2018-07-21 19:21:18 -04:00
|
|
|
import os, hashlib, binascii
|
|
|
|
from metadata_reader import read_metadata, ParseException
|
|
|
|
|
|
|
|
"""
|
|
|
|
Resources are the basic components of any given library.
|
|
|
|
|
|
|
|
supported the following types:
|
|
|
|
* lib (files in lib/)
|
|
|
|
* shared (file in shared/)
|
|
|
|
* app (all other folders in root)
|
|
|
|
* root (files in root)
|
|
|
|
|
|
|
|
A resource has the following form:
|
|
|
|
{
|
|
|
|
"app1": {"type": "app", dependencies: ["lib/lib1.py"], "files": {"app1/main.py": "abcdef1234", "app1/nested/text.txt": "abcdef1234"}},
|
|
|
|
"app2": {"type": "app", dependencies: ["lib/lib2.py"], "files": {"app2/main.py": "abcdef1234", "app2/other_content.txt": "abcdef1234", "app2/some_binary_file.gif": "abcdef1234"}},
|
|
|
|
"lib/lib1.py": {"type": "lib", , dependencies: ["lib/lib3.py"], "files": {"lib/lib1.py": "abcdef1234"}},
|
|
|
|
"lib/lib2.py": {"type": "lib", "files": {"lib/lib2.py": "abcdef1234"}},
|
|
|
|
"lib/lib3.py": {"type": "lib", "files": {"lib/lib3.py": "abcdef1234"}},
|
|
|
|
"lib/lib4.py": {"type": "lib", "files": {"lib/lib4.py": "abcdef1234"}},
|
|
|
|
"lib/lib5.py": {"type": "lib", "files": {"lib/lib5.py": "abcdef1234"}},
|
|
|
|
"shared/foo.txt": {"type": "shared", "files": {"shared/foo.txt": "abcdef1234"}},
|
|
|
|
"boot.py": {"type": "root", "files": {"boot.py": "abcdef1234"}}
|
|
|
|
}
|
|
|
|
|
|
|
|
Every resource can also contain other metadata fields which are extracted from the body
|
|
|
|
of its main python class (in case of lib or app).
|
|
|
|
|
|
|
|
This module has the following operations:
|
|
|
|
resources = get_resources(path) # Gets resources for a given path
|
|
|
|
add_hashes(path, resources) # Adds hashes to the file dict - not needed for testing
|
|
|
|
add_metadata(path, resources) # Adds metadata
|
|
|
|
validate(resources) # Runs basic validation
|
2018-07-27 18:35:39 -04:00
|
|
|
resolve_dependencies(resources) # Merges all dependencies into each resource's file dict
|
|
|
|
remove_upip(resources) # Remove upip resources from dict again
|
2018-07-21 19:21:18 -04:00
|
|
|
|
|
|
|
This module encapsulates all the main operations the app library is expect to
|
|
|
|
perform on a given checkout. It's intentionally kept in one file to make it easier
|
|
|
|
to share between repositories. The only exception to this rule it metadata_reader
|
|
|
|
(because it's rather complex and I didn't want to make this file impossible to read)
|
|
|
|
|
|
|
|
Please make sure this file can be executes on any operating system running python3.
|
|
|
|
Don't include any external dependencies. It forms part of the local toolchain.
|
|
|
|
"""
|
|
|
|
|
|
|
|
"""
|
|
|
|
scan(path)
|
|
|
|
|
|
|
|
A resource scanner for the Tilda filesystem. Returns a {path: {type:<type>, files:{...}}}
|
|
|
|
|
|
|
|
ignored are the following:
|
|
|
|
* dotfiles
|
|
|
|
* __pycache__
|
|
|
|
"""
|
|
|
|
|
|
|
|
def _scan_files(path, rel_path = ""):
|
|
|
|
result = []
|
|
|
|
for element in os.listdir(path):
|
|
|
|
if element.startswith(".") or element == "__pycache__":
|
|
|
|
continue
|
|
|
|
element_path = os.path.join(path, element)
|
|
|
|
element_rel_path = os.path.join(rel_path, element)
|
|
|
|
if os.path.isdir(element_path):
|
|
|
|
result.extend(_scan_files(element_path, element_rel_path))
|
|
|
|
else:
|
|
|
|
result.append(element_rel_path)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
def get_resources(path):
|
|
|
|
result = {}
|
|
|
|
for sub_path in os.listdir(path):
|
|
|
|
if sub_path.startswith(".") or sub_path == "__pycache__":
|
|
|
|
continue
|
|
|
|
full_path = os.path.join(path, sub_path)
|
2018-07-27 18:35:39 -04:00
|
|
|
if os.path.islink(full_path):
|
|
|
|
continue
|
2018-07-21 19:21:18 -04:00
|
|
|
if os.path.isfile(full_path):
|
|
|
|
result[sub_path] = {"type": "root", "files": {sub_path: None}}
|
|
|
|
continue
|
|
|
|
if sub_path in ["lib", "shared"]:
|
2018-07-27 18:35:39 -04:00
|
|
|
files = _scan_files(full_path, sub_path)
|
2018-07-21 19:21:18 -04:00
|
|
|
for rel_path in files:
|
|
|
|
result[rel_path] = {"type": sub_path, "files": {rel_path: None}}
|
2018-07-27 18:35:39 -04:00
|
|
|
elif sub_path == "upip":
|
|
|
|
for upip_lib in os.listdir(full_path):
|
|
|
|
if upip_lib.startswith(".") or upip_lib == "__pycache__":
|
|
|
|
continue
|
|
|
|
full_lib_path = os.path.join(full_path, upip_lib)
|
2018-08-21 15:58:57 -04:00
|
|
|
rel_lib_path = os.path.join(sub_path, upip_lib)
|
2018-07-27 18:35:39 -04:00
|
|
|
files = {}
|
|
|
|
if os.path.isfile(full_lib_path):
|
2018-08-21 15:58:57 -04:00
|
|
|
files = {rel_lib_path: None}
|
2018-07-27 18:35:39 -04:00
|
|
|
upip_lib = upip_lib.rsplit('.', 1)[0]
|
|
|
|
else:
|
|
|
|
for rel_path in _scan_files(full_lib_path, os.path.join(sub_path, upip_lib)):
|
|
|
|
files[rel_path] = None
|
|
|
|
result["upip:%s" % upip_lib] = {"type": sub_path, "files": files}
|
2018-07-21 19:21:18 -04:00
|
|
|
else:
|
2018-07-27 18:35:39 -04:00
|
|
|
files = _scan_files(full_path, sub_path)
|
2018-07-21 19:21:18 -04:00
|
|
|
result[sub_path] = {"type": "app", "files": {}}
|
|
|
|
for rel_path in files:
|
|
|
|
result[sub_path]["files"][rel_path] = None
|
|
|
|
return result
|
|
|
|
|
|
|
|
"""
|
|
|
|
add_hashes(path, resource)
|
|
|
|
|
|
|
|
Adds the first 10 characters of SHA256 hashes to all elements in "files".
|
|
|
|
The hash is calcuated on the file content, not the file name.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def add_hashes(path, resources):
|
|
|
|
for resource in resources.values():
|
|
|
|
for file_path in resource["files"]:
|
|
|
|
resource["files"][file_path] = _hash_file(os.path.join(path, file_path))
|
|
|
|
|
|
|
|
def _hash_file(filename):
|
|
|
|
"""Calculates the SHA256 hash of a file."""
|
|
|
|
with open(filename, "rb") as file:
|
|
|
|
sha256 = hashlib.sha256()
|
|
|
|
buf = file.read(128)
|
|
|
|
while len(buf) > 0:
|
|
|
|
sha256.update(buf)
|
|
|
|
buf = file.read(128)
|
|
|
|
return str(binascii.hexlify(sha256.digest()), "utf8")[:10]
|
|
|
|
|
|
|
|
"""
|
|
|
|
add_metadata(path, resource)
|
|
|
|
|
|
|
|
Reads primary files for app and lib resources and extracts metadata information from its header
|
|
|
|
"""
|
|
|
|
|
|
|
|
def add_metadata(path, resources):
|
|
|
|
for resource in resources.values():
|
|
|
|
file = None
|
|
|
|
if resource['type'] == "app":
|
2018-08-29 18:17:54 -04:00
|
|
|
file = next(f for f in resource['files'] if os.path.basename(f) == "main.py")
|
2018-07-21 19:21:18 -04:00
|
|
|
elif resource['type'] == "lib":
|
|
|
|
file = next(iter(resource['files'].keys()))
|
|
|
|
|
|
|
|
if file:
|
|
|
|
try:
|
2018-09-01 13:19:14 -04:00
|
|
|
with open(os.path.join(path, file), "r", encoding='utf8') as stream:
|
2018-07-21 19:21:18 -04:00
|
|
|
resource.update(_normalize_metadata(read_metadata(stream)))
|
|
|
|
except ParseException as e:
|
|
|
|
resource.setdefault("errors", []).append(file + ": " + str(e))
|
|
|
|
|
|
|
|
def _normalize_metadata(metadata):
|
|
|
|
metadata['description'] = metadata.pop('doc')
|
|
|
|
if 'dependencies' in metadata:
|
2018-07-27 18:35:39 -04:00
|
|
|
metadata['dependencies'] = [normalize_dependency(l) for l in metadata.pop('dependencies')]
|
2018-07-21 19:21:18 -04:00
|
|
|
|
|
|
|
return metadata
|
|
|
|
|
|
|
|
"""
|
|
|
|
resolve_dependencies(resources)
|
|
|
|
|
|
|
|
merges files from dependent resources into the original files dict
|
|
|
|
"""
|
|
|
|
|
|
|
|
def resolve_dependencies(resources):
|
|
|
|
for file, resource in resources.items():
|
|
|
|
if 'dependencies' in resource:
|
|
|
|
already_added = [file]
|
|
|
|
to_add = resource['dependencies'].copy()
|
|
|
|
while len(to_add):
|
|
|
|
r = to_add.pop()
|
2018-08-29 18:17:54 -04:00
|
|
|
r = os.path.normpath(r)
|
2018-07-21 19:21:18 -04:00
|
|
|
if r in already_added:
|
|
|
|
continue
|
|
|
|
if r not in resources:
|
|
|
|
resource.setdefault("errors", []).append("Dependency %s not found" % r)
|
|
|
|
continue
|
|
|
|
already_added.append(r)
|
|
|
|
to_add.extend(resources[r].get("dependencies", []))
|
|
|
|
resource['files'].update(resources[r]['files'])
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
validate(path, resources)
|
|
|
|
|
|
|
|
does basic verification:
|
|
|
|
* Is it valid python?
|
|
|
|
* Are metadata fields missing
|
|
|
|
* TBD: Does it have imports that are not dependencies?
|
|
|
|
"""
|
|
|
|
def validate(path, resources):
|
|
|
|
for resource in resources.values():
|
2018-08-01 15:07:16 -04:00
|
|
|
if resource['type'] == "upip":
|
|
|
|
continue
|
2018-07-21 19:21:18 -04:00
|
|
|
_validate_resource(path, resource)
|
|
|
|
|
|
|
|
def _validate_resource(path, resource):
|
|
|
|
# Compile
|
|
|
|
for file in resource['files'].keys():
|
|
|
|
if file.endswith(".py"):
|
|
|
|
try:
|
|
|
|
filename = os.path.join(path, file)
|
2018-09-01 13:19:14 -04:00
|
|
|
with open(filename, 'r', encoding='utf8') as s:
|
2018-07-21 19:21:18 -04:00
|
|
|
compile(s.read() + '\n', filename, 'exec')
|
|
|
|
except Exception as e:
|
|
|
|
resource.setdefault("errors", []).append(str(e))
|
|
|
|
|
|
|
|
# Metadata check
|
|
|
|
if resource['type'] in ["app", "lib"]:
|
2018-07-22 06:34:07 -04:00
|
|
|
pass #todo: should we make license required?
|
2018-07-21 19:21:18 -04:00
|
|
|
|
2018-07-22 06:34:07 -04:00
|
|
|
if resource['type'] == "app":
|
|
|
|
if 'categories' not in resource or (not isinstance(resource['categories'], list)) or len(resource['categories']) == 0:
|
|
|
|
resource.setdefault("errors", []).append("___categories___ list is required in main.py but not found")
|
2018-07-21 19:21:18 -04:00
|
|
|
|
2018-07-27 18:35:39 -04:00
|
|
|
|
|
|
|
"""
|
|
|
|
remove_upip(resources)
|
|
|
|
|
|
|
|
upip adds over a 100 resources to the list. Some of them have broken validation as well, so it's
|
|
|
|
useful to remove them after resolving dependencies.
|
|
|
|
"""
|
|
|
|
def remove_upip(resources):
|
|
|
|
to_delete = []
|
|
|
|
for key, resource in resources.items():
|
|
|
|
if resource['type'] == "upip":
|
|
|
|
to_delete.append(key)
|
|
|
|
for key in to_delete:
|
|
|
|
del resources[key]
|
|
|
|
|
2018-07-21 19:21:18 -04:00
|
|
|
"""
|
|
|
|
helpers
|
|
|
|
"""
|
|
|
|
|
|
|
|
def get_error_summary(resources):
|
|
|
|
summary = ""
|
|
|
|
for key, resource in resources.items():
|
|
|
|
if "errors" in resource:
|
|
|
|
summary += "--- %s ---\n" % key
|
|
|
|
for error in resource['errors']:
|
|
|
|
summary += error + "\n"
|
|
|
|
summary += "\n"
|
|
|
|
return summary.strip()
|
|
|
|
|
2018-07-27 18:35:39 -04:00
|
|
|
def pretty_print_resources(resources):
|
|
|
|
import json
|
|
|
|
return json.dumps(resources, indent=4)
|
|
|
|
|
|
|
|
def normalize_dependency(dependency):
|
|
|
|
"""lib dependencies can be shortened to just their module name"""
|
2018-08-29 18:17:54 -04:00
|
|
|
if "." in dependency or os.pathsep in dependency or "upip:" in dependency:
|
2018-07-27 18:35:39 -04:00
|
|
|
return dependency
|
2018-08-29 18:17:54 -04:00
|
|
|
return os.path.join("lib", "%s.py" % dependency)
|