process_from_http.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import argparse
  2. import errno
  3. import hashlib
  4. import os
  5. import platform
  6. import shutil
  7. import sys
  8. class CliArgs:
  9. def __init__(self, resource_root, uri, out): # type: (str,str,str) -> None
  10. self.resource_root = resource_root
  11. self.uri = uri
  12. self.out = out
  13. def parse_args(): # type: () -> CliArgs
  14. parser = argparse.ArgumentParser()
  15. parser.add_argument("--resource-root", required=True)
  16. parser.add_argument("--uri", required=True)
  17. parser.add_argument("--out", required=True)
  18. return parser.parse_args()
  19. def print_err_msg(msg): # type: (str) -> None
  20. print("[[bad]]process_from_https: {}[[rst]]".format(msg), file=sys.stderr)
  21. def link_or_copy(src, dst): # type: (str,str) -> None
  22. try:
  23. if platform.system().lower() == "windows":
  24. shutil.copy(src, dst)
  25. else:
  26. os.link(src, dst)
  27. except OSError as e:
  28. if e.errno == errno.EEXIST:
  29. print_err_msg("destination file already exists: {}".format(dst))
  30. if e.errno == errno.ENOENT:
  31. print_err_msg("source file does not exists: {}".format(src))
  32. raise
  33. def md5_hex(string): # type: (str) -> str
  34. return hashlib.md5(string.encode()).hexdigest()
  35. def get_integrity_from_meta(meta_str): # type: (str) -> str | None
  36. pairs = meta_str.split("&")
  37. integrity_prefix = "integrity="
  38. for pair in pairs:
  39. if pair.startswith(integrity_prefix):
  40. return pair[len(integrity_prefix) :]
  41. return None
  42. def get_path_from_uri(resource_uri): # type: (str) -> str | None
  43. if not resource_uri.startswith("https://") and not resource_uri.startswith("http://"):
  44. print_err_msg("Uri has to start with 'https:' or 'http:', got {}".format(resource_uri))
  45. return None
  46. _, meta_str = resource_uri.split("#", 1)
  47. integrity = get_integrity_from_meta(meta_str)
  48. if not integrity:
  49. print_err_msg("Uri mate has to have integrity field, got {}".format(resource_uri))
  50. return None
  51. resource_id = md5_hex(integrity)
  52. return "http/{}/resource".format(resource_id)
  53. def main():
  54. args = parse_args()
  55. relative_resource_path = get_path_from_uri(args.uri)
  56. resource_path = os.path.join(args.resource_root, relative_resource_path)
  57. if not resource_path:
  58. print_err_msg("Cannot get filepath from uri")
  59. return 1
  60. if not os.path.exists(resource_path):
  61. print_err_msg("File {} not found in $(RESOURCE_ROOT)".format(relative_resource_path))
  62. return 1
  63. our_dirname = os.path.dirname(args.out)
  64. if our_dirname:
  65. os.makedirs(our_dirname, exist_ok=True)
  66. link_or_copy(resource_path, args.out)
  67. if __name__ == "__main__":
  68. sys.exit(main())