extract_docs.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import argparse
  2. import os
  3. import tarfile
  4. import sys
  5. # Explicitly enable local imports
  6. # Don't forget to add imported scripts to inputs of the calling command!
  7. sys.path.append(os.path.dirname(os.path.abspath(__file__)))
  8. import process_command_files as pcf
  9. def parse_args():
  10. parser = argparse.ArgumentParser()
  11. parser.add_argument('--dest-dir', required=True)
  12. parser.add_argument('--skip-prefix', dest='skip_prefixes', action='append', default=[])
  13. parser.add_argument('docs', nargs='*')
  14. return parser.parse_args(pcf.get_args(sys.argv[1:]))
  15. def main():
  16. args = parse_args()
  17. prefixes = ['{}{}'.format(os.path.normpath(p), os.path.sep) for p in args.skip_prefixes]
  18. def _valid_docslib(path):
  19. base = os.path.basename(path)
  20. return base.endswith(('.docslib', '.docslib.fake')) or base == 'preprocessed.tar.gz'
  21. for src in [p for p in args.docs if _valid_docslib(p)]:
  22. if src == 'preprocessed.tar.gz':
  23. rel_dst = os.path.dirname(os.path.normpath(src))
  24. for prefix in prefixes:
  25. if src.startswith(prefix):
  26. rel_dst = rel_dst[len(prefix) :]
  27. continue
  28. assert not os.path.isabs(rel_dst)
  29. dest_dir = os.path.join(args.dest_dir, rel_dst)
  30. else:
  31. dest_dir = args.dest_dir
  32. if not os.path.exists(dest_dir):
  33. os.makedirs(dest_dir)
  34. with tarfile.open(src, 'r') as tar_file:
  35. if sys.version_info >= (3, 12):
  36. tar_file.extractall(dest_dir, filter='data')
  37. else:
  38. tar_file.extractall(dest_dir)
  39. if __name__ == '__main__':
  40. main()