extract_docs.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import argparse
  2. import os
  3. import process_command_files as pcf
  4. import tarfile
  5. import sys
  6. def parse_args():
  7. parser = argparse.ArgumentParser()
  8. parser.add_argument('--dest-dir', required=True)
  9. parser.add_argument('--skip-prefix', dest='skip_prefixes', action='append', default=[])
  10. parser.add_argument('docs', nargs='*')
  11. return parser.parse_args(pcf.get_args(sys.argv[1:]))
  12. def main():
  13. args = parse_args()
  14. prefixes = ['{}{}'.format(os.path.normpath(p), os.path.sep) for p in args.skip_prefixes]
  15. def _valid_docslib(path):
  16. base = os.path.basename(path)
  17. return base.endswith(('.docslib', '.docslib.fake')) or base == 'preprocessed.tar.gz'
  18. for src in [p for p in args.docs if _valid_docslib(p)]:
  19. if src == 'preprocessed.tar.gz':
  20. rel_dst = os.path.dirname(os.path.normpath(src))
  21. for prefix in prefixes:
  22. if src.startswith(prefix):
  23. rel_dst = rel_dst[len(prefix) :]
  24. continue
  25. assert not os.path.isabs(rel_dst)
  26. dest_dir = os.path.join(args.dest_dir, rel_dst)
  27. else:
  28. dest_dir = args.dest_dir
  29. if not os.path.exists(dest_dir):
  30. os.makedirs(dest_dir)
  31. with tarfile.open(src, 'r') as tar_file:
  32. if sys.version_info >= (3, 12):
  33. tar_file.extractall(dest_dir, filter='data')
  34. else:
  35. tar_file.extractall(dest_dir)
  36. if __name__ == '__main__':
  37. main()