filter_zip.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import argparse
  2. import os
  3. import re
  4. import uuid
  5. import zipfile
  6. def pattern_to_regexp(p):
  7. return re.compile(
  8. '^'
  9. + re.escape(p)
  10. .replace(r'\*\*\/', '[_DIR_]')
  11. .replace(r'\*', '[_FILE_]')
  12. .replace('[_DIR_]', '(.*/)?')
  13. .replace('[_FILE_]', '([^/]*)')
  14. + '$'
  15. )
  16. def is_deathman(positive_filter, negative_filter, candidate):
  17. remove = positive_filter
  18. for pf in positive_filter:
  19. if pf.match(candidate):
  20. remove = False
  21. break
  22. if not negative_filter or remove:
  23. return remove
  24. for nf in negative_filter:
  25. if nf.match(candidate):
  26. remove = True
  27. break
  28. return remove
  29. def just_do_it():
  30. parser = argparse.ArgumentParser()
  31. parser.add_argument('--positive', action='append', default=[])
  32. parser.add_argument('--negative', action='append', default=[])
  33. parser.add_argument('--file', action='store', required=True)
  34. args = parser.parse_args()
  35. if not args.positive and not args.negative:
  36. return
  37. pos = [pattern_to_regexp(i) for i in args.positive]
  38. neg = [pattern_to_regexp(i) for i in args.negative]
  39. temp_dirname = None
  40. for _ in range(10):
  41. candidate = '__unpacked_{}__'.format(uuid.uuid4())
  42. if not os.path.exists(candidate):
  43. temp_dirname = candidate
  44. os.makedirs(temp_dirname)
  45. if not temp_dirname:
  46. raise Exception("Can't generate name for temp dir")
  47. with zipfile.ZipFile(args.file, 'r') as zip_ref:
  48. zip_ref.extractall(temp_dirname)
  49. for root, _, files in os.walk(temp_dirname):
  50. for f in files:
  51. candidate = os.path.join(root, f).replace('\\', '/')
  52. if is_deathman(pos, neg, os.path.relpath(candidate, temp_dirname)):
  53. os.remove(candidate)
  54. with zipfile.ZipFile(args.file, 'w') as zip_ref:
  55. for root, _, files in os.walk(temp_dirname):
  56. for f in files:
  57. realname = os.path.join(root, f)
  58. zip_ref.write(realname, os.path.sep.join(os.path.normpath(realname).split(os.path.sep, 2)[1:]))
  59. if __name__ == '__main__':
  60. just_do_it()