decimal_md5.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function
  4. import hashlib
  5. import struct
  6. import sys
  7. import os
  8. import argparse
  9. def print_code(checksum, func_name):
  10. if len(func_name) == 0: # safe fallback for old ya.make files
  11. func_name = "DecimalMD5"
  12. print('const char* ' + func_name + '() {return "' + checksum + '";}')
  13. def ensure_paths_exist(paths):
  14. bad_paths = sorted(
  15. path for path in paths
  16. if not os.path.exists(path)
  17. )
  18. if bad_paths:
  19. print("decimal_md5 inputs do not exist:", file=sys.stderr)
  20. for path in bad_paths:
  21. print(path, file=sys.stderr)
  22. sys.exit(1)
  23. def _update_digest_with_file_contents(digest, path, block_size=65535):
  24. with open(path) as f:
  25. while True:
  26. block = f.read(block_size)
  27. if not block:
  28. break
  29. digest.update(block)
  30. def main():
  31. parser = argparse.ArgumentParser()
  32. parser.add_argument("--fixed-output", help="don not calculate md5, use this value instead")
  33. parser.add_argument("--lower-bits", help="use specified count of lower bits", type=int, default=32)
  34. parser.add_argument("--source-root", help="arcadia source root")
  35. parser.add_argument("--func-name", help="custom function name to be defined", default="DecimalMD5")
  36. parser.add_argument("targets", nargs='*', default=['.'])
  37. args = parser.parse_args()
  38. abs_paths = [
  39. os.path.join(args.source_root, target)
  40. for target in args.targets
  41. ]
  42. ensure_paths_exist(abs_paths)
  43. if args.fixed_output:
  44. try:
  45. bitmask = (1 << args.lower_bits) - 1
  46. fmt = '{:0%dd}' % len(str(bitmask))
  47. checksum = fmt.format(int(args.fixed_output) & bitmask)
  48. except ValueError:
  49. raise ValueError("decimal_md5: bad value passed via --fixed-output: %s" % args.fixed_output)
  50. print_code(str(checksum), func_name=args.func_name)
  51. return
  52. md5 = hashlib.md5()
  53. for path in abs_paths:
  54. _update_digest_with_file_contents(md5, path)
  55. md5_parts = struct.unpack('IIII', md5.digest())
  56. md5_int = sum(part << (32 * n) for n, part in enumerate(md5_parts))
  57. bitmask = (1 << args.lower_bits) - 1
  58. fmt = '{:0%dd}' % len(str(bitmask))
  59. checksum_str = fmt.format(md5_int & bitmask)
  60. print_code(checksum_str, func_name=args.func_name)
  61. if __name__ == "__main__":
  62. main()