decimal_md5.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import hashlib
  4. import struct
  5. import sys
  6. import os
  7. import argparse
  8. def print_code(checksum, func_name):
  9. if len(func_name) == 0: # safe fallback for old ya.make files
  10. func_name = "DecimalMD5"
  11. print('const char* ' + func_name + '() {return "' + checksum + '";}')
  12. def ensure_paths_exist(paths):
  13. bad_paths = sorted(
  14. path for path in paths
  15. if not os.path.exists(path)
  16. )
  17. if bad_paths:
  18. print >> sys.stderr, "decimal_md5 inputs do not exist:"
  19. for path in bad_paths:
  20. print >> sys.stderr, path
  21. sys.exit(1)
  22. def _update_digest_with_file_contents(digest, path, block_size=65535):
  23. with open(path) as f:
  24. while True:
  25. block = f.read(block_size)
  26. if not block:
  27. break
  28. digest.update(block)
  29. def main():
  30. parser = argparse.ArgumentParser()
  31. parser.add_argument("--fixed-output", help="don not calculate md5, use this value instead")
  32. parser.add_argument("--lower-bits", help="use specified count of lower bits", type=int, default=32)
  33. parser.add_argument("--source-root", help="arcadia source root")
  34. parser.add_argument("--func-name", help="custom function name to be defined", default="DecimalMD5")
  35. parser.add_argument("targets", nargs='*', default=['.'])
  36. args = parser.parse_args()
  37. abs_paths = [
  38. os.path.join(args.source_root, target)
  39. for target in args.targets
  40. ]
  41. ensure_paths_exist(abs_paths)
  42. if args.fixed_output:
  43. try:
  44. bitmask = (1 << args.lower_bits) - 1
  45. fmt = '{:0%dd}' % len(str(bitmask))
  46. checksum = fmt.format(int(args.fixed_output) & bitmask)
  47. except ValueError:
  48. raise ValueError("decimal_md5: bad value passed via --fixed-output: %s" % args.fixed_output)
  49. print_code(str(checksum), func_name=args.func_name)
  50. return
  51. md5 = hashlib.md5()
  52. for path in abs_paths:
  53. _update_digest_with_file_contents(md5, path)
  54. md5_parts = struct.unpack('IIII', md5.digest())
  55. md5_int = sum(part << (32 * n) for n, part in enumerate(md5_parts))
  56. bitmask = (1 << args.lower_bits) - 1
  57. fmt = '{:0%dd}' % len(str(bitmask))
  58. checksum_str = fmt.format(md5_int & bitmask)
  59. print_code(checksum_str, func_name=args.func_name)
  60. if __name__ == "__main__":
  61. main()