ch_64.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. package city
  2. import "encoding/binary"
  3. // Ref:
  4. // https://github.com/xzkostyan/python-cityhash/commit/f4091154ff2c6c0de11d5d6673b5007fdd6355ad
  5. const k3 uint64 = 0xc949d7c7509e6557
  6. func ch16(u, v uint64) uint64 {
  7. return hash128to64(U128{u, v})
  8. }
  9. // Return an 8-byte hash for 33 to 64 bytes.
  10. func ch33to64(s []byte, length int) uint64 {
  11. z := binary.LittleEndian.Uint64(s[24:])
  12. a := binary.LittleEndian.Uint64(s) + (uint64(length)+binary.LittleEndian.Uint64(s[length-16:]))*k0
  13. b := rot64(a+z, 52)
  14. c := rot64(a, 37)
  15. a += binary.LittleEndian.Uint64(s[8:])
  16. c += rot64(a, 7)
  17. a += binary.LittleEndian.Uint64(s[16:])
  18. vf := a + z
  19. vs := b + rot64(a, 31) + c
  20. a = binary.LittleEndian.Uint64(s[16:]) + binary.LittleEndian.Uint64(s[length-32:])
  21. z = binary.LittleEndian.Uint64(s[length-8:])
  22. b = rot64(a+z, 52)
  23. c = rot64(a, 37)
  24. a += binary.LittleEndian.Uint64(s[length-24:])
  25. c += rot64(a, 7)
  26. a += binary.LittleEndian.Uint64(s[length-16:])
  27. wf := a + z
  28. ws := b + rot64(a, 31) + c
  29. r := shiftMix((vf+ws)*k2 + (wf+vs)*k0)
  30. return shiftMix(r*k0+vs) * k2
  31. }
  32. func ch17to32(s []byte, length int) uint64 {
  33. a := binary.LittleEndian.Uint64(s) * k1
  34. b := binary.LittleEndian.Uint64(s[8:])
  35. c := binary.LittleEndian.Uint64(s[length-8:]) * k2
  36. d := binary.LittleEndian.Uint64(s[length-16:]) * k0
  37. return hash16(
  38. rot64(a-b, 43)+rot64(c, 30)+d,
  39. a+rot64(b^k3, 20)-c+uint64(length),
  40. )
  41. }
  42. func ch0to16(s []byte, length int) uint64 {
  43. if length > 8 {
  44. a := binary.LittleEndian.Uint64(s)
  45. b := binary.LittleEndian.Uint64(s[length-8:])
  46. return ch16(a, rot64(b+uint64(length), uint(length))) ^ b
  47. }
  48. if length >= 4 {
  49. a := uint64(fetch32(s))
  50. return ch16(uint64(length)+(a<<3), uint64(fetch32(s[length-4:])))
  51. }
  52. if length > 0 {
  53. a := s[0]
  54. b := s[length>>1]
  55. c := s[length-1]
  56. y := uint32(a) + (uint32(b) << 8)
  57. z := uint32(length) + (uint32(c) << 2)
  58. return shiftMix(uint64(y)*k2^uint64(z)*k3) * k2
  59. }
  60. return k2
  61. }
  62. // CH64 returns ClickHouse version of Hash64.
  63. func CH64(s []byte) uint64 {
  64. length := len(s)
  65. if length <= 16 {
  66. return ch0to16(s, length)
  67. }
  68. if length <= 32 {
  69. return ch17to32(s, length)
  70. }
  71. if length <= 64 {
  72. return ch33to64(s, length)
  73. }
  74. x := binary.LittleEndian.Uint64(s)
  75. y := binary.LittleEndian.Uint64(s[length-16:]) ^ k1
  76. z := binary.LittleEndian.Uint64(s[length-56:]) ^ k0
  77. v := weakHash32SeedsByte(s[length-64:], uint64(length), y)
  78. w := weakHash32SeedsByte(s[length-32:], uint64(length)*k1, k0)
  79. z += shiftMix(v.High) * k1
  80. x = rot64(z+x, 39) * k1
  81. y = rot64(y, 33) * k1
  82. // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
  83. s = s[:nearestMultiple64(s)]
  84. for len(s) > 0 {
  85. x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:]), 37) * k1
  86. y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
  87. x ^= w.High
  88. y ^= v.Low
  89. z = rot64(z^w.Low, 33)
  90. v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
  91. w = weakHash32SeedsByte(s[32:], z+w.High, y)
  92. z, x = x, z
  93. s = s[64:]
  94. }
  95. return ch16(
  96. ch16(v.Low, w.Low)+shiftMix(y)*k1+z,
  97. ch16(v.High, w.High)+x,
  98. )
  99. }