ch_128.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. package city
  2. import "encoding/binary"
  3. // A subroutine for CH128(). Returns a decent 128-bit hash for strings
  4. // of any length representable in signed long. Based on City and Mumur.
  5. func chMurmur(s []byte, seed U128) U128 {
  6. length := len(s)
  7. a := seed.Low
  8. b := seed.High
  9. c := uint64(0)
  10. d := uint64(0)
  11. l := length - 16
  12. if len(s) <= 16 { // length <= 16
  13. a = shiftMix(a*k1) * k1
  14. c = b*k1 + ch0to16(s, length)
  15. if length >= 8 {
  16. d = shiftMix(a + binary.LittleEndian.Uint64(s))
  17. } else {
  18. d = shiftMix(a + c)
  19. }
  20. } else { // length > 16
  21. c = ch16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
  22. d = ch16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
  23. a += d
  24. {
  25. a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
  26. a *= k1
  27. b ^= a
  28. c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
  29. c *= k1
  30. d ^= c
  31. s = s[16:]
  32. l -= 16
  33. }
  34. if l > 0 {
  35. for len(s) >= 16 {
  36. a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
  37. a *= k1
  38. b ^= a
  39. c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
  40. c *= k1
  41. d ^= c
  42. s = s[16:]
  43. l -= 16
  44. if l <= 0 {
  45. break
  46. }
  47. }
  48. }
  49. }
  50. a = ch16(a, c)
  51. b = ch16(d, b)
  52. return U128{a ^ b, ch16(b, a)}
  53. }
  54. // CH128 returns 128-bit ClickHouse CityHash.
  55. func CH128(s []byte) U128 {
  56. if len(s) >= 16 {
  57. return CH128Seed(s[16:], U128{
  58. Low: binary.LittleEndian.Uint64(s[0:8:8]) ^ k3,
  59. High: binary.LittleEndian.Uint64(s[8 : 8+8 : 8+8]),
  60. })
  61. }
  62. if len(s) >= 8 {
  63. l := uint64(len(s))
  64. return CH128Seed(nil, U128{
  65. Low: binary.LittleEndian.Uint64(s) ^ (l * k0),
  66. High: binary.LittleEndian.Uint64(s[l-8:]) ^ k1,
  67. })
  68. }
  69. return CH128Seed(s, U128{Low: k0, High: k1})
  70. }
  71. // CH128Seed returns 128-bit seeded ClickHouse CityHash.
  72. func CH128Seed(s []byte, seed U128) U128 {
  73. if len(s) < 128 {
  74. return chMurmur(s, seed)
  75. }
  76. // Saving initial input for tail hashing.
  77. t := s
  78. // We expect len >= 128 to be the common case. Keep 56 bytes of state:
  79. // v, w, x, y and z.
  80. var v, w U128
  81. x := seed.Low
  82. y := seed.High
  83. z := uint64(len(s)) * k1
  84. {
  85. subSlice := (*[96]byte)(s[0:])
  86. v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(subSlice[0:])
  87. v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(subSlice[8:])
  88. w.Low = rot64(y+z, 35)*k1 + x
  89. w.High = rot64(x+binary.LittleEndian.Uint64(subSlice[88:]), 53) * k1
  90. }
  91. // This is the same inner loop as CH64(), manually unrolled.
  92. for len(s) >= 128 {
  93. // Roll 1.
  94. {
  95. x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:16+8:16+8]), 37) * k1
  96. y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:48+8:48+8]), 42) * k1
  97. x ^= w.High
  98. y ^= v.Low
  99. z = rot64(z^w.Low, 33)
  100. v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
  101. w = weakHash32SeedsByte(s[32:], z+w.High, y)
  102. z, x = x, z
  103. }
  104. // Roll 2.
  105. {
  106. const offset = 64
  107. x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[offset+16:offset+16+8:offset+16+8]), 37) * k1
  108. y = rot64(y+v.High+binary.LittleEndian.Uint64(s[offset+48:offset+48+8:offset+48+8]), 42) * k1
  109. x ^= w.High
  110. y ^= v.Low
  111. z = rot64(z^w.Low, 33)
  112. v = weakHash32SeedsByte(s[offset:], v.High*k1, x+w.Low)
  113. w = weakHash32SeedsByte(s[offset+32:], z+w.High, y)
  114. z, x = x, z
  115. }
  116. s = s[128:]
  117. }
  118. y += rot64(w.Low, 37)*k0 + z
  119. x += rot64(v.Low+z, 49) * k0
  120. // If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
  121. for i := 0; i < len(s); {
  122. i += 32
  123. y = rot64(y-x, 42)*k0 + v.High
  124. w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
  125. x = rot64(x, 49)*k0 + w.Low
  126. w.Low += v.Low
  127. v = weakHash32SeedsByte(t[len(t)-i:], v.Low, v.High)
  128. }
  129. // At this point our 48 bytes of state should contain more than
  130. // enough information for a strong 128-bit hash. We use two
  131. // different 48-byte-to-8-byte hashes to get a 16-byte final result.
  132. x = ch16(x, v.Low)
  133. y = ch16(y, w.Low)
  134. return U128{
  135. Low: ch16(x+v.High, w.High) + y,
  136. High: ch16(x+w.High, y+v.High),
  137. }
  138. }