123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- package city
- import "encoding/binary"
- // A subroutine for CH128(). Returns a decent 128-bit hash for strings
- // of any length representable in signed long. Based on City and Mumur.
- func chMurmur(s []byte, seed U128) U128 {
- length := len(s)
- a := seed.Low
- b := seed.High
- c := uint64(0)
- d := uint64(0)
- l := length - 16
- if len(s) <= 16 { // length <= 16
- a = shiftMix(a*k1) * k1
- c = b*k1 + ch0to16(s, length)
- if length >= 8 {
- d = shiftMix(a + binary.LittleEndian.Uint64(s))
- } else {
- d = shiftMix(a + c)
- }
- } else { // length > 16
- c = ch16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
- d = ch16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
- a += d
- {
- a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
- a *= k1
- b ^= a
- c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
- c *= k1
- d ^= c
- s = s[16:]
- l -= 16
- }
- if l > 0 {
- for len(s) >= 16 {
- a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
- a *= k1
- b ^= a
- c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
- c *= k1
- d ^= c
- s = s[16:]
- l -= 16
- if l <= 0 {
- break
- }
- }
- }
- }
- a = ch16(a, c)
- b = ch16(d, b)
- return U128{a ^ b, ch16(b, a)}
- }
- // CH128 returns 128-bit ClickHouse CityHash.
- func CH128(s []byte) U128 {
- if len(s) >= 16 {
- return CH128Seed(s[16:], U128{
- Low: binary.LittleEndian.Uint64(s[0:8:8]) ^ k3,
- High: binary.LittleEndian.Uint64(s[8 : 8+8 : 8+8]),
- })
- }
- if len(s) >= 8 {
- l := uint64(len(s))
- return CH128Seed(nil, U128{
- Low: binary.LittleEndian.Uint64(s) ^ (l * k0),
- High: binary.LittleEndian.Uint64(s[l-8:]) ^ k1,
- })
- }
- return CH128Seed(s, U128{Low: k0, High: k1})
- }
- // CH128Seed returns 128-bit seeded ClickHouse CityHash.
- func CH128Seed(s []byte, seed U128) U128 {
- if len(s) < 128 {
- return chMurmur(s, seed)
- }
- // Saving initial input for tail hashing.
- t := s
- // We expect len >= 128 to be the common case. Keep 56 bytes of state:
- // v, w, x, y and z.
- var v, w U128
- x := seed.Low
- y := seed.High
- z := uint64(len(s)) * k1
- {
- subSlice := (*[96]byte)(s[0:])
- v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(subSlice[0:])
- v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(subSlice[8:])
- w.Low = rot64(y+z, 35)*k1 + x
- w.High = rot64(x+binary.LittleEndian.Uint64(subSlice[88:]), 53) * k1
- }
- // This is the same inner loop as CH64(), manually unrolled.
- for len(s) >= 128 {
- // Roll 1.
- {
- x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:16+8:16+8]), 37) * k1
- y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:48+8:48+8]), 42) * k1
- x ^= w.High
- y ^= v.Low
- z = rot64(z^w.Low, 33)
- v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
- w = weakHash32SeedsByte(s[32:], z+w.High, y)
- z, x = x, z
- }
- // Roll 2.
- {
- const offset = 64
- x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[offset+16:offset+16+8:offset+16+8]), 37) * k1
- y = rot64(y+v.High+binary.LittleEndian.Uint64(s[offset+48:offset+48+8:offset+48+8]), 42) * k1
- x ^= w.High
- y ^= v.Low
- z = rot64(z^w.Low, 33)
- v = weakHash32SeedsByte(s[offset:], v.High*k1, x+w.Low)
- w = weakHash32SeedsByte(s[offset+32:], z+w.High, y)
- z, x = x, z
- }
- s = s[128:]
- }
- y += rot64(w.Low, 37)*k0 + z
- x += rot64(v.Low+z, 49) * k0
- // If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
- for i := 0; i < len(s); {
- i += 32
- y = rot64(y-x, 42)*k0 + v.High
- w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
- x = rot64(x, 49)*k0 + w.Low
- w.Low += v.Low
- v = weakHash32SeedsByte(t[len(t)-i:], v.Low, v.High)
- }
- // At this point our 48 bytes of state should contain more than
- // enough information for a strong 128-bit hash. We use two
- // different 48-byte-to-8-byte hashes to get a 16-byte final result.
- x = ch16(x, v.Low)
- y = ch16(y, w.Low)
- return U128{
- Low: ch16(x+v.High, w.High) + y,
- High: ch16(x+w.High, y+v.High),
- }
- }
|