farmhashna.cc 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #include "common.h"
  2. namespace farmhashna {
  3. #undef Fetch
  4. #define Fetch Fetch64
  5. #undef Rotate
  6. #define Rotate Rotate64
  7. #undef Bswap
  8. #define Bswap Bswap64
  9. STATIC_INLINE uint64_t ShiftMix(uint64_t val) {
  10. return val ^ (val >> 47);
  11. }
  12. STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) {
  13. return Hash128to64(Uint128(u, v));
  14. }
  15. STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
  16. // Murmur-inspired hashing.
  17. uint64_t a = (u ^ v) * mul;
  18. a ^= (a >> 47);
  19. uint64_t b = (v ^ a) * mul;
  20. b ^= (b >> 47);
  21. b *= mul;
  22. return b;
  23. }
  24. STATIC_INLINE uint64_t HashLen0to16(const char *s, size_t len) {
  25. if (len >= 8) {
  26. uint64_t mul = k2 + len * 2;
  27. uint64_t a = Fetch(s) + k2;
  28. uint64_t b = Fetch(s + len - 8);
  29. uint64_t c = Rotate(b, 37) * mul + a;
  30. uint64_t d = (Rotate(a, 25) + b) * mul;
  31. return HashLen16(c, d, mul);
  32. }
  33. if (len >= 4) {
  34. uint64_t mul = k2 + len * 2;
  35. uint64_t a = Fetch32(s);
  36. return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
  37. }
  38. if (len > 0) {
  39. uint8_t a = s[0];
  40. uint8_t b = s[len >> 1];
  41. uint8_t c = s[len - 1];
  42. uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
  43. uint32_t z = len + (static_cast<uint32_t>(c) << 2);
  44. return ShiftMix(y * k2 ^ z * k0) * k2;
  45. }
  46. return k2;
  47. }
  48. // This probably works well for 16-byte strings as well, but it may be overkill
  49. // in that case.
  50. STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) {
  51. uint64_t mul = k2 + len * 2;
  52. uint64_t a = Fetch(s) * k1;
  53. uint64_t b = Fetch(s + 8);
  54. uint64_t c = Fetch(s + len - 8) * mul;
  55. uint64_t d = Fetch(s + len - 16) * k2;
  56. return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
  57. a + Rotate(b + k2, 18) + c, mul);
  58. }
  59. // Return a 16-byte hash for 48 bytes. Quick and dirty.
  60. // Callers do best to use "random-looking" values for a and b.
  61. STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
  62. uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
  63. a += w;
  64. b = Rotate(b + a + z, 21);
  65. uint64_t c = a;
  66. a += x;
  67. a += y;
  68. b += Rotate(a, 44);
  69. return make_pair(a + z, b + c);
  70. }
  71. // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
  72. STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
  73. const char* s, uint64_t a, uint64_t b) {
  74. return WeakHashLen32WithSeeds(Fetch(s),
  75. Fetch(s + 8),
  76. Fetch(s + 16),
  77. Fetch(s + 24),
  78. a,
  79. b);
  80. }
  81. // Return an 8-byte hash for 33 to 64 bytes.
  82. STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) {
  83. uint64_t mul = k2 + len * 2;
  84. uint64_t a = Fetch(s) * k2;
  85. uint64_t b = Fetch(s + 8);
  86. uint64_t c = Fetch(s + len - 8) * mul;
  87. uint64_t d = Fetch(s + len - 16) * k2;
  88. uint64_t y = Rotate(a + b, 43) + Rotate(c, 30) + d;
  89. uint64_t z = HashLen16(y, a + Rotate(b + k2, 18) + c, mul);
  90. uint64_t e = Fetch(s + 16) * mul;
  91. uint64_t f = Fetch(s + 24);
  92. uint64_t g = (y + Fetch(s + len - 32)) * mul;
  93. uint64_t h = (z + Fetch(s + len - 24)) * mul;
  94. return HashLen16(Rotate(e + f, 43) + Rotate(g, 30) + h,
  95. e + Rotate(f + a, 18) + g, mul);
  96. }
  97. uint64_t Hash64(const char *s, size_t len) {
  98. const uint64_t seed = 81;
  99. if (len <= 32) {
  100. if (len <= 16) {
  101. return HashLen0to16(s, len);
  102. } else {
  103. return HashLen17to32(s, len);
  104. }
  105. } else if (len <= 64) {
  106. return HashLen33to64(s, len);
  107. }
  108. // For strings over 64 bytes we loop. Internal state consists of
  109. // 56 bytes: v, w, x, y, and z.
  110. uint64_t x = seed;
  111. uint64_t y = seed * k1 + 113;
  112. uint64_t z = ShiftMix(y * k2 + 113) * k2;
  113. pair<uint64_t, uint64_t> v = make_pair(0, 0);
  114. pair<uint64_t, uint64_t> w = make_pair(0, 0);
  115. x = x * k2 + Fetch(s);
  116. // Set end so that after the loop we have 1 to 64 bytes left to process.
  117. const char* end = s + ((len - 1) / 64) * 64;
  118. const char* last64 = end + ((len - 1) & 63) - 63;
  119. assert(s + len - 64 == last64);
  120. do {
  121. x = Rotate(x + y + v.first + Fetch(s + 8), 37) * k1;
  122. y = Rotate(y + v.second + Fetch(s + 48), 42) * k1;
  123. x ^= w.second;
  124. y += v.first + Fetch(s + 40);
  125. z = Rotate(z + w.first, 33) * k1;
  126. v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
  127. w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16));
  128. std::swap(z, x);
  129. s += 64;
  130. } while (s != end);
  131. uint64_t mul = k1 + ((z & 0xff) << 1);
  132. // Make s point to the last 64 bytes of input.
  133. s = last64;
  134. w.first += ((len - 1) & 63);
  135. v.first += w.first;
  136. w.first += v.first;
  137. x = Rotate(x + y + v.first + Fetch(s + 8), 37) * mul;
  138. y = Rotate(y + v.second + Fetch(s + 48), 42) * mul;
  139. x ^= w.second * 9;
  140. y += v.first * 9 + Fetch(s + 40);
  141. z = Rotate(z + w.first, 33) * mul;
  142. v = WeakHashLen32WithSeeds(s, v.second * mul, x + w.first);
  143. w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16));
  144. std::swap(z, x);
  145. return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z,
  146. HashLen16(v.second, w.second, mul) + x,
  147. mul);
  148. }
  149. uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1);
  150. uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) {
  151. return Hash64WithSeeds(s, len, k2, seed);
  152. }
  153. uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) {
  154. return HashLen16(Hash64(s, len) - seed0, seed1);
  155. }
  156. } // namespace farmhashna