farmhashsu.cc 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #include "common.h"
  2. namespace {
  3. #include "farmhashmk.cc"
  4. }
  5. namespace farmhashsu {
  6. #if !can_use_sse42 || !can_use_aesni
  7. uint32_t Hash32(const char *s, size_t len) {
  8. FARMHASH_DIE_IF_MISCONFIGURED;
  9. return s == NULL ? 0 : len;
  10. }
  11. uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
  12. FARMHASH_DIE_IF_MISCONFIGURED;
  13. return seed + Hash32(s, len);
  14. }
  15. #else
  16. #undef Fetch
  17. #define Fetch Fetch32
  18. #undef Rotate
  19. #define Rotate Rotate32
  20. #undef Bswap
  21. #define Bswap Bswap32
  22. // Helpers for data-parallel operations (4x 32-bits).
  23. STATIC_INLINE __m128i Add(__m128i x, __m128i y) { return _mm_add_epi32(x, y); }
  24. STATIC_INLINE __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); }
  25. STATIC_INLINE __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); }
  26. STATIC_INLINE __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); }
  27. STATIC_INLINE __m128i Mul5(__m128i x) { return Add(x, _mm_slli_epi32(x, 2)); }
  28. STATIC_INLINE __m128i RotateLeft(__m128i x, int c) {
  29. return Or(_mm_slli_epi32(x, c),
  30. _mm_srli_epi32(x, 32 - c));
  31. }
  32. STATIC_INLINE __m128i Rol17(__m128i x) { return RotateLeft(x, 17); }
  33. STATIC_INLINE __m128i Rol19(__m128i x) { return RotateLeft(x, 19); }
  34. STATIC_INLINE __m128i Shuffle0321(__m128i x) {
  35. return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0));
  36. }
  37. uint32_t Hash32(const char *s, size_t len) {
  38. const uint32_t seed = 81;
  39. if (len <= 24) {
  40. return len <= 12 ?
  41. (len <= 4 ?
  42. farmhashmk::Hash32Len0to4(s, len) :
  43. farmhashmk::Hash32Len5to12(s, len)) :
  44. farmhashmk::Hash32Len13to24(s, len);
  45. }
  46. if (len < 40) {
  47. uint32_t a = len, b = seed * c2, c = a + b;
  48. a += Fetch(s + len - 4);
  49. b += Fetch(s + len - 20);
  50. c += Fetch(s + len - 16);
  51. uint32_t d = a;
  52. a = NAMESPACE_FOR_HASH_FUNCTIONS::Rotate32(a, 21);
  53. a = Mur(a, Mur(b, _mm_crc32_u32(c, d)));
  54. a += Fetch(s + len - 12);
  55. b += Fetch(s + len - 8);
  56. d += a;
  57. a += d;
  58. b = Mur(b, d) * c2;
  59. a = _mm_crc32_u32(a, b + c);
  60. return farmhashmk::Hash32Len13to24(s, (len + 1) / 2, a) + b;
  61. }
  62. #undef Mulc1
  63. #define Mulc1(x) Mul((x), cc1)
  64. #undef Mulc2
  65. #define Mulc2(x) Mul((x), cc2)
  66. #undef Murk
  67. #define Murk(a, h) \
  68. Add(k, \
  69. Mul5( \
  70. Rol19( \
  71. Xor( \
  72. Mulc2( \
  73. Rol17( \
  74. Mulc1(a))), \
  75. (h)))))
  76. const __m128i cc1 = _mm_set1_epi32(c1);
  77. const __m128i cc2 = _mm_set1_epi32(c2);
  78. __m128i h = _mm_set1_epi32(seed);
  79. __m128i g = _mm_set1_epi32(c1 * seed);
  80. __m128i f = g;
  81. __m128i k = _mm_set1_epi32(0xe6546b64);
  82. __m128i q;
  83. if (len < 80) {
  84. __m128i a = Fetch128(s);
  85. __m128i b = Fetch128(s + 16);
  86. __m128i c = Fetch128(s + (len - 15) / 2);
  87. __m128i d = Fetch128(s + len - 32);
  88. __m128i e = Fetch128(s + len - 16);
  89. h = Add(h, a);
  90. g = Add(g, b);
  91. q = g;
  92. g = Shuffle0321(g);
  93. f = Add(f, c);
  94. __m128i be = Add(b, Mulc1(e));
  95. h = Add(h, f);
  96. f = Add(f, h);
  97. h = Add(Murk(d, h), e);
  98. k = Xor(k, _mm_shuffle_epi8(g, f));
  99. g = Add(Xor(c, g), a);
  100. f = Add(Xor(be, f), d);
  101. k = Add(k, be);
  102. k = Add(k, _mm_shuffle_epi8(f, h));
  103. f = Add(f, g);
  104. g = Add(g, f);
  105. g = Add(_mm_set1_epi32(len), Mulc1(g));
  106. } else {
  107. // len >= 80
  108. // The following is loosely modelled after farmhashmk::Hash32.
  109. size_t iters = (len - 1) / 80;
  110. len -= iters * 80;
  111. #undef Chunk
  112. #define Chunk() do { \
  113. __m128i a = Fetch128(s); \
  114. __m128i b = Fetch128(s + 16); \
  115. __m128i c = Fetch128(s + 32); \
  116. __m128i d = Fetch128(s + 48); \
  117. __m128i e = Fetch128(s + 64); \
  118. h = Add(h, a); \
  119. g = Add(g, b); \
  120. g = Shuffle0321(g); \
  121. f = Add(f, c); \
  122. __m128i be = Add(b, Mulc1(e)); \
  123. h = Add(h, f); \
  124. f = Add(f, h); \
  125. h = Add(h, d); \
  126. q = Add(q, e); \
  127. h = Rol17(h); \
  128. h = Mulc1(h); \
  129. k = Xor(k, _mm_shuffle_epi8(g, f)); \
  130. g = Add(Xor(c, g), a); \
  131. f = Add(Xor(be, f), d); \
  132. std::swap(f, q); \
  133. q = _mm_aesimc_si128(q); \
  134. k = Add(k, be); \
  135. k = Add(k, _mm_shuffle_epi8(f, h)); \
  136. f = Add(f, g); \
  137. g = Add(g, f); \
  138. f = Mulc1(f); \
  139. } while (0)
  140. q = g;
  141. while (iters-- != 0) {
  142. Chunk();
  143. s += 80;
  144. }
  145. if (len != 0) {
  146. h = Add(h, _mm_set1_epi32(len));
  147. s = s + len - 80;
  148. Chunk();
  149. }
  150. }
  151. g = Shuffle0321(g);
  152. k = Xor(k, g);
  153. k = Xor(k, q);
  154. h = Xor(h, q);
  155. f = Mulc1(f);
  156. k = Mulc2(k);
  157. g = Mulc1(g);
  158. h = Mulc2(h);
  159. k = Add(k, _mm_shuffle_epi8(g, f));
  160. h = Add(h, f);
  161. f = Add(f, h);
  162. g = Add(g, k);
  163. k = Add(k, g);
  164. k = Xor(k, _mm_shuffle_epi8(f, h));
  165. __m128i buf[4];
  166. buf[0] = f;
  167. buf[1] = g;
  168. buf[2] = k;
  169. buf[3] = h;
  170. s = reinterpret_cast<char*>(buf);
  171. uint32_t x = Fetch(s);
  172. uint32_t y = Fetch(s+4);
  173. uint32_t z = Fetch(s+8);
  174. x = _mm_crc32_u32(x, Fetch(s+12));
  175. y = _mm_crc32_u32(y, Fetch(s+16));
  176. z = _mm_crc32_u32(z * c1, Fetch(s+20));
  177. x = _mm_crc32_u32(x, Fetch(s+24));
  178. y = _mm_crc32_u32(y * c1, Fetch(s+28));
  179. uint32_t o = y;
  180. z = _mm_crc32_u32(z, Fetch(s+32));
  181. x = _mm_crc32_u32(x * c1, Fetch(s+36));
  182. y = _mm_crc32_u32(y, Fetch(s+40));
  183. z = _mm_crc32_u32(z * c1, Fetch(s+44));
  184. x = _mm_crc32_u32(x, Fetch(s+48));
  185. y = _mm_crc32_u32(y * c1, Fetch(s+52));
  186. z = _mm_crc32_u32(z, Fetch(s+56));
  187. x = _mm_crc32_u32(x, Fetch(s+60));
  188. return (o - x + y - z) * c1;
  189. }
  190. #undef Chunk
  191. #undef Murk
  192. #undef Mulc2
  193. #undef Mulc1
  194. uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
  195. if (len <= 24) {
  196. if (len >= 13) return farmhashmk::Hash32Len13to24(s, len, seed * c1);
  197. else if (len >= 5) return farmhashmk::Hash32Len5to12(s, len, seed);
  198. else return farmhashmk::Hash32Len0to4(s, len, seed);
  199. }
  200. uint32_t h = farmhashmk::Hash32Len13to24(s, 24, seed ^ len);
  201. return _mm_crc32_u32(Hash32(s + 24, len - 24) + seed, h);
  202. }
  203. #endif
  204. } // namespace farmhashsu