farmhashsa.cc 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. #include "common.h"
  2. namespace {
  3. #include "farmhashsu.cc"
  4. }
  5. namespace farmhashsa {
  6. #if !can_use_sse42
  7. uint32_t Hash32(const char *s, size_t len) {
  8. FARMHASH_DIE_IF_MISCONFIGURED;
  9. return s == NULL ? 0 : len;
  10. }
  11. uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
  12. FARMHASH_DIE_IF_MISCONFIGURED;
  13. return seed + Hash32(s, len);
  14. }
  15. #else
  16. #undef Fetch
  17. #define Fetch Fetch32
  18. #undef Rotate
  19. #define Rotate Rotate32
  20. #undef Bswap
  21. #define Bswap Bswap32
  22. // Helpers for data-parallel operations (4x 32-bits).
  23. STATIC_INLINE __m128i Add(__m128i x, __m128i y) { return _mm_add_epi32(x, y); }
  24. STATIC_INLINE __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); }
  25. STATIC_INLINE __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); }
  26. STATIC_INLINE __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); }
  27. STATIC_INLINE __m128i Mul5(__m128i x) { return Add(x, _mm_slli_epi32(x, 2)); }
  28. STATIC_INLINE __m128i Rotate(__m128i x, int c) {
  29. return Or(_mm_slli_epi32(x, c),
  30. _mm_srli_epi32(x, 32 - c));
  31. }
  32. STATIC_INLINE __m128i Rot17(__m128i x) { return Rotate(x, 17); }
  33. STATIC_INLINE __m128i Rot19(__m128i x) { return Rotate(x, 19); }
  34. STATIC_INLINE __m128i Shuffle0321(__m128i x) {
  35. return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0));
  36. }
  37. uint32_t Hash32(const char *s, size_t len) {
  38. const uint32_t seed = 81;
  39. if (len <= 24) {
  40. return len <= 12 ?
  41. (len <= 4 ?
  42. farmhashmk::Hash32Len0to4(s, len) :
  43. farmhashmk::Hash32Len5to12(s, len)) :
  44. farmhashmk::Hash32Len13to24(s, len);
  45. }
  46. if (len < 40) {
  47. uint32_t a = len, b = seed * c2, c = a + b;
  48. a += Fetch(s + len - 4);
  49. b += Fetch(s + len - 20);
  50. c += Fetch(s + len - 16);
  51. uint32_t d = a;
  52. a = NAMESPACE_FOR_HASH_FUNCTIONS::Rotate32(a, 21);
  53. a = Mur(a, Mur(b, Mur(c, d)));
  54. a += Fetch(s + len - 12);
  55. b += Fetch(s + len - 8);
  56. d += a;
  57. a += d;
  58. b = Mur(b, d) * c2;
  59. a = _mm_crc32_u32(a, b + c);
  60. return farmhashmk::Hash32Len13to24(s, (len + 1) / 2, a) + b;
  61. }
  62. #undef Mulc1
  63. #define Mulc1(x) Mul((x), cc1)
  64. #undef Mulc2
  65. #define Mulc2(x) Mul((x), cc2)
  66. #undef Murk
  67. #define Murk(a, h) \
  68. Add(k, \
  69. Mul5( \
  70. Rot19( \
  71. Xor( \
  72. Mulc2( \
  73. Rot17( \
  74. Mulc1(a))), \
  75. (h)))))
  76. const __m128i cc1 = _mm_set1_epi32(c1);
  77. const __m128i cc2 = _mm_set1_epi32(c2);
  78. __m128i h = _mm_set1_epi32(seed);
  79. __m128i g = _mm_set1_epi32(c1 * seed);
  80. __m128i f = g;
  81. __m128i k = _mm_set1_epi32(0xe6546b64);
  82. if (len < 80) {
  83. __m128i a = Fetch128(s);
  84. __m128i b = Fetch128(s + 16);
  85. __m128i c = Fetch128(s + (len - 15) / 2);
  86. __m128i d = Fetch128(s + len - 32);
  87. __m128i e = Fetch128(s + len - 16);
  88. h = Add(h, a);
  89. g = Add(g, b);
  90. g = Shuffle0321(g);
  91. f = Add(f, c);
  92. __m128i be = Add(b, Mulc1(e));
  93. h = Add(h, f);
  94. f = Add(f, h);
  95. h = Add(Murk(d, h), e);
  96. k = Xor(k, _mm_shuffle_epi8(g, f));
  97. g = Add(Xor(c, g), a);
  98. f = Add(Xor(be, f), d);
  99. k = Add(k, be);
  100. k = Add(k, _mm_shuffle_epi8(f, h));
  101. f = Add(f, g);
  102. g = Add(g, f);
  103. g = Add(_mm_set1_epi32(len), Mulc1(g));
  104. } else {
  105. // len >= 80
  106. // The following is loosely modelled after farmhashmk::Hash32.
  107. size_t iters = (len - 1) / 80;
  108. len -= iters * 80;
  109. #undef Chunk
  110. #define Chunk() do { \
  111. __m128i a = Fetch128(s); \
  112. __m128i b = Fetch128(s + 16); \
  113. __m128i c = Fetch128(s + 32); \
  114. __m128i d = Fetch128(s + 48); \
  115. __m128i e = Fetch128(s + 64); \
  116. h = Add(h, a); \
  117. g = Add(g, b); \
  118. g = Shuffle0321(g); \
  119. f = Add(f, c); \
  120. __m128i be = Add(b, Mulc1(e)); \
  121. h = Add(h, f); \
  122. f = Add(f, h); \
  123. h = Add(Murk(d, h), e); \
  124. k = Xor(k, _mm_shuffle_epi8(g, f)); \
  125. g = Add(Xor(c, g), a); \
  126. f = Add(Xor(be, f), d); \
  127. k = Add(k, be); \
  128. k = Add(k, _mm_shuffle_epi8(f, h)); \
  129. f = Add(f, g); \
  130. g = Add(g, f); \
  131. f = Mulc1(f); \
  132. } while (0)
  133. while (iters-- != 0) {
  134. Chunk();
  135. s += 80;
  136. }
  137. if (len != 0) {
  138. h = Add(h, _mm_set1_epi32(len));
  139. s = s + len - 80;
  140. Chunk();
  141. }
  142. }
  143. g = Shuffle0321(g);
  144. k = Xor(k, g);
  145. f = Mulc1(f);
  146. k = Mulc2(k);
  147. g = Mulc1(g);
  148. h = Mulc2(h);
  149. k = Add(k, _mm_shuffle_epi8(g, f));
  150. h = Add(h, f);
  151. f = Add(f, h);
  152. g = Add(g, k);
  153. k = Add(k, g);
  154. k = Xor(k, _mm_shuffle_epi8(f, h));
  155. __m128i buf[4];
  156. buf[0] = f;
  157. buf[1] = g;
  158. buf[2] = k;
  159. buf[3] = h;
  160. s = reinterpret_cast<char*>(buf);
  161. uint32_t x = Fetch(s);
  162. uint32_t y = Fetch(s+4);
  163. uint32_t z = Fetch(s+8);
  164. x = _mm_crc32_u32(x, Fetch(s+12));
  165. y = _mm_crc32_u32(y, Fetch(s+16));
  166. z = _mm_crc32_u32(z * c1, Fetch(s+20));
  167. x = _mm_crc32_u32(x, Fetch(s+24));
  168. y = _mm_crc32_u32(y * c1, Fetch(s+28));
  169. uint32_t o = y;
  170. z = _mm_crc32_u32(z, Fetch(s+32));
  171. x = _mm_crc32_u32(x * c1, Fetch(s+36));
  172. y = _mm_crc32_u32(y, Fetch(s+40));
  173. z = _mm_crc32_u32(z * c1, Fetch(s+44));
  174. x = _mm_crc32_u32(x, Fetch(s+48));
  175. y = _mm_crc32_u32(y * c1, Fetch(s+52));
  176. z = _mm_crc32_u32(z, Fetch(s+56));
  177. x = _mm_crc32_u32(x, Fetch(s+60));
  178. return (o - x + y - z) * c1;
  179. }
  180. #undef Chunk
  181. #undef Murk
  182. #undef Mulc2
  183. #undef Mulc1
  184. uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
  185. if (len <= 24) {
  186. if (len >= 13) return farmhashmk::Hash32Len13to24(s, len, seed * c1);
  187. else if (len >= 5) return farmhashmk::Hash32Len5to12(s, len, seed);
  188. else return farmhashmk::Hash32Len0to4(s, len, seed);
  189. }
  190. uint32_t h = farmhashmk::Hash32Len13to24(s, 24, seed ^ len);
  191. return _mm_crc32_u32(Hash32(s + 24, len - 24) + seed, h);
  192. }
  193. #endif
  194. } // namespace farmhashsa