#include "common.h" namespace { #include "farmhashmk.cc" } namespace farmhashsu { #if !can_use_sse42 || !can_use_aesni uint32_t Hash32(const char *s, size_t len) { FARMHASH_DIE_IF_MISCONFIGURED; return s == NULL ? 0 : len; } uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) { FARMHASH_DIE_IF_MISCONFIGURED; return seed + Hash32(s, len); } #else #undef Fetch #define Fetch Fetch32 #undef Rotate #define Rotate Rotate32 #undef Bswap #define Bswap Bswap32 // Helpers for data-parallel operations (4x 32-bits). STATIC_INLINE __m128i Add(__m128i x, __m128i y) { return _mm_add_epi32(x, y); } STATIC_INLINE __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); } STATIC_INLINE __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); } STATIC_INLINE __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); } STATIC_INLINE __m128i Mul5(__m128i x) { return Add(x, _mm_slli_epi32(x, 2)); } STATIC_INLINE __m128i RotateLeft(__m128i x, int c) { return Or(_mm_slli_epi32(x, c), _mm_srli_epi32(x, 32 - c)); } STATIC_INLINE __m128i Rol17(__m128i x) { return RotateLeft(x, 17); } STATIC_INLINE __m128i Rol19(__m128i x) { return RotateLeft(x, 19); } STATIC_INLINE __m128i Shuffle0321(__m128i x) { return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0)); } uint32_t Hash32(const char *s, size_t len) { const uint32_t seed = 81; if (len <= 24) { return len <= 12 ? (len <= 4 ? farmhashmk::Hash32Len0to4(s, len) : farmhashmk::Hash32Len5to12(s, len)) : farmhashmk::Hash32Len13to24(s, len); } if (len < 40) { uint32_t a = len, b = seed * c2, c = a + b; a += Fetch(s + len - 4); b += Fetch(s + len - 20); c += Fetch(s + len - 16); uint32_t d = a; a = NAMESPACE_FOR_HASH_FUNCTIONS::Rotate32(a, 21); a = Mur(a, Mur(b, _mm_crc32_u32(c, d))); a += Fetch(s + len - 12); b += Fetch(s + len - 8); d += a; a += d; b = Mur(b, d) * c2; a = _mm_crc32_u32(a, b + c); return farmhashmk::Hash32Len13to24(s, (len + 1) / 2, a) + b; } #undef Mulc1 #define Mulc1(x) Mul((x), cc1) #undef Mulc2 #define Mulc2(x) Mul((x), cc2) #undef Murk #define Murk(a, h) \ Add(k, \ Mul5( \ Rol19( \ Xor( \ Mulc2( \ Rol17( \ Mulc1(a))), \ (h))))) const __m128i cc1 = _mm_set1_epi32(c1); const __m128i cc2 = _mm_set1_epi32(c2); __m128i h = _mm_set1_epi32(seed); __m128i g = _mm_set1_epi32(c1 * seed); __m128i f = g; __m128i k = _mm_set1_epi32(0xe6546b64); __m128i q; if (len < 80) { __m128i a = Fetch128(s); __m128i b = Fetch128(s + 16); __m128i c = Fetch128(s + (len - 15) / 2); __m128i d = Fetch128(s + len - 32); __m128i e = Fetch128(s + len - 16); h = Add(h, a); g = Add(g, b); q = g; g = Shuffle0321(g); f = Add(f, c); __m128i be = Add(b, Mulc1(e)); h = Add(h, f); f = Add(f, h); h = Add(Murk(d, h), e); k = Xor(k, _mm_shuffle_epi8(g, f)); g = Add(Xor(c, g), a); f = Add(Xor(be, f), d); k = Add(k, be); k = Add(k, _mm_shuffle_epi8(f, h)); f = Add(f, g); g = Add(g, f); g = Add(_mm_set1_epi32(len), Mulc1(g)); } else { // len >= 80 // The following is loosely modelled after farmhashmk::Hash32. size_t iters = (len - 1) / 80; len -= iters * 80; #undef Chunk #define Chunk() do { \ __m128i a = Fetch128(s); \ __m128i b = Fetch128(s + 16); \ __m128i c = Fetch128(s + 32); \ __m128i d = Fetch128(s + 48); \ __m128i e = Fetch128(s + 64); \ h = Add(h, a); \ g = Add(g, b); \ g = Shuffle0321(g); \ f = Add(f, c); \ __m128i be = Add(b, Mulc1(e)); \ h = Add(h, f); \ f = Add(f, h); \ h = Add(h, d); \ q = Add(q, e); \ h = Rol17(h); \ h = Mulc1(h); \ k = Xor(k, _mm_shuffle_epi8(g, f)); \ g = Add(Xor(c, g), a); \ f = Add(Xor(be, f), d); \ std::swap(f, q); \ q = _mm_aesimc_si128(q); \ k = Add(k, be); \ k = Add(k, _mm_shuffle_epi8(f, h)); \ f = Add(f, g); \ g = Add(g, f); \ f = Mulc1(f); \ } while (0) q = g; while (iters-- != 0) { Chunk(); s += 80; } if (len != 0) { h = Add(h, _mm_set1_epi32(len)); s = s + len - 80; Chunk(); } } g = Shuffle0321(g); k = Xor(k, g); k = Xor(k, q); h = Xor(h, q); f = Mulc1(f); k = Mulc2(k); g = Mulc1(g); h = Mulc2(h); k = Add(k, _mm_shuffle_epi8(g, f)); h = Add(h, f); f = Add(f, h); g = Add(g, k); k = Add(k, g); k = Xor(k, _mm_shuffle_epi8(f, h)); __m128i buf[4]; buf[0] = f; buf[1] = g; buf[2] = k; buf[3] = h; s = reinterpret_cast(buf); uint32_t x = Fetch(s); uint32_t y = Fetch(s+4); uint32_t z = Fetch(s+8); x = _mm_crc32_u32(x, Fetch(s+12)); y = _mm_crc32_u32(y, Fetch(s+16)); z = _mm_crc32_u32(z * c1, Fetch(s+20)); x = _mm_crc32_u32(x, Fetch(s+24)); y = _mm_crc32_u32(y * c1, Fetch(s+28)); uint32_t o = y; z = _mm_crc32_u32(z, Fetch(s+32)); x = _mm_crc32_u32(x * c1, Fetch(s+36)); y = _mm_crc32_u32(y, Fetch(s+40)); z = _mm_crc32_u32(z * c1, Fetch(s+44)); x = _mm_crc32_u32(x, Fetch(s+48)); y = _mm_crc32_u32(y * c1, Fetch(s+52)); z = _mm_crc32_u32(z, Fetch(s+56)); x = _mm_crc32_u32(x, Fetch(s+60)); return (o - x + y - z) * c1; } #undef Chunk #undef Murk #undef Mulc2 #undef Mulc1 uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) { if (len <= 24) { if (len >= 13) return farmhashmk::Hash32Len13to24(s, len, seed * c1); else if (len >= 5) return farmhashmk::Hash32Len5to12(s, len, seed); else return farmhashmk::Hash32Len0to4(s, len, seed); } uint32_t h = farmhashmk::Hash32Len13to24(s, 24, seed ^ len); return _mm_crc32_u32(Hash32(s + 24, len - 24) + seed, h); } #endif } // namespace farmhashsu