MurmurHash2.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. //-----------------------------------------------------------------------------
  2. // MurmurHash2 was written by Austin Appleby, and is placed in the public
  3. // domain. The author hereby disclaims copyright to this source code.
  4. // Note - This code makes a few assumptions about how your machine behaves -
  5. // 1. We can read a 4-byte value from any address without crashing
  6. // 2. sizeof(int) == 4
  7. // And it has a few limitations -
  8. // 1. It will not work incrementally.
  9. // 2. It will not produce the same results on little-endian and big-endian
  10. // machines.
  11. #include "MurmurHash2.h"
  12. #include <string.h>
  13. //-----------------------------------------------------------------------------
  14. // Platform-specific functions and macros
  15. // Microsoft Visual Studio
  16. #if defined(_MSC_VER)
  17. #define FORCE_INLINE __forceinline
  18. #define BIG_CONSTANT(x) (x)
  19. // Other compilers
  20. #else // defined(_MSC_VER)
  21. #define FORCE_INLINE inline __attribute__((always_inline))
  22. #define BIG_CONSTANT(x) (x##LLU)
  23. #endif // !defined(_MSC_VER)
  24. //-----------------------------------------------------------------------------
  25. // Block read - if your platform needs to do endian-swapping or can only
  26. // handle aligned reads, do the conversion here
  27. FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
  28. {
  29. uint32_t res;
  30. memcpy(&res, p + i, sizeof(res));
  31. return res;
  32. }
  33. FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
  34. {
  35. uint64_t res;
  36. memcpy(&res, p + i, sizeof(res));
  37. return res;
  38. }
  39. uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
  40. {
  41. // 'm' and 'r' are mixing constants generated offline.
  42. // They're not really 'magic', they just happen to work well.
  43. const uint32_t m = 0x5bd1e995;
  44. const int r = 24;
  45. // Initialize the hash to a 'random' value
  46. uint32_t h = seed ^ len;
  47. // Mix 4 bytes at a time into the hash
  48. const unsigned char * data = (const unsigned char *)key;
  49. while(len >= 4)
  50. {
  51. uint32_t k = getblock32((const uint32_t*)data, 0);
  52. k *= m;
  53. k ^= k >> r;
  54. k *= m;
  55. h *= m;
  56. h ^= k;
  57. data += 4;
  58. len -= 4;
  59. }
  60. // Handle the last few bytes of the input array
  61. switch(len)
  62. {
  63. case 3: h ^= data[2] << 16;
  64. case 2: h ^= data[1] << 8;
  65. case 1: h ^= data[0];
  66. h *= m;
  67. };
  68. // Do a few final mixes of the hash to ensure the last few
  69. // bytes are well-incorporated.
  70. h ^= h >> 13;
  71. h *= m;
  72. h ^= h >> 15;
  73. return h;
  74. }
  75. //-----------------------------------------------------------------------------
  76. // MurmurHash2, 64-bit versions, by Austin Appleby
  77. // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
  78. // and endian-ness issues if used across multiple platforms.
  79. // 64-bit hash for 64-bit platforms
  80. uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )
  81. {
  82. const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
  83. const int r = 47;
  84. uint64_t h = seed ^ (len * m);
  85. const uint64_t * data = (const uint64_t *)key;
  86. const uint64_t * end = data + (len/8);
  87. while(data != end)
  88. {
  89. uint64_t k = getblock64(data, 0);
  90. data++;
  91. k *= m;
  92. k ^= k >> r;
  93. k *= m;
  94. h ^= k;
  95. h *= m;
  96. }
  97. const unsigned char * data2 = (const unsigned char*)data;
  98. switch(len & 7)
  99. {
  100. case 7: h ^= uint64_t(data2[6]) << 48;
  101. case 6: h ^= uint64_t(data2[5]) << 40;
  102. case 5: h ^= uint64_t(data2[4]) << 32;
  103. case 4: h ^= uint64_t(data2[3]) << 24;
  104. case 3: h ^= uint64_t(data2[2]) << 16;
  105. case 2: h ^= uint64_t(data2[1]) << 8;
  106. case 1: h ^= uint64_t(data2[0]);
  107. h *= m;
  108. };
  109. h ^= h >> r;
  110. h *= m;
  111. h ^= h >> r;
  112. return h;
  113. }
  114. // 64-bit hash for 32-bit platforms
  115. uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
  116. {
  117. const uint32_t m = 0x5bd1e995;
  118. const int r = 24;
  119. uint32_t h1 = uint32_t(seed) ^ len;
  120. uint32_t h2 = uint32_t(seed >> 32);
  121. const uint32_t * data = (const uint32_t *)key;
  122. while(len >= 8)
  123. {
  124. uint32_t k1 = *data++;
  125. k1 *= m; k1 ^= k1 >> r; k1 *= m;
  126. h1 *= m; h1 ^= k1;
  127. len -= 4;
  128. uint32_t k2 = *data++;
  129. k2 *= m; k2 ^= k2 >> r; k2 *= m;
  130. h2 *= m; h2 ^= k2;
  131. len -= 4;
  132. }
  133. if(len >= 4)
  134. {
  135. uint32_t k1 = *data++;
  136. k1 *= m; k1 ^= k1 >> r; k1 *= m;
  137. h1 *= m; h1 ^= k1;
  138. len -= 4;
  139. }
  140. switch(len)
  141. {
  142. case 3: h2 ^= ((unsigned char*)data)[2] << 16;
  143. case 2: h2 ^= ((unsigned char*)data)[1] << 8;
  144. case 1: h2 ^= ((unsigned char*)data)[0];
  145. h2 *= m;
  146. };
  147. h1 ^= h2 >> 18; h1 *= m;
  148. h2 ^= h1 >> 22; h2 *= m;
  149. h1 ^= h2 >> 17; h1 *= m;
  150. h2 ^= h1 >> 19; h2 *= m;
  151. uint64_t h = h1;
  152. h = (h << 32) | h2;
  153. return h;
  154. }
  155. //-----------------------------------------------------------------------------
  156. // MurmurHash2A, by Austin Appleby
  157. // This is a variant of MurmurHash2 modified to use the Merkle-Damgard
  158. // construction. Bulk speed should be identical to Murmur2, small-key speed
  159. // will be 10%-20% slower due to the added overhead at the end of the hash.
  160. // This variant fixes a minor issue where null keys were more likely to
  161. // collide with each other than expected, and also makes the function
  162. // more amenable to incremental implementations.
  163. #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
  164. uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )
  165. {
  166. const uint32_t m = 0x5bd1e995;
  167. const int r = 24;
  168. uint32_t l = len;
  169. const unsigned char * data = (const unsigned char *)key;
  170. uint32_t h = seed;
  171. while(len >= 4)
  172. {
  173. uint32_t k = *(uint32_t*)data;
  174. mmix(h,k);
  175. data += 4;
  176. len -= 4;
  177. }
  178. uint32_t t = 0;
  179. switch(len)
  180. {
  181. case 3: t ^= data[2] << 16;
  182. case 2: t ^= data[1] << 8;
  183. case 1: t ^= data[0];
  184. };
  185. mmix(h,t);
  186. mmix(h,l);
  187. h ^= h >> 13;
  188. h *= m;
  189. h ^= h >> 15;
  190. return h;
  191. }
  192. //-----------------------------------------------------------------------------
  193. // CMurmurHash2A, by Austin Appleby
  194. // This is a sample implementation of MurmurHash2A designed to work
  195. // incrementally.
  196. // Usage -
  197. // CMurmurHash2A hasher
  198. // hasher.Begin(seed);
  199. // hasher.Add(data1,size1);
  200. // hasher.Add(data2,size2);
  201. // ...
  202. // hasher.Add(dataN,sizeN);
  203. // uint32_t hash = hasher.End()
  204. class CMurmurHash2A
  205. {
  206. public:
  207. void Begin ( uint32_t seed = 0 )
  208. {
  209. m_hash = seed;
  210. m_tail = 0;
  211. m_count = 0;
  212. m_size = 0;
  213. }
  214. void Add ( const unsigned char * data, size_t len )
  215. {
  216. m_size += len;
  217. MixTail(data,len);
  218. while(len >= 4)
  219. {
  220. uint32_t k = *(uint32_t*)data;
  221. mmix(m_hash,k);
  222. data += 4;
  223. len -= 4;
  224. }
  225. MixTail(data,len);
  226. }
  227. uint32_t End ( void )
  228. {
  229. mmix(m_hash,m_tail);
  230. mmix(m_hash,m_size);
  231. m_hash ^= m_hash >> 13;
  232. m_hash *= m;
  233. m_hash ^= m_hash >> 15;
  234. return m_hash;
  235. }
  236. private:
  237. static const uint32_t m = 0x5bd1e995;
  238. static const int r = 24;
  239. void MixTail ( const unsigned char * & data, size_t & len )
  240. {
  241. while( len && ((len<4) || m_count) )
  242. {
  243. m_tail |= (*data++) << (m_count * 8);
  244. m_count++;
  245. len--;
  246. if(m_count == 4)
  247. {
  248. mmix(m_hash,m_tail);
  249. m_tail = 0;
  250. m_count = 0;
  251. }
  252. }
  253. }
  254. uint32_t m_hash;
  255. uint32_t m_tail;
  256. uint32_t m_count;
  257. uint32_t m_size;
  258. };
  259. //-----------------------------------------------------------------------------
  260. // MurmurHashNeutral2, by Austin Appleby
  261. // Same as MurmurHash2, but endian- and alignment-neutral.
  262. // Half the speed though, alas.
  263. uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed )
  264. {
  265. const uint32_t m = 0x5bd1e995;
  266. const int r = 24;
  267. uint32_t h = seed ^ len;
  268. const unsigned char * data = (const unsigned char *)key;
  269. while(len >= 4)
  270. {
  271. uint32_t k;
  272. k = data[0];
  273. k |= data[1] << 8;
  274. k |= data[2] << 16;
  275. k |= data[3] << 24;
  276. k *= m;
  277. k ^= k >> r;
  278. k *= m;
  279. h *= m;
  280. h ^= k;
  281. data += 4;
  282. len -= 4;
  283. }
  284. switch(len)
  285. {
  286. case 3: h ^= data[2] << 16;
  287. case 2: h ^= data[1] << 8;
  288. case 1: h ^= data[0];
  289. h *= m;
  290. };
  291. h ^= h >> 13;
  292. h *= m;
  293. h ^= h >> 15;
  294. return h;
  295. }
  296. //-----------------------------------------------------------------------------
  297. // MurmurHashAligned2, by Austin Appleby
  298. // Same algorithm as MurmurHash2, but only does aligned reads - should be safer
  299. // on certain platforms.
  300. // Performance will be lower than MurmurHash2
  301. #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
  302. uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
  303. {
  304. const uint32_t m = 0x5bd1e995;
  305. const int r = 24;
  306. const unsigned char * data = (const unsigned char *)key;
  307. uint32_t h = seed ^ len;
  308. size_t align = (uint64_t)data & 3;
  309. if(align && (len >= 4))
  310. {
  311. // Pre-load the temp registers
  312. uint32_t t = 0, d = 0;
  313. switch(align)
  314. {
  315. case 1: t |= data[2] << 16;
  316. case 2: t |= data[1] << 8;
  317. case 3: t |= data[0];
  318. }
  319. t <<= (8 * align);
  320. data += 4-align;
  321. len -= 4-align;
  322. int sl = 8 * (4-align);
  323. int sr = 8 * align;
  324. // Mix
  325. while(len >= 4)
  326. {
  327. d = *(uint32_t *)data;
  328. t = (t >> sr) | (d << sl);
  329. uint32_t k = t;
  330. MIX(h,k,m);
  331. t = d;
  332. data += 4;
  333. len -= 4;
  334. }
  335. // Handle leftover data in temp registers
  336. d = 0;
  337. if(len >= align)
  338. {
  339. switch(align)
  340. {
  341. case 3: d |= data[2] << 16;
  342. case 2: d |= data[1] << 8;
  343. case 1: d |= data[0];
  344. }
  345. uint32_t k = (t >> sr) | (d << sl);
  346. MIX(h,k,m);
  347. data += align;
  348. len -= align;
  349. //----------
  350. // Handle tail bytes
  351. switch(len)
  352. {
  353. case 3: h ^= data[2] << 16;
  354. case 2: h ^= data[1] << 8;
  355. case 1: h ^= data[0];
  356. h *= m;
  357. };
  358. }
  359. else
  360. {
  361. switch(len)
  362. {
  363. case 3: d |= data[2] << 16;
  364. case 2: d |= data[1] << 8;
  365. case 1: d |= data[0];
  366. case 0: h ^= (t >> sr) | (d << sl);
  367. h *= m;
  368. }
  369. }
  370. h ^= h >> 13;
  371. h *= m;
  372. h ^= h >> 15;
  373. return h;
  374. }
  375. else
  376. {
  377. while(len >= 4)
  378. {
  379. uint32_t k = *(uint32_t *)data;
  380. MIX(h,k,m);
  381. data += 4;
  382. len -= 4;
  383. }
  384. //----------
  385. // Handle tail bytes
  386. switch(len)
  387. {
  388. case 3: h ^= data[2] << 16;
  389. case 2: h ^= data[1] << 8;
  390. case 1: h ^= data[0];
  391. h *= m;
  392. };
  393. h ^= h >> 13;
  394. h *= m;
  395. h ^= h >> 15;
  396. return h;
  397. }
  398. }
  399. //-----------------------------------------------------------------------------