farmhash.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. // Copyright (c) 2014 Google, Inc.
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to deal
  5. // in the Software without restriction, including without limitation the rights
  6. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. // copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. // THE SOFTWARE.
  20. //
  21. // FarmHash, by Geoff Pike
  22. //
  23. // http://code.google.com/p/farmhash/
  24. //
  25. // This file provides a few functions for hashing strings and other
  26. // data. All of them are high-quality functions in the sense that
  27. // they do well on standard tests such as Austin Appleby's SMHasher.
  28. // They're also fast. FarmHash is the successor to CityHash.
  29. //
  30. // Functions in the FarmHash family are not suitable for cryptography.
  31. //
  32. // WARNING: This code has been only lightly tested on big-endian platforms!
  33. // It is known to work well on little-endian platforms that have a small penalty
  34. // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
  35. // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
  36. // bug reports are welcome.
  37. //
  38. // By the way, for some hash functions, given strings a and b, the hash
  39. // of a+b is easily derived from the hashes of a and b. This property
  40. // doesn't hold for any hash functions in this file.
  41. #ifndef FARM_HASH_H_
  42. #define FARM_HASH_H_
  43. #include <assert.h>
  44. #include <stdint.h>
  45. #include <stdlib.h>
  46. #include <string.h> // for memcpy and memset
  47. #include <utility>
  48. #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
  49. #define NAMESPACE_FOR_HASH_FUNCTIONS util
  50. #endif
  51. namespace NAMESPACE_FOR_HASH_FUNCTIONS {
  52. #if defined(FARMHASH_UINT128_T_DEFINED)
  53. #if defined(__clang__)
  54. #if !defined(uint128_t)
  55. #define uint128_t __uint128_t
  56. #endif
  57. #endif
  58. inline uint64_t Uint128Low64(const uint128_t x) {
  59. return static_cast<uint64_t>(x);
  60. }
  61. inline uint64_t Uint128High64(const uint128_t x) {
  62. return static_cast<uint64_t>(x >> 64);
  63. }
  64. inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
  65. return lo + (((uint128_t)hi) << 64);
  66. }
  67. #else
  68. typedef std::pair<uint64_t, uint64_t> uint128_t;
  69. inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
  70. inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
  71. inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
  72. #endif
  73. // BASIC STRING HASHING
  74. // Hash function for a byte array.
  75. // May change from time to time, may differ on different platforms, may differ
  76. // depending on NDEBUG.
  77. size_t Hash(const char* s, size_t len);
  78. // Hash function for a byte array. Most useful in 32-bit binaries.
  79. // May change from time to time, may differ on different platforms, may differ
  80. // depending on NDEBUG.
  81. uint32_t Hash32(const char* s, size_t len);
  82. // Hash function for a byte array. For convenience, a 32-bit seed is also
  83. // hashed into the result.
  84. // May change from time to time, may differ on different platforms, may differ
  85. // depending on NDEBUG.
  86. uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
  87. // Hash function for a byte array.
  88. // May change from time to time, may differ on different platforms, may differ
  89. // depending on NDEBUG.
  90. uint64_t Hash64(const char* s, size_t len);
  91. // Hash function for a byte array. For convenience, a 64-bit seed is also
  92. // hashed into the result.
  93. // May change from time to time, may differ on different platforms, may differ
  94. // depending on NDEBUG.
  95. uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
  96. // Hash function for a byte array. For convenience, two seeds are also
  97. // hashed into the result.
  98. // May change from time to time, may differ on different platforms, may differ
  99. // depending on NDEBUG.
  100. uint64_t Hash64WithSeeds(const char* s, size_t len,
  101. uint64_t seed0, uint64_t seed1);
  102. // Hash function for a byte array.
  103. // May change from time to time, may differ on different platforms, may differ
  104. // depending on NDEBUG.
  105. uint128_t Hash128(const char* s, size_t len);
  106. // Hash function for a byte array. For convenience, a 128-bit seed is also
  107. // hashed into the result.
  108. // May change from time to time, may differ on different platforms, may differ
  109. // depending on NDEBUG.
  110. uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
  111. // BASIC NON-STRING HASHING
  112. // Hash 128 input bits down to 64 bits of output.
  113. // This is intended to be a reasonably good hash function.
  114. // May change from time to time, may differ on different platforms, may differ
  115. // depending on NDEBUG.
  116. inline uint64_t Hash128to64(uint128_t x) {
  117. // Murmur-inspired hashing.
  118. const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  119. uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  120. a ^= (a >> 47);
  121. uint64_t b = (Uint128High64(x) ^ a) * kMul;
  122. b ^= (b >> 47);
  123. b *= kMul;
  124. return b;
  125. }
  126. // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
  127. // Fingerprint function for a byte array. Most useful in 32-bit binaries.
  128. uint32_t Fingerprint32(const char* s, size_t len);
  129. // Fingerprint function for a byte array.
  130. uint64_t Fingerprint64(const char* s, size_t len);
  131. // Fingerprint function for a byte array.
  132. uint128_t Fingerprint128(const char* s, size_t len);
  133. // This is intended to be a good fingerprinting primitive.
  134. // See below for more overloads.
  135. inline uint64_t Fingerprint(uint128_t x) {
  136. // Murmur-inspired hashing.
  137. const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  138. uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  139. a ^= (a >> 47);
  140. uint64_t b = (Uint128High64(x) ^ a) * kMul;
  141. b ^= (b >> 44);
  142. b *= kMul;
  143. b ^= (b >> 41);
  144. b *= kMul;
  145. return b;
  146. }
  147. // This is intended to be a good fingerprinting primitive.
  148. inline uint64_t Fingerprint(uint64_t x) {
  149. // Murmur-inspired hashing.
  150. const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  151. uint64_t b = x * kMul;
  152. b ^= (b >> 44);
  153. b *= kMul;
  154. b ^= (b >> 41);
  155. b *= kMul;
  156. return b;
  157. }
  158. #ifndef FARMHASH_NO_CXX_STRING
  159. // Convenience functions to hash or fingerprint C++ strings.
  160. // These require that Str::data() return a pointer to the first char
  161. // (as a const char*) and that Str::length() return the string's length;
  162. // they work with std::string, for example.
  163. // Hash function for a byte array.
  164. // May change from time to time, may differ on different platforms, may differ
  165. // depending on NDEBUG.
  166. template <typename Str>
  167. inline size_t Hash(const Str& s) {
  168. assert(sizeof(s[0]) == 1);
  169. return Hash(s.data(), s.length());
  170. }
  171. // Hash function for a byte array. Most useful in 32-bit binaries.
  172. // May change from time to time, may differ on different platforms, may differ
  173. // depending on NDEBUG.
  174. template <typename Str>
  175. inline uint32_t Hash32(const Str& s) {
  176. assert(sizeof(s[0]) == 1);
  177. return Hash32(s.data(), s.length());
  178. }
  179. // Hash function for a byte array. For convenience, a 32-bit seed is also
  180. // hashed into the result.
  181. // May change from time to time, may differ on different platforms, may differ
  182. // depending on NDEBUG.
  183. template <typename Str>
  184. inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
  185. assert(sizeof(s[0]) == 1);
  186. return Hash32WithSeed(s.data(), s.length(), seed);
  187. }
  188. // Hash 128 input bits down to 64 bits of output.
  189. // Hash function for a byte array.
  190. // May change from time to time, may differ on different platforms, may differ
  191. // depending on NDEBUG.
  192. template <typename Str>
  193. inline uint64_t Hash64(const Str& s) {
  194. assert(sizeof(s[0]) == 1);
  195. return Hash64(s.data(), s.length());
  196. }
  197. // Hash function for a byte array. For convenience, a 64-bit seed is also
  198. // hashed into the result.
  199. // May change from time to time, may differ on different platforms, may differ
  200. // depending on NDEBUG.
  201. template <typename Str>
  202. inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
  203. assert(sizeof(s[0]) == 1);
  204. return Hash64WithSeed(s.data(), s.length(), seed);
  205. }
  206. // Hash function for a byte array. For convenience, two seeds are also
  207. // hashed into the result.
  208. // May change from time to time, may differ on different platforms, may differ
  209. // depending on NDEBUG.
  210. template <typename Str>
  211. inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
  212. assert(sizeof(s[0]) == 1);
  213. return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
  214. }
  215. // Hash function for a byte array.
  216. // May change from time to time, may differ on different platforms, may differ
  217. // depending on NDEBUG.
  218. template <typename Str>
  219. inline uint128_t Hash128(const Str& s) {
  220. assert(sizeof(s[0]) == 1);
  221. return Hash128(s.data(), s.length());
  222. }
  223. // Hash function for a byte array. For convenience, a 128-bit seed is also
  224. // hashed into the result.
  225. // May change from time to time, may differ on different platforms, may differ
  226. // depending on NDEBUG.
  227. template <typename Str>
  228. inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
  229. assert(sizeof(s[0]) == 1);
  230. return Hash128(s.data(), s.length(), seed);
  231. }
  232. // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
  233. // Fingerprint function for a byte array. Most useful in 32-bit binaries.
  234. template <typename Str>
  235. inline uint32_t Fingerprint32(const Str& s) {
  236. assert(sizeof(s[0]) == 1);
  237. return Fingerprint32(s.data(), s.length());
  238. }
  239. // Fingerprint 128 input bits down to 64 bits of output.
  240. // Fingerprint function for a byte array.
  241. template <typename Str>
  242. inline uint64_t Fingerprint64(const Str& s) {
  243. assert(sizeof(s[0]) == 1);
  244. return Fingerprint64(s.data(), s.length());
  245. }
  246. // Fingerprint function for a byte array.
  247. template <typename Str>
  248. inline uint128_t Fingerprint128(const Str& s) {
  249. assert(sizeof(s[0]) == 1);
  250. return Fingerprint128(s.data(), s.length());
  251. }
  252. #endif
  253. } // namespace NAMESPACE_FOR_HASH_FUNCTIONS
  254. /* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */
  255. #if defined(__BIG_ENDIAN__)
  256. #if !defined(FARMHASH_BIG_ENDIAN)
  257. #define FARMHASH_BIG_ENDIAN
  258. #endif
  259. #elif defined(__LITTLE_ENDIAN__)
  260. // nothing for little-endian
  261. #elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__)
  262. // nothing for little-endian
  263. #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__)
  264. #if !defined(FARMHASH_BIG_ENDIAN)
  265. #define FARMHASH_BIG_ENDIAN
  266. #endif
  267. #elif defined(__linux__) || defined(__CYGWIN__) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
  268. #include <endian.h> // libc6-dev, GLIBC
  269. #if BYTE_ORDER == BIG_ENDIAN
  270. #if !defined(FARMHASH_BIG_ENDIAN)
  271. #define FARMHASH_BIG_ENDIAN
  272. #endif
  273. #endif
  274. #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
  275. #include <sys/endian.h>
  276. #if BYTE_ORDER == BIG_ENDIAN
  277. #if !defined(FARMHASH_BIG_ENDIAN)
  278. #define FARMHASH_BIG_ENDIAN
  279. #endif
  280. #endif
  281. #elif defined(_WIN32)
  282. // Windows is (currently) little-endian
  283. #else
  284. #error "Unable to determine endianness!"
  285. #endif /* __BIG_ENDIAN__ */
  286. #endif // FARM_HASH_H_