hash.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // -----------------------------------------------------------------------------
  16. // File: hash.h
  17. // -----------------------------------------------------------------------------
  18. //
  19. // This header file defines the Abseil `hash` library and the Abseil hashing
  20. // framework. This framework consists of the following:
  21. //
  22. // * The `absl::Hash` functor, which is used to invoke the hasher within the
  23. // Abseil hashing framework. `absl::Hash<T>` supports most basic types and
  24. // a number of Abseil types out of the box.
  25. // * `AbslHashValue`, an extension point that allows you to extend types to
  26. // support Abseil hashing without requiring you to define a hashing
  27. // algorithm.
  28. // * `HashState`, a type-erased class which implements the manipulation of the
  29. // hash state (H) itself; contains member functions `combine()`,
  30. // `combine_contiguous()`, and `combine_unordered()`; and which you can use
  31. // to contribute to an existing hash state when hashing your types.
  32. //
  33. // Unlike `std::hash` or other hashing frameworks, the Abseil hashing framework
  34. // provides most of its utility by abstracting away the hash algorithm (and its
  35. // implementation) entirely. Instead, a type invokes the Abseil hashing
  36. // framework by simply combining its state with the state of known, hashable
  37. // types. Hashing of that combined state is separately done by `absl::Hash`.
  38. //
  39. // One should assume that a hash algorithm is chosen randomly at the start of
  40. // each process. E.g., `absl::Hash<int>{}(9)` in one process and
  41. // `absl::Hash<int>{}(9)` in another process are likely to differ.
  42. //
  43. // `absl::Hash` may also produce different values from different dynamically
  44. // loaded libraries. For this reason, `absl::Hash` values must never cross
  45. // boundaries in dynamically loaded libraries (including when used in types like
  46. // hash containers.)
  47. //
  48. // `absl::Hash` is intended to strongly mix input bits with a target of passing
  49. // an [Avalanche Test](https://en.wikipedia.org/wiki/Avalanche_effect).
  50. //
  51. // Example:
  52. //
  53. // // Suppose we have a class `Circle` for which we want to add hashing:
  54. // class Circle {
  55. // public:
  56. // ...
  57. // private:
  58. // std::pair<int, int> center_;
  59. // int radius_;
  60. // };
  61. //
  62. // // To add hashing support to `Circle`, we simply need to add a free
  63. // // (non-member) function `AbslHashValue()`, and return the combined hash
  64. // // state of the existing hash state and the class state. You can add such a
  65. // // free function using a friend declaration within the body of the class:
  66. // class Circle {
  67. // public:
  68. // ...
  69. // template <typename H>
  70. // friend H AbslHashValue(H h, const Circle& c) {
  71. // return H::combine(std::move(h), c.center_, c.radius_);
  72. // }
  73. // ...
  74. // };
  75. //
  76. // For more information, see Adding Type Support to `absl::Hash` below.
  77. //
  78. #ifndef ABSL_HASH_HASH_H_
  79. #define ABSL_HASH_HASH_H_
  80. #include <cstddef>
  81. #include <cstdint>
  82. #include <tuple>
  83. #include <type_traits>
  84. #include <utility>
  85. #include "absl/base/config.h"
  86. #include "absl/functional/function_ref.h"
  87. #include "absl/hash/internal/hash.h"
  88. #include "absl/meta/type_traits.h"
  89. namespace absl {
  90. ABSL_NAMESPACE_BEGIN
  91. // -----------------------------------------------------------------------------
  92. // `absl::Hash`
  93. // -----------------------------------------------------------------------------
  94. //
  95. // `absl::Hash<T>` is a convenient general-purpose hash functor for any type `T`
  96. // satisfying any of the following conditions (in order):
  97. //
  98. // * T is an arithmetic or pointer type
  99. // * T defines an overload for `AbslHashValue(H, const T&)` for an arbitrary
  100. // hash state `H`.
  101. // - T defines a specialization of `std::hash<T>`
  102. //
  103. // `absl::Hash` intrinsically supports the following types:
  104. //
  105. // * All integral types (including bool)
  106. // * All enum types
  107. // * All floating-point types (although hashing them is discouraged)
  108. // * All pointer types, including nullptr_t
  109. // * std::pair<T1, T2>, if T1 and T2 are hashable
  110. // * std::tuple<Ts...>, if all the Ts... are hashable
  111. // * std::unique_ptr and std::shared_ptr
  112. // * All string-like types including:
  113. // * absl::Cord
  114. // * std::string (as well as any instance of std::basic_string that
  115. // uses one of {char, wchar_t, char16_t, char32_t} and its associated
  116. // std::char_traits)
  117. // * std::string_view (as well as any instance of std::basic_string_view
  118. // that uses one of {char, wchar_t, char16_t, char32_t} and its associated
  119. // std::char_traits)
  120. // * All the standard sequence containers (provided the elements are hashable)
  121. // * All the standard associative containers (provided the elements are
  122. // hashable)
  123. // * absl types such as the following:
  124. // * absl::string_view
  125. // * absl::uint128
  126. // * absl::Time, absl::Duration, and absl::TimeZone
  127. // * absl containers (provided the elements are hashable) such as the
  128. // following:
  129. // * absl::flat_hash_set, absl::node_hash_set, absl::btree_set
  130. // * absl::flat_hash_map, absl::node_hash_map, absl::btree_map
  131. // * absl::btree_multiset, absl::btree_multimap
  132. // * absl::InlinedVector
  133. // * absl::FixedArray
  134. //
  135. // When absl::Hash is used to hash an unordered container with a custom hash
  136. // functor, the elements are hashed using default absl::Hash semantics, not
  137. // the custom hash functor. This is consistent with the behavior of
  138. // operator==() on unordered containers, which compares elements pairwise with
  139. // operator==() rather than the custom equality functor. It is usually a
  140. // mistake to use either operator==() or absl::Hash on unordered collections
  141. // that use functors incompatible with operator==() equality.
  142. //
  143. // Note: the list above is not meant to be exhaustive. Additional type support
  144. // may be added, in which case the above list will be updated.
  145. //
  146. // -----------------------------------------------------------------------------
  147. // absl::Hash Invocation Evaluation
  148. // -----------------------------------------------------------------------------
  149. //
  150. // When invoked, `absl::Hash<T>` searches for supplied hash functions in the
  151. // following order:
  152. //
  153. // * Natively supported types out of the box (see above)
  154. // * Types for which an `AbslHashValue()` overload is provided (such as
  155. // user-defined types). See "Adding Type Support to `absl::Hash`" below.
  156. // * Types which define a `std::hash<T>` specialization
  157. //
  158. // The fallback to legacy hash functions exists mainly for backwards
  159. // compatibility. If you have a choice, prefer defining an `AbslHashValue`
  160. // overload instead of specializing any legacy hash functors.
  161. //
  162. // -----------------------------------------------------------------------------
  163. // The Hash State Concept, and using `HashState` for Type Erasure
  164. // -----------------------------------------------------------------------------
  165. //
  166. // The `absl::Hash` framework relies on the Concept of a "hash state." Such a
  167. // hash state is used in several places:
  168. //
  169. // * Within existing implementations of `absl::Hash<T>` to store the hashed
  170. // state of an object. Note that it is up to the implementation how it stores
  171. // such state. A hash table, for example, may mix the state to produce an
  172. // integer value; a testing framework may simply hold a vector of that state.
  173. // * Within implementations of `AbslHashValue()` used to extend user-defined
  174. // types. (See "Adding Type Support to absl::Hash" below.)
  175. // * Inside a `HashState`, providing type erasure for the concept of a hash
  176. // state, which you can use to extend the `absl::Hash` framework for types
  177. // that are otherwise difficult to extend using `AbslHashValue()`. (See the
  178. // `HashState` class below.)
  179. //
  180. // The "hash state" concept contains three member functions for mixing hash
  181. // state:
  182. //
  183. // * `H::combine(state, values...)`
  184. //
  185. // Combines an arbitrary number of values into a hash state, returning the
  186. // updated state. Note that the existing hash state is move-only and must be
  187. // passed by value.
  188. //
  189. // Each of the value types T must be hashable by H.
  190. //
  191. // NOTE:
  192. //
  193. // state = H::combine(std::move(state), value1, value2, value3);
  194. //
  195. // must be guaranteed to produce the same hash expansion as
  196. //
  197. // state = H::combine(std::move(state), value1);
  198. // state = H::combine(std::move(state), value2);
  199. // state = H::combine(std::move(state), value3);
  200. //
  201. // * `H::combine_contiguous(state, data, size)`
  202. //
  203. // Combines a contiguous array of `size` elements into a hash state,
  204. // returning the updated state. Note that the existing hash state is
  205. // move-only and must be passed by value.
  206. //
  207. // NOTE:
  208. //
  209. // state = H::combine_contiguous(std::move(state), data, size);
  210. //
  211. // need NOT be guaranteed to produce the same hash expansion as a loop
  212. // (it may perform internal optimizations). If you need this guarantee, use a
  213. // loop instead.
  214. //
  215. // * `H::combine_unordered(state, begin, end)`
  216. //
  217. // Combines a set of elements denoted by an iterator pair into a hash
  218. // state, returning the updated state. Note that the existing hash
  219. // state is move-only and must be passed by value.
  220. //
  221. // Unlike the other two methods, the hashing is order-independent.
  222. // This can be used to hash unordered collections.
  223. //
  224. // -----------------------------------------------------------------------------
  225. // Adding Type Support to `absl::Hash`
  226. // -----------------------------------------------------------------------------
  227. //
  228. // To add support for your user-defined type, add a proper `AbslHashValue()`
  229. // overload as a free (non-member) function. The overload will take an
  230. // existing hash state and should combine that state with state from the type.
  231. //
  232. // Example:
  233. //
  234. // template <typename H>
  235. // H AbslHashValue(H state, const MyType& v) {
  236. // return H::combine(std::move(state), v.field1, ..., v.fieldN);
  237. // }
  238. //
  239. // where `(field1, ..., fieldN)` are the members you would use on your
  240. // `operator==` to define equality.
  241. //
  242. // Notice that `AbslHashValue` is not a class member, but an ordinary function.
  243. // An `AbslHashValue` overload for a type should only be declared in the same
  244. // file and namespace as said type. The proper `AbslHashValue` implementation
  245. // for a given type will be discovered via ADL.
  246. //
  247. // Note: unlike `std::hash', `absl::Hash` should never be specialized. It must
  248. // only be extended by adding `AbslHashValue()` overloads.
  249. //
  250. template <typename T>
  251. using Hash = absl::hash_internal::Hash<T>;
  252. // HashOf
  253. //
  254. // absl::HashOf() is a helper that generates a hash from the values of its
  255. // arguments. It dispatches to absl::Hash directly, as follows:
  256. // * HashOf(t) == absl::Hash<T>{}(t)
  257. // * HashOf(a, b, c) == HashOf(std::make_tuple(a, b, c))
  258. //
  259. // HashOf(a1, a2, ...) == HashOf(b1, b2, ...) is guaranteed when
  260. // * The argument lists have pairwise identical C++ types
  261. // * a1 == b1 && a2 == b2 && ...
  262. //
  263. // The requirement that the arguments match in both type and value is critical.
  264. // It means that `a == b` does not necessarily imply `HashOf(a) == HashOf(b)` if
  265. // `a` and `b` have different types. For example, `HashOf(2) != HashOf(2.0)`.
  266. template <int&... ExplicitArgumentBarrier, typename... Types>
  267. size_t HashOf(const Types&... values) {
  268. auto tuple = std::tie(values...);
  269. return absl::Hash<decltype(tuple)>{}(tuple);
  270. }
  271. // HashState
  272. //
  273. // A type erased version of the hash state concept, for use in user-defined
  274. // `AbslHashValue` implementations that can't use templates (such as PImpl
  275. // classes, virtual functions, etc.). The type erasure adds overhead so it
  276. // should be avoided unless necessary.
  277. //
  278. // Note: This wrapper will only erase calls to
  279. // combine_contiguous(H, const unsigned char*, size_t)
  280. // RunCombineUnordered(H, CombinerF)
  281. //
  282. // All other calls will be handled internally and will not invoke overloads
  283. // provided by the wrapped class.
  284. //
  285. // Users of this class should still define a template `AbslHashValue` function,
  286. // but can use `absl::HashState::Create(&state)` to erase the type of the hash
  287. // state and dispatch to their private hashing logic.
  288. //
  289. // This state can be used like any other hash state. In particular, you can call
  290. // `HashState::combine()` and `HashState::combine_contiguous()` on it.
  291. //
  292. // Example:
  293. //
  294. // class Interface {
  295. // public:
  296. // template <typename H>
  297. // friend H AbslHashValue(H state, const Interface& value) {
  298. // state = H::combine(std::move(state), std::type_index(typeid(*this)));
  299. // value.HashValue(absl::HashState::Create(&state));
  300. // return state;
  301. // }
  302. // private:
  303. // virtual void HashValue(absl::HashState state) const = 0;
  304. // };
  305. //
  306. // class Impl : Interface {
  307. // private:
  308. // void HashValue(absl::HashState state) const override {
  309. // absl::HashState::combine(std::move(state), v1_, v2_);
  310. // }
  311. // int v1_;
  312. // std::string v2_;
  313. // };
  314. class HashState : public hash_internal::HashStateBase<HashState> {
  315. public:
  316. // HashState::Create()
  317. //
  318. // Create a new `HashState` instance that wraps `state`. All calls to
  319. // `combine()` and `combine_contiguous()` on the new instance will be
  320. // redirected to the original `state` object. The `state` object must outlive
  321. // the `HashState` instance. `T` must be a subclass of `HashStateBase<T>` -
  322. // users should not define their own HashState types.
  323. template <
  324. typename T,
  325. absl::enable_if_t<
  326. std::is_base_of<hash_internal::HashStateBase<T>, T>::value, int> = 0>
  327. static HashState Create(T* state) {
  328. HashState s;
  329. s.Init(state);
  330. return s;
  331. }
  332. HashState(const HashState&) = delete;
  333. HashState& operator=(const HashState&) = delete;
  334. HashState(HashState&&) = default;
  335. HashState& operator=(HashState&&) = default;
  336. // HashState::combine()
  337. //
  338. // Combines an arbitrary number of values into a hash state, returning the
  339. // updated state.
  340. using HashState::HashStateBase::combine;
  341. // HashState::combine_contiguous()
  342. //
  343. // Combines a contiguous array of `size` elements into a hash state, returning
  344. // the updated state.
  345. static HashState combine_contiguous(HashState hash_state,
  346. const unsigned char* first, size_t size) {
  347. hash_state.combine_contiguous_(hash_state.state_, first, size);
  348. return hash_state;
  349. }
  350. using HashState::HashStateBase::combine_contiguous;
  351. private:
  352. HashState() = default;
  353. friend class HashState::HashStateBase;
  354. friend struct hash_internal::CombineRaw;
  355. template <typename T>
  356. static void CombineContiguousImpl(void* p, const unsigned char* first,
  357. size_t size) {
  358. T& state = *static_cast<T*>(p);
  359. state = T::combine_contiguous(std::move(state), first, size);
  360. }
  361. static HashState combine_raw(HashState hash_state, uint64_t value) {
  362. hash_state.combine_raw_(hash_state.state_, value);
  363. return hash_state;
  364. }
  365. template <typename T>
  366. static void CombineRawImpl(void* p, uint64_t value) {
  367. T& state = *static_cast<T*>(p);
  368. state = hash_internal::CombineRaw()(std::move(state), value);
  369. }
  370. template <typename T>
  371. void Init(T* state) {
  372. state_ = state;
  373. combine_contiguous_ = &CombineContiguousImpl<T>;
  374. combine_raw_ = &CombineRawImpl<T>;
  375. run_combine_unordered_ = &RunCombineUnorderedImpl<T>;
  376. }
  377. template <typename HS>
  378. struct CombineUnorderedInvoker {
  379. template <typename T, typename ConsumerT>
  380. void operator()(T inner_state, ConsumerT inner_cb) {
  381. f(HashState::Create(&inner_state),
  382. [&](HashState& inner_erased) { inner_cb(inner_erased.Real<T>()); });
  383. }
  384. absl::FunctionRef<void(HS, absl::FunctionRef<void(HS&)>)> f;
  385. };
  386. template <typename T>
  387. static HashState RunCombineUnorderedImpl(
  388. HashState state,
  389. absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>
  390. f) {
  391. // Note that this implementation assumes that inner_state and outer_state
  392. // are the same type. This isn't true in the SpyHash case, but SpyHash
  393. // types are move-convertible to each other, so this still works.
  394. T& real_state = state.Real<T>();
  395. real_state = T::RunCombineUnordered(
  396. std::move(real_state), CombineUnorderedInvoker<HashState>{f});
  397. return state;
  398. }
  399. template <typename CombinerT>
  400. static HashState RunCombineUnordered(HashState state, CombinerT combiner) {
  401. auto* run = state.run_combine_unordered_;
  402. return run(std::move(state), std::ref(combiner));
  403. }
  404. // Do not erase an already erased state.
  405. void Init(HashState* state) {
  406. state_ = state->state_;
  407. combine_contiguous_ = state->combine_contiguous_;
  408. combine_raw_ = state->combine_raw_;
  409. run_combine_unordered_ = state->run_combine_unordered_;
  410. }
  411. template <typename T>
  412. T& Real() {
  413. return *static_cast<T*>(state_);
  414. }
  415. void* state_;
  416. void (*combine_contiguous_)(void*, const unsigned char*, size_t);
  417. void (*combine_raw_)(void*, uint64_t);
  418. HashState (*run_combine_unordered_)(
  419. HashState state,
  420. absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>);
  421. };
  422. ABSL_NAMESPACE_END
  423. } // namespace absl
  424. #endif // ABSL_HASH_HASH_H_