HashBuilder.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file implements an interface allowing to conveniently build hashes of
  15. // various data types, without relying on the underlying hasher type to know
  16. // about hashed data types.
  17. //
  18. //===----------------------------------------------------------------------===//
  19. #ifndef LLVM_SUPPORT_HASHBUILDER_H
  20. #define LLVM_SUPPORT_HASHBUILDER_H
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/ADT/Hashing.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/StringRef.h"
  25. #include "llvm/Support/Endian.h"
  26. #include "llvm/Support/type_traits.h"
  27. #include <iterator>
  28. #include <utility>
  29. namespace llvm {
  30. namespace hashbuilder_detail {
  31. /// Trait to indicate whether a type's bits can be hashed directly (after
  32. /// endianness correction).
  33. template <typename U>
  34. struct IsHashableData
  35. : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
  36. } // namespace hashbuilder_detail
  37. /// Declares the hasher member, and functions forwarding directly to the hasher.
  38. template <typename HasherT> class HashBuilderBase {
  39. public:
  40. HasherT &getHasher() { return Hasher; }
  41. /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
  42. ///
  43. /// This may not take the size of `Data` into account.
  44. /// Users of this function should pay attention to respect endianness
  45. /// contraints.
  46. void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
  47. /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
  48. ///
  49. /// This may not take the size of `Data` into account.
  50. /// Users of this function should pay attention to respect endianness
  51. /// contraints.
  52. void update(StringRef Data) {
  53. update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
  54. Data.size()));
  55. }
  56. /// Forward to `HasherT::final()` if available.
  57. template <typename HasherT_ = HasherT> StringRef final() {
  58. return this->getHasher().final();
  59. }
  60. /// Forward to `HasherT::result()` if available.
  61. template <typename HasherT_ = HasherT> StringRef result() {
  62. return this->getHasher().result();
  63. }
  64. protected:
  65. explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
  66. template <typename... ArgTypes>
  67. explicit HashBuilderBase(ArgTypes &&...Args)
  68. : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
  69. Hasher(*OptionalHasher) {}
  70. private:
  71. Optional<HasherT> OptionalHasher;
  72. HasherT &Hasher;
  73. };
  74. /// Implementation of the `HashBuilder` interface.
  75. ///
  76. /// `support::endianness::native` is not supported. `HashBuilder` is
  77. /// expected to canonicalize `support::endianness::native` to one of
  78. /// `support::endianness::big` or `support::endianness::little`.
  79. template <typename HasherT, support::endianness Endianness>
  80. class HashBuilderImpl : public HashBuilderBase<HasherT> {
  81. static_assert(Endianness != support::endianness::native,
  82. "HashBuilder should canonicalize endianness");
  83. public:
  84. explicit HashBuilderImpl(HasherT &Hasher)
  85. : HashBuilderBase<HasherT>(Hasher) {}
  86. template <typename... ArgTypes>
  87. explicit HashBuilderImpl(ArgTypes &&...Args)
  88. : HashBuilderBase<HasherT>(Args...) {}
  89. /// Implement hashing for hashable data types, e.g. integral or enum values.
  90. template <typename T>
  91. std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
  92. HashBuilderImpl &>
  93. add(T Value) {
  94. return adjustForEndiannessAndAdd(Value);
  95. }
  96. /// Support hashing `ArrayRef`.
  97. ///
  98. /// `Value.size()` is taken into account to ensure cases like
  99. /// ```
  100. /// builder.add({1});
  101. /// builder.add({2, 3});
  102. /// ```
  103. /// and
  104. /// ```
  105. /// builder.add({1, 2});
  106. /// builder.add({3});
  107. /// ```
  108. /// do not collide.
  109. template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
  110. // As of implementation time, simply calling `addRange(Value)` would also go
  111. // through the `update` fast path. But that would rely on the implementation
  112. // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
  113. // `update` to guarantee the fast path.
  114. add(Value.size());
  115. if (hashbuilder_detail::IsHashableData<T>::value &&
  116. Endianness == support::endian::system_endianness()) {
  117. this->update(
  118. makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
  119. Value.size() * sizeof(T)));
  120. } else {
  121. for (auto &V : Value)
  122. add(V);
  123. }
  124. return *this;
  125. }
  126. /// Support hashing `StringRef`.
  127. ///
  128. /// `Value.size()` is taken into account to ensure cases like
  129. /// ```
  130. /// builder.add("a");
  131. /// builder.add("bc");
  132. /// ```
  133. /// and
  134. /// ```
  135. /// builder.add("ab");
  136. /// builder.add("c");
  137. /// ```
  138. /// do not collide.
  139. HashBuilderImpl &add(StringRef Value) {
  140. // As of implementation time, simply calling `addRange(Value)` would also go
  141. // through `update`. But that would rely on the implementation of
  142. // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
  143. // guarantee the fast path.
  144. add(Value.size());
  145. this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
  146. Value.size()));
  147. return *this;
  148. }
  149. template <typename T>
  150. using HasAddHashT =
  151. decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
  152. /// Implement hashing for user-defined `struct`s.
  153. ///
  154. /// Any user-define `struct` can participate in hashing via `HashBuilder` by
  155. /// providing a `addHash` templated function.
  156. ///
  157. /// ```
  158. /// template <typename HasherT, support::endianness Endianness>
  159. /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
  160. /// const UserDefinedStruct &Value);
  161. /// ```
  162. ///
  163. /// For example:
  164. /// ```
  165. /// struct SimpleStruct {
  166. /// char c;
  167. /// int i;
  168. /// };
  169. ///
  170. /// template <typename HasherT, support::endianness Endianness>
  171. /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
  172. /// const SimpleStruct &Value) {
  173. /// HBuilder.add(Value.c);
  174. /// HBuilder.add(Value.i);
  175. /// }
  176. /// ```
  177. ///
  178. /// To avoid endianness issues, specializations of `addHash` should
  179. /// generally rely on exising `add`, `addRange`, and `addRangeElements`
  180. /// functions. If directly using `update`, an implementation must correctly
  181. /// handle endianness.
  182. ///
  183. /// ```
  184. /// struct __attribute__ ((packed)) StructWithFastHash {
  185. /// int I;
  186. /// char C;
  187. ///
  188. /// // If possible, we want to hash both `I` and `C` in a single
  189. /// // `update` call for performance concerns.
  190. /// template <typename HasherT, support::endianness Endianness>
  191. /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
  192. /// const StructWithFastHash &Value) {
  193. /// if (Endianness == support::endian::system_endianness()) {
  194. /// HBuilder.update(makeArrayRef(
  195. /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
  196. /// } else {
  197. /// // Rely on existing `add` methods to handle endianness.
  198. /// HBuilder.add(Value.I);
  199. /// HBuilder.add(Value.C);
  200. /// }
  201. /// }
  202. /// };
  203. /// ```
  204. ///
  205. /// To avoid collisions, specialization of `addHash` for variable-size
  206. /// types must take the size into account.
  207. ///
  208. /// For example:
  209. /// ```
  210. /// struct CustomContainer {
  211. /// private:
  212. /// size_t Size;
  213. /// int Elements[100];
  214. ///
  215. /// public:
  216. /// CustomContainer(size_t Size) : Size(Size) {
  217. /// for (size_t I = 0; I != Size; ++I)
  218. /// Elements[I] = I;
  219. /// }
  220. /// template <typename HasherT, support::endianness Endianness>
  221. /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
  222. /// const CustomContainer &Value) {
  223. /// if (Endianness == support::endian::system_endianness()) {
  224. /// HBuilder.update(makeArrayRef(
  225. /// reinterpret_cast<const uint8_t *>(&Value.Size),
  226. /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
  227. /// } else {
  228. /// // `addRange` will take care of encoding the size.
  229. /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
  230. /// Value.Size);
  231. /// }
  232. /// }
  233. /// };
  234. /// ```
  235. template <typename T>
  236. std::enable_if_t<is_detected<HasAddHashT, T>::value &&
  237. !hashbuilder_detail::IsHashableData<T>::value,
  238. HashBuilderImpl &>
  239. add(const T &Value) {
  240. addHash(*this, Value);
  241. return *this;
  242. }
  243. template <typename T1, typename T2>
  244. HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
  245. add(Value.first);
  246. add(Value.second);
  247. return *this;
  248. }
  249. template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
  250. return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
  251. }
  252. /// A convenenience variadic helper.
  253. /// It simply iterates over its arguments, in order.
  254. /// ```
  255. /// add(Arg1, Arg2);
  256. /// ```
  257. /// is equivalent to
  258. /// ```
  259. /// add(Arg1)
  260. /// add(Arg2)
  261. /// ```
  262. template <typename T, typename... Ts>
  263. typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
  264. add(const T &FirstArg, const Ts &...Args) {
  265. add(FirstArg);
  266. add(Args...);
  267. return *this;
  268. }
  269. template <typename ForwardIteratorT>
  270. HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
  271. add(std::distance(First, Last));
  272. return addRangeElements(First, Last);
  273. }
  274. template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
  275. return addRange(adl_begin(Range), adl_end(Range));
  276. }
  277. template <typename ForwardIteratorT>
  278. HashBuilderImpl &addRangeElements(ForwardIteratorT First,
  279. ForwardIteratorT Last) {
  280. return addRangeElementsImpl(
  281. First, Last,
  282. typename std::iterator_traits<ForwardIteratorT>::iterator_category());
  283. }
  284. template <typename RangeT>
  285. HashBuilderImpl &addRangeElements(const RangeT &Range) {
  286. return addRangeElements(adl_begin(Range), adl_end(Range));
  287. }
  288. template <typename T>
  289. using HasByteSwapT = decltype(support::endian::byte_swap(
  290. std::declval<T &>(), support::endianness::little));
  291. /// Adjust `Value` for the target endianness and add it to the hash.
  292. template <typename T>
  293. std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
  294. adjustForEndiannessAndAdd(const T &Value) {
  295. T SwappedValue = support::endian::byte_swap(Value, Endianness);
  296. this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
  297. sizeof(SwappedValue)));
  298. return *this;
  299. }
  300. private:
  301. template <typename... Ts, std::size_t... Indices>
  302. HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
  303. std::index_sequence<Indices...>) {
  304. add(std::get<Indices>(Arg)...);
  305. return *this;
  306. }
  307. // FIXME: Once available, specialize this function for `contiguous_iterator`s,
  308. // and use it for `ArrayRef` and `StringRef`.
  309. template <typename ForwardIteratorT>
  310. HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
  311. ForwardIteratorT Last,
  312. std::forward_iterator_tag) {
  313. for (auto It = First; It != Last; ++It)
  314. add(*It);
  315. return *this;
  316. }
  317. template <typename T>
  318. std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
  319. Endianness == support::endian::system_endianness(),
  320. HashBuilderImpl &>
  321. addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
  322. this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
  323. (Last - First) * sizeof(T)));
  324. return *this;
  325. }
  326. };
  327. /// Interface to help hash various types through a hasher type.
  328. ///
  329. /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
  330. /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
  331. /// without requiring any knowledge of hashed types from the hasher type.
  332. ///
  333. /// The only method expected from the templated hasher type `HasherT` is:
  334. /// * void update(ArrayRef<uint8_t> Data)
  335. ///
  336. /// Additionally, the following methods will be forwarded to the hasher type:
  337. /// * decltype(std::declval<HasherT &>().final()) final()
  338. /// * decltype(std::declval<HasherT &>().result()) result()
  339. ///
  340. /// From a user point of view, the interface provides the following:
  341. /// * `template<typename T> add(const T &Value)`
  342. /// The `add` function implements hashing of various types.
  343. /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
  344. /// The `addRange` function is designed to aid hashing a range of values.
  345. /// It explicitly adds the size of the range in the hash.
  346. /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
  347. /// The `addRangeElements` function is also designed to aid hashing a range of
  348. /// values. In contrast to `addRange`, it **ignores** the size of the range,
  349. /// behaving as if elements were added one at a time with `add`.
  350. ///
  351. /// User-defined `struct` types can participate in this interface by providing
  352. /// an `addHash` templated function. See the associated template specialization
  353. /// for details.
  354. ///
  355. /// This interface does not impose requirements on the hasher
  356. /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
  357. /// variable-size types; for example for
  358. /// ```
  359. /// builder.add({1});
  360. /// builder.add({2, 3});
  361. /// ```
  362. /// and
  363. /// ```
  364. /// builder.add({1, 2});
  365. /// builder.add({3});
  366. /// ```
  367. /// . Thus, specializations of `add` and `addHash` for variable-size types must
  368. /// not assume that the hasher type considers the size as part of the hash; they
  369. /// must explicitly add the size to the hash. See for example specializations
  370. /// for `ArrayRef` and `StringRef`.
  371. ///
  372. /// Additionally, since types are eventually forwarded to the hasher's
  373. /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
  374. /// computation (for example when computing `add((int)123)`).
  375. /// Specifiying a non-`native` `Endianness` template parameter allows to compute
  376. /// stable hash across platforms with different endianness.
  377. template <class HasherT, support::endianness Endianness>
  378. using HashBuilder =
  379. HashBuilderImpl<HasherT, (Endianness == support::endianness::native
  380. ? support::endian::system_endianness()
  381. : Endianness)>;
  382. namespace hashbuilder_detail {
  383. class HashCodeHasher {
  384. public:
  385. HashCodeHasher() : Code(0) {}
  386. void update(ArrayRef<uint8_t> Data) {
  387. hash_code DataCode = hash_value(Data);
  388. Code = hash_combine(Code, DataCode);
  389. }
  390. hash_code Code;
  391. };
  392. using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
  393. support::endianness::native>;
  394. } // namespace hashbuilder_detail
  395. /// Provide a default implementation of `hash_value` when `addHash(const T &)`
  396. /// is supported.
  397. template <typename T>
  398. std::enable_if_t<
  399. is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
  400. hash_code>
  401. hash_value(const T &Value) {
  402. hashbuilder_detail::HashCodeHashBuilder HBuilder;
  403. HBuilder.add(Value);
  404. return HBuilder.getHasher().Code;
  405. }
  406. } // end namespace llvm
  407. #endif // LLVM_SUPPORT_HASHBUILDER_H
  408. #ifdef __GNUC__
  409. #pragma GCC diagnostic pop
  410. #endif