HashBuilder.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file implements an interface allowing to conveniently build hashes of
  15. // various data types, without relying on the underlying hasher type to know
  16. // about hashed data types.
  17. //
  18. //===----------------------------------------------------------------------===//
  19. #ifndef LLVM_SUPPORT_HASHBUILDER_H
  20. #define LLVM_SUPPORT_HASHBUILDER_H
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/ADT/Hashing.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/StringRef.h"
  25. #include "llvm/Support/Endian.h"
  26. #include "llvm/Support/type_traits.h"
  27. #include <iterator>
  28. #include <optional>
  29. #include <utility>
  30. namespace llvm {
  31. namespace hashbuilder_detail {
  32. /// Trait to indicate whether a type's bits can be hashed directly (after
  33. /// endianness correction).
  34. template <typename U>
  35. struct IsHashableData
  36. : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
  37. } // namespace hashbuilder_detail
  38. /// Declares the hasher member, and functions forwarding directly to the hasher.
  39. template <typename HasherT> class HashBuilderBase {
  40. public:
  41. template <typename HasherT_ = HasherT>
  42. using HashResultTy = decltype(std::declval<HasherT_ &>().final());
  43. HasherT &getHasher() { return Hasher; }
  44. /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
  45. ///
  46. /// This may not take the size of `Data` into account.
  47. /// Users of this function should pay attention to respect endianness
  48. /// contraints.
  49. void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
  50. /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
  51. ///
  52. /// This may not take the size of `Data` into account.
  53. /// Users of this function should pay attention to respect endianness
  54. /// contraints.
  55. void update(StringRef Data) {
  56. update(
  57. ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()));
  58. }
  59. /// Forward to `HasherT::final()` if available.
  60. template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
  61. return this->getHasher().final();
  62. }
  63. /// Forward to `HasherT::result()` if available.
  64. template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
  65. return this->getHasher().result();
  66. }
  67. protected:
  68. explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
  69. template <typename... ArgTypes>
  70. explicit HashBuilderBase(ArgTypes &&...Args)
  71. : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...),
  72. Hasher(*OptionalHasher) {}
  73. private:
  74. std::optional<HasherT> OptionalHasher;
  75. HasherT &Hasher;
  76. };
  77. /// Implementation of the `HashBuilder` interface.
  78. ///
  79. /// `support::endianness::native` is not supported. `HashBuilder` is
  80. /// expected to canonicalize `support::endianness::native` to one of
  81. /// `support::endianness::big` or `support::endianness::little`.
  82. template <typename HasherT, support::endianness Endianness>
  83. class HashBuilderImpl : public HashBuilderBase<HasherT> {
  84. static_assert(Endianness != support::endianness::native,
  85. "HashBuilder should canonicalize endianness");
  86. public:
  87. explicit HashBuilderImpl(HasherT &Hasher)
  88. : HashBuilderBase<HasherT>(Hasher) {}
  89. template <typename... ArgTypes>
  90. explicit HashBuilderImpl(ArgTypes &&...Args)
  91. : HashBuilderBase<HasherT>(Args...) {}
  92. /// Implement hashing for hashable data types, e.g. integral or enum values.
  93. template <typename T>
  94. std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
  95. HashBuilderImpl &>
  96. add(T Value) {
  97. return adjustForEndiannessAndAdd(Value);
  98. }
  99. /// Support hashing `ArrayRef`.
  100. ///
  101. /// `Value.size()` is taken into account to ensure cases like
  102. /// ```
  103. /// builder.add({1});
  104. /// builder.add({2, 3});
  105. /// ```
  106. /// and
  107. /// ```
  108. /// builder.add({1, 2});
  109. /// builder.add({3});
  110. /// ```
  111. /// do not collide.
  112. template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
  113. // As of implementation time, simply calling `addRange(Value)` would also go
  114. // through the `update` fast path. But that would rely on the implementation
  115. // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
  116. // `update` to guarantee the fast path.
  117. add(Value.size());
  118. if (hashbuilder_detail::IsHashableData<T>::value &&
  119. Endianness == support::endian::system_endianness()) {
  120. this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
  121. Value.size() * sizeof(T)));
  122. } else {
  123. for (auto &V : Value)
  124. add(V);
  125. }
  126. return *this;
  127. }
  128. /// Support hashing `StringRef`.
  129. ///
  130. /// `Value.size()` is taken into account to ensure cases like
  131. /// ```
  132. /// builder.add("a");
  133. /// builder.add("bc");
  134. /// ```
  135. /// and
  136. /// ```
  137. /// builder.add("ab");
  138. /// builder.add("c");
  139. /// ```
  140. /// do not collide.
  141. HashBuilderImpl &add(StringRef Value) {
  142. // As of implementation time, simply calling `addRange(Value)` would also go
  143. // through `update`. But that would rely on the implementation of
  144. // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
  145. // guarantee the fast path.
  146. add(Value.size());
  147. this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
  148. Value.size()));
  149. return *this;
  150. }
  151. template <typename T>
  152. using HasAddHashT =
  153. decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
  154. /// Implement hashing for user-defined `struct`s.
  155. ///
  156. /// Any user-define `struct` can participate in hashing via `HashBuilder` by
  157. /// providing a `addHash` templated function.
  158. ///
  159. /// ```
  160. /// template <typename HasherT, support::endianness Endianness>
  161. /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
  162. /// const UserDefinedStruct &Value);
  163. /// ```
  164. ///
  165. /// For example:
  166. /// ```
  167. /// struct SimpleStruct {
  168. /// char c;
  169. /// int i;
  170. /// };
  171. ///
  172. /// template <typename HasherT, support::endianness Endianness>
  173. /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
  174. /// const SimpleStruct &Value) {
  175. /// HBuilder.add(Value.c);
  176. /// HBuilder.add(Value.i);
  177. /// }
  178. /// ```
  179. ///
  180. /// To avoid endianness issues, specializations of `addHash` should
  181. /// generally rely on exising `add`, `addRange`, and `addRangeElements`
  182. /// functions. If directly using `update`, an implementation must correctly
  183. /// handle endianness.
  184. ///
  185. /// ```
  186. /// struct __attribute__ ((packed)) StructWithFastHash {
  187. /// int I;
  188. /// char C;
  189. ///
  190. /// // If possible, we want to hash both `I` and `C` in a single
  191. /// // `update` call for performance concerns.
  192. /// template <typename HasherT, support::endianness Endianness>
  193. /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
  194. /// const StructWithFastHash &Value) {
  195. /// if (Endianness == support::endian::system_endianness()) {
  196. /// HBuilder.update(ArrayRef(
  197. /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
  198. /// } else {
  199. /// // Rely on existing `add` methods to handle endianness.
  200. /// HBuilder.add(Value.I);
  201. /// HBuilder.add(Value.C);
  202. /// }
  203. /// }
  204. /// };
  205. /// ```
  206. ///
  207. /// To avoid collisions, specialization of `addHash` for variable-size
  208. /// types must take the size into account.
  209. ///
  210. /// For example:
  211. /// ```
  212. /// struct CustomContainer {
  213. /// private:
  214. /// size_t Size;
  215. /// int Elements[100];
  216. ///
  217. /// public:
  218. /// CustomContainer(size_t Size) : Size(Size) {
  219. /// for (size_t I = 0; I != Size; ++I)
  220. /// Elements[I] = I;
  221. /// }
  222. /// template <typename HasherT, support::endianness Endianness>
  223. /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
  224. /// const CustomContainer &Value) {
  225. /// if (Endianness == support::endian::system_endianness()) {
  226. /// HBuilder.update(ArrayRef(
  227. /// reinterpret_cast<const uint8_t *>(&Value.Size),
  228. /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
  229. /// } else {
  230. /// // `addRange` will take care of encoding the size.
  231. /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
  232. /// Value.Size);
  233. /// }
  234. /// }
  235. /// };
  236. /// ```
  237. template <typename T>
  238. std::enable_if_t<is_detected<HasAddHashT, T>::value &&
  239. !hashbuilder_detail::IsHashableData<T>::value,
  240. HashBuilderImpl &>
  241. add(const T &Value) {
  242. addHash(*this, Value);
  243. return *this;
  244. }
  245. template <typename T1, typename T2>
  246. HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
  247. return add(Value.first, Value.second);
  248. }
  249. template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
  250. std::apply([this](const auto &...Args) { this->add(Args...); }, Arg);
  251. return *this;
  252. }
  253. /// A convenenience variadic helper.
  254. /// It simply iterates over its arguments, in order.
  255. /// ```
  256. /// add(Arg1, Arg2);
  257. /// ```
  258. /// is equivalent to
  259. /// ```
  260. /// add(Arg1)
  261. /// add(Arg2)
  262. /// ```
  263. template <typename... Ts>
  264. std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &>
  265. add(const Ts &...Args) {
  266. return (add(Args), ...);
  267. }
  268. template <typename ForwardIteratorT>
  269. HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
  270. add(std::distance(First, Last));
  271. return addRangeElements(First, Last);
  272. }
  273. template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
  274. return addRange(adl_begin(Range), adl_end(Range));
  275. }
  276. template <typename ForwardIteratorT>
  277. HashBuilderImpl &addRangeElements(ForwardIteratorT First,
  278. ForwardIteratorT Last) {
  279. return addRangeElementsImpl(
  280. First, Last,
  281. typename std::iterator_traits<ForwardIteratorT>::iterator_category());
  282. }
  283. template <typename RangeT>
  284. HashBuilderImpl &addRangeElements(const RangeT &Range) {
  285. return addRangeElements(adl_begin(Range), adl_end(Range));
  286. }
  287. template <typename T>
  288. using HasByteSwapT = decltype(support::endian::byte_swap(
  289. std::declval<T &>(), support::endianness::little));
  290. /// Adjust `Value` for the target endianness and add it to the hash.
  291. template <typename T>
  292. std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
  293. adjustForEndiannessAndAdd(const T &Value) {
  294. T SwappedValue = support::endian::byte_swap(Value, Endianness);
  295. this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
  296. sizeof(SwappedValue)));
  297. return *this;
  298. }
  299. private:
  300. // FIXME: Once available, specialize this function for `contiguous_iterator`s,
  301. // and use it for `ArrayRef` and `StringRef`.
  302. template <typename ForwardIteratorT>
  303. HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
  304. ForwardIteratorT Last,
  305. std::forward_iterator_tag) {
  306. for (auto It = First; It != Last; ++It)
  307. add(*It);
  308. return *this;
  309. }
  310. template <typename T>
  311. std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
  312. Endianness == support::endian::system_endianness(),
  313. HashBuilderImpl &>
  314. addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
  315. this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
  316. (Last - First) * sizeof(T)));
  317. return *this;
  318. }
  319. };
  320. /// Interface to help hash various types through a hasher type.
  321. ///
  322. /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
  323. /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
  324. /// without requiring any knowledge of hashed types from the hasher type.
  325. ///
  326. /// The only method expected from the templated hasher type `HasherT` is:
  327. /// * void update(ArrayRef<uint8_t> Data)
  328. ///
  329. /// Additionally, the following methods will be forwarded to the hasher type:
  330. /// * decltype(std::declval<HasherT &>().final()) final()
  331. /// * decltype(std::declval<HasherT &>().result()) result()
  332. ///
  333. /// From a user point of view, the interface provides the following:
  334. /// * `template<typename T> add(const T &Value)`
  335. /// The `add` function implements hashing of various types.
  336. /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
  337. /// The `addRange` function is designed to aid hashing a range of values.
  338. /// It explicitly adds the size of the range in the hash.
  339. /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
  340. /// The `addRangeElements` function is also designed to aid hashing a range of
  341. /// values. In contrast to `addRange`, it **ignores** the size of the range,
  342. /// behaving as if elements were added one at a time with `add`.
  343. ///
  344. /// User-defined `struct` types can participate in this interface by providing
  345. /// an `addHash` templated function. See the associated template specialization
  346. /// for details.
  347. ///
  348. /// This interface does not impose requirements on the hasher
  349. /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
  350. /// variable-size types; for example for
  351. /// ```
  352. /// builder.add({1});
  353. /// builder.add({2, 3});
  354. /// ```
  355. /// and
  356. /// ```
  357. /// builder.add({1, 2});
  358. /// builder.add({3});
  359. /// ```
  360. /// . Thus, specializations of `add` and `addHash` for variable-size types must
  361. /// not assume that the hasher type considers the size as part of the hash; they
  362. /// must explicitly add the size to the hash. See for example specializations
  363. /// for `ArrayRef` and `StringRef`.
  364. ///
  365. /// Additionally, since types are eventually forwarded to the hasher's
  366. /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
  367. /// computation (for example when computing `add((int)123)`).
  368. /// Specifiying a non-`native` `Endianness` template parameter allows to compute
  369. /// stable hash across platforms with different endianness.
  370. template <class HasherT, support::endianness Endianness>
  371. using HashBuilder =
  372. HashBuilderImpl<HasherT, (Endianness == support::endianness::native
  373. ? support::endian::system_endianness()
  374. : Endianness)>;
  375. namespace hashbuilder_detail {
  376. class HashCodeHasher {
  377. public:
  378. HashCodeHasher() : Code(0) {}
  379. void update(ArrayRef<uint8_t> Data) {
  380. hash_code DataCode = hash_value(Data);
  381. Code = hash_combine(Code, DataCode);
  382. }
  383. hash_code Code;
  384. };
  385. using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
  386. support::endianness::native>;
  387. } // namespace hashbuilder_detail
  388. /// Provide a default implementation of `hash_value` when `addHash(const T &)`
  389. /// is supported.
  390. template <typename T>
  391. std::enable_if_t<
  392. is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
  393. hash_code>
  394. hash_value(const T &Value) {
  395. hashbuilder_detail::HashCodeHashBuilder HBuilder;
  396. HBuilder.add(Value);
  397. return HBuilder.getHasher().Code;
  398. }
  399. } // end namespace llvm
  400. #endif // LLVM_SUPPORT_HASHBUILDER_H
  401. #ifdef __GNUC__
  402. #pragma GCC diagnostic pop
  403. #endif