HashTable.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  14. #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  15. #include "llvm/ADT/SparseBitVector.h"
  16. #include "llvm/ADT/iterator.h"
  17. #include "llvm/DebugInfo/PDB/Native/RawError.h"
  18. #include "llvm/Support/BinaryStreamReader.h"
  19. #include "llvm/Support/BinaryStreamWriter.h"
  20. #include "llvm/Support/Endian.h"
  21. #include "llvm/Support/Error.h"
  22. #include <cstdint>
  23. #include <iterator>
  24. #include <utility>
  25. #include <vector>
  26. namespace llvm {
  27. namespace pdb {
  28. Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
  29. Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
  30. template <typename ValueT> class HashTable;
  31. template <typename ValueT>
  32. class HashTableIterator
  33. : public iterator_facade_base<HashTableIterator<ValueT>,
  34. std::forward_iterator_tag,
  35. const std::pair<uint32_t, ValueT>> {
  36. using BaseT = typename HashTableIterator::iterator_facade_base;
  37. friend HashTable<ValueT>;
  38. HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
  39. bool IsEnd)
  40. : Map(&Map), Index(Index), IsEnd(IsEnd) {}
  41. public:
  42. HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
  43. int I = Map.Present.find_first();
  44. if (I == -1) {
  45. Index = 0;
  46. IsEnd = true;
  47. } else {
  48. Index = static_cast<uint32_t>(I);
  49. IsEnd = false;
  50. }
  51. }
  52. HashTableIterator(const HashTableIterator &R) = default;
  53. HashTableIterator &operator=(const HashTableIterator &R) {
  54. Map = R.Map;
  55. return *this;
  56. }
  57. bool operator==(const HashTableIterator &R) const {
  58. if (IsEnd && R.IsEnd)
  59. return true;
  60. if (IsEnd != R.IsEnd)
  61. return false;
  62. return (Map == R.Map) && (Index == R.Index);
  63. }
  64. const std::pair<uint32_t, ValueT> &operator*() const {
  65. assert(Map->Present.test(Index));
  66. return Map->Buckets[Index];
  67. }
  68. // Implement postfix op++ in terms of prefix op++ by using the superclass
  69. // implementation.
  70. using BaseT::operator++;
  71. HashTableIterator &operator++() {
  72. while (Index < Map->Buckets.size()) {
  73. ++Index;
  74. if (Map->Present.test(Index))
  75. return *this;
  76. }
  77. IsEnd = true;
  78. return *this;
  79. }
  80. private:
  81. bool isEnd() const { return IsEnd; }
  82. uint32_t index() const { return Index; }
  83. const HashTable<ValueT> *Map;
  84. uint32_t Index;
  85. bool IsEnd;
  86. };
  87. template <typename ValueT>
  88. class HashTable {
  89. struct Header {
  90. support::ulittle32_t Size;
  91. support::ulittle32_t Capacity;
  92. };
  93. using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
  94. public:
  95. using const_iterator = HashTableIterator<ValueT>;
  96. friend const_iterator;
  97. HashTable() { Buckets.resize(8); }
  98. explicit HashTable(uint32_t Capacity) {
  99. Buckets.resize(Capacity);
  100. }
  101. Error load(BinaryStreamReader &Stream) {
  102. const Header *H;
  103. if (auto EC = Stream.readObject(H))
  104. return EC;
  105. if (H->Capacity == 0)
  106. return make_error<RawError>(raw_error_code::corrupt_file,
  107. "Invalid Hash Table Capacity");
  108. if (H->Size > maxLoad(H->Capacity))
  109. return make_error<RawError>(raw_error_code::corrupt_file,
  110. "Invalid Hash Table Size");
  111. Buckets.resize(H->Capacity);
  112. if (auto EC = readSparseBitVector(Stream, Present))
  113. return EC;
  114. if (Present.count() != H->Size)
  115. return make_error<RawError>(raw_error_code::corrupt_file,
  116. "Present bit vector does not match size!");
  117. if (auto EC = readSparseBitVector(Stream, Deleted))
  118. return EC;
  119. if (Present.intersects(Deleted))
  120. return make_error<RawError>(raw_error_code::corrupt_file,
  121. "Present bit vector intersects deleted!");
  122. for (uint32_t P : Present) {
  123. if (auto EC = Stream.readInteger(Buckets[P].first))
  124. return EC;
  125. const ValueT *Value;
  126. if (auto EC = Stream.readObject(Value))
  127. return EC;
  128. Buckets[P].second = *Value;
  129. }
  130. return Error::success();
  131. }
  132. uint32_t calculateSerializedLength() const {
  133. uint32_t Size = sizeof(Header);
  134. constexpr int BitsPerWord = 8 * sizeof(uint32_t);
  135. int NumBitsP = Present.find_last() + 1;
  136. int NumBitsD = Deleted.find_last() + 1;
  137. uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
  138. uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
  139. // Present bit set number of words (4 bytes), followed by that many actual
  140. // words (4 bytes each).
  141. Size += sizeof(uint32_t);
  142. Size += NumWordsP * sizeof(uint32_t);
  143. // Deleted bit set number of words (4 bytes), followed by that many actual
  144. // words (4 bytes each).
  145. Size += sizeof(uint32_t);
  146. Size += NumWordsD * sizeof(uint32_t);
  147. // One (Key, ValueT) pair for each entry Present.
  148. Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
  149. return Size;
  150. }
  151. Error commit(BinaryStreamWriter &Writer) const {
  152. Header H;
  153. H.Size = size();
  154. H.Capacity = capacity();
  155. if (auto EC = Writer.writeObject(H))
  156. return EC;
  157. if (auto EC = writeSparseBitVector(Writer, Present))
  158. return EC;
  159. if (auto EC = writeSparseBitVector(Writer, Deleted))
  160. return EC;
  161. for (const auto &Entry : *this) {
  162. if (auto EC = Writer.writeInteger(Entry.first))
  163. return EC;
  164. if (auto EC = Writer.writeObject(Entry.second))
  165. return EC;
  166. }
  167. return Error::success();
  168. }
  169. void clear() {
  170. Buckets.resize(8);
  171. Present.clear();
  172. Deleted.clear();
  173. }
  174. bool empty() const { return size() == 0; }
  175. uint32_t capacity() const { return Buckets.size(); }
  176. uint32_t size() const { return Present.count(); }
  177. const_iterator begin() const { return const_iterator(*this); }
  178. const_iterator end() const { return const_iterator(*this, 0, true); }
  179. /// Find the entry whose key has the specified hash value, using the specified
  180. /// traits defining hash function and equality.
  181. template <typename Key, typename TraitsT>
  182. const_iterator find_as(const Key &K, TraitsT &Traits) const {
  183. uint32_t H = Traits.hashLookupKey(K) % capacity();
  184. uint32_t I = H;
  185. std::optional<uint32_t> FirstUnused;
  186. do {
  187. if (isPresent(I)) {
  188. if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
  189. return const_iterator(*this, I, false);
  190. } else {
  191. if (!FirstUnused)
  192. FirstUnused = I;
  193. // Insertion occurs via linear probing from the slot hint, and will be
  194. // inserted at the first empty / deleted location. Therefore, if we are
  195. // probing and find a location that is neither present nor deleted, then
  196. // nothing must have EVER been inserted at this location, and thus it is
  197. // not possible for a matching value to occur later.
  198. if (!isDeleted(I))
  199. break;
  200. }
  201. I = (I + 1) % capacity();
  202. } while (I != H);
  203. // The only way FirstUnused would not be set is if every single entry in the
  204. // table were Present. But this would violate the load factor constraints
  205. // that we impose, so it should never happen.
  206. assert(FirstUnused);
  207. return const_iterator(*this, *FirstUnused, true);
  208. }
  209. /// Set the entry using a key type that the specified Traits can convert
  210. /// from a real key to an internal key.
  211. template <typename Key, typename TraitsT>
  212. bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
  213. return set_as_internal(K, std::move(V), Traits, std::nullopt);
  214. }
  215. template <typename Key, typename TraitsT>
  216. ValueT get(const Key &K, TraitsT &Traits) const {
  217. auto Iter = find_as(K, Traits);
  218. assert(Iter != end());
  219. return (*Iter).second;
  220. }
  221. protected:
  222. bool isPresent(uint32_t K) const { return Present.test(K); }
  223. bool isDeleted(uint32_t K) const { return Deleted.test(K); }
  224. BucketList Buckets;
  225. mutable SparseBitVector<> Present;
  226. mutable SparseBitVector<> Deleted;
  227. private:
  228. /// Set the entry using a key type that the specified Traits can convert
  229. /// from a real key to an internal key.
  230. template <typename Key, typename TraitsT>
  231. bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
  232. std::optional<uint32_t> InternalKey) {
  233. auto Entry = find_as(K, Traits);
  234. if (Entry != end()) {
  235. assert(isPresent(Entry.index()));
  236. assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
  237. // We're updating, no need to do anything special.
  238. Buckets[Entry.index()].second = V;
  239. return false;
  240. }
  241. auto &B = Buckets[Entry.index()];
  242. assert(!isPresent(Entry.index()));
  243. assert(Entry.isEnd());
  244. B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
  245. B.second = V;
  246. Present.set(Entry.index());
  247. Deleted.reset(Entry.index());
  248. grow(Traits);
  249. assert((find_as(K, Traits)) != end());
  250. return true;
  251. }
  252. static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
  253. template <typename TraitsT>
  254. void grow(TraitsT &Traits) {
  255. uint32_t S = size();
  256. uint32_t MaxLoad = maxLoad(capacity());
  257. if (S < maxLoad(capacity()))
  258. return;
  259. assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
  260. uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
  261. // Growing requires rebuilding the table and re-hashing every item. Make a
  262. // copy with a larger capacity, insert everything into the copy, then swap
  263. // it in.
  264. HashTable NewMap(NewCapacity);
  265. for (auto I : Present) {
  266. auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
  267. NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
  268. Buckets[I].first);
  269. }
  270. Buckets.swap(NewMap.Buckets);
  271. std::swap(Present, NewMap.Present);
  272. std::swap(Deleted, NewMap.Deleted);
  273. assert(capacity() == NewCapacity);
  274. assert(size() == S);
  275. }
  276. };
  277. } // end namespace pdb
  278. } // end namespace llvm
  279. #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  280. #ifdef __GNUC__
  281. #pragma GCC diagnostic pop
  282. #endif