FoldingSet.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a hash set that can be used to remove duplication of
  10. // nodes in a graph.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ADT/FoldingSet.h"
  14. #include "llvm/ADT/StringRef.h"
  15. #include "llvm/Support/Allocator.h"
  16. #include "llvm/Support/ErrorHandling.h"
  17. #include "llvm/Support/MathExtras.h"
  18. #include "llvm/Support/SwapByteOrder.h"
  19. #include <cassert>
  20. #include <cstring>
  21. using namespace llvm;
  22. //===----------------------------------------------------------------------===//
  23. // FoldingSetNodeIDRef Implementation
  24. bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
  25. if (Size != RHS.Size) return false;
  26. return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
  27. }
  28. /// Used to compare the "ordering" of two nodes as defined by the
  29. /// profiled bits and their ordering defined by memcmp().
  30. bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
  31. if (Size != RHS.Size)
  32. return Size < RHS.Size;
  33. return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
  34. }
  35. //===----------------------------------------------------------------------===//
  36. // FoldingSetNodeID Implementation
  37. /// Add* - Add various data types to Bit data.
  38. ///
  39. void FoldingSetNodeID::AddString(StringRef String) {
  40. unsigned Size = String.size();
  41. unsigned NumInserts = 1 + divideCeil(Size, 4);
  42. Bits.reserve(Bits.size() + NumInserts);
  43. Bits.push_back(Size);
  44. if (!Size) return;
  45. unsigned Units = Size / 4;
  46. unsigned Pos = 0;
  47. const unsigned *Base = (const unsigned*) String.data();
  48. // If the string is aligned do a bulk transfer.
  49. if (!((intptr_t)Base & 3)) {
  50. Bits.append(Base, Base + Units);
  51. Pos = (Units + 1) * 4;
  52. } else {
  53. // Otherwise do it the hard way.
  54. // To be compatible with above bulk transfer, we need to take endianness
  55. // into account.
  56. static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost,
  57. "Unexpected host endianness");
  58. if (sys::IsBigEndianHost) {
  59. for (Pos += 4; Pos <= Size; Pos += 4) {
  60. unsigned V = ((unsigned char)String[Pos - 4] << 24) |
  61. ((unsigned char)String[Pos - 3] << 16) |
  62. ((unsigned char)String[Pos - 2] << 8) |
  63. (unsigned char)String[Pos - 1];
  64. Bits.push_back(V);
  65. }
  66. } else { // Little-endian host
  67. for (Pos += 4; Pos <= Size; Pos += 4) {
  68. unsigned V = ((unsigned char)String[Pos - 1] << 24) |
  69. ((unsigned char)String[Pos - 2] << 16) |
  70. ((unsigned char)String[Pos - 3] << 8) |
  71. (unsigned char)String[Pos - 4];
  72. Bits.push_back(V);
  73. }
  74. }
  75. }
  76. // With the leftover bits.
  77. unsigned V = 0;
  78. // Pos will have overshot size by 4 - #bytes left over.
  79. // No need to take endianness into account here - this is always executed.
  80. switch (Pos - Size) {
  81. case 1: V = (V << 8) | (unsigned char)String[Size - 3]; [[fallthrough]];
  82. case 2: V = (V << 8) | (unsigned char)String[Size - 2]; [[fallthrough]];
  83. case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
  84. default: return; // Nothing left.
  85. }
  86. Bits.push_back(V);
  87. }
  88. // AddNodeID - Adds the Bit data of another ID to *this.
  89. void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
  90. Bits.append(ID.Bits.begin(), ID.Bits.end());
  91. }
  92. /// operator== - Used to compare two nodes to each other.
  93. ///
  94. bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
  95. return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
  96. }
  97. /// operator== - Used to compare two nodes to each other.
  98. ///
  99. bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
  100. return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
  101. }
  102. /// Used to compare the "ordering" of two nodes as defined by the
  103. /// profiled bits and their ordering defined by memcmp().
  104. bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
  105. return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
  106. }
  107. bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
  108. return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
  109. }
  110. /// Intern - Copy this node's data to a memory region allocated from the
  111. /// given allocator and return a FoldingSetNodeIDRef describing the
  112. /// interned data.
  113. FoldingSetNodeIDRef
  114. FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
  115. unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
  116. std::uninitialized_copy(Bits.begin(), Bits.end(), New);
  117. return FoldingSetNodeIDRef(New, Bits.size());
  118. }
  119. //===----------------------------------------------------------------------===//
  120. /// Helper functions for FoldingSetBase.
  121. /// GetNextPtr - In order to save space, each bucket is a
  122. /// singly-linked-list. In order to make deletion more efficient, we make
  123. /// the list circular, so we can delete a node without computing its hash.
  124. /// The problem with this is that the start of the hash buckets are not
  125. /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
  126. /// use GetBucketPtr when this happens.
  127. static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
  128. // The low bit is set if this is the pointer back to the bucket.
  129. if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
  130. return nullptr;
  131. return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
  132. }
  133. /// testing.
  134. static void **GetBucketPtr(void *NextInBucketPtr) {
  135. intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
  136. assert((Ptr & 1) && "Not a bucket pointer");
  137. return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
  138. }
  139. /// GetBucketFor - Hash the specified node ID and return the hash bucket for
  140. /// the specified ID.
  141. static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
  142. // NumBuckets is always a power of 2.
  143. unsigned BucketNum = Hash & (NumBuckets-1);
  144. return Buckets + BucketNum;
  145. }
  146. /// AllocateBuckets - Allocated initialized bucket memory.
  147. static void **AllocateBuckets(unsigned NumBuckets) {
  148. void **Buckets = static_cast<void**>(safe_calloc(NumBuckets + 1,
  149. sizeof(void*)));
  150. // Set the very last bucket to be a non-null "pointer".
  151. Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
  152. return Buckets;
  153. }
  154. //===----------------------------------------------------------------------===//
  155. // FoldingSetBase Implementation
  156. FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) {
  157. assert(5 < Log2InitSize && Log2InitSize < 32 &&
  158. "Initial hash table size out of range");
  159. NumBuckets = 1 << Log2InitSize;
  160. Buckets = AllocateBuckets(NumBuckets);
  161. NumNodes = 0;
  162. }
  163. FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg)
  164. : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
  165. Arg.Buckets = nullptr;
  166. Arg.NumBuckets = 0;
  167. Arg.NumNodes = 0;
  168. }
  169. FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) {
  170. free(Buckets); // This may be null if the set is in a moved-from state.
  171. Buckets = RHS.Buckets;
  172. NumBuckets = RHS.NumBuckets;
  173. NumNodes = RHS.NumNodes;
  174. RHS.Buckets = nullptr;
  175. RHS.NumBuckets = 0;
  176. RHS.NumNodes = 0;
  177. return *this;
  178. }
  179. FoldingSetBase::~FoldingSetBase() {
  180. free(Buckets);
  181. }
  182. void FoldingSetBase::clear() {
  183. // Set all but the last bucket to null pointers.
  184. memset(Buckets, 0, NumBuckets*sizeof(void*));
  185. // Set the very last bucket to be a non-null "pointer".
  186. Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
  187. // Reset the node count to zero.
  188. NumNodes = 0;
  189. }
  190. void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount,
  191. const FoldingSetInfo &Info) {
  192. assert((NewBucketCount > NumBuckets) &&
  193. "Can't shrink a folding set with GrowBucketCount");
  194. assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
  195. void **OldBuckets = Buckets;
  196. unsigned OldNumBuckets = NumBuckets;
  197. // Clear out new buckets.
  198. Buckets = AllocateBuckets(NewBucketCount);
  199. // Set NumBuckets only if allocation of new buckets was successful.
  200. NumBuckets = NewBucketCount;
  201. NumNodes = 0;
  202. // Walk the old buckets, rehashing nodes into their new place.
  203. FoldingSetNodeID TempID;
  204. for (unsigned i = 0; i != OldNumBuckets; ++i) {
  205. void *Probe = OldBuckets[i];
  206. if (!Probe) continue;
  207. while (Node *NodeInBucket = GetNextPtr(Probe)) {
  208. // Figure out the next link, remove NodeInBucket from the old link.
  209. Probe = NodeInBucket->getNextInBucket();
  210. NodeInBucket->SetNextInBucket(nullptr);
  211. // Insert the node into the new bucket, after recomputing the hash.
  212. InsertNode(NodeInBucket,
  213. GetBucketFor(Info.ComputeNodeHash(this, NodeInBucket, TempID),
  214. Buckets, NumBuckets),
  215. Info);
  216. TempID.clear();
  217. }
  218. }
  219. free(OldBuckets);
  220. }
  221. /// GrowHashTable - Double the size of the hash table and rehash everything.
  222. ///
  223. void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) {
  224. GrowBucketCount(NumBuckets * 2, Info);
  225. }
  226. void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
  227. // This will give us somewhere between EltCount / 2 and
  228. // EltCount buckets. This puts us in the load factor
  229. // range of 1.0 - 2.0.
  230. if(EltCount < capacity())
  231. return;
  232. GrowBucketCount(PowerOf2Floor(EltCount), Info);
  233. }
  234. /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
  235. /// return it. If not, return the insertion token that will make insertion
  236. /// faster.
  237. FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos(
  238. const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) {
  239. unsigned IDHash = ID.ComputeHash();
  240. void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
  241. void *Probe = *Bucket;
  242. InsertPos = nullptr;
  243. FoldingSetNodeID TempID;
  244. while (Node *NodeInBucket = GetNextPtr(Probe)) {
  245. if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID))
  246. return NodeInBucket;
  247. TempID.clear();
  248. Probe = NodeInBucket->getNextInBucket();
  249. }
  250. // Didn't find the node, return null with the bucket as the InsertPos.
  251. InsertPos = Bucket;
  252. return nullptr;
  253. }
  254. /// InsertNode - Insert the specified node into the folding set, knowing that it
  255. /// is not already in the map. InsertPos must be obtained from
  256. /// FindNodeOrInsertPos.
  257. void FoldingSetBase::InsertNode(Node *N, void *InsertPos,
  258. const FoldingSetInfo &Info) {
  259. assert(!N->getNextInBucket());
  260. // Do we need to grow the hashtable?
  261. if (NumNodes+1 > capacity()) {
  262. GrowHashTable(Info);
  263. FoldingSetNodeID TempID;
  264. InsertPos = GetBucketFor(Info.ComputeNodeHash(this, N, TempID), Buckets,
  265. NumBuckets);
  266. }
  267. ++NumNodes;
  268. /// The insert position is actually a bucket pointer.
  269. void **Bucket = static_cast<void**>(InsertPos);
  270. void *Next = *Bucket;
  271. // If this is the first insertion into this bucket, its next pointer will be
  272. // null. Pretend as if it pointed to itself, setting the low bit to indicate
  273. // that it is a pointer to the bucket.
  274. if (!Next)
  275. Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
  276. // Set the node's next pointer, and make the bucket point to the node.
  277. N->SetNextInBucket(Next);
  278. *Bucket = N;
  279. }
  280. /// RemoveNode - Remove a node from the folding set, returning true if one was
  281. /// removed or false if the node was not in the folding set.
  282. bool FoldingSetBase::RemoveNode(Node *N) {
  283. // Because each bucket is a circular list, we don't need to compute N's hash
  284. // to remove it.
  285. void *Ptr = N->getNextInBucket();
  286. if (!Ptr) return false; // Not in folding set.
  287. --NumNodes;
  288. N->SetNextInBucket(nullptr);
  289. // Remember what N originally pointed to, either a bucket or another node.
  290. void *NodeNextPtr = Ptr;
  291. // Chase around the list until we find the node (or bucket) which points to N.
  292. while (true) {
  293. if (Node *NodeInBucket = GetNextPtr(Ptr)) {
  294. // Advance pointer.
  295. Ptr = NodeInBucket->getNextInBucket();
  296. // We found a node that points to N, change it to point to N's next node,
  297. // removing N from the list.
  298. if (Ptr == N) {
  299. NodeInBucket->SetNextInBucket(NodeNextPtr);
  300. return true;
  301. }
  302. } else {
  303. void **Bucket = GetBucketPtr(Ptr);
  304. Ptr = *Bucket;
  305. // If we found that the bucket points to N, update the bucket to point to
  306. // whatever is next.
  307. if (Ptr == N) {
  308. *Bucket = NodeNextPtr;
  309. return true;
  310. }
  311. }
  312. }
  313. }
  314. /// GetOrInsertNode - If there is an existing simple Node exactly
  315. /// equal to the specified node, return it. Otherwise, insert 'N' and it
  316. /// instead.
  317. FoldingSetBase::Node *
  318. FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N,
  319. const FoldingSetInfo &Info) {
  320. FoldingSetNodeID ID;
  321. Info.GetNodeProfile(this, N, ID);
  322. void *IP;
  323. if (Node *E = FindNodeOrInsertPos(ID, IP, Info))
  324. return E;
  325. InsertNode(N, IP, Info);
  326. return N;
  327. }
  328. //===----------------------------------------------------------------------===//
  329. // FoldingSetIteratorImpl Implementation
  330. FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
  331. // Skip to the first non-null non-self-cycle bucket.
  332. while (*Bucket != reinterpret_cast<void*>(-1) &&
  333. (!*Bucket || !GetNextPtr(*Bucket)))
  334. ++Bucket;
  335. NodePtr = static_cast<FoldingSetNode*>(*Bucket);
  336. }
  337. void FoldingSetIteratorImpl::advance() {
  338. // If there is another link within this bucket, go to it.
  339. void *Probe = NodePtr->getNextInBucket();
  340. if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
  341. NodePtr = NextNodeInBucket;
  342. else {
  343. // Otherwise, this is the last link in this bucket.
  344. void **Bucket = GetBucketPtr(Probe);
  345. // Skip to the next non-null non-self-cycle bucket.
  346. do {
  347. ++Bucket;
  348. } while (*Bucket != reinterpret_cast<void*>(-1) &&
  349. (!*Bucket || !GetNextPtr(*Bucket)));
  350. NodePtr = static_cast<FoldingSetNode*>(*Bucket);
  351. }
  352. }
  353. //===----------------------------------------------------------------------===//
  354. // FoldingSetBucketIteratorImpl Implementation
  355. FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
  356. Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket;
  357. }