FoldingSet.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a hash set that can be used to remove duplication of
  10. // nodes in a graph.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ADT/FoldingSet.h"
  14. #include "llvm/ADT/Hashing.h"
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/Support/Allocator.h"
  17. #include "llvm/Support/ErrorHandling.h"
  18. #include "llvm/Support/Host.h"
  19. #include "llvm/Support/MathExtras.h"
  20. #include <cassert>
  21. #include <cstring>
  22. using namespace llvm;
  23. //===----------------------------------------------------------------------===//
  24. // FoldingSetNodeIDRef Implementation
  25. /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
  26. /// used to lookup the node in the FoldingSetBase.
  27. unsigned FoldingSetNodeIDRef::ComputeHash() const {
  28. return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
  29. }
  30. bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
  31. if (Size != RHS.Size) return false;
  32. return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
  33. }
  34. /// Used to compare the "ordering" of two nodes as defined by the
  35. /// profiled bits and their ordering defined by memcmp().
  36. bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
  37. if (Size != RHS.Size)
  38. return Size < RHS.Size;
  39. return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
  40. }
  41. //===----------------------------------------------------------------------===//
  42. // FoldingSetNodeID Implementation
  43. /// Add* - Add various data types to Bit data.
  44. ///
  45. void FoldingSetNodeID::AddPointer(const void *Ptr) {
  46. // Note: this adds pointers to the hash using sizes and endianness that
  47. // depend on the host. It doesn't matter, however, because hashing on
  48. // pointer values is inherently unstable. Nothing should depend on the
  49. // ordering of nodes in the folding set.
  50. static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long),
  51. "unexpected pointer size");
  52. AddInteger(reinterpret_cast<uintptr_t>(Ptr));
  53. }
  54. void FoldingSetNodeID::AddInteger(signed I) {
  55. Bits.push_back(I);
  56. }
  57. void FoldingSetNodeID::AddInteger(unsigned I) {
  58. Bits.push_back(I);
  59. }
  60. void FoldingSetNodeID::AddInteger(long I) {
  61. AddInteger((unsigned long)I);
  62. }
  63. void FoldingSetNodeID::AddInteger(unsigned long I) {
  64. if (sizeof(long) == sizeof(int))
  65. AddInteger(unsigned(I));
  66. else if (sizeof(long) == sizeof(long long)) {
  67. AddInteger((unsigned long long)I);
  68. } else {
  69. llvm_unreachable("unexpected sizeof(long)");
  70. }
  71. }
  72. void FoldingSetNodeID::AddInteger(long long I) {
  73. AddInteger((unsigned long long)I);
  74. }
  75. void FoldingSetNodeID::AddInteger(unsigned long long I) {
  76. AddInteger(unsigned(I));
  77. AddInteger(unsigned(I >> 32));
  78. }
  79. void FoldingSetNodeID::AddString(StringRef String) {
  80. unsigned Size = String.size();
  81. unsigned NumInserts = 1 + divideCeil(Size, 4);
  82. Bits.reserve(Bits.size() + NumInserts);
  83. Bits.push_back(Size);
  84. if (!Size) return;
  85. unsigned Units = Size / 4;
  86. unsigned Pos = 0;
  87. const unsigned *Base = (const unsigned*) String.data();
  88. // If the string is aligned do a bulk transfer.
  89. if (!((intptr_t)Base & 3)) {
  90. Bits.append(Base, Base + Units);
  91. Pos = (Units + 1) * 4;
  92. } else {
  93. // Otherwise do it the hard way.
  94. // To be compatible with above bulk transfer, we need to take endianness
  95. // into account.
  96. static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost,
  97. "Unexpected host endianness");
  98. if (sys::IsBigEndianHost) {
  99. for (Pos += 4; Pos <= Size; Pos += 4) {
  100. unsigned V = ((unsigned char)String[Pos - 4] << 24) |
  101. ((unsigned char)String[Pos - 3] << 16) |
  102. ((unsigned char)String[Pos - 2] << 8) |
  103. (unsigned char)String[Pos - 1];
  104. Bits.push_back(V);
  105. }
  106. } else { // Little-endian host
  107. for (Pos += 4; Pos <= Size; Pos += 4) {
  108. unsigned V = ((unsigned char)String[Pos - 1] << 24) |
  109. ((unsigned char)String[Pos - 2] << 16) |
  110. ((unsigned char)String[Pos - 3] << 8) |
  111. (unsigned char)String[Pos - 4];
  112. Bits.push_back(V);
  113. }
  114. }
  115. }
  116. // With the leftover bits.
  117. unsigned V = 0;
  118. // Pos will have overshot size by 4 - #bytes left over.
  119. // No need to take endianness into account here - this is always executed.
  120. switch (Pos - Size) {
  121. case 1: V = (V << 8) | (unsigned char)String[Size - 3]; LLVM_FALLTHROUGH;
  122. case 2: V = (V << 8) | (unsigned char)String[Size - 2]; LLVM_FALLTHROUGH;
  123. case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
  124. default: return; // Nothing left.
  125. }
  126. Bits.push_back(V);
  127. }
  128. // AddNodeID - Adds the Bit data of another ID to *this.
  129. void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
  130. Bits.append(ID.Bits.begin(), ID.Bits.end());
  131. }
  132. /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
  133. /// lookup the node in the FoldingSetBase.
  134. unsigned FoldingSetNodeID::ComputeHash() const {
  135. return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
  136. }
  137. /// operator== - Used to compare two nodes to each other.
  138. ///
  139. bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
  140. return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
  141. }
  142. /// operator== - Used to compare two nodes to each other.
  143. ///
  144. bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
  145. return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
  146. }
  147. /// Used to compare the "ordering" of two nodes as defined by the
  148. /// profiled bits and their ordering defined by memcmp().
  149. bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
  150. return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
  151. }
  152. bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
  153. return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
  154. }
  155. /// Intern - Copy this node's data to a memory region allocated from the
  156. /// given allocator and return a FoldingSetNodeIDRef describing the
  157. /// interned data.
  158. FoldingSetNodeIDRef
  159. FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
  160. unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
  161. std::uninitialized_copy(Bits.begin(), Bits.end(), New);
  162. return FoldingSetNodeIDRef(New, Bits.size());
  163. }
  164. //===----------------------------------------------------------------------===//
  165. /// Helper functions for FoldingSetBase.
  166. /// GetNextPtr - In order to save space, each bucket is a
  167. /// singly-linked-list. In order to make deletion more efficient, we make
  168. /// the list circular, so we can delete a node without computing its hash.
  169. /// The problem with this is that the start of the hash buckets are not
  170. /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
  171. /// use GetBucketPtr when this happens.
  172. static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
  173. // The low bit is set if this is the pointer back to the bucket.
  174. if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
  175. return nullptr;
  176. return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
  177. }
  178. /// testing.
  179. static void **GetBucketPtr(void *NextInBucketPtr) {
  180. intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
  181. assert((Ptr & 1) && "Not a bucket pointer");
  182. return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
  183. }
  184. /// GetBucketFor - Hash the specified node ID and return the hash bucket for
  185. /// the specified ID.
  186. static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
  187. // NumBuckets is always a power of 2.
  188. unsigned BucketNum = Hash & (NumBuckets-1);
  189. return Buckets + BucketNum;
  190. }
  191. /// AllocateBuckets - Allocated initialized bucket memory.
  192. static void **AllocateBuckets(unsigned NumBuckets) {
  193. void **Buckets = static_cast<void**>(safe_calloc(NumBuckets + 1,
  194. sizeof(void*)));
  195. // Set the very last bucket to be a non-null "pointer".
  196. Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
  197. return Buckets;
  198. }
  199. //===----------------------------------------------------------------------===//
  200. // FoldingSetBase Implementation
  201. FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) {
  202. assert(5 < Log2InitSize && Log2InitSize < 32 &&
  203. "Initial hash table size out of range");
  204. NumBuckets = 1 << Log2InitSize;
  205. Buckets = AllocateBuckets(NumBuckets);
  206. NumNodes = 0;
  207. }
  208. FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg)
  209. : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
  210. Arg.Buckets = nullptr;
  211. Arg.NumBuckets = 0;
  212. Arg.NumNodes = 0;
  213. }
  214. FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) {
  215. free(Buckets); // This may be null if the set is in a moved-from state.
  216. Buckets = RHS.Buckets;
  217. NumBuckets = RHS.NumBuckets;
  218. NumNodes = RHS.NumNodes;
  219. RHS.Buckets = nullptr;
  220. RHS.NumBuckets = 0;
  221. RHS.NumNodes = 0;
  222. return *this;
  223. }
  224. FoldingSetBase::~FoldingSetBase() {
  225. free(Buckets);
  226. }
  227. void FoldingSetBase::clear() {
  228. // Set all but the last bucket to null pointers.
  229. memset(Buckets, 0, NumBuckets*sizeof(void*));
  230. // Set the very last bucket to be a non-null "pointer".
  231. Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
  232. // Reset the node count to zero.
  233. NumNodes = 0;
  234. }
  235. void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount,
  236. const FoldingSetInfo &Info) {
  237. assert((NewBucketCount > NumBuckets) &&
  238. "Can't shrink a folding set with GrowBucketCount");
  239. assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
  240. void **OldBuckets = Buckets;
  241. unsigned OldNumBuckets = NumBuckets;
  242. // Clear out new buckets.
  243. Buckets = AllocateBuckets(NewBucketCount);
  244. // Set NumBuckets only if allocation of new buckets was successful.
  245. NumBuckets = NewBucketCount;
  246. NumNodes = 0;
  247. // Walk the old buckets, rehashing nodes into their new place.
  248. FoldingSetNodeID TempID;
  249. for (unsigned i = 0; i != OldNumBuckets; ++i) {
  250. void *Probe = OldBuckets[i];
  251. if (!Probe) continue;
  252. while (Node *NodeInBucket = GetNextPtr(Probe)) {
  253. // Figure out the next link, remove NodeInBucket from the old link.
  254. Probe = NodeInBucket->getNextInBucket();
  255. NodeInBucket->SetNextInBucket(nullptr);
  256. // Insert the node into the new bucket, after recomputing the hash.
  257. InsertNode(NodeInBucket,
  258. GetBucketFor(Info.ComputeNodeHash(this, NodeInBucket, TempID),
  259. Buckets, NumBuckets),
  260. Info);
  261. TempID.clear();
  262. }
  263. }
  264. free(OldBuckets);
  265. }
  266. /// GrowHashTable - Double the size of the hash table and rehash everything.
  267. ///
  268. void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) {
  269. GrowBucketCount(NumBuckets * 2, Info);
  270. }
  271. void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
  272. // This will give us somewhere between EltCount / 2 and
  273. // EltCount buckets. This puts us in the load factor
  274. // range of 1.0 - 2.0.
  275. if(EltCount < capacity())
  276. return;
  277. GrowBucketCount(PowerOf2Floor(EltCount), Info);
  278. }
  279. /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
  280. /// return it. If not, return the insertion token that will make insertion
  281. /// faster.
  282. FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos(
  283. const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) {
  284. unsigned IDHash = ID.ComputeHash();
  285. void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
  286. void *Probe = *Bucket;
  287. InsertPos = nullptr;
  288. FoldingSetNodeID TempID;
  289. while (Node *NodeInBucket = GetNextPtr(Probe)) {
  290. if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID))
  291. return NodeInBucket;
  292. TempID.clear();
  293. Probe = NodeInBucket->getNextInBucket();
  294. }
  295. // Didn't find the node, return null with the bucket as the InsertPos.
  296. InsertPos = Bucket;
  297. return nullptr;
  298. }
  299. /// InsertNode - Insert the specified node into the folding set, knowing that it
  300. /// is not already in the map. InsertPos must be obtained from
  301. /// FindNodeOrInsertPos.
  302. void FoldingSetBase::InsertNode(Node *N, void *InsertPos,
  303. const FoldingSetInfo &Info) {
  304. assert(!N->getNextInBucket());
  305. // Do we need to grow the hashtable?
  306. if (NumNodes+1 > capacity()) {
  307. GrowHashTable(Info);
  308. FoldingSetNodeID TempID;
  309. InsertPos = GetBucketFor(Info.ComputeNodeHash(this, N, TempID), Buckets,
  310. NumBuckets);
  311. }
  312. ++NumNodes;
  313. /// The insert position is actually a bucket pointer.
  314. void **Bucket = static_cast<void**>(InsertPos);
  315. void *Next = *Bucket;
  316. // If this is the first insertion into this bucket, its next pointer will be
  317. // null. Pretend as if it pointed to itself, setting the low bit to indicate
  318. // that it is a pointer to the bucket.
  319. if (!Next)
  320. Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
  321. // Set the node's next pointer, and make the bucket point to the node.
  322. N->SetNextInBucket(Next);
  323. *Bucket = N;
  324. }
  325. /// RemoveNode - Remove a node from the folding set, returning true if one was
  326. /// removed or false if the node was not in the folding set.
  327. bool FoldingSetBase::RemoveNode(Node *N) {
  328. // Because each bucket is a circular list, we don't need to compute N's hash
  329. // to remove it.
  330. void *Ptr = N->getNextInBucket();
  331. if (!Ptr) return false; // Not in folding set.
  332. --NumNodes;
  333. N->SetNextInBucket(nullptr);
  334. // Remember what N originally pointed to, either a bucket or another node.
  335. void *NodeNextPtr = Ptr;
  336. // Chase around the list until we find the node (or bucket) which points to N.
  337. while (true) {
  338. if (Node *NodeInBucket = GetNextPtr(Ptr)) {
  339. // Advance pointer.
  340. Ptr = NodeInBucket->getNextInBucket();
  341. // We found a node that points to N, change it to point to N's next node,
  342. // removing N from the list.
  343. if (Ptr == N) {
  344. NodeInBucket->SetNextInBucket(NodeNextPtr);
  345. return true;
  346. }
  347. } else {
  348. void **Bucket = GetBucketPtr(Ptr);
  349. Ptr = *Bucket;
  350. // If we found that the bucket points to N, update the bucket to point to
  351. // whatever is next.
  352. if (Ptr == N) {
  353. *Bucket = NodeNextPtr;
  354. return true;
  355. }
  356. }
  357. }
  358. }
  359. /// GetOrInsertNode - If there is an existing simple Node exactly
  360. /// equal to the specified node, return it. Otherwise, insert 'N' and it
  361. /// instead.
  362. FoldingSetBase::Node *
  363. FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N,
  364. const FoldingSetInfo &Info) {
  365. FoldingSetNodeID ID;
  366. Info.GetNodeProfile(this, N, ID);
  367. void *IP;
  368. if (Node *E = FindNodeOrInsertPos(ID, IP, Info))
  369. return E;
  370. InsertNode(N, IP, Info);
  371. return N;
  372. }
  373. //===----------------------------------------------------------------------===//
  374. // FoldingSetIteratorImpl Implementation
  375. FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
  376. // Skip to the first non-null non-self-cycle bucket.
  377. while (*Bucket != reinterpret_cast<void*>(-1) &&
  378. (!*Bucket || !GetNextPtr(*Bucket)))
  379. ++Bucket;
  380. NodePtr = static_cast<FoldingSetNode*>(*Bucket);
  381. }
  382. void FoldingSetIteratorImpl::advance() {
  383. // If there is another link within this bucket, go to it.
  384. void *Probe = NodePtr->getNextInBucket();
  385. if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
  386. NodePtr = NextNodeInBucket;
  387. else {
  388. // Otherwise, this is the last link in this bucket.
  389. void **Bucket = GetBucketPtr(Probe);
  390. // Skip to the next non-null non-self-cycle bucket.
  391. do {
  392. ++Bucket;
  393. } while (*Bucket != reinterpret_cast<void*>(-1) &&
  394. (!*Bucket || !GetNextPtr(*Bucket)));
  395. NodePtr = static_cast<FoldingSetNode*>(*Bucket);
  396. }
  397. }
  398. //===----------------------------------------------------------------------===//
  399. // FoldingSetBucketIteratorImpl Implementation
  400. FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
  401. Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket;
  402. }