DifferenceEngine.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038
  1. //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This header defines the implementation of the LLVM difference
  10. // engine, which structurally compares global values within a module.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "DifferenceEngine.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/SmallString.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/StringSet.h"
  19. #include "llvm/IR/BasicBlock.h"
  20. #include "llvm/IR/CFG.h"
  21. #include "llvm/IR/Constants.h"
  22. #include "llvm/IR/Function.h"
  23. #include "llvm/IR/Instructions.h"
  24. #include "llvm/IR/Module.h"
  25. #include "llvm/Support/ErrorHandling.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include "llvm/Support/type_traits.h"
  28. #include <utility>
  29. using namespace llvm;
  30. namespace {
  31. /// A priority queue, implemented as a heap.
  32. template <class T, class Sorter, unsigned InlineCapacity>
  33. class PriorityQueue {
  34. Sorter Precedes;
  35. llvm::SmallVector<T, InlineCapacity> Storage;
  36. public:
  37. PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
  38. /// Checks whether the heap is empty.
  39. bool empty() const { return Storage.empty(); }
  40. /// Insert a new value on the heap.
  41. void insert(const T &V) {
  42. unsigned Index = Storage.size();
  43. Storage.push_back(V);
  44. if (Index == 0) return;
  45. T *data = Storage.data();
  46. while (true) {
  47. unsigned Target = (Index + 1) / 2 - 1;
  48. if (!Precedes(data[Index], data[Target])) return;
  49. std::swap(data[Index], data[Target]);
  50. if (Target == 0) return;
  51. Index = Target;
  52. }
  53. }
  54. /// Remove the minimum value in the heap. Only valid on a non-empty heap.
  55. T remove_min() {
  56. assert(!empty());
  57. T tmp = Storage[0];
  58. unsigned NewSize = Storage.size() - 1;
  59. if (NewSize) {
  60. // Move the slot at the end to the beginning.
  61. if (std::is_trivially_copyable<T>::value)
  62. Storage[0] = Storage[NewSize];
  63. else
  64. std::swap(Storage[0], Storage[NewSize]);
  65. // Bubble the root up as necessary.
  66. unsigned Index = 0;
  67. while (true) {
  68. // With a 1-based index, the children would be Index*2 and Index*2+1.
  69. unsigned R = (Index + 1) * 2;
  70. unsigned L = R - 1;
  71. // If R is out of bounds, we're done after this in any case.
  72. if (R >= NewSize) {
  73. // If L is also out of bounds, we're done immediately.
  74. if (L >= NewSize) break;
  75. // Otherwise, test whether we should swap L and Index.
  76. if (Precedes(Storage[L], Storage[Index]))
  77. std::swap(Storage[L], Storage[Index]);
  78. break;
  79. }
  80. // Otherwise, we need to compare with the smaller of L and R.
  81. // Prefer R because it's closer to the end of the array.
  82. unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
  83. // If Index is >= the min of L and R, then heap ordering is restored.
  84. if (!Precedes(Storage[IndexToTest], Storage[Index]))
  85. break;
  86. // Otherwise, keep bubbling up.
  87. std::swap(Storage[IndexToTest], Storage[Index]);
  88. Index = IndexToTest;
  89. }
  90. }
  91. Storage.pop_back();
  92. return tmp;
  93. }
  94. };
  95. /// A function-scope difference engine.
  96. class FunctionDifferenceEngine {
  97. DifferenceEngine &Engine;
  98. // Some initializers may reference the variable we're currently checking. This
  99. // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
  100. // recursing.
  101. const Value *SavedLHS;
  102. const Value *SavedRHS;
  103. // The current mapping from old local values to new local values.
  104. DenseMap<const Value *, const Value *> Values;
  105. // The current mapping from old blocks to new blocks.
  106. DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
  107. // The tentative mapping from old local values while comparing a pair of
  108. // basic blocks. Once the pair has been processed, the tentative mapping is
  109. // committed to the Values map.
  110. DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
  111. // Equivalence Assumptions
  112. //
  113. // For basic blocks in loops, some values in phi nodes may depend on
  114. // values from not yet processed basic blocks in the loop. When encountering
  115. // such values, we optimistically asssume their equivalence and store this
  116. // assumption in a BlockDiffCandidate for the pair of compared BBs.
  117. //
  118. // Once we have diffed all BBs, for every BlockDiffCandidate, we check all
  119. // stored assumptions using the Values map that stores proven equivalences
  120. // between the old and new values, and report a diff if an assumption cannot
  121. // be proven to be true.
  122. //
  123. // Note that after having made an assumption, all further determined
  124. // equivalences implicitly depend on that assumption. These will not be
  125. // reverted or reported if the assumption proves to be false, because these
  126. // are considered indirect diffs caused by earlier direct diffs.
  127. //
  128. // We aim to avoid false negatives in llvm-diff, that is, ensure that
  129. // whenever no diff is reported, the functions are indeed equal. If
  130. // assumptions were made, this is not entirely clear, because in principle we
  131. // could end up with a circular proof where the proof of equivalence of two
  132. // nodes is depending on the assumption of their equivalence.
  133. //
  134. // To see that assumptions do not add false negatives, note that if we do not
  135. // report a diff, this means that there is an equivalence mapping between old
  136. // and new values that is consistent with all assumptions made. The circular
  137. // dependency that exists on an IR value level does not exist at run time,
  138. // because the values selected by the phi nodes must always already have been
  139. // computed. Hence, we can prove equivalence of the old and new functions by
  140. // considering step-wise parallel execution, and incrementally proving
  141. // equivalence of every new computed value. Another way to think about it is
  142. // to imagine cloning the loop BBs for every iteration, turning the loops
  143. // into (possibly infinite) DAGs, and proving equivalence by induction on the
  144. // iteration, using the computed value mapping.
  145. // The class BlockDiffCandidate stores pairs which either have already been
  146. // proven to differ, or pairs whose equivalence depends on assumptions to be
  147. // verified later.
  148. struct BlockDiffCandidate {
  149. const BasicBlock *LBB;
  150. const BasicBlock *RBB;
  151. // Maps old values to assumed-to-be-equivalent new values
  152. SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions;
  153. // If set, we already know the blocks differ.
  154. bool KnownToDiffer;
  155. };
  156. // List of block diff candidates in the order found by processing.
  157. // We generate reports in this order.
  158. // For every LBB, there may only be one corresponding RBB.
  159. SmallVector<BlockDiffCandidate> BlockDiffCandidates;
  160. // Maps LBB to the index of its BlockDiffCandidate, if existing.
  161. DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices;
  162. // Note: Every LBB must always be queried together with the same RBB.
  163. // The returned reference is not permanently valid and should not be stored.
  164. BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB,
  165. const BasicBlock *RBB) {
  166. auto It = BlockDiffCandidateIndices.find(LBB);
  167. // Check if LBB already has a diff candidate
  168. if (It == BlockDiffCandidateIndices.end()) {
  169. // Add new one
  170. BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size();
  171. BlockDiffCandidates.push_back(
  172. {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false});
  173. return BlockDiffCandidates.back();
  174. }
  175. // Use existing one
  176. BlockDiffCandidate &Result = BlockDiffCandidates[It->second];
  177. assert(Result.RBB == RBB && "Inconsistent basic block pairing!");
  178. return Result;
  179. }
  180. // Optionally passed to equivalence checker functions, so these can add
  181. // assumptions in BlockDiffCandidates. Its presence controls whether
  182. // assumptions are generated.
  183. struct AssumptionContext {
  184. // The two basic blocks that need the two compared values to be equivalent.
  185. const BasicBlock *LBB;
  186. const BasicBlock *RBB;
  187. };
  188. unsigned getUnprocPredCount(const BasicBlock *Block) const {
  189. unsigned Count = 0;
  190. for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
  191. ++I)
  192. if (!Blocks.count(*I)) Count++;
  193. return Count;
  194. }
  195. typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
  196. /// A type which sorts a priority queue by the number of unprocessed
  197. /// predecessor blocks it has remaining.
  198. ///
  199. /// This is actually really expensive to calculate.
  200. struct QueueSorter {
  201. const FunctionDifferenceEngine &fde;
  202. explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
  203. bool operator()(BlockPair &Old, BlockPair &New) {
  204. return fde.getUnprocPredCount(Old.first)
  205. < fde.getUnprocPredCount(New.first);
  206. }
  207. };
  208. /// A queue of unified blocks to process.
  209. PriorityQueue<BlockPair, QueueSorter, 20> Queue;
  210. /// Try to unify the given two blocks. Enqueues them for processing
  211. /// if they haven't already been processed.
  212. ///
  213. /// Returns true if there was a problem unifying them.
  214. bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
  215. const BasicBlock *&Ref = Blocks[L];
  216. if (Ref) {
  217. if (Ref == R) return false;
  218. Engine.logf("successor %l cannot be equivalent to %r; "
  219. "it's already equivalent to %r")
  220. << L << R << Ref;
  221. return true;
  222. }
  223. Ref = R;
  224. Queue.insert(BlockPair(L, R));
  225. return false;
  226. }
  227. /// Unifies two instructions, given that they're known not to have
  228. /// structural differences.
  229. void unify(const Instruction *L, const Instruction *R) {
  230. DifferenceEngine::Context C(Engine, L, R);
  231. bool Result = diff(L, R, true, true, true);
  232. assert(!Result && "structural differences second time around?");
  233. (void) Result;
  234. if (!L->use_empty())
  235. Values[L] = R;
  236. }
  237. void processQueue() {
  238. while (!Queue.empty()) {
  239. BlockPair Pair = Queue.remove_min();
  240. diff(Pair.first, Pair.second);
  241. }
  242. }
  243. void checkAndReportDiffCandidates() {
  244. for (BlockDiffCandidate &BDC : BlockDiffCandidates) {
  245. // Check assumptions
  246. for (const auto &[L, R] : BDC.EquivalenceAssumptions) {
  247. auto It = Values.find(L);
  248. if (It == Values.end() || It->second != R) {
  249. BDC.KnownToDiffer = true;
  250. break;
  251. }
  252. }
  253. // Run block diff if the BBs differ
  254. if (BDC.KnownToDiffer) {
  255. DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB);
  256. runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin());
  257. }
  258. }
  259. }
  260. void diff(const BasicBlock *L, const BasicBlock *R) {
  261. DifferenceEngine::Context C(Engine, L, R);
  262. BasicBlock::const_iterator LI = L->begin(), LE = L->end();
  263. BasicBlock::const_iterator RI = R->begin();
  264. do {
  265. assert(LI != LE && RI != R->end());
  266. const Instruction *LeftI = &*LI, *RightI = &*RI;
  267. // If the instructions differ, start the more sophisticated diff
  268. // algorithm at the start of the block.
  269. if (diff(LeftI, RightI, false, false, true)) {
  270. TentativeValues.clear();
  271. // Register (L, R) as diffing pair. Note that we could directly emit a
  272. // block diff here, but this way we ensure all diffs are emitted in one
  273. // consistent order, independent of whether the diffs were detected
  274. // immediately or via invalid assumptions.
  275. getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true;
  276. return;
  277. }
  278. // Otherwise, tentatively unify them.
  279. if (!LeftI->use_empty())
  280. TentativeValues.insert(std::make_pair(LeftI, RightI));
  281. ++LI;
  282. ++RI;
  283. } while (LI != LE); // This is sufficient: we can't get equality of
  284. // terminators if there are residual instructions.
  285. // Unify everything in the block, non-tentatively this time.
  286. TentativeValues.clear();
  287. for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
  288. unify(&*LI, &*RI);
  289. }
  290. bool matchForBlockDiff(const Instruction *L, const Instruction *R);
  291. void runBlockDiff(BasicBlock::const_iterator LI,
  292. BasicBlock::const_iterator RI);
  293. bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
  294. // FIXME: call attributes
  295. AssumptionContext AC = {L.getParent(), R.getParent()};
  296. if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(),
  297. &AC)) {
  298. if (Complain) Engine.log("called functions differ");
  299. return true;
  300. }
  301. if (L.arg_size() != R.arg_size()) {
  302. if (Complain) Engine.log("argument counts differ");
  303. return true;
  304. }
  305. for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
  306. if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) {
  307. if (Complain)
  308. Engine.logf("arguments %l and %r differ")
  309. << L.getArgOperand(I) << R.getArgOperand(I);
  310. return true;
  311. }
  312. return false;
  313. }
  314. // If AllowAssumptions is enabled, whenever we encounter a pair of values
  315. // that we cannot prove to be equivalent, we assume equivalence and store that
  316. // assumption to be checked later in BlockDiffCandidates.
  317. bool diff(const Instruction *L, const Instruction *R, bool Complain,
  318. bool TryUnify, bool AllowAssumptions) {
  319. // FIXME: metadata (if Complain is set)
  320. AssumptionContext ACValue = {L->getParent(), R->getParent()};
  321. // nullptr AssumptionContext disables assumption generation.
  322. const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr;
  323. // Different opcodes always imply different operations.
  324. if (L->getOpcode() != R->getOpcode()) {
  325. if (Complain) Engine.log("different instruction types");
  326. return true;
  327. }
  328. if (isa<CmpInst>(L)) {
  329. if (cast<CmpInst>(L)->getPredicate()
  330. != cast<CmpInst>(R)->getPredicate()) {
  331. if (Complain) Engine.log("different predicates");
  332. return true;
  333. }
  334. } else if (isa<CallInst>(L)) {
  335. return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
  336. } else if (isa<PHINode>(L)) {
  337. const PHINode &LI = cast<PHINode>(*L);
  338. const PHINode &RI = cast<PHINode>(*R);
  339. // This is really weird; type uniquing is broken?
  340. if (LI.getType() != RI.getType()) {
  341. if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {
  342. if (Complain) Engine.log("different phi types");
  343. return true;
  344. }
  345. }
  346. if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {
  347. if (Complain)
  348. Engine.log("PHI node # of incoming values differ");
  349. return true;
  350. }
  351. for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {
  352. if (TryUnify)
  353. tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
  354. if (!equivalentAsOperands(LI.getIncomingValue(I),
  355. RI.getIncomingValue(I), AC)) {
  356. if (Complain)
  357. Engine.log("PHI node incoming values differ");
  358. return true;
  359. }
  360. }
  361. return false;
  362. // Terminators.
  363. } else if (isa<InvokeInst>(L)) {
  364. const InvokeInst &LI = cast<InvokeInst>(*L);
  365. const InvokeInst &RI = cast<InvokeInst>(*R);
  366. if (diffCallSites(LI, RI, Complain))
  367. return true;
  368. if (TryUnify) {
  369. tryUnify(LI.getNormalDest(), RI.getNormalDest());
  370. tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
  371. }
  372. return false;
  373. } else if (isa<CallBrInst>(L)) {
  374. const CallBrInst &LI = cast<CallBrInst>(*L);
  375. const CallBrInst &RI = cast<CallBrInst>(*R);
  376. if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
  377. if (Complain)
  378. Engine.log("callbr # of indirect destinations differ");
  379. return true;
  380. }
  381. // Perform the "try unify" step so that we can equate the indirect
  382. // destinations before checking the call site.
  383. for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
  384. tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
  385. if (diffCallSites(LI, RI, Complain))
  386. return true;
  387. if (TryUnify)
  388. tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
  389. return false;
  390. } else if (isa<BranchInst>(L)) {
  391. const BranchInst *LI = cast<BranchInst>(L);
  392. const BranchInst *RI = cast<BranchInst>(R);
  393. if (LI->isConditional() != RI->isConditional()) {
  394. if (Complain) Engine.log("branch conditionality differs");
  395. return true;
  396. }
  397. if (LI->isConditional()) {
  398. if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {
  399. if (Complain) Engine.log("branch conditions differ");
  400. return true;
  401. }
  402. if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
  403. }
  404. if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
  405. return false;
  406. } else if (isa<IndirectBrInst>(L)) {
  407. const IndirectBrInst *LI = cast<IndirectBrInst>(L);
  408. const IndirectBrInst *RI = cast<IndirectBrInst>(R);
  409. if (LI->getNumDestinations() != RI->getNumDestinations()) {
  410. if (Complain) Engine.log("indirectbr # of destinations differ");
  411. return true;
  412. }
  413. if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) {
  414. if (Complain) Engine.log("indirectbr addresses differ");
  415. return true;
  416. }
  417. if (TryUnify) {
  418. for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
  419. tryUnify(LI->getDestination(i), RI->getDestination(i));
  420. }
  421. }
  422. return false;
  423. } else if (isa<SwitchInst>(L)) {
  424. const SwitchInst *LI = cast<SwitchInst>(L);
  425. const SwitchInst *RI = cast<SwitchInst>(R);
  426. if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {
  427. if (Complain) Engine.log("switch conditions differ");
  428. return true;
  429. }
  430. if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
  431. bool Difference = false;
  432. DenseMap<const ConstantInt *, const BasicBlock *> LCases;
  433. for (auto Case : LI->cases())
  434. LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
  435. for (auto Case : RI->cases()) {
  436. const ConstantInt *CaseValue = Case.getCaseValue();
  437. const BasicBlock *LCase = LCases[CaseValue];
  438. if (LCase) {
  439. if (TryUnify)
  440. tryUnify(LCase, Case.getCaseSuccessor());
  441. LCases.erase(CaseValue);
  442. } else if (Complain || !Difference) {
  443. if (Complain)
  444. Engine.logf("right switch has extra case %r") << CaseValue;
  445. Difference = true;
  446. }
  447. }
  448. if (!Difference)
  449. for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
  450. I = LCases.begin(),
  451. E = LCases.end();
  452. I != E; ++I) {
  453. if (Complain)
  454. Engine.logf("left switch has extra case %l") << I->first;
  455. Difference = true;
  456. }
  457. return Difference;
  458. } else if (isa<UnreachableInst>(L)) {
  459. return false;
  460. }
  461. if (L->getNumOperands() != R->getNumOperands()) {
  462. if (Complain) Engine.log("instructions have different operand counts");
  463. return true;
  464. }
  465. for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
  466. Value *LO = L->getOperand(I), *RO = R->getOperand(I);
  467. if (!equivalentAsOperands(LO, RO, AC)) {
  468. if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
  469. return true;
  470. }
  471. }
  472. return false;
  473. }
  474. public:
  475. bool equivalentAsOperands(const Constant *L, const Constant *R,
  476. const AssumptionContext *AC) {
  477. // Use equality as a preliminary filter.
  478. if (L == R)
  479. return true;
  480. if (L->getValueID() != R->getValueID())
  481. return false;
  482. // Ask the engine about global values.
  483. if (isa<GlobalValue>(L))
  484. return Engine.equivalentAsOperands(cast<GlobalValue>(L),
  485. cast<GlobalValue>(R));
  486. // Compare constant expressions structurally.
  487. if (isa<ConstantExpr>(L))
  488. return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R),
  489. AC);
  490. // Constants of the "same type" don't always actually have the same
  491. // type; I don't know why. Just white-list them.
  492. if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
  493. return true;
  494. // Block addresses only match if we've already encountered the
  495. // block. FIXME: tentative matches?
  496. if (isa<BlockAddress>(L))
  497. return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
  498. == cast<BlockAddress>(R)->getBasicBlock();
  499. // If L and R are ConstantVectors, compare each element
  500. if (isa<ConstantVector>(L)) {
  501. const ConstantVector *CVL = cast<ConstantVector>(L);
  502. const ConstantVector *CVR = cast<ConstantVector>(R);
  503. if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
  504. return false;
  505. for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
  506. if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC))
  507. return false;
  508. }
  509. return true;
  510. }
  511. // If L and R are ConstantArrays, compare the element count and types.
  512. if (isa<ConstantArray>(L)) {
  513. const ConstantArray *CAL = cast<ConstantArray>(L);
  514. const ConstantArray *CAR = cast<ConstantArray>(R);
  515. // Sometimes a type may be equivalent, but not uniquified---e.g. it may
  516. // contain a GEP instruction. Do a deeper comparison of the types.
  517. if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
  518. return false;
  519. for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
  520. if (!equivalentAsOperands(CAL->getAggregateElement(I),
  521. CAR->getAggregateElement(I), AC))
  522. return false;
  523. }
  524. return true;
  525. }
  526. // If L and R are ConstantStructs, compare each field and type.
  527. if (isa<ConstantStruct>(L)) {
  528. const ConstantStruct *CSL = cast<ConstantStruct>(L);
  529. const ConstantStruct *CSR = cast<ConstantStruct>(R);
  530. const StructType *LTy = cast<StructType>(CSL->getType());
  531. const StructType *RTy = cast<StructType>(CSR->getType());
  532. // The StructTypes should have the same attributes. Don't use
  533. // isLayoutIdentical(), because that just checks the element pointers,
  534. // which may not work here.
  535. if (LTy->getNumElements() != RTy->getNumElements() ||
  536. LTy->isPacked() != RTy->isPacked())
  537. return false;
  538. for (unsigned I = 0; I < LTy->getNumElements(); I++) {
  539. const Value *LAgg = CSL->getAggregateElement(I);
  540. const Value *RAgg = CSR->getAggregateElement(I);
  541. if (LAgg == SavedLHS || RAgg == SavedRHS) {
  542. if (LAgg != SavedLHS || RAgg != SavedRHS)
  543. // If the left and right operands aren't both re-analyzing the
  544. // variable, then the initialiers don't match, so report "false".
  545. // Otherwise, we skip these operands..
  546. return false;
  547. continue;
  548. }
  549. if (!equivalentAsOperands(LAgg, RAgg, AC)) {
  550. return false;
  551. }
  552. }
  553. return true;
  554. }
  555. return false;
  556. }
  557. bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R,
  558. const AssumptionContext *AC) {
  559. if (L == R)
  560. return true;
  561. if (L->getOpcode() != R->getOpcode())
  562. return false;
  563. switch (L->getOpcode()) {
  564. case Instruction::ICmp:
  565. case Instruction::FCmp:
  566. if (L->getPredicate() != R->getPredicate())
  567. return false;
  568. break;
  569. case Instruction::GetElementPtr:
  570. // FIXME: inbounds?
  571. break;
  572. default:
  573. break;
  574. }
  575. if (L->getNumOperands() != R->getNumOperands())
  576. return false;
  577. for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
  578. const auto *LOp = L->getOperand(I);
  579. const auto *ROp = R->getOperand(I);
  580. if (LOp == SavedLHS || ROp == SavedRHS) {
  581. if (LOp != SavedLHS || ROp != SavedRHS)
  582. // If the left and right operands aren't both re-analyzing the
  583. // variable, then the initialiers don't match, so report "false".
  584. // Otherwise, we skip these operands..
  585. return false;
  586. continue;
  587. }
  588. if (!equivalentAsOperands(LOp, ROp, AC))
  589. return false;
  590. }
  591. return true;
  592. }
  593. // There are cases where we cannot determine whether two values are
  594. // equivalent, because it depends on not yet processed basic blocks -- see the
  595. // documentation on assumptions.
  596. //
  597. // AC is the context in which we are currently performing a diff.
  598. // When we encounter a pair of values for which we can neither prove
  599. // equivalence nor the opposite, we do the following:
  600. // * If AC is nullptr, we treat the pair as non-equivalent.
  601. // * If AC is set, we add an assumption for the basic blocks given by AC,
  602. // and treat the pair as equivalent. The assumption is checked later.
  603. bool equivalentAsOperands(const Value *L, const Value *R,
  604. const AssumptionContext *AC) {
  605. // Fall out if the values have different kind.
  606. // This possibly shouldn't take priority over oracles.
  607. if (L->getValueID() != R->getValueID())
  608. return false;
  609. // Value subtypes: Argument, Constant, Instruction, BasicBlock,
  610. // InlineAsm, MDNode, MDString, PseudoSourceValue
  611. if (isa<Constant>(L))
  612. return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC);
  613. if (isa<Instruction>(L)) {
  614. auto It = Values.find(L);
  615. if (It != Values.end())
  616. return It->second == R;
  617. if (TentativeValues.count(std::make_pair(L, R)))
  618. return true;
  619. // L and R might be equivalent, this could depend on not yet processed
  620. // basic blocks, so we cannot decide here.
  621. if (AC) {
  622. // Add an assumption, unless there is a conflict with an existing one
  623. BlockDiffCandidate &BDC =
  624. getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB);
  625. auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R});
  626. if (!InsertionResult.second && InsertionResult.first->second != R) {
  627. // We already have a conflicting equivalence assumption for L, so at
  628. // least one must be wrong, and we know that there is a diff.
  629. BDC.KnownToDiffer = true;
  630. BDC.EquivalenceAssumptions.clear();
  631. return false;
  632. }
  633. // Optimistically assume equivalence, and check later once all BBs
  634. // have been processed.
  635. return true;
  636. }
  637. // Assumptions disabled, so pessimistically assume non-equivalence.
  638. return false;
  639. }
  640. if (isa<Argument>(L))
  641. return Values[L] == R;
  642. if (isa<BasicBlock>(L))
  643. return Blocks[cast<BasicBlock>(L)] != R;
  644. // Pretend everything else is identical.
  645. return true;
  646. }
  647. // Avoid a gcc warning about accessing 'this' in an initializer.
  648. FunctionDifferenceEngine *this_() { return this; }
  649. public:
  650. FunctionDifferenceEngine(DifferenceEngine &Engine,
  651. const Value *SavedLHS = nullptr,
  652. const Value *SavedRHS = nullptr)
  653. : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
  654. Queue(QueueSorter(*this_())) {}
  655. void diff(const Function *L, const Function *R) {
  656. assert(Values.empty() && "Multiple diffs per engine are not supported!");
  657. if (L->arg_size() != R->arg_size())
  658. Engine.log("different argument counts");
  659. // Map the arguments.
  660. for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
  661. RI = R->arg_begin(), RE = R->arg_end();
  662. LI != LE && RI != RE; ++LI, ++RI)
  663. Values[&*LI] = &*RI;
  664. tryUnify(&*L->begin(), &*R->begin());
  665. processQueue();
  666. checkAndReportDiffCandidates();
  667. }
  668. };
  669. struct DiffEntry {
  670. DiffEntry() : Cost(0) {}
  671. unsigned Cost;
  672. llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
  673. };
  674. bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
  675. const Instruction *R) {
  676. return !diff(L, R, false, false, false);
  677. }
  678. void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
  679. BasicBlock::const_iterator RStart) {
  680. BasicBlock::const_iterator LE = LStart->getParent()->end();
  681. BasicBlock::const_iterator RE = RStart->getParent()->end();
  682. unsigned NL = std::distance(LStart, LE);
  683. SmallVector<DiffEntry, 20> Paths1(NL+1);
  684. SmallVector<DiffEntry, 20> Paths2(NL+1);
  685. DiffEntry *Cur = Paths1.data();
  686. DiffEntry *Next = Paths2.data();
  687. const unsigned LeftCost = 2;
  688. const unsigned RightCost = 2;
  689. const unsigned MatchCost = 0;
  690. assert(TentativeValues.empty());
  691. // Initialize the first column.
  692. for (unsigned I = 0; I != NL+1; ++I) {
  693. Cur[I].Cost = I * LeftCost;
  694. for (unsigned J = 0; J != I; ++J)
  695. Cur[I].Path.push_back(DC_left);
  696. }
  697. for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
  698. // Initialize the first row.
  699. Next[0] = Cur[0];
  700. Next[0].Cost += RightCost;
  701. Next[0].Path.push_back(DC_right);
  702. unsigned Index = 1;
  703. for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
  704. if (matchForBlockDiff(&*LI, &*RI)) {
  705. Next[Index] = Cur[Index-1];
  706. Next[Index].Cost += MatchCost;
  707. Next[Index].Path.push_back(DC_match);
  708. TentativeValues.insert(std::make_pair(&*LI, &*RI));
  709. } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
  710. Next[Index] = Next[Index-1];
  711. Next[Index].Cost += LeftCost;
  712. Next[Index].Path.push_back(DC_left);
  713. } else {
  714. Next[Index] = Cur[Index];
  715. Next[Index].Cost += RightCost;
  716. Next[Index].Path.push_back(DC_right);
  717. }
  718. }
  719. std::swap(Cur, Next);
  720. }
  721. // We don't need the tentative values anymore; everything from here
  722. // on out should be non-tentative.
  723. TentativeValues.clear();
  724. SmallVectorImpl<char> &Path = Cur[NL].Path;
  725. BasicBlock::const_iterator LI = LStart, RI = RStart;
  726. DiffLogBuilder Diff(Engine.getConsumer());
  727. // Drop trailing matches.
  728. while (Path.size() && Path.back() == DC_match)
  729. Path.pop_back();
  730. // Skip leading matches.
  731. SmallVectorImpl<char>::iterator
  732. PI = Path.begin(), PE = Path.end();
  733. while (PI != PE && *PI == DC_match) {
  734. unify(&*LI, &*RI);
  735. ++PI;
  736. ++LI;
  737. ++RI;
  738. }
  739. for (; PI != PE; ++PI) {
  740. switch (static_cast<DiffChange>(*PI)) {
  741. case DC_match:
  742. assert(LI != LE && RI != RE);
  743. {
  744. const Instruction *L = &*LI, *R = &*RI;
  745. unify(L, R);
  746. Diff.addMatch(L, R);
  747. }
  748. ++LI; ++RI;
  749. break;
  750. case DC_left:
  751. assert(LI != LE);
  752. Diff.addLeft(&*LI);
  753. ++LI;
  754. break;
  755. case DC_right:
  756. assert(RI != RE);
  757. Diff.addRight(&*RI);
  758. ++RI;
  759. break;
  760. }
  761. }
  762. // Finishing unifying and complaining about the tails of the block,
  763. // which should be matches all the way through.
  764. while (LI != LE) {
  765. assert(RI != RE);
  766. unify(&*LI, &*RI);
  767. ++LI;
  768. ++RI;
  769. }
  770. // If the terminators have different kinds, but one is an invoke and the
  771. // other is an unconditional branch immediately following a call, unify
  772. // the results and the destinations.
  773. const Instruction *LTerm = LStart->getParent()->getTerminator();
  774. const Instruction *RTerm = RStart->getParent()->getTerminator();
  775. if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
  776. if (cast<BranchInst>(LTerm)->isConditional()) return;
  777. BasicBlock::const_iterator I = LTerm->getIterator();
  778. if (I == LStart->getParent()->begin()) return;
  779. --I;
  780. if (!isa<CallInst>(*I)) return;
  781. const CallInst *LCall = cast<CallInst>(&*I);
  782. const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
  783. if (!equivalentAsOperands(LCall->getCalledOperand(),
  784. RInvoke->getCalledOperand(), nullptr))
  785. return;
  786. if (!LCall->use_empty())
  787. Values[LCall] = RInvoke;
  788. tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
  789. } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
  790. if (cast<BranchInst>(RTerm)->isConditional()) return;
  791. BasicBlock::const_iterator I = RTerm->getIterator();
  792. if (I == RStart->getParent()->begin()) return;
  793. --I;
  794. if (!isa<CallInst>(*I)) return;
  795. const CallInst *RCall = cast<CallInst>(I);
  796. const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
  797. if (!equivalentAsOperands(LInvoke->getCalledOperand(),
  798. RCall->getCalledOperand(), nullptr))
  799. return;
  800. if (!LInvoke->use_empty())
  801. Values[LInvoke] = RCall;
  802. tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
  803. }
  804. }
  805. }
  806. void DifferenceEngine::Oracle::anchor() { }
  807. void DifferenceEngine::diff(const Function *L, const Function *R) {
  808. Context C(*this, L, R);
  809. // FIXME: types
  810. // FIXME: attributes and CC
  811. // FIXME: parameter attributes
  812. // If both are declarations, we're done.
  813. if (L->empty() && R->empty())
  814. return;
  815. else if (L->empty())
  816. log("left function is declaration, right function is definition");
  817. else if (R->empty())
  818. log("right function is declaration, left function is definition");
  819. else
  820. FunctionDifferenceEngine(*this).diff(L, R);
  821. }
  822. void DifferenceEngine::diff(const Module *L, const Module *R) {
  823. StringSet<> LNames;
  824. SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
  825. unsigned LeftAnonCount = 0;
  826. unsigned RightAnonCount = 0;
  827. for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
  828. const Function *LFn = &*I;
  829. StringRef Name = LFn->getName();
  830. if (Name.empty()) {
  831. ++LeftAnonCount;
  832. continue;
  833. }
  834. LNames.insert(Name);
  835. if (Function *RFn = R->getFunction(LFn->getName()))
  836. Queue.push_back(std::make_pair(LFn, RFn));
  837. else
  838. logf("function %l exists only in left module") << LFn;
  839. }
  840. for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
  841. const Function *RFn = &*I;
  842. StringRef Name = RFn->getName();
  843. if (Name.empty()) {
  844. ++RightAnonCount;
  845. continue;
  846. }
  847. if (!LNames.count(Name))
  848. logf("function %r exists only in right module") << RFn;
  849. }
  850. if (LeftAnonCount != 0 || RightAnonCount != 0) {
  851. SmallString<32> Tmp;
  852. logf(("not comparing " + Twine(LeftAnonCount) +
  853. " anonymous functions in the left module and " +
  854. Twine(RightAnonCount) + " in the right module")
  855. .toStringRef(Tmp));
  856. }
  857. for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
  858. I = Queue.begin(),
  859. E = Queue.end();
  860. I != E; ++I)
  861. diff(I->first, I->second);
  862. }
  863. bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
  864. const GlobalValue *R) {
  865. if (globalValueOracle) return (*globalValueOracle)(L, R);
  866. if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
  867. const GlobalVariable *GVL = cast<GlobalVariable>(L);
  868. const GlobalVariable *GVR = cast<GlobalVariable>(R);
  869. if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
  870. GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
  871. return FunctionDifferenceEngine(*this, GVL, GVR)
  872. .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(),
  873. nullptr);
  874. }
  875. return L->getName() == R->getName();
  876. }