DifferenceEngine.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874
  1. //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This header defines the implementation of the LLVM difference
  10. // engine, which structurally compares global values within a module.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "DifferenceEngine.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/SmallString.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/StringSet.h"
  19. #include "llvm/IR/CFG.h"
  20. #include "llvm/IR/Constants.h"
  21. #include "llvm/IR/Function.h"
  22. #include "llvm/IR/Instructions.h"
  23. #include "llvm/IR/Module.h"
  24. #include "llvm/Support/ErrorHandling.h"
  25. #include "llvm/Support/raw_ostream.h"
  26. #include "llvm/Support/type_traits.h"
  27. #include <utility>
  28. using namespace llvm;
  29. namespace {
  30. /// A priority queue, implemented as a heap.
  31. template <class T, class Sorter, unsigned InlineCapacity>
  32. class PriorityQueue {
  33. Sorter Precedes;
  34. llvm::SmallVector<T, InlineCapacity> Storage;
  35. public:
  36. PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
  37. /// Checks whether the heap is empty.
  38. bool empty() const { return Storage.empty(); }
  39. /// Insert a new value on the heap.
  40. void insert(const T &V) {
  41. unsigned Index = Storage.size();
  42. Storage.push_back(V);
  43. if (Index == 0) return;
  44. T *data = Storage.data();
  45. while (true) {
  46. unsigned Target = (Index + 1) / 2 - 1;
  47. if (!Precedes(data[Index], data[Target])) return;
  48. std::swap(data[Index], data[Target]);
  49. if (Target == 0) return;
  50. Index = Target;
  51. }
  52. }
  53. /// Remove the minimum value in the heap. Only valid on a non-empty heap.
  54. T remove_min() {
  55. assert(!empty());
  56. T tmp = Storage[0];
  57. unsigned NewSize = Storage.size() - 1;
  58. if (NewSize) {
  59. // Move the slot at the end to the beginning.
  60. if (std::is_trivially_copyable<T>::value)
  61. Storage[0] = Storage[NewSize];
  62. else
  63. std::swap(Storage[0], Storage[NewSize]);
  64. // Bubble the root up as necessary.
  65. unsigned Index = 0;
  66. while (true) {
  67. // With a 1-based index, the children would be Index*2 and Index*2+1.
  68. unsigned R = (Index + 1) * 2;
  69. unsigned L = R - 1;
  70. // If R is out of bounds, we're done after this in any case.
  71. if (R >= NewSize) {
  72. // If L is also out of bounds, we're done immediately.
  73. if (L >= NewSize) break;
  74. // Otherwise, test whether we should swap L and Index.
  75. if (Precedes(Storage[L], Storage[Index]))
  76. std::swap(Storage[L], Storage[Index]);
  77. break;
  78. }
  79. // Otherwise, we need to compare with the smaller of L and R.
  80. // Prefer R because it's closer to the end of the array.
  81. unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
  82. // If Index is >= the min of L and R, then heap ordering is restored.
  83. if (!Precedes(Storage[IndexToTest], Storage[Index]))
  84. break;
  85. // Otherwise, keep bubbling up.
  86. std::swap(Storage[IndexToTest], Storage[Index]);
  87. Index = IndexToTest;
  88. }
  89. }
  90. Storage.pop_back();
  91. return tmp;
  92. }
  93. };
  94. /// A function-scope difference engine.
  95. class FunctionDifferenceEngine {
  96. DifferenceEngine &Engine;
  97. // Some initializers may reference the variable we're currently checking. This
  98. // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
  99. // recursing.
  100. const Value *SavedLHS;
  101. const Value *SavedRHS;
  102. /// The current mapping from old local values to new local values.
  103. DenseMap<const Value *, const Value *> Values;
  104. /// The current mapping from old blocks to new blocks.
  105. DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
  106. DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
  107. unsigned getUnprocPredCount(const BasicBlock *Block) const {
  108. unsigned Count = 0;
  109. for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
  110. ++I)
  111. if (!Blocks.count(*I)) Count++;
  112. return Count;
  113. }
  114. typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
  115. /// A type which sorts a priority queue by the number of unprocessed
  116. /// predecessor blocks it has remaining.
  117. ///
  118. /// This is actually really expensive to calculate.
  119. struct QueueSorter {
  120. const FunctionDifferenceEngine &fde;
  121. explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
  122. bool operator()(BlockPair &Old, BlockPair &New) {
  123. return fde.getUnprocPredCount(Old.first)
  124. < fde.getUnprocPredCount(New.first);
  125. }
  126. };
  127. /// A queue of unified blocks to process.
  128. PriorityQueue<BlockPair, QueueSorter, 20> Queue;
  129. /// Try to unify the given two blocks. Enqueues them for processing
  130. /// if they haven't already been processed.
  131. ///
  132. /// Returns true if there was a problem unifying them.
  133. bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
  134. const BasicBlock *&Ref = Blocks[L];
  135. if (Ref) {
  136. if (Ref == R) return false;
  137. Engine.logf("successor %l cannot be equivalent to %r; "
  138. "it's already equivalent to %r")
  139. << L << R << Ref;
  140. return true;
  141. }
  142. Ref = R;
  143. Queue.insert(BlockPair(L, R));
  144. return false;
  145. }
  146. /// Unifies two instructions, given that they're known not to have
  147. /// structural differences.
  148. void unify(const Instruction *L, const Instruction *R) {
  149. DifferenceEngine::Context C(Engine, L, R);
  150. bool Result = diff(L, R, true, true);
  151. assert(!Result && "structural differences second time around?");
  152. (void) Result;
  153. if (!L->use_empty())
  154. Values[L] = R;
  155. }
  156. void processQueue() {
  157. while (!Queue.empty()) {
  158. BlockPair Pair = Queue.remove_min();
  159. diff(Pair.first, Pair.second);
  160. }
  161. }
  162. void diff(const BasicBlock *L, const BasicBlock *R) {
  163. DifferenceEngine::Context C(Engine, L, R);
  164. BasicBlock::const_iterator LI = L->begin(), LE = L->end();
  165. BasicBlock::const_iterator RI = R->begin();
  166. do {
  167. assert(LI != LE && RI != R->end());
  168. const Instruction *LeftI = &*LI, *RightI = &*RI;
  169. // If the instructions differ, start the more sophisticated diff
  170. // algorithm at the start of the block.
  171. if (diff(LeftI, RightI, false, false)) {
  172. TentativeValues.clear();
  173. return runBlockDiff(L->begin(), R->begin());
  174. }
  175. // Otherwise, tentatively unify them.
  176. if (!LeftI->use_empty())
  177. TentativeValues.insert(std::make_pair(LeftI, RightI));
  178. ++LI;
  179. ++RI;
  180. } while (LI != LE); // This is sufficient: we can't get equality of
  181. // terminators if there are residual instructions.
  182. // Unify everything in the block, non-tentatively this time.
  183. TentativeValues.clear();
  184. for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
  185. unify(&*LI, &*RI);
  186. }
  187. bool matchForBlockDiff(const Instruction *L, const Instruction *R);
  188. void runBlockDiff(BasicBlock::const_iterator LI,
  189. BasicBlock::const_iterator RI);
  190. bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
  191. // FIXME: call attributes
  192. if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
  193. if (Complain) Engine.log("called functions differ");
  194. return true;
  195. }
  196. if (L.arg_size() != R.arg_size()) {
  197. if (Complain) Engine.log("argument counts differ");
  198. return true;
  199. }
  200. for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
  201. if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
  202. if (Complain)
  203. Engine.logf("arguments %l and %r differ")
  204. << L.getArgOperand(I) << R.getArgOperand(I);
  205. return true;
  206. }
  207. return false;
  208. }
  209. bool diff(const Instruction *L, const Instruction *R, bool Complain,
  210. bool TryUnify) {
  211. // FIXME: metadata (if Complain is set)
  212. // Different opcodes always imply different operations.
  213. if (L->getOpcode() != R->getOpcode()) {
  214. if (Complain) Engine.log("different instruction types");
  215. return true;
  216. }
  217. if (isa<CmpInst>(L)) {
  218. if (cast<CmpInst>(L)->getPredicate()
  219. != cast<CmpInst>(R)->getPredicate()) {
  220. if (Complain) Engine.log("different predicates");
  221. return true;
  222. }
  223. } else if (isa<CallInst>(L)) {
  224. return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
  225. } else if (isa<PHINode>(L)) {
  226. const PHINode &LI = cast<PHINode>(*L);
  227. const PHINode &RI = cast<PHINode>(*R);
  228. // This is really weird; type uniquing is broken?
  229. if (LI.getType() != RI.getType()) {
  230. if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {
  231. if (Complain) Engine.log("different phi types");
  232. return true;
  233. }
  234. }
  235. if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {
  236. if (Complain)
  237. Engine.log("PHI node # of incoming values differ");
  238. return true;
  239. }
  240. for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {
  241. if (TryUnify)
  242. tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
  243. if (!equivalentAsOperands(LI.getIncomingValue(I),
  244. RI.getIncomingValue(I))) {
  245. if (Complain)
  246. Engine.log("PHI node incoming values differ");
  247. return true;
  248. }
  249. }
  250. return false;
  251. // Terminators.
  252. } else if (isa<InvokeInst>(L)) {
  253. const InvokeInst &LI = cast<InvokeInst>(*L);
  254. const InvokeInst &RI = cast<InvokeInst>(*R);
  255. if (diffCallSites(LI, RI, Complain))
  256. return true;
  257. if (TryUnify) {
  258. tryUnify(LI.getNormalDest(), RI.getNormalDest());
  259. tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
  260. }
  261. return false;
  262. } else if (isa<CallBrInst>(L)) {
  263. const CallBrInst &LI = cast<CallBrInst>(*L);
  264. const CallBrInst &RI = cast<CallBrInst>(*R);
  265. if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
  266. if (Complain)
  267. Engine.log("callbr # of indirect destinations differ");
  268. return true;
  269. }
  270. // Perform the "try unify" step so that we can equate the indirect
  271. // destinations before checking the call site.
  272. for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
  273. tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
  274. if (diffCallSites(LI, RI, Complain))
  275. return true;
  276. if (TryUnify)
  277. tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
  278. return false;
  279. } else if (isa<BranchInst>(L)) {
  280. const BranchInst *LI = cast<BranchInst>(L);
  281. const BranchInst *RI = cast<BranchInst>(R);
  282. if (LI->isConditional() != RI->isConditional()) {
  283. if (Complain) Engine.log("branch conditionality differs");
  284. return true;
  285. }
  286. if (LI->isConditional()) {
  287. if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
  288. if (Complain) Engine.log("branch conditions differ");
  289. return true;
  290. }
  291. if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
  292. }
  293. if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
  294. return false;
  295. } else if (isa<IndirectBrInst>(L)) {
  296. const IndirectBrInst *LI = cast<IndirectBrInst>(L);
  297. const IndirectBrInst *RI = cast<IndirectBrInst>(R);
  298. if (LI->getNumDestinations() != RI->getNumDestinations()) {
  299. if (Complain) Engine.log("indirectbr # of destinations differ");
  300. return true;
  301. }
  302. if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
  303. if (Complain) Engine.log("indirectbr addresses differ");
  304. return true;
  305. }
  306. if (TryUnify) {
  307. for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
  308. tryUnify(LI->getDestination(i), RI->getDestination(i));
  309. }
  310. }
  311. return false;
  312. } else if (isa<SwitchInst>(L)) {
  313. const SwitchInst *LI = cast<SwitchInst>(L);
  314. const SwitchInst *RI = cast<SwitchInst>(R);
  315. if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
  316. if (Complain) Engine.log("switch conditions differ");
  317. return true;
  318. }
  319. if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
  320. bool Difference = false;
  321. DenseMap<const ConstantInt *, const BasicBlock *> LCases;
  322. for (auto Case : LI->cases())
  323. LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
  324. for (auto Case : RI->cases()) {
  325. const ConstantInt *CaseValue = Case.getCaseValue();
  326. const BasicBlock *LCase = LCases[CaseValue];
  327. if (LCase) {
  328. if (TryUnify)
  329. tryUnify(LCase, Case.getCaseSuccessor());
  330. LCases.erase(CaseValue);
  331. } else if (Complain || !Difference) {
  332. if (Complain)
  333. Engine.logf("right switch has extra case %r") << CaseValue;
  334. Difference = true;
  335. }
  336. }
  337. if (!Difference)
  338. for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
  339. I = LCases.begin(),
  340. E = LCases.end();
  341. I != E; ++I) {
  342. if (Complain)
  343. Engine.logf("left switch has extra case %l") << I->first;
  344. Difference = true;
  345. }
  346. return Difference;
  347. } else if (isa<UnreachableInst>(L)) {
  348. return false;
  349. }
  350. if (L->getNumOperands() != R->getNumOperands()) {
  351. if (Complain) Engine.log("instructions have different operand counts");
  352. return true;
  353. }
  354. for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
  355. Value *LO = L->getOperand(I), *RO = R->getOperand(I);
  356. if (!equivalentAsOperands(LO, RO)) {
  357. if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
  358. return true;
  359. }
  360. }
  361. return false;
  362. }
  363. public:
  364. bool equivalentAsOperands(const Constant *L, const Constant *R) {
  365. // Use equality as a preliminary filter.
  366. if (L == R)
  367. return true;
  368. if (L->getValueID() != R->getValueID())
  369. return false;
  370. // Ask the engine about global values.
  371. if (isa<GlobalValue>(L))
  372. return Engine.equivalentAsOperands(cast<GlobalValue>(L),
  373. cast<GlobalValue>(R));
  374. // Compare constant expressions structurally.
  375. if (isa<ConstantExpr>(L))
  376. return equivalentAsOperands(cast<ConstantExpr>(L),
  377. cast<ConstantExpr>(R));
  378. // Constants of the "same type" don't always actually have the same
  379. // type; I don't know why. Just white-list them.
  380. if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
  381. return true;
  382. // Block addresses only match if we've already encountered the
  383. // block. FIXME: tentative matches?
  384. if (isa<BlockAddress>(L))
  385. return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
  386. == cast<BlockAddress>(R)->getBasicBlock();
  387. // If L and R are ConstantVectors, compare each element
  388. if (isa<ConstantVector>(L)) {
  389. const ConstantVector *CVL = cast<ConstantVector>(L);
  390. const ConstantVector *CVR = cast<ConstantVector>(R);
  391. if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
  392. return false;
  393. for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
  394. if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
  395. return false;
  396. }
  397. return true;
  398. }
  399. // If L and R are ConstantArrays, compare the element count and types.
  400. if (isa<ConstantArray>(L)) {
  401. const ConstantArray *CAL = cast<ConstantArray>(L);
  402. const ConstantArray *CAR = cast<ConstantArray>(R);
  403. // Sometimes a type may be equivalent, but not uniquified---e.g. it may
  404. // contain a GEP instruction. Do a deeper comparison of the types.
  405. if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
  406. return false;
  407. for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
  408. if (!equivalentAsOperands(CAL->getAggregateElement(I),
  409. CAR->getAggregateElement(I)))
  410. return false;
  411. }
  412. return true;
  413. }
  414. // If L and R are ConstantStructs, compare each field and type.
  415. if (isa<ConstantStruct>(L)) {
  416. const ConstantStruct *CSL = cast<ConstantStruct>(L);
  417. const ConstantStruct *CSR = cast<ConstantStruct>(R);
  418. const StructType *LTy = cast<StructType>(CSL->getType());
  419. const StructType *RTy = cast<StructType>(CSR->getType());
  420. // The StructTypes should have the same attributes. Don't use
  421. // isLayoutIdentical(), because that just checks the element pointers,
  422. // which may not work here.
  423. if (LTy->getNumElements() != RTy->getNumElements() ||
  424. LTy->isPacked() != RTy->isPacked())
  425. return false;
  426. for (unsigned I = 0; I < LTy->getNumElements(); I++) {
  427. const Value *LAgg = CSL->getAggregateElement(I);
  428. const Value *RAgg = CSR->getAggregateElement(I);
  429. if (LAgg == SavedLHS || RAgg == SavedRHS) {
  430. if (LAgg != SavedLHS || RAgg != SavedRHS)
  431. // If the left and right operands aren't both re-analyzing the
  432. // variable, then the initialiers don't match, so report "false".
  433. // Otherwise, we skip these operands..
  434. return false;
  435. continue;
  436. }
  437. if (!equivalentAsOperands(LAgg, RAgg)) {
  438. return false;
  439. }
  440. }
  441. return true;
  442. }
  443. return false;
  444. }
  445. bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
  446. if (L == R)
  447. return true;
  448. if (L->getOpcode() != R->getOpcode())
  449. return false;
  450. switch (L->getOpcode()) {
  451. case Instruction::ICmp:
  452. case Instruction::FCmp:
  453. if (L->getPredicate() != R->getPredicate())
  454. return false;
  455. break;
  456. case Instruction::GetElementPtr:
  457. // FIXME: inbounds?
  458. break;
  459. default:
  460. break;
  461. }
  462. if (L->getNumOperands() != R->getNumOperands())
  463. return false;
  464. for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
  465. const auto *LOp = L->getOperand(I);
  466. const auto *ROp = R->getOperand(I);
  467. if (LOp == SavedLHS || ROp == SavedRHS) {
  468. if (LOp != SavedLHS || ROp != SavedRHS)
  469. // If the left and right operands aren't both re-analyzing the
  470. // variable, then the initialiers don't match, so report "false".
  471. // Otherwise, we skip these operands..
  472. return false;
  473. continue;
  474. }
  475. if (!equivalentAsOperands(LOp, ROp))
  476. return false;
  477. }
  478. return true;
  479. }
  480. bool equivalentAsOperands(const Value *L, const Value *R) {
  481. // Fall out if the values have different kind.
  482. // This possibly shouldn't take priority over oracles.
  483. if (L->getValueID() != R->getValueID())
  484. return false;
  485. // Value subtypes: Argument, Constant, Instruction, BasicBlock,
  486. // InlineAsm, MDNode, MDString, PseudoSourceValue
  487. if (isa<Constant>(L))
  488. return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
  489. if (isa<Instruction>(L))
  490. return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
  491. if (isa<Argument>(L))
  492. return Values[L] == R;
  493. if (isa<BasicBlock>(L))
  494. return Blocks[cast<BasicBlock>(L)] != R;
  495. // Pretend everything else is identical.
  496. return true;
  497. }
  498. // Avoid a gcc warning about accessing 'this' in an initializer.
  499. FunctionDifferenceEngine *this_() { return this; }
  500. public:
  501. FunctionDifferenceEngine(DifferenceEngine &Engine,
  502. const Value *SavedLHS = nullptr,
  503. const Value *SavedRHS = nullptr)
  504. : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
  505. Queue(QueueSorter(*this_())) {}
  506. void diff(const Function *L, const Function *R) {
  507. if (L->arg_size() != R->arg_size())
  508. Engine.log("different argument counts");
  509. // Map the arguments.
  510. for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
  511. RI = R->arg_begin(), RE = R->arg_end();
  512. LI != LE && RI != RE; ++LI, ++RI)
  513. Values[&*LI] = &*RI;
  514. tryUnify(&*L->begin(), &*R->begin());
  515. processQueue();
  516. }
  517. };
  518. struct DiffEntry {
  519. DiffEntry() : Cost(0) {}
  520. unsigned Cost;
  521. llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
  522. };
  523. bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
  524. const Instruction *R) {
  525. return !diff(L, R, false, false);
  526. }
  527. void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
  528. BasicBlock::const_iterator RStart) {
  529. BasicBlock::const_iterator LE = LStart->getParent()->end();
  530. BasicBlock::const_iterator RE = RStart->getParent()->end();
  531. unsigned NL = std::distance(LStart, LE);
  532. SmallVector<DiffEntry, 20> Paths1(NL+1);
  533. SmallVector<DiffEntry, 20> Paths2(NL+1);
  534. DiffEntry *Cur = Paths1.data();
  535. DiffEntry *Next = Paths2.data();
  536. const unsigned LeftCost = 2;
  537. const unsigned RightCost = 2;
  538. const unsigned MatchCost = 0;
  539. assert(TentativeValues.empty());
  540. // Initialize the first column.
  541. for (unsigned I = 0; I != NL+1; ++I) {
  542. Cur[I].Cost = I * LeftCost;
  543. for (unsigned J = 0; J != I; ++J)
  544. Cur[I].Path.push_back(DC_left);
  545. }
  546. for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
  547. // Initialize the first row.
  548. Next[0] = Cur[0];
  549. Next[0].Cost += RightCost;
  550. Next[0].Path.push_back(DC_right);
  551. unsigned Index = 1;
  552. for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
  553. if (matchForBlockDiff(&*LI, &*RI)) {
  554. Next[Index] = Cur[Index-1];
  555. Next[Index].Cost += MatchCost;
  556. Next[Index].Path.push_back(DC_match);
  557. TentativeValues.insert(std::make_pair(&*LI, &*RI));
  558. } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
  559. Next[Index] = Next[Index-1];
  560. Next[Index].Cost += LeftCost;
  561. Next[Index].Path.push_back(DC_left);
  562. } else {
  563. Next[Index] = Cur[Index];
  564. Next[Index].Cost += RightCost;
  565. Next[Index].Path.push_back(DC_right);
  566. }
  567. }
  568. std::swap(Cur, Next);
  569. }
  570. // We don't need the tentative values anymore; everything from here
  571. // on out should be non-tentative.
  572. TentativeValues.clear();
  573. SmallVectorImpl<char> &Path = Cur[NL].Path;
  574. BasicBlock::const_iterator LI = LStart, RI = RStart;
  575. DiffLogBuilder Diff(Engine.getConsumer());
  576. // Drop trailing matches.
  577. while (Path.size() && Path.back() == DC_match)
  578. Path.pop_back();
  579. // Skip leading matches.
  580. SmallVectorImpl<char>::iterator
  581. PI = Path.begin(), PE = Path.end();
  582. while (PI != PE && *PI == DC_match) {
  583. unify(&*LI, &*RI);
  584. ++PI;
  585. ++LI;
  586. ++RI;
  587. }
  588. for (; PI != PE; ++PI) {
  589. switch (static_cast<DiffChange>(*PI)) {
  590. case DC_match:
  591. assert(LI != LE && RI != RE);
  592. {
  593. const Instruction *L = &*LI, *R = &*RI;
  594. unify(L, R);
  595. Diff.addMatch(L, R);
  596. }
  597. ++LI; ++RI;
  598. break;
  599. case DC_left:
  600. assert(LI != LE);
  601. Diff.addLeft(&*LI);
  602. ++LI;
  603. break;
  604. case DC_right:
  605. assert(RI != RE);
  606. Diff.addRight(&*RI);
  607. ++RI;
  608. break;
  609. }
  610. }
  611. // Finishing unifying and complaining about the tails of the block,
  612. // which should be matches all the way through.
  613. while (LI != LE) {
  614. assert(RI != RE);
  615. unify(&*LI, &*RI);
  616. ++LI;
  617. ++RI;
  618. }
  619. // If the terminators have different kinds, but one is an invoke and the
  620. // other is an unconditional branch immediately following a call, unify
  621. // the results and the destinations.
  622. const Instruction *LTerm = LStart->getParent()->getTerminator();
  623. const Instruction *RTerm = RStart->getParent()->getTerminator();
  624. if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
  625. if (cast<BranchInst>(LTerm)->isConditional()) return;
  626. BasicBlock::const_iterator I = LTerm->getIterator();
  627. if (I == LStart->getParent()->begin()) return;
  628. --I;
  629. if (!isa<CallInst>(*I)) return;
  630. const CallInst *LCall = cast<CallInst>(&*I);
  631. const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
  632. if (!equivalentAsOperands(LCall->getCalledOperand(),
  633. RInvoke->getCalledOperand()))
  634. return;
  635. if (!LCall->use_empty())
  636. Values[LCall] = RInvoke;
  637. tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
  638. } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
  639. if (cast<BranchInst>(RTerm)->isConditional()) return;
  640. BasicBlock::const_iterator I = RTerm->getIterator();
  641. if (I == RStart->getParent()->begin()) return;
  642. --I;
  643. if (!isa<CallInst>(*I)) return;
  644. const CallInst *RCall = cast<CallInst>(I);
  645. const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
  646. if (!equivalentAsOperands(LInvoke->getCalledOperand(),
  647. RCall->getCalledOperand()))
  648. return;
  649. if (!LInvoke->use_empty())
  650. Values[LInvoke] = RCall;
  651. tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
  652. }
  653. }
  654. }
  655. void DifferenceEngine::Oracle::anchor() { }
  656. void DifferenceEngine::diff(const Function *L, const Function *R) {
  657. Context C(*this, L, R);
  658. // FIXME: types
  659. // FIXME: attributes and CC
  660. // FIXME: parameter attributes
  661. // If both are declarations, we're done.
  662. if (L->empty() && R->empty())
  663. return;
  664. else if (L->empty())
  665. log("left function is declaration, right function is definition");
  666. else if (R->empty())
  667. log("right function is declaration, left function is definition");
  668. else
  669. FunctionDifferenceEngine(*this).diff(L, R);
  670. }
  671. void DifferenceEngine::diff(const Module *L, const Module *R) {
  672. StringSet<> LNames;
  673. SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
  674. unsigned LeftAnonCount = 0;
  675. unsigned RightAnonCount = 0;
  676. for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
  677. const Function *LFn = &*I;
  678. StringRef Name = LFn->getName();
  679. if (Name.empty()) {
  680. ++LeftAnonCount;
  681. continue;
  682. }
  683. LNames.insert(Name);
  684. if (Function *RFn = R->getFunction(LFn->getName()))
  685. Queue.push_back(std::make_pair(LFn, RFn));
  686. else
  687. logf("function %l exists only in left module") << LFn;
  688. }
  689. for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
  690. const Function *RFn = &*I;
  691. StringRef Name = RFn->getName();
  692. if (Name.empty()) {
  693. ++RightAnonCount;
  694. continue;
  695. }
  696. if (!LNames.count(Name))
  697. logf("function %r exists only in right module") << RFn;
  698. }
  699. if (LeftAnonCount != 0 || RightAnonCount != 0) {
  700. SmallString<32> Tmp;
  701. logf(("not comparing " + Twine(LeftAnonCount) +
  702. " anonymous functions in the left module and " +
  703. Twine(RightAnonCount) + " in the right module")
  704. .toStringRef(Tmp));
  705. }
  706. for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
  707. I = Queue.begin(),
  708. E = Queue.end();
  709. I != E; ++I)
  710. diff(I->first, I->second);
  711. }
  712. bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
  713. const GlobalValue *R) {
  714. if (globalValueOracle) return (*globalValueOracle)(L, R);
  715. if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
  716. const GlobalVariable *GVL = cast<GlobalVariable>(L);
  717. const GlobalVariable *GVR = cast<GlobalVariable>(R);
  718. if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
  719. GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
  720. return FunctionDifferenceEngine(*this, GVL, GVR)
  721. .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
  722. }
  723. return L->getName() == R->getName();
  724. }