IRSimilarityIdentifier.cpp 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248
  1. //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // \file
  10. // Implementation file for the IRSimilarityIdentifier for identifying
  11. // similarities in IR including the IRInstructionMapper.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Analysis/IRSimilarityIdentifier.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/IR/Intrinsics.h"
  17. #include "llvm/IR/Operator.h"
  18. #include "llvm/IR/User.h"
  19. #include "llvm/InitializePasses.h"
  20. #include "llvm/Support/SuffixTree.h"
  21. using namespace llvm;
  22. using namespace IRSimilarity;
  23. namespace llvm {
  24. cl::opt<bool>
  25. DisableBranches("no-ir-sim-branch-matching", cl::init(false),
  26. cl::ReallyHidden,
  27. cl::desc("disable similarity matching, and outlining, "
  28. "across branches for debugging purposes."));
  29. cl::opt<bool>
  30. DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
  31. cl::ReallyHidden,
  32. cl::desc("disable outlining indirect calls."));
  33. cl::opt<bool>
  34. MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
  35. cl::desc("only allow matching call instructions if the "
  36. "name and type signature match."));
  37. cl::opt<bool>
  38. DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
  39. cl::desc("Don't match or outline intrinsics"));
  40. } // namespace llvm
  41. IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
  42. IRInstructionDataList &IDList)
  43. : Inst(&I), Legal(Legality), IDL(&IDList) {
  44. initializeInstruction();
  45. }
  46. void IRInstructionData::initializeInstruction() {
  47. // We check for whether we have a comparison instruction. If it is, we
  48. // find the "less than" version of the predicate for consistency for
  49. // comparison instructions throught the program.
  50. if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
  51. CmpInst::Predicate Predicate = predicateForConsistency(C);
  52. if (Predicate != C->getPredicate())
  53. RevisedPredicate = Predicate;
  54. }
  55. // Here we collect the operands and their types for determining whether
  56. // the structure of the operand use matches between two different candidates.
  57. for (Use &OI : Inst->operands()) {
  58. if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
  59. // If we have a CmpInst where the predicate is reversed, it means the
  60. // operands must be reversed as well.
  61. OperVals.insert(OperVals.begin(), OI.get());
  62. continue;
  63. }
  64. OperVals.push_back(OI.get());
  65. }
  66. // We capture the incoming BasicBlocks as values as well as the incoming
  67. // Values in order to check for structural similarity.
  68. if (PHINode *PN = dyn_cast<PHINode>(Inst))
  69. for (BasicBlock *BB : PN->blocks())
  70. OperVals.push_back(BB);
  71. }
  72. IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
  73. : IDL(&IDList) {}
  74. void IRInstructionData::setBranchSuccessors(
  75. DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
  76. assert(isa<BranchInst>(Inst) && "Instruction must be branch");
  77. BranchInst *BI = cast<BranchInst>(Inst);
  78. DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
  79. BBNumIt = BasicBlockToInteger.find(BI->getParent());
  80. assert(BBNumIt != BasicBlockToInteger.end() &&
  81. "Could not find location for BasicBlock!");
  82. int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
  83. for (BasicBlock *Successor : BI->successors()) {
  84. BBNumIt = BasicBlockToInteger.find(Successor);
  85. assert(BBNumIt != BasicBlockToInteger.end() &&
  86. "Could not find number for BasicBlock!");
  87. int OtherBlockNumber = static_cast<int>(BBNumIt->second);
  88. int Relative = OtherBlockNumber - CurrentBlockNumber;
  89. RelativeBlockLocations.push_back(Relative);
  90. }
  91. }
  92. void IRInstructionData::setCalleeName(bool MatchByName) {
  93. CallInst *CI = dyn_cast<CallInst>(Inst);
  94. assert(CI && "Instruction must be call");
  95. CalleeName = "";
  96. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  97. // To hash intrinsics, we use the opcode, and types like the other
  98. // instructions, but also, the Intrinsic ID, and the Name of the
  99. // intrinsic.
  100. Intrinsic::ID IntrinsicID = II->getIntrinsicID();
  101. FunctionType *FT = II->getFunctionType();
  102. // If there is an overloaded name, we have to use the complex version
  103. // of getName to get the entire string.
  104. if (Intrinsic::isOverloaded(IntrinsicID))
  105. CalleeName =
  106. Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
  107. // If there is not an overloaded name, we only need to use this version.
  108. else
  109. CalleeName = Intrinsic::getName(IntrinsicID).str();
  110. return;
  111. }
  112. if (!CI->isIndirectCall() && MatchByName)
  113. CalleeName = CI->getCalledFunction()->getName().str();
  114. }
  115. void IRInstructionData::setPHIPredecessors(
  116. DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
  117. assert(isa<PHINode>(Inst) && "Instruction must be phi node");
  118. PHINode *PN = cast<PHINode>(Inst);
  119. DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
  120. BBNumIt = BasicBlockToInteger.find(PN->getParent());
  121. assert(BBNumIt != BasicBlockToInteger.end() &&
  122. "Could not find location for BasicBlock!");
  123. int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
  124. // Convert the incoming blocks of the PHINode to an integer value, based on
  125. // the relative distances between the current block and the incoming block.
  126. for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
  127. BasicBlock *Incoming = PN->getIncomingBlock(Idx);
  128. BBNumIt = BasicBlockToInteger.find(Incoming);
  129. assert(BBNumIt != BasicBlockToInteger.end() &&
  130. "Could not find number for BasicBlock!");
  131. int OtherBlockNumber = static_cast<int>(BBNumIt->second);
  132. int Relative = OtherBlockNumber - CurrentBlockNumber;
  133. RelativeBlockLocations.push_back(Relative);
  134. RelativeBlockLocations.push_back(Relative);
  135. }
  136. }
  137. CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
  138. switch (CI->getPredicate()) {
  139. case CmpInst::FCMP_OGT:
  140. case CmpInst::FCMP_UGT:
  141. case CmpInst::FCMP_OGE:
  142. case CmpInst::FCMP_UGE:
  143. case CmpInst::ICMP_SGT:
  144. case CmpInst::ICMP_UGT:
  145. case CmpInst::ICMP_SGE:
  146. case CmpInst::ICMP_UGE:
  147. return CI->getSwappedPredicate();
  148. default:
  149. return CI->getPredicate();
  150. }
  151. }
  152. CmpInst::Predicate IRInstructionData::getPredicate() const {
  153. assert(isa<CmpInst>(Inst) &&
  154. "Can only get a predicate from a compare instruction");
  155. if (RevisedPredicate.hasValue())
  156. return RevisedPredicate.getValue();
  157. return cast<CmpInst>(Inst)->getPredicate();
  158. }
  159. StringRef IRInstructionData::getCalleeName() const {
  160. assert(isa<CallInst>(Inst) &&
  161. "Can only get a name from a call instruction");
  162. assert(CalleeName.hasValue() && "CalleeName has not been set");
  163. return *CalleeName;
  164. }
  165. bool IRSimilarity::isClose(const IRInstructionData &A,
  166. const IRInstructionData &B) {
  167. if (!A.Legal || !B.Legal)
  168. return false;
  169. // Check if we are performing the same sort of operation on the same types
  170. // but not on the same values.
  171. if (!A.Inst->isSameOperationAs(B.Inst)) {
  172. // If there is a predicate, this means that either there is a swapped
  173. // predicate, or that the types are different, we want to make sure that
  174. // the predicates are equivalent via swapping.
  175. if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
  176. if (A.getPredicate() != B.getPredicate())
  177. return false;
  178. // If the predicates are the same via swap, make sure that the types are
  179. // still the same.
  180. auto ZippedTypes = zip(A.OperVals, B.OperVals);
  181. return all_of(
  182. ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
  183. return std::get<0>(R)->getType() == std::get<1>(R)->getType();
  184. });
  185. }
  186. return false;
  187. }
  188. // Since any GEP Instruction operands after the first operand cannot be
  189. // defined by a register, we must make sure that the operands after the first
  190. // are the same in the two instructions
  191. if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
  192. auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
  193. // If the instructions do not have the same inbounds restrictions, we do
  194. // not consider them the same.
  195. if (GEP->isInBounds() != OtherGEP->isInBounds())
  196. return false;
  197. auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
  198. // We increment here since we do not care about the first instruction,
  199. // we only care about the following operands since they must be the
  200. // exact same to be considered similar.
  201. return all_of(drop_begin(ZippedOperands),
  202. [](std::tuple<llvm::Use &, llvm::Use &> R) {
  203. return std::get<0>(R) == std::get<1>(R);
  204. });
  205. }
  206. // If the instructions are functions calls, we make sure that the function
  207. // name is the same. We already know that the types are since is
  208. // isSameOperationAs is true.
  209. if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
  210. if (A.getCalleeName().str() != B.getCalleeName().str())
  211. return false;
  212. }
  213. if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
  214. A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
  215. return false;
  216. return true;
  217. }
  218. // TODO: This is the same as the MachineOutliner, and should be consolidated
  219. // into the same interface.
  220. void IRInstructionMapper::convertToUnsignedVec(
  221. BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
  222. std::vector<unsigned> &IntegerMapping) {
  223. BasicBlock::iterator It = BB.begin();
  224. std::vector<unsigned> IntegerMappingForBB;
  225. std::vector<IRInstructionData *> InstrListForBB;
  226. for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
  227. switch (InstClassifier.visit(*It)) {
  228. case InstrType::Legal:
  229. mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
  230. break;
  231. case InstrType::Illegal:
  232. mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
  233. break;
  234. case InstrType::Invisible:
  235. AddedIllegalLastTime = false;
  236. break;
  237. }
  238. }
  239. if (HaveLegalRange) {
  240. if (AddedIllegalLastTime)
  241. mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
  242. for (IRInstructionData *ID : InstrListForBB)
  243. this->IDL->push_back(*ID);
  244. llvm::append_range(InstrList, InstrListForBB);
  245. llvm::append_range(IntegerMapping, IntegerMappingForBB);
  246. }
  247. }
  248. // TODO: This is the same as the MachineOutliner, and should be consolidated
  249. // into the same interface.
  250. unsigned IRInstructionMapper::mapToLegalUnsigned(
  251. BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
  252. std::vector<IRInstructionData *> &InstrListForBB) {
  253. // We added something legal, so we should unset the AddedLegalLastTime
  254. // flag.
  255. AddedIllegalLastTime = false;
  256. // If we have at least two adjacent legal instructions (which may have
  257. // invisible instructions in between), remember that.
  258. if (CanCombineWithPrevInstr)
  259. HaveLegalRange = true;
  260. CanCombineWithPrevInstr = true;
  261. // Get the integer for this instruction or give it the current
  262. // LegalInstrNumber.
  263. IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
  264. InstrListForBB.push_back(ID);
  265. if (isa<BranchInst>(*It))
  266. ID->setBranchSuccessors(BasicBlockToInteger);
  267. if (isa<CallInst>(*It))
  268. ID->setCalleeName(EnableMatchCallsByName);
  269. if (isa<PHINode>(*It))
  270. ID->setPHIPredecessors(BasicBlockToInteger);
  271. // Add to the instruction list
  272. bool WasInserted;
  273. DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
  274. ResultIt;
  275. std::tie(ResultIt, WasInserted) =
  276. InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
  277. unsigned INumber = ResultIt->second;
  278. // There was an insertion.
  279. if (WasInserted)
  280. LegalInstrNumber++;
  281. IntegerMappingForBB.push_back(INumber);
  282. // Make sure we don't overflow or use any integers reserved by the DenseMap.
  283. assert(LegalInstrNumber < IllegalInstrNumber &&
  284. "Instruction mapping overflow!");
  285. assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
  286. "Tried to assign DenseMap tombstone or empty key to instruction.");
  287. assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
  288. "Tried to assign DenseMap tombstone or empty key to instruction.");
  289. return INumber;
  290. }
  291. IRInstructionData *
  292. IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
  293. IRInstructionDataList &IDL) {
  294. return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
  295. }
  296. IRInstructionData *
  297. IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
  298. return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
  299. }
  300. IRInstructionDataList *
  301. IRInstructionMapper::allocateIRInstructionDataList() {
  302. return new (IDLAllocator->Allocate()) IRInstructionDataList();
  303. }
  304. // TODO: This is the same as the MachineOutliner, and should be consolidated
  305. // into the same interface.
  306. unsigned IRInstructionMapper::mapToIllegalUnsigned(
  307. BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
  308. std::vector<IRInstructionData *> &InstrListForBB, bool End) {
  309. // Can't combine an illegal instruction. Set the flag.
  310. CanCombineWithPrevInstr = false;
  311. // Only add one illegal number per range of legal numbers.
  312. if (AddedIllegalLastTime)
  313. return IllegalInstrNumber;
  314. IRInstructionData *ID = nullptr;
  315. if (!End)
  316. ID = allocateIRInstructionData(*It, false, *IDL);
  317. else
  318. ID = allocateIRInstructionData(*IDL);
  319. InstrListForBB.push_back(ID);
  320. // Remember that we added an illegal number last time.
  321. AddedIllegalLastTime = true;
  322. unsigned INumber = IllegalInstrNumber;
  323. IntegerMappingForBB.push_back(IllegalInstrNumber--);
  324. assert(LegalInstrNumber < IllegalInstrNumber &&
  325. "Instruction mapping overflow!");
  326. assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
  327. "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
  328. assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
  329. "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
  330. return INumber;
  331. }
  332. IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
  333. IRInstructionData *FirstInstIt,
  334. IRInstructionData *LastInstIt)
  335. : StartIdx(StartIdx), Len(Len) {
  336. assert(FirstInstIt != nullptr && "Instruction is nullptr!");
  337. assert(LastInstIt != nullptr && "Instruction is nullptr!");
  338. assert(StartIdx + Len > StartIdx &&
  339. "Overflow for IRSimilarityCandidate range?");
  340. assert(Len - 1 == static_cast<unsigned>(std::distance(
  341. iterator(FirstInstIt), iterator(LastInstIt))) &&
  342. "Length of the first and last IRInstructionData do not match the "
  343. "given length");
  344. // We iterate over the given instructions, and map each unique value
  345. // to a unique number in the IRSimilarityCandidate ValueToNumber and
  346. // NumberToValue maps. A constant get its own value globally, the individual
  347. // uses of the constants are not considered to be unique.
  348. //
  349. // IR: Mapping Added:
  350. // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
  351. // %add2 = add i32 %a, %1 %add2 -> 4
  352. // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
  353. //
  354. // when replace with global values, starting from 1, would be
  355. //
  356. // 3 = add i32 1, 2
  357. // 4 = add i32 1, 3
  358. // 6 = add i32 5, 2
  359. unsigned LocalValNumber = 1;
  360. IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
  361. for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
  362. // Map the operand values to an unsigned integer if it does not already
  363. // have an unsigned integer assigned to it.
  364. for (Value *Arg : ID->OperVals)
  365. if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
  366. ValueToNumber.try_emplace(Arg, LocalValNumber);
  367. NumberToValue.try_emplace(LocalValNumber, Arg);
  368. LocalValNumber++;
  369. }
  370. // Mapping the instructions to an unsigned integer if it is not already
  371. // exist in the mapping.
  372. if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
  373. ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
  374. NumberToValue.try_emplace(LocalValNumber, ID->Inst);
  375. LocalValNumber++;
  376. }
  377. }
  378. // Setting the first and last instruction data pointers for the candidate. If
  379. // we got through the entire for loop without hitting an assert, we know
  380. // that both of these instructions are not nullptrs.
  381. FirstInst = FirstInstIt;
  382. LastInst = LastInstIt;
  383. }
  384. bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
  385. const IRSimilarityCandidate &B) {
  386. if (A.getLength() != B.getLength())
  387. return false;
  388. auto InstrDataForBoth =
  389. zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
  390. return all_of(InstrDataForBoth,
  391. [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
  392. IRInstructionData &A = std::get<0>(R);
  393. IRInstructionData &B = std::get<1>(R);
  394. if (!A.Legal || !B.Legal)
  395. return false;
  396. return isClose(A, B);
  397. });
  398. }
  399. /// Determine if one or more of the assigned global value numbers for the
  400. /// operands in \p TargetValueNumbers is in the current mapping set for operand
  401. /// numbers in \p SourceOperands. The set of possible corresponding global
  402. /// value numbers are replaced with the most recent version of compatible
  403. /// values.
  404. ///
  405. /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
  406. /// value number for the source IRInstructionCandidate.
  407. /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
  408. /// IRSimilarityCandidate global value numbers to a set of possible numbers in
  409. /// the target.
  410. /// \param [in] SourceOperands - The operands in the original
  411. /// IRSimilarityCandidate in the current instruction.
  412. /// \param [in] TargetValueNumbers - The global value numbers of the operands in
  413. /// the corresponding Instruction in the other IRSimilarityCandidate.
  414. /// \returns true if there exists a possible mapping between the source
  415. /// Instruction operands and the target Instruction operands, and false if not.
  416. static bool checkNumberingAndReplaceCommutative(
  417. const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
  418. DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
  419. ArrayRef<Value *> &SourceOperands,
  420. DenseSet<unsigned> &TargetValueNumbers){
  421. DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
  422. unsigned ArgVal;
  423. bool WasInserted;
  424. // Iterate over the operands in the source IRSimilarityCandidate to determine
  425. // whether there exists an operand in the other IRSimilarityCandidate that
  426. // creates a valid mapping of Value to Value between the
  427. // IRSimilarityCaniddates.
  428. for (Value *V : SourceOperands) {
  429. ArgVal = SourceValueToNumberMapping.find(V)->second;
  430. std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
  431. std::make_pair(ArgVal, TargetValueNumbers));
  432. // Instead of finding a current mapping, we inserted a set. This means a
  433. // mapping did not exist for the source Instruction operand, it has no
  434. // current constraints we need to check.
  435. if (WasInserted)
  436. continue;
  437. // If a mapping already exists for the source operand to the values in the
  438. // other IRSimilarityCandidate we need to iterate over the items in other
  439. // IRSimilarityCandidate's Instruction to determine whether there is a valid
  440. // mapping of Value to Value.
  441. DenseSet<unsigned> NewSet;
  442. for (unsigned &Curr : ValueMappingIt->second)
  443. // If we can find the value in the mapping, we add it to the new set.
  444. if (TargetValueNumbers.contains(Curr))
  445. NewSet.insert(Curr);
  446. // If we could not find a Value, return 0.
  447. if (NewSet.empty())
  448. return false;
  449. // Otherwise replace the old mapping with the newly constructed one.
  450. if (NewSet.size() != ValueMappingIt->second.size())
  451. ValueMappingIt->second.swap(NewSet);
  452. // We have reached no conclusions about the mapping, and cannot remove
  453. // any items from the other operands, so we move to check the next operand.
  454. if (ValueMappingIt->second.size() != 1)
  455. continue;
  456. unsigned ValToRemove = *ValueMappingIt->second.begin();
  457. // When there is only one item left in the mapping for and operand, remove
  458. // the value from the other operands. If it results in there being no
  459. // mapping, return false, it means the mapping is wrong
  460. for (Value *InnerV : SourceOperands) {
  461. if (V == InnerV)
  462. continue;
  463. unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
  464. ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
  465. if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
  466. continue;
  467. ValueMappingIt->second.erase(ValToRemove);
  468. if (ValueMappingIt->second.empty())
  469. return false;
  470. }
  471. }
  472. return true;
  473. }
  474. /// Determine if operand number \p TargetArgVal is in the current mapping set
  475. /// for operand number \p SourceArgVal.
  476. ///
  477. /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
  478. /// value numbers from source IRSimilarityCandidate to target
  479. /// IRSimilarityCandidate.
  480. /// \param [in] SourceArgVal The global value number for an operand in the
  481. /// in the original candidate.
  482. /// \param [in] TargetArgVal The global value number for the corresponding
  483. /// operand in the other candidate.
  484. /// \returns True if there exists a mapping and false if not.
  485. bool checkNumberingAndReplace(
  486. DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
  487. unsigned SourceArgVal, unsigned TargetArgVal) {
  488. // We are given two unsigned integers representing the global values of
  489. // the operands in different IRSimilarityCandidates and a current mapping
  490. // between the two.
  491. //
  492. // Source Operand GVN: 1
  493. // Target Operand GVN: 2
  494. // CurrentMapping: {1: {1, 2}}
  495. //
  496. // Since we have mapping, and the target operand is contained in the set, we
  497. // update it to:
  498. // CurrentMapping: {1: {2}}
  499. // and can return true. But, if the mapping was
  500. // CurrentMapping: {1: {3}}
  501. // we would return false.
  502. bool WasInserted;
  503. DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
  504. std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
  505. std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
  506. // If we created a new mapping, then we are done.
  507. if (WasInserted)
  508. return true;
  509. // If there is more than one option in the mapping set, and the target value
  510. // is included in the mapping set replace that set with one that only includes
  511. // the target value, as it is the only valid mapping via the non commutative
  512. // instruction.
  513. DenseSet<unsigned> &TargetSet = Val->second;
  514. if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
  515. TargetSet.clear();
  516. TargetSet.insert(TargetArgVal);
  517. return true;
  518. }
  519. // Return true if we can find the value in the set.
  520. return TargetSet.contains(TargetArgVal);
  521. }
  522. bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
  523. OperandMapping A, OperandMapping B) {
  524. // Iterators to keep track of where we are in the operands for each
  525. // Instruction.
  526. ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
  527. ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
  528. unsigned OperandLength = A.OperVals.size();
  529. // For each operand, get the value numbering and ensure it is consistent.
  530. for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
  531. unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
  532. unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
  533. // Attempt to add a set with only the target value. If there is no mapping
  534. // we can create it here.
  535. //
  536. // For an instruction like a subtraction:
  537. // IRSimilarityCandidateA: IRSimilarityCandidateB:
  538. // %resultA = sub %a, %b %resultB = sub %d, %e
  539. //
  540. // We map %a -> %d and %b -> %e.
  541. //
  542. // And check to see whether their mapping is consistent in
  543. // checkNumberingAndReplace.
  544. if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
  545. return false;
  546. if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
  547. return false;
  548. }
  549. return true;
  550. }
  551. bool IRSimilarityCandidate::compareCommutativeOperandMapping(
  552. OperandMapping A, OperandMapping B) {
  553. DenseSet<unsigned> ValueNumbersA;
  554. DenseSet<unsigned> ValueNumbersB;
  555. ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
  556. ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
  557. unsigned OperandLength = A.OperVals.size();
  558. // Find the value number sets for the operands.
  559. for (unsigned Idx = 0; Idx < OperandLength;
  560. Idx++, VItA++, VItB++) {
  561. ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
  562. ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
  563. }
  564. // Iterate over the operands in the first IRSimilarityCandidate and make sure
  565. // there exists a possible mapping with the operands in the second
  566. // IRSimilarityCandidate.
  567. if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
  568. A.ValueNumberMapping, A.OperVals,
  569. ValueNumbersB))
  570. return false;
  571. // Iterate over the operands in the second IRSimilarityCandidate and make sure
  572. // there exists a possible mapping with the operands in the first
  573. // IRSimilarityCandidate.
  574. if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
  575. B.ValueNumberMapping, B.OperVals,
  576. ValueNumbersA))
  577. return false;
  578. return true;
  579. }
  580. bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
  581. RelativeLocMapping B) {
  582. // Get the basic blocks the label refers to.
  583. BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
  584. BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
  585. // Get the basic blocks contained in each region.
  586. DenseSet<BasicBlock *> BasicBlockA;
  587. DenseSet<BasicBlock *> BasicBlockB;
  588. A.IRSC.getBasicBlocks(BasicBlockA);
  589. B.IRSC.getBasicBlocks(BasicBlockB);
  590. // Determine if the block is contained in the region.
  591. bool AContained = BasicBlockA.contains(ABB);
  592. bool BContained = BasicBlockB.contains(BBB);
  593. // Both blocks need to be contained in the region, or both need to be outside
  594. // the reigon.
  595. if (AContained != BContained)
  596. return false;
  597. // If both are contained, then we need to make sure that the relative
  598. // distance to the target blocks are the same.
  599. if (AContained)
  600. return A.RelativeLocation == B.RelativeLocation;
  601. return true;
  602. }
  603. bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
  604. const IRSimilarityCandidate &B) {
  605. DenseMap<unsigned, DenseSet<unsigned>> MappingA;
  606. DenseMap<unsigned, DenseSet<unsigned>> MappingB;
  607. return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
  608. }
  609. typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
  610. SmallVector<int, 4> &, ArrayRef<Value *> &,
  611. ArrayRef<Value *> &>
  612. ZippedRelativeLocationsT;
  613. bool IRSimilarityCandidate::compareStructure(
  614. const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
  615. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
  616. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
  617. if (A.getLength() != B.getLength())
  618. return false;
  619. if (A.ValueToNumber.size() != B.ValueToNumber.size())
  620. return false;
  621. iterator ItA = A.begin();
  622. iterator ItB = B.begin();
  623. // These ValueNumber Mapping sets create a create a mapping between the values
  624. // in one candidate to values in the other candidate. If we create a set with
  625. // one element, and that same element maps to the original element in the
  626. // candidate we have a good mapping.
  627. DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
  628. // Iterate over the instructions contained in each candidate
  629. unsigned SectionLength = A.getStartIdx() + A.getLength();
  630. for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
  631. ItA++, ItB++, Loc++) {
  632. // Make sure the instructions are similar to one another.
  633. if (!isClose(*ItA, *ItB))
  634. return false;
  635. Instruction *IA = ItA->Inst;
  636. Instruction *IB = ItB->Inst;
  637. if (!ItA->Legal || !ItB->Legal)
  638. return false;
  639. // Get the operand sets for the instructions.
  640. ArrayRef<Value *> OperValsA = ItA->OperVals;
  641. ArrayRef<Value *> OperValsB = ItB->OperVals;
  642. unsigned InstValA = A.ValueToNumber.find(IA)->second;
  643. unsigned InstValB = B.ValueToNumber.find(IB)->second;
  644. bool WasInserted;
  645. // Ensure that the mappings for the instructions exists.
  646. std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
  647. std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
  648. if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
  649. return false;
  650. std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
  651. std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
  652. if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
  653. return false;
  654. // We have different paths for commutative instructions and non-commutative
  655. // instructions since commutative instructions could allow multiple mappings
  656. // to certain values.
  657. if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
  658. if (!compareCommutativeOperandMapping(
  659. {A, OperValsA, ValueNumberMappingA},
  660. {B, OperValsB, ValueNumberMappingB}))
  661. return false;
  662. continue;
  663. }
  664. // Handle the non-commutative cases.
  665. if (!compareNonCommutativeOperandMapping(
  666. {A, OperValsA, ValueNumberMappingA},
  667. {B, OperValsB, ValueNumberMappingB}))
  668. return false;
  669. // Here we check that between two corresponding instructions,
  670. // when referring to a basic block in the same region, the
  671. // relative locations are the same. And, that the instructions refer to
  672. // basic blocks outside the region in the same corresponding locations.
  673. // We are able to make the assumption about blocks outside of the region
  674. // since the target block labels are considered values and will follow the
  675. // same number matching that we defined for the other instructions in the
  676. // region. So, at this point, in each location we target a specific block
  677. // outside the region, we are targeting a corresponding block in each
  678. // analagous location in the region we are comparing to.
  679. if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
  680. !(isa<PHINode>(IA) && isa<PHINode>(IB)))
  681. continue;
  682. SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
  683. SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
  684. if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
  685. OperValsA.size() != OperValsB.size())
  686. return false;
  687. ZippedRelativeLocationsT ZippedRelativeLocations =
  688. zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
  689. if (any_of(ZippedRelativeLocations,
  690. [&A, &B](std::tuple<int, int, Value *, Value *> R) {
  691. return !checkRelativeLocations(
  692. {A, std::get<0>(R), std::get<2>(R)},
  693. {B, std::get<1>(R), std::get<3>(R)});
  694. }))
  695. return false;
  696. }
  697. return true;
  698. }
  699. bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
  700. const IRSimilarityCandidate &B) {
  701. auto DoesOverlap = [](const IRSimilarityCandidate &X,
  702. const IRSimilarityCandidate &Y) {
  703. // Check:
  704. // XXXXXX X starts before Y ends
  705. // YYYYYYY Y starts after X starts
  706. return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
  707. };
  708. return DoesOverlap(A, B) || DoesOverlap(B, A);
  709. }
  710. void IRSimilarityIdentifier::populateMapper(
  711. Module &M, std::vector<IRInstructionData *> &InstrList,
  712. std::vector<unsigned> &IntegerMapping) {
  713. std::vector<IRInstructionData *> InstrListForModule;
  714. std::vector<unsigned> IntegerMappingForModule;
  715. // Iterate over the functions in the module to map each Instruction in each
  716. // BasicBlock to an unsigned integer.
  717. Mapper.initializeForBBs(M);
  718. for (Function &F : M) {
  719. if (F.empty())
  720. continue;
  721. for (BasicBlock &BB : F) {
  722. // BB has potential to have similarity since it has a size greater than 2
  723. // and can therefore match other regions greater than 2. Map it to a list
  724. // of unsigned integers.
  725. Mapper.convertToUnsignedVec(BB, InstrListForModule,
  726. IntegerMappingForModule);
  727. }
  728. BasicBlock::iterator It = F.begin()->end();
  729. Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
  730. true);
  731. if (InstrListForModule.size() > 0)
  732. Mapper.IDL->push_back(*InstrListForModule.back());
  733. }
  734. // Insert the InstrListForModule at the end of the overall InstrList so that
  735. // we can have a long InstrList for the entire set of Modules being analyzed.
  736. llvm::append_range(InstrList, InstrListForModule);
  737. // Do the same as above, but for IntegerMapping.
  738. llvm::append_range(IntegerMapping, IntegerMappingForModule);
  739. }
  740. void IRSimilarityIdentifier::populateMapper(
  741. ArrayRef<std::unique_ptr<Module>> &Modules,
  742. std::vector<IRInstructionData *> &InstrList,
  743. std::vector<unsigned> &IntegerMapping) {
  744. // Iterate over, and map the instructions in each module.
  745. for (const std::unique_ptr<Module> &M : Modules)
  746. populateMapper(*M, InstrList, IntegerMapping);
  747. }
  748. /// From a repeated subsequence, find all the different instances of the
  749. /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
  750. /// the IRInstructionData in subsequence.
  751. ///
  752. /// \param [in] Mapper - The instruction mapper for basic correctness checks.
  753. /// \param [in] InstrList - The vector that holds the instruction data.
  754. /// \param [in] IntegerMapping - The vector that holds the mapped integers.
  755. /// \param [out] CandsForRepSubstring - The vector to store the generated
  756. /// IRSimilarityCandidates.
  757. static void createCandidatesFromSuffixTree(
  758. const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
  759. std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
  760. std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
  761. unsigned StringLen = RS.Length;
  762. if (StringLen < 2)
  763. return;
  764. // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
  765. for (const unsigned &StartIdx : RS.StartIndices) {
  766. unsigned EndIdx = StartIdx + StringLen - 1;
  767. // Check that this subsequence does not contain an illegal instruction.
  768. bool ContainsIllegal = false;
  769. for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
  770. unsigned Key = IntegerMapping[CurrIdx];
  771. if (Key > Mapper.IllegalInstrNumber) {
  772. ContainsIllegal = true;
  773. break;
  774. }
  775. }
  776. // If we have an illegal instruction, we should not create an
  777. // IRSimilarityCandidate for this region.
  778. if (ContainsIllegal)
  779. continue;
  780. // We are getting iterators to the instructions in this region of code
  781. // by advancing the start and end indices from the start of the
  782. // InstrList.
  783. std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
  784. std::advance(StartIt, StartIdx);
  785. std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
  786. std::advance(EndIt, EndIdx);
  787. CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
  788. }
  789. }
  790. void IRSimilarityCandidate::createCanonicalRelationFrom(
  791. IRSimilarityCandidate &SourceCand,
  792. DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
  793. DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
  794. assert(SourceCand.CanonNumToNumber.size() != 0 &&
  795. "Base canonical relationship is empty!");
  796. assert(SourceCand.NumberToCanonNum.size() != 0 &&
  797. "Base canonical relationship is empty!");
  798. assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
  799. assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
  800. DenseSet<unsigned> UsedGVNs;
  801. // Iterate over the mappings provided from this candidate to SourceCand. We
  802. // are then able to map the GVN in this candidate to the same canonical number
  803. // given to the corresponding GVN in SourceCand.
  804. for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
  805. unsigned SourceGVN = GVNMapping.first;
  806. assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
  807. unsigned ResultGVN;
  808. // We need special handling if we have more than one potential value. This
  809. // means that there are at least two GVNs that could correspond to this GVN.
  810. // This could lead to potential swapping later on, so we make a decision
  811. // here to ensure a one-to-one mapping.
  812. if (GVNMapping.second.size() > 1) {
  813. bool Found = false;
  814. for (unsigned Val : GVNMapping.second) {
  815. // We make sure the target value number hasn't already been reserved.
  816. if (UsedGVNs.contains(Val))
  817. continue;
  818. // We make sure that the opposite mapping is still consistent.
  819. DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
  820. FromSourceMapping.find(Val);
  821. if (!It->second.contains(SourceGVN))
  822. continue;
  823. // We pick the first item that satisfies these conditions.
  824. Found = true;
  825. ResultGVN = Val;
  826. break;
  827. }
  828. assert(Found && "Could not find matching value for source GVN");
  829. (void)Found;
  830. } else
  831. ResultGVN = *GVNMapping.second.begin();
  832. // Whatever GVN is found, we mark it as used.
  833. UsedGVNs.insert(ResultGVN);
  834. unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
  835. CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
  836. NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
  837. }
  838. }
  839. void IRSimilarityCandidate::createCanonicalMappingFor(
  840. IRSimilarityCandidate &CurrCand) {
  841. assert(CurrCand.CanonNumToNumber.size() == 0 &&
  842. "Canonical Relationship is non-empty");
  843. assert(CurrCand.NumberToCanonNum.size() == 0 &&
  844. "Canonical Relationship is non-empty");
  845. unsigned CanonNum = 0;
  846. // Iterate over the value numbers found, the order does not matter in this
  847. // case.
  848. for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
  849. CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
  850. CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
  851. CanonNum++;
  852. }
  853. }
  854. /// From the list of IRSimilarityCandidates, perform a comparison between each
  855. /// IRSimilarityCandidate to determine if there are overlapping
  856. /// IRInstructionData, or if they do not have the same structure.
  857. ///
  858. /// \param [in] CandsForRepSubstring - The vector containing the
  859. /// IRSimilarityCandidates.
  860. /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
  861. /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
  862. /// vector are structurally similar to one another.
  863. static void findCandidateStructures(
  864. std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
  865. DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
  866. std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
  867. InnerCandEndIt;
  868. // IRSimilarityCandidates each have a structure for operand use. It is
  869. // possible that two instances of the same subsequences have different
  870. // structure. Each type of structure found is assigned a number. This
  871. // DenseMap maps an IRSimilarityCandidate to which type of similarity
  872. // discovered it fits within.
  873. DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
  874. // Find the compatibility from each candidate to the others to determine
  875. // which candidates overlap and which have the same structure by mapping
  876. // each structure to a different group.
  877. bool SameStructure;
  878. bool Inserted;
  879. unsigned CurrentGroupNum = 0;
  880. unsigned OuterGroupNum;
  881. DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
  882. DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
  883. DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
  884. // Iterate over the candidates to determine its structural and overlapping
  885. // compatibility with other instructions
  886. DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
  887. DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
  888. for (CandIt = CandsForRepSubstring.begin(),
  889. CandEndIt = CandsForRepSubstring.end();
  890. CandIt != CandEndIt; CandIt++) {
  891. // Determine if it has an assigned structural group already.
  892. CandToGroupIt = CandToGroup.find(&*CandIt);
  893. if (CandToGroupIt == CandToGroup.end()) {
  894. // If not, we assign it one, and add it to our mapping.
  895. std::tie(CandToGroupIt, Inserted) =
  896. CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
  897. }
  898. // Get the structural group number from the iterator.
  899. OuterGroupNum = CandToGroupIt->second;
  900. // Check if we already have a list of IRSimilarityCandidates for the current
  901. // structural group. Create one if one does not exist.
  902. CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
  903. if (CurrentGroupPair == StructuralGroups.end()) {
  904. IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
  905. std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
  906. std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
  907. }
  908. // Iterate over the IRSimilarityCandidates following the current
  909. // IRSimilarityCandidate in the list to determine whether the two
  910. // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
  911. // of IRSimilarityCandidates.
  912. for (InnerCandIt = std::next(CandIt),
  913. InnerCandEndIt = CandsForRepSubstring.end();
  914. InnerCandIt != InnerCandEndIt; InnerCandIt++) {
  915. // We check if the inner item has a group already, if it does, we skip it.
  916. CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
  917. if (CandToGroupItInner != CandToGroup.end())
  918. continue;
  919. // Otherwise we determine if they have the same structure and add it to
  920. // vector if they match.
  921. ValueNumberMappingA.clear();
  922. ValueNumberMappingB.clear();
  923. SameStructure = IRSimilarityCandidate::compareStructure(
  924. *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
  925. if (!SameStructure)
  926. continue;
  927. InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
  928. ValueNumberMappingB);
  929. CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
  930. CurrentGroupPair->second.push_back(*InnerCandIt);
  931. }
  932. }
  933. }
  934. void IRSimilarityIdentifier::findCandidates(
  935. std::vector<IRInstructionData *> &InstrList,
  936. std::vector<unsigned> &IntegerMapping) {
  937. SuffixTree ST(IntegerMapping);
  938. std::vector<IRSimilarityCandidate> CandsForRepSubstring;
  939. std::vector<SimilarityGroup> NewCandidateGroups;
  940. DenseMap<unsigned, SimilarityGroup> StructuralGroups;
  941. // Iterate over the subsequences found by the Suffix Tree to create
  942. // IRSimilarityCandidates for each repeated subsequence and determine which
  943. // instances are structurally similar to one another.
  944. for (SuffixTree::RepeatedSubstring &RS : ST) {
  945. createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
  946. CandsForRepSubstring);
  947. if (CandsForRepSubstring.size() < 2)
  948. continue;
  949. findCandidateStructures(CandsForRepSubstring, StructuralGroups);
  950. for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
  951. // We only add the group if it contains more than one
  952. // IRSimilarityCandidate. If there is only one, that means there is no
  953. // other repeated subsequence with the same structure.
  954. if (Group.second.size() > 1)
  955. SimilarityCandidates->push_back(Group.second);
  956. CandsForRepSubstring.clear();
  957. StructuralGroups.clear();
  958. NewCandidateGroups.clear();
  959. }
  960. }
  961. SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
  962. ArrayRef<std::unique_ptr<Module>> Modules) {
  963. resetSimilarityCandidates();
  964. std::vector<IRInstructionData *> InstrList;
  965. std::vector<unsigned> IntegerMapping;
  966. Mapper.InstClassifier.EnableBranches = this->EnableBranches;
  967. Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
  968. Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
  969. Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
  970. populateMapper(Modules, InstrList, IntegerMapping);
  971. findCandidates(InstrList, IntegerMapping);
  972. return SimilarityCandidates.getValue();
  973. }
  974. SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
  975. resetSimilarityCandidates();
  976. Mapper.InstClassifier.EnableBranches = this->EnableBranches;
  977. Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
  978. Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
  979. Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
  980. std::vector<IRInstructionData *> InstrList;
  981. std::vector<unsigned> IntegerMapping;
  982. populateMapper(M, InstrList, IntegerMapping);
  983. findCandidates(InstrList, IntegerMapping);
  984. return SimilarityCandidates.getValue();
  985. }
  986. INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
  987. "ir-similarity-identifier", false, true)
  988. IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
  989. : ModulePass(ID) {
  990. initializeIRSimilarityIdentifierWrapperPassPass(
  991. *PassRegistry::getPassRegistry());
  992. }
  993. bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
  994. IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
  995. MatchCallsByName, !DisableIntrinsics));
  996. return false;
  997. }
  998. bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
  999. IRSI.reset();
  1000. return false;
  1001. }
  1002. bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
  1003. IRSI->findSimilarity(M);
  1004. return false;
  1005. }
  1006. AnalysisKey IRSimilarityAnalysis::Key;
  1007. IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
  1008. ModuleAnalysisManager &) {
  1009. auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
  1010. MatchCallsByName, !DisableIntrinsics);
  1011. IRSI.findSimilarity(M);
  1012. return IRSI;
  1013. }
  1014. PreservedAnalyses
  1015. IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
  1016. IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
  1017. Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
  1018. for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
  1019. OS << CandVec.size() << " candidates of length "
  1020. << CandVec.begin()->getLength() << ". Found in: \n";
  1021. for (IRSimilarityCandidate &Cand : CandVec) {
  1022. OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
  1023. << ", Basic Block: ";
  1024. if (Cand.front()->Inst->getParent()->getName().str() == "")
  1025. OS << "(unnamed)";
  1026. else
  1027. OS << Cand.front()->Inst->getParent()->getName().str();
  1028. OS << "\n Start Instruction: ";
  1029. Cand.frontInstruction()->print(OS);
  1030. OS << "\n End Instruction: ";
  1031. Cand.backInstruction()->print(OS);
  1032. OS << "\n";
  1033. }
  1034. }
  1035. return PreservedAnalyses::all();
  1036. }
  1037. char IRSimilarityIdentifierWrapperPass::ID = 0;