IRSimilarityIdentifier.cpp 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291
  1. //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // \file
  10. // Implementation file for the IRSimilarityIdentifier for identifying
  11. // similarities in IR including the IRInstructionMapper.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Analysis/IRSimilarityIdentifier.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/IR/Intrinsics.h"
  17. #include "llvm/IR/Operator.h"
  18. #include "llvm/IR/User.h"
  19. #include "llvm/InitializePasses.h"
  20. #include "llvm/Support/SuffixTree.h"
  21. using namespace llvm;
  22. using namespace IRSimilarity;
  23. namespace llvm {
  24. cl::opt<bool>
  25. DisableBranches("no-ir-sim-branch-matching", cl::init(false),
  26. cl::ReallyHidden,
  27. cl::desc("disable similarity matching, and outlining, "
  28. "across branches for debugging purposes."));
  29. cl::opt<bool>
  30. DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
  31. cl::ReallyHidden,
  32. cl::desc("disable outlining indirect calls."));
  33. cl::opt<bool>
  34. MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
  35. cl::desc("only allow matching call instructions if the "
  36. "name and type signature match."));
  37. cl::opt<bool>
  38. DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
  39. cl::desc("Don't match or outline intrinsics"));
  40. } // namespace llvm
  41. IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
  42. IRInstructionDataList &IDList)
  43. : Inst(&I), Legal(Legality), IDL(&IDList) {
  44. initializeInstruction();
  45. }
  46. void IRInstructionData::initializeInstruction() {
  47. // We check for whether we have a comparison instruction. If it is, we
  48. // find the "less than" version of the predicate for consistency for
  49. // comparison instructions throught the program.
  50. if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
  51. CmpInst::Predicate Predicate = predicateForConsistency(C);
  52. if (Predicate != C->getPredicate())
  53. RevisedPredicate = Predicate;
  54. }
  55. // Here we collect the operands and their types for determining whether
  56. // the structure of the operand use matches between two different candidates.
  57. for (Use &OI : Inst->operands()) {
  58. if (isa<CmpInst>(Inst) && RevisedPredicate) {
  59. // If we have a CmpInst where the predicate is reversed, it means the
  60. // operands must be reversed as well.
  61. OperVals.insert(OperVals.begin(), OI.get());
  62. continue;
  63. }
  64. OperVals.push_back(OI.get());
  65. }
  66. // We capture the incoming BasicBlocks as values as well as the incoming
  67. // Values in order to check for structural similarity.
  68. if (PHINode *PN = dyn_cast<PHINode>(Inst))
  69. for (BasicBlock *BB : PN->blocks())
  70. OperVals.push_back(BB);
  71. }
  72. IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
  73. : IDL(&IDList) {}
  74. void IRInstructionData::setBranchSuccessors(
  75. DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
  76. assert(isa<BranchInst>(Inst) && "Instruction must be branch");
  77. BranchInst *BI = cast<BranchInst>(Inst);
  78. DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
  79. BBNumIt = BasicBlockToInteger.find(BI->getParent());
  80. assert(BBNumIt != BasicBlockToInteger.end() &&
  81. "Could not find location for BasicBlock!");
  82. int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
  83. for (BasicBlock *Successor : BI->successors()) {
  84. BBNumIt = BasicBlockToInteger.find(Successor);
  85. assert(BBNumIt != BasicBlockToInteger.end() &&
  86. "Could not find number for BasicBlock!");
  87. int OtherBlockNumber = static_cast<int>(BBNumIt->second);
  88. int Relative = OtherBlockNumber - CurrentBlockNumber;
  89. RelativeBlockLocations.push_back(Relative);
  90. }
  91. }
  92. void IRInstructionData::setCalleeName(bool MatchByName) {
  93. CallInst *CI = dyn_cast<CallInst>(Inst);
  94. assert(CI && "Instruction must be call");
  95. CalleeName = "";
  96. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  97. // To hash intrinsics, we use the opcode, and types like the other
  98. // instructions, but also, the Intrinsic ID, and the Name of the
  99. // intrinsic.
  100. Intrinsic::ID IntrinsicID = II->getIntrinsicID();
  101. FunctionType *FT = II->getFunctionType();
  102. // If there is an overloaded name, we have to use the complex version
  103. // of getName to get the entire string.
  104. if (Intrinsic::isOverloaded(IntrinsicID))
  105. CalleeName =
  106. Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
  107. // If there is not an overloaded name, we only need to use this version.
  108. else
  109. CalleeName = Intrinsic::getName(IntrinsicID).str();
  110. return;
  111. }
  112. if (!CI->isIndirectCall() && MatchByName)
  113. CalleeName = CI->getCalledFunction()->getName().str();
  114. }
  115. void IRInstructionData::setPHIPredecessors(
  116. DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
  117. assert(isa<PHINode>(Inst) && "Instruction must be phi node");
  118. PHINode *PN = cast<PHINode>(Inst);
  119. DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
  120. BBNumIt = BasicBlockToInteger.find(PN->getParent());
  121. assert(BBNumIt != BasicBlockToInteger.end() &&
  122. "Could not find location for BasicBlock!");
  123. int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
  124. // Convert the incoming blocks of the PHINode to an integer value, based on
  125. // the relative distances between the current block and the incoming block.
  126. for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
  127. BasicBlock *Incoming = PN->getIncomingBlock(Idx);
  128. BBNumIt = BasicBlockToInteger.find(Incoming);
  129. assert(BBNumIt != BasicBlockToInteger.end() &&
  130. "Could not find number for BasicBlock!");
  131. int OtherBlockNumber = static_cast<int>(BBNumIt->second);
  132. int Relative = OtherBlockNumber - CurrentBlockNumber;
  133. RelativeBlockLocations.push_back(Relative);
  134. RelativeBlockLocations.push_back(Relative);
  135. }
  136. }
  137. CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
  138. switch (CI->getPredicate()) {
  139. case CmpInst::FCMP_OGT:
  140. case CmpInst::FCMP_UGT:
  141. case CmpInst::FCMP_OGE:
  142. case CmpInst::FCMP_UGE:
  143. case CmpInst::ICMP_SGT:
  144. case CmpInst::ICMP_UGT:
  145. case CmpInst::ICMP_SGE:
  146. case CmpInst::ICMP_UGE:
  147. return CI->getSwappedPredicate();
  148. default:
  149. return CI->getPredicate();
  150. }
  151. }
  152. CmpInst::Predicate IRInstructionData::getPredicate() const {
  153. assert(isa<CmpInst>(Inst) &&
  154. "Can only get a predicate from a compare instruction");
  155. if (RevisedPredicate)
  156. return *RevisedPredicate;
  157. return cast<CmpInst>(Inst)->getPredicate();
  158. }
  159. StringRef IRInstructionData::getCalleeName() const {
  160. assert(isa<CallInst>(Inst) &&
  161. "Can only get a name from a call instruction");
  162. assert(CalleeName && "CalleeName has not been set");
  163. return *CalleeName;
  164. }
  165. bool IRSimilarity::isClose(const IRInstructionData &A,
  166. const IRInstructionData &B) {
  167. if (!A.Legal || !B.Legal)
  168. return false;
  169. // Check if we are performing the same sort of operation on the same types
  170. // but not on the same values.
  171. if (!A.Inst->isSameOperationAs(B.Inst)) {
  172. // If there is a predicate, this means that either there is a swapped
  173. // predicate, or that the types are different, we want to make sure that
  174. // the predicates are equivalent via swapping.
  175. if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
  176. if (A.getPredicate() != B.getPredicate())
  177. return false;
  178. // If the predicates are the same via swap, make sure that the types are
  179. // still the same.
  180. auto ZippedTypes = zip(A.OperVals, B.OperVals);
  181. return all_of(
  182. ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
  183. return std::get<0>(R)->getType() == std::get<1>(R)->getType();
  184. });
  185. }
  186. return false;
  187. }
  188. // Since any GEP Instruction operands after the first operand cannot be
  189. // defined by a register, we must make sure that the operands after the first
  190. // are the same in the two instructions
  191. if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
  192. auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
  193. // If the instructions do not have the same inbounds restrictions, we do
  194. // not consider them the same.
  195. if (GEP->isInBounds() != OtherGEP->isInBounds())
  196. return false;
  197. auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
  198. // We increment here since we do not care about the first instruction,
  199. // we only care about the following operands since they must be the
  200. // exact same to be considered similar.
  201. return all_of(drop_begin(ZippedOperands),
  202. [](std::tuple<llvm::Use &, llvm::Use &> R) {
  203. return std::get<0>(R) == std::get<1>(R);
  204. });
  205. }
  206. // If the instructions are functions calls, we make sure that the function
  207. // name is the same. We already know that the types are since is
  208. // isSameOperationAs is true.
  209. if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
  210. if (A.getCalleeName().str() != B.getCalleeName().str())
  211. return false;
  212. }
  213. if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
  214. A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
  215. return false;
  216. return true;
  217. }
  218. // TODO: This is the same as the MachineOutliner, and should be consolidated
  219. // into the same interface.
  220. void IRInstructionMapper::convertToUnsignedVec(
  221. BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
  222. std::vector<unsigned> &IntegerMapping) {
  223. BasicBlock::iterator It = BB.begin();
  224. std::vector<unsigned> IntegerMappingForBB;
  225. std::vector<IRInstructionData *> InstrListForBB;
  226. for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
  227. switch (InstClassifier.visit(*It)) {
  228. case InstrType::Legal:
  229. mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
  230. break;
  231. case InstrType::Illegal:
  232. mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
  233. break;
  234. case InstrType::Invisible:
  235. AddedIllegalLastTime = false;
  236. break;
  237. }
  238. }
  239. if (AddedIllegalLastTime)
  240. mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
  241. for (IRInstructionData *ID : InstrListForBB)
  242. this->IDL->push_back(*ID);
  243. llvm::append_range(InstrList, InstrListForBB);
  244. llvm::append_range(IntegerMapping, IntegerMappingForBB);
  245. }
  246. // TODO: This is the same as the MachineOutliner, and should be consolidated
  247. // into the same interface.
  248. unsigned IRInstructionMapper::mapToLegalUnsigned(
  249. BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
  250. std::vector<IRInstructionData *> &InstrListForBB) {
  251. // We added something legal, so we should unset the AddedLegalLastTime
  252. // flag.
  253. AddedIllegalLastTime = false;
  254. // If we have at least two adjacent legal instructions (which may have
  255. // invisible instructions in between), remember that.
  256. if (CanCombineWithPrevInstr)
  257. HaveLegalRange = true;
  258. CanCombineWithPrevInstr = true;
  259. // Get the integer for this instruction or give it the current
  260. // LegalInstrNumber.
  261. IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
  262. InstrListForBB.push_back(ID);
  263. if (isa<BranchInst>(*It))
  264. ID->setBranchSuccessors(BasicBlockToInteger);
  265. if (isa<CallInst>(*It))
  266. ID->setCalleeName(EnableMatchCallsByName);
  267. if (isa<PHINode>(*It))
  268. ID->setPHIPredecessors(BasicBlockToInteger);
  269. // Add to the instruction list
  270. bool WasInserted;
  271. DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
  272. ResultIt;
  273. std::tie(ResultIt, WasInserted) =
  274. InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
  275. unsigned INumber = ResultIt->second;
  276. // There was an insertion.
  277. if (WasInserted)
  278. LegalInstrNumber++;
  279. IntegerMappingForBB.push_back(INumber);
  280. // Make sure we don't overflow or use any integers reserved by the DenseMap.
  281. assert(LegalInstrNumber < IllegalInstrNumber &&
  282. "Instruction mapping overflow!");
  283. assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
  284. "Tried to assign DenseMap tombstone or empty key to instruction.");
  285. assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
  286. "Tried to assign DenseMap tombstone or empty key to instruction.");
  287. return INumber;
  288. }
  289. IRInstructionData *
  290. IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
  291. IRInstructionDataList &IDL) {
  292. return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
  293. }
  294. IRInstructionData *
  295. IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
  296. return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
  297. }
  298. IRInstructionDataList *
  299. IRInstructionMapper::allocateIRInstructionDataList() {
  300. return new (IDLAllocator->Allocate()) IRInstructionDataList();
  301. }
  302. // TODO: This is the same as the MachineOutliner, and should be consolidated
  303. // into the same interface.
  304. unsigned IRInstructionMapper::mapToIllegalUnsigned(
  305. BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
  306. std::vector<IRInstructionData *> &InstrListForBB, bool End) {
  307. // Can't combine an illegal instruction. Set the flag.
  308. CanCombineWithPrevInstr = false;
  309. // Only add one illegal number per range of legal numbers.
  310. if (AddedIllegalLastTime)
  311. return IllegalInstrNumber;
  312. IRInstructionData *ID = nullptr;
  313. if (!End)
  314. ID = allocateIRInstructionData(*It, false, *IDL);
  315. else
  316. ID = allocateIRInstructionData(*IDL);
  317. InstrListForBB.push_back(ID);
  318. // Remember that we added an illegal number last time.
  319. AddedIllegalLastTime = true;
  320. unsigned INumber = IllegalInstrNumber;
  321. IntegerMappingForBB.push_back(IllegalInstrNumber--);
  322. assert(LegalInstrNumber < IllegalInstrNumber &&
  323. "Instruction mapping overflow!");
  324. assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
  325. "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
  326. assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
  327. "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
  328. return INumber;
  329. }
  330. IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
  331. IRInstructionData *FirstInstIt,
  332. IRInstructionData *LastInstIt)
  333. : StartIdx(StartIdx), Len(Len) {
  334. assert(FirstInstIt != nullptr && "Instruction is nullptr!");
  335. assert(LastInstIt != nullptr && "Instruction is nullptr!");
  336. assert(StartIdx + Len > StartIdx &&
  337. "Overflow for IRSimilarityCandidate range?");
  338. assert(Len - 1 == static_cast<unsigned>(std::distance(
  339. iterator(FirstInstIt), iterator(LastInstIt))) &&
  340. "Length of the first and last IRInstructionData do not match the "
  341. "given length");
  342. // We iterate over the given instructions, and map each unique value
  343. // to a unique number in the IRSimilarityCandidate ValueToNumber and
  344. // NumberToValue maps. A constant get its own value globally, the individual
  345. // uses of the constants are not considered to be unique.
  346. //
  347. // IR: Mapping Added:
  348. // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
  349. // %add2 = add i32 %a, %1 %add2 -> 4
  350. // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
  351. //
  352. // when replace with global values, starting from 1, would be
  353. //
  354. // 3 = add i32 1, 2
  355. // 4 = add i32 1, 3
  356. // 6 = add i32 5, 2
  357. unsigned LocalValNumber = 1;
  358. IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
  359. for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
  360. // Map the operand values to an unsigned integer if it does not already
  361. // have an unsigned integer assigned to it.
  362. for (Value *Arg : ID->OperVals)
  363. if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
  364. ValueToNumber.try_emplace(Arg, LocalValNumber);
  365. NumberToValue.try_emplace(LocalValNumber, Arg);
  366. LocalValNumber++;
  367. }
  368. // Mapping the instructions to an unsigned integer if it is not already
  369. // exist in the mapping.
  370. if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
  371. ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
  372. NumberToValue.try_emplace(LocalValNumber, ID->Inst);
  373. LocalValNumber++;
  374. }
  375. }
  376. // Setting the first and last instruction data pointers for the candidate. If
  377. // we got through the entire for loop without hitting an assert, we know
  378. // that both of these instructions are not nullptrs.
  379. FirstInst = FirstInstIt;
  380. LastInst = LastInstIt;
  381. // Add the basic blocks contained in the set into the global value numbering.
  382. DenseSet<BasicBlock *> BBSet;
  383. getBasicBlocks(BBSet);
  384. for (BasicBlock *BB : BBSet) {
  385. if (ValueToNumber.find(BB) != ValueToNumber.end())
  386. continue;
  387. ValueToNumber.try_emplace(BB, LocalValNumber);
  388. NumberToValue.try_emplace(LocalValNumber, BB);
  389. LocalValNumber++;
  390. }
  391. }
  392. bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
  393. const IRSimilarityCandidate &B) {
  394. if (A.getLength() != B.getLength())
  395. return false;
  396. auto InstrDataForBoth =
  397. zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
  398. return all_of(InstrDataForBoth,
  399. [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
  400. IRInstructionData &A = std::get<0>(R);
  401. IRInstructionData &B = std::get<1>(R);
  402. if (!A.Legal || !B.Legal)
  403. return false;
  404. return isClose(A, B);
  405. });
  406. }
  407. /// Determine if one or more of the assigned global value numbers for the
  408. /// operands in \p TargetValueNumbers is in the current mapping set for operand
  409. /// numbers in \p SourceOperands. The set of possible corresponding global
  410. /// value numbers are replaced with the most recent version of compatible
  411. /// values.
  412. ///
  413. /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
  414. /// value number for the source IRInstructionCandidate.
  415. /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
  416. /// IRSimilarityCandidate global value numbers to a set of possible numbers in
  417. /// the target.
  418. /// \param [in] SourceOperands - The operands in the original
  419. /// IRSimilarityCandidate in the current instruction.
  420. /// \param [in] TargetValueNumbers - The global value numbers of the operands in
  421. /// the corresponding Instruction in the other IRSimilarityCandidate.
  422. /// \returns true if there exists a possible mapping between the source
  423. /// Instruction operands and the target Instruction operands, and false if not.
  424. static bool checkNumberingAndReplaceCommutative(
  425. const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
  426. DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
  427. ArrayRef<Value *> &SourceOperands,
  428. DenseSet<unsigned> &TargetValueNumbers){
  429. DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
  430. unsigned ArgVal;
  431. bool WasInserted;
  432. // Iterate over the operands in the source IRSimilarityCandidate to determine
  433. // whether there exists an operand in the other IRSimilarityCandidate that
  434. // creates a valid mapping of Value to Value between the
  435. // IRSimilarityCaniddates.
  436. for (Value *V : SourceOperands) {
  437. ArgVal = SourceValueToNumberMapping.find(V)->second;
  438. // Instead of finding a current mapping, we attempt to insert a set.
  439. std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
  440. std::make_pair(ArgVal, TargetValueNumbers));
  441. // We need to iterate over the items in other IRSimilarityCandidate's
  442. // Instruction to determine whether there is a valid mapping of
  443. // Value to Value.
  444. DenseSet<unsigned> NewSet;
  445. for (unsigned &Curr : ValueMappingIt->second)
  446. // If we can find the value in the mapping, we add it to the new set.
  447. if (TargetValueNumbers.contains(Curr))
  448. NewSet.insert(Curr);
  449. // If we could not find a Value, return 0.
  450. if (NewSet.empty())
  451. return false;
  452. // Otherwise replace the old mapping with the newly constructed one.
  453. if (NewSet.size() != ValueMappingIt->second.size())
  454. ValueMappingIt->second.swap(NewSet);
  455. // We have reached no conclusions about the mapping, and cannot remove
  456. // any items from the other operands, so we move to check the next operand.
  457. if (ValueMappingIt->second.size() != 1)
  458. continue;
  459. unsigned ValToRemove = *ValueMappingIt->second.begin();
  460. // When there is only one item left in the mapping for and operand, remove
  461. // the value from the other operands. If it results in there being no
  462. // mapping, return false, it means the mapping is wrong
  463. for (Value *InnerV : SourceOperands) {
  464. if (V == InnerV)
  465. continue;
  466. unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
  467. ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
  468. if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
  469. continue;
  470. ValueMappingIt->second.erase(ValToRemove);
  471. if (ValueMappingIt->second.empty())
  472. return false;
  473. }
  474. }
  475. return true;
  476. }
  477. /// Determine if operand number \p TargetArgVal is in the current mapping set
  478. /// for operand number \p SourceArgVal.
  479. ///
  480. /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
  481. /// value numbers from source IRSimilarityCandidate to target
  482. /// IRSimilarityCandidate.
  483. /// \param [in] SourceArgVal The global value number for an operand in the
  484. /// in the original candidate.
  485. /// \param [in] TargetArgVal The global value number for the corresponding
  486. /// operand in the other candidate.
  487. /// \returns True if there exists a mapping and false if not.
  488. bool checkNumberingAndReplace(
  489. DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
  490. unsigned SourceArgVal, unsigned TargetArgVal) {
  491. // We are given two unsigned integers representing the global values of
  492. // the operands in different IRSimilarityCandidates and a current mapping
  493. // between the two.
  494. //
  495. // Source Operand GVN: 1
  496. // Target Operand GVN: 2
  497. // CurrentMapping: {1: {1, 2}}
  498. //
  499. // Since we have mapping, and the target operand is contained in the set, we
  500. // update it to:
  501. // CurrentMapping: {1: {2}}
  502. // and can return true. But, if the mapping was
  503. // CurrentMapping: {1: {3}}
  504. // we would return false.
  505. bool WasInserted;
  506. DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
  507. std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
  508. std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
  509. // If we created a new mapping, then we are done.
  510. if (WasInserted)
  511. return true;
  512. // If there is more than one option in the mapping set, and the target value
  513. // is included in the mapping set replace that set with one that only includes
  514. // the target value, as it is the only valid mapping via the non commutative
  515. // instruction.
  516. DenseSet<unsigned> &TargetSet = Val->second;
  517. if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
  518. TargetSet.clear();
  519. TargetSet.insert(TargetArgVal);
  520. return true;
  521. }
  522. // Return true if we can find the value in the set.
  523. return TargetSet.contains(TargetArgVal);
  524. }
  525. bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
  526. OperandMapping A, OperandMapping B) {
  527. // Iterators to keep track of where we are in the operands for each
  528. // Instruction.
  529. ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
  530. ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
  531. unsigned OperandLength = A.OperVals.size();
  532. // For each operand, get the value numbering and ensure it is consistent.
  533. for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
  534. unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
  535. unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
  536. // Attempt to add a set with only the target value. If there is no mapping
  537. // we can create it here.
  538. //
  539. // For an instruction like a subtraction:
  540. // IRSimilarityCandidateA: IRSimilarityCandidateB:
  541. // %resultA = sub %a, %b %resultB = sub %d, %e
  542. //
  543. // We map %a -> %d and %b -> %e.
  544. //
  545. // And check to see whether their mapping is consistent in
  546. // checkNumberingAndReplace.
  547. if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
  548. return false;
  549. if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
  550. return false;
  551. }
  552. return true;
  553. }
  554. bool IRSimilarityCandidate::compareCommutativeOperandMapping(
  555. OperandMapping A, OperandMapping B) {
  556. DenseSet<unsigned> ValueNumbersA;
  557. DenseSet<unsigned> ValueNumbersB;
  558. ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
  559. ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
  560. unsigned OperandLength = A.OperVals.size();
  561. // Find the value number sets for the operands.
  562. for (unsigned Idx = 0; Idx < OperandLength;
  563. Idx++, VItA++, VItB++) {
  564. ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
  565. ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
  566. }
  567. // Iterate over the operands in the first IRSimilarityCandidate and make sure
  568. // there exists a possible mapping with the operands in the second
  569. // IRSimilarityCandidate.
  570. if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
  571. A.ValueNumberMapping, A.OperVals,
  572. ValueNumbersB))
  573. return false;
  574. // Iterate over the operands in the second IRSimilarityCandidate and make sure
  575. // there exists a possible mapping with the operands in the first
  576. // IRSimilarityCandidate.
  577. if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
  578. B.ValueNumberMapping, B.OperVals,
  579. ValueNumbersA))
  580. return false;
  581. return true;
  582. }
  583. bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
  584. RelativeLocMapping B) {
  585. // Get the basic blocks the label refers to.
  586. BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
  587. BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
  588. // Get the basic blocks contained in each region.
  589. DenseSet<BasicBlock *> BasicBlockA;
  590. DenseSet<BasicBlock *> BasicBlockB;
  591. A.IRSC.getBasicBlocks(BasicBlockA);
  592. B.IRSC.getBasicBlocks(BasicBlockB);
  593. // Determine if the block is contained in the region.
  594. bool AContained = BasicBlockA.contains(ABB);
  595. bool BContained = BasicBlockB.contains(BBB);
  596. // Both blocks need to be contained in the region, or both need to be outside
  597. // the reigon.
  598. if (AContained != BContained)
  599. return false;
  600. // If both are contained, then we need to make sure that the relative
  601. // distance to the target blocks are the same.
  602. if (AContained)
  603. return A.RelativeLocation == B.RelativeLocation;
  604. return true;
  605. }
  606. bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
  607. const IRSimilarityCandidate &B) {
  608. DenseMap<unsigned, DenseSet<unsigned>> MappingA;
  609. DenseMap<unsigned, DenseSet<unsigned>> MappingB;
  610. return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
  611. }
  612. typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
  613. SmallVector<int, 4> &, ArrayRef<Value *> &,
  614. ArrayRef<Value *> &>
  615. ZippedRelativeLocationsT;
  616. bool IRSimilarityCandidate::compareStructure(
  617. const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
  618. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
  619. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
  620. if (A.getLength() != B.getLength())
  621. return false;
  622. if (A.ValueToNumber.size() != B.ValueToNumber.size())
  623. return false;
  624. iterator ItA = A.begin();
  625. iterator ItB = B.begin();
  626. // These ValueNumber Mapping sets create a create a mapping between the values
  627. // in one candidate to values in the other candidate. If we create a set with
  628. // one element, and that same element maps to the original element in the
  629. // candidate we have a good mapping.
  630. DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
  631. // Iterate over the instructions contained in each candidate
  632. unsigned SectionLength = A.getStartIdx() + A.getLength();
  633. for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
  634. ItA++, ItB++, Loc++) {
  635. // Make sure the instructions are similar to one another.
  636. if (!isClose(*ItA, *ItB))
  637. return false;
  638. Instruction *IA = ItA->Inst;
  639. Instruction *IB = ItB->Inst;
  640. if (!ItA->Legal || !ItB->Legal)
  641. return false;
  642. // Get the operand sets for the instructions.
  643. ArrayRef<Value *> OperValsA = ItA->OperVals;
  644. ArrayRef<Value *> OperValsB = ItB->OperVals;
  645. unsigned InstValA = A.ValueToNumber.find(IA)->second;
  646. unsigned InstValB = B.ValueToNumber.find(IB)->second;
  647. bool WasInserted;
  648. // Ensure that the mappings for the instructions exists.
  649. std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
  650. std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
  651. if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
  652. return false;
  653. std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
  654. std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
  655. if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
  656. return false;
  657. // We have different paths for commutative instructions and non-commutative
  658. // instructions since commutative instructions could allow multiple mappings
  659. // to certain values.
  660. if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
  661. !isa<IntrinsicInst>(IA)) {
  662. if (!compareCommutativeOperandMapping(
  663. {A, OperValsA, ValueNumberMappingA},
  664. {B, OperValsB, ValueNumberMappingB}))
  665. return false;
  666. continue;
  667. }
  668. // Handle the non-commutative cases.
  669. if (!compareNonCommutativeOperandMapping(
  670. {A, OperValsA, ValueNumberMappingA},
  671. {B, OperValsB, ValueNumberMappingB}))
  672. return false;
  673. // Here we check that between two corresponding instructions,
  674. // when referring to a basic block in the same region, the
  675. // relative locations are the same. And, that the instructions refer to
  676. // basic blocks outside the region in the same corresponding locations.
  677. // We are able to make the assumption about blocks outside of the region
  678. // since the target block labels are considered values and will follow the
  679. // same number matching that we defined for the other instructions in the
  680. // region. So, at this point, in each location we target a specific block
  681. // outside the region, we are targeting a corresponding block in each
  682. // analagous location in the region we are comparing to.
  683. if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
  684. !(isa<PHINode>(IA) && isa<PHINode>(IB)))
  685. continue;
  686. SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
  687. SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
  688. if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
  689. OperValsA.size() != OperValsB.size())
  690. return false;
  691. ZippedRelativeLocationsT ZippedRelativeLocations =
  692. zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
  693. if (any_of(ZippedRelativeLocations,
  694. [&A, &B](std::tuple<int, int, Value *, Value *> R) {
  695. return !checkRelativeLocations(
  696. {A, std::get<0>(R), std::get<2>(R)},
  697. {B, std::get<1>(R), std::get<3>(R)});
  698. }))
  699. return false;
  700. }
  701. return true;
  702. }
  703. bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
  704. const IRSimilarityCandidate &B) {
  705. auto DoesOverlap = [](const IRSimilarityCandidate &X,
  706. const IRSimilarityCandidate &Y) {
  707. // Check:
  708. // XXXXXX X starts before Y ends
  709. // YYYYYYY Y starts after X starts
  710. return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
  711. };
  712. return DoesOverlap(A, B) || DoesOverlap(B, A);
  713. }
  714. void IRSimilarityIdentifier::populateMapper(
  715. Module &M, std::vector<IRInstructionData *> &InstrList,
  716. std::vector<unsigned> &IntegerMapping) {
  717. std::vector<IRInstructionData *> InstrListForModule;
  718. std::vector<unsigned> IntegerMappingForModule;
  719. // Iterate over the functions in the module to map each Instruction in each
  720. // BasicBlock to an unsigned integer.
  721. Mapper.initializeForBBs(M);
  722. for (Function &F : M) {
  723. if (F.empty())
  724. continue;
  725. for (BasicBlock &BB : F) {
  726. // BB has potential to have similarity since it has a size greater than 2
  727. // and can therefore match other regions greater than 2. Map it to a list
  728. // of unsigned integers.
  729. Mapper.convertToUnsignedVec(BB, InstrListForModule,
  730. IntegerMappingForModule);
  731. }
  732. BasicBlock::iterator It = F.begin()->end();
  733. Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
  734. true);
  735. if (InstrListForModule.size() > 0)
  736. Mapper.IDL->push_back(*InstrListForModule.back());
  737. }
  738. // Insert the InstrListForModule at the end of the overall InstrList so that
  739. // we can have a long InstrList for the entire set of Modules being analyzed.
  740. llvm::append_range(InstrList, InstrListForModule);
  741. // Do the same as above, but for IntegerMapping.
  742. llvm::append_range(IntegerMapping, IntegerMappingForModule);
  743. }
  744. void IRSimilarityIdentifier::populateMapper(
  745. ArrayRef<std::unique_ptr<Module>> &Modules,
  746. std::vector<IRInstructionData *> &InstrList,
  747. std::vector<unsigned> &IntegerMapping) {
  748. // Iterate over, and map the instructions in each module.
  749. for (const std::unique_ptr<Module> &M : Modules)
  750. populateMapper(*M, InstrList, IntegerMapping);
  751. }
  752. /// From a repeated subsequence, find all the different instances of the
  753. /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
  754. /// the IRInstructionData in subsequence.
  755. ///
  756. /// \param [in] Mapper - The instruction mapper for basic correctness checks.
  757. /// \param [in] InstrList - The vector that holds the instruction data.
  758. /// \param [in] IntegerMapping - The vector that holds the mapped integers.
  759. /// \param [out] CandsForRepSubstring - The vector to store the generated
  760. /// IRSimilarityCandidates.
  761. static void createCandidatesFromSuffixTree(
  762. const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
  763. std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
  764. std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
  765. unsigned StringLen = RS.Length;
  766. if (StringLen < 2)
  767. return;
  768. // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
  769. for (const unsigned &StartIdx : RS.StartIndices) {
  770. unsigned EndIdx = StartIdx + StringLen - 1;
  771. // Check that this subsequence does not contain an illegal instruction.
  772. bool ContainsIllegal = false;
  773. for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
  774. unsigned Key = IntegerMapping[CurrIdx];
  775. if (Key > Mapper.IllegalInstrNumber) {
  776. ContainsIllegal = true;
  777. break;
  778. }
  779. }
  780. // If we have an illegal instruction, we should not create an
  781. // IRSimilarityCandidate for this region.
  782. if (ContainsIllegal)
  783. continue;
  784. // We are getting iterators to the instructions in this region of code
  785. // by advancing the start and end indices from the start of the
  786. // InstrList.
  787. std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
  788. std::advance(StartIt, StartIdx);
  789. std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
  790. std::advance(EndIt, EndIdx);
  791. CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
  792. }
  793. }
  794. void IRSimilarityCandidate::createCanonicalRelationFrom(
  795. IRSimilarityCandidate &SourceCand,
  796. DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
  797. DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
  798. assert(SourceCand.CanonNumToNumber.size() != 0 &&
  799. "Base canonical relationship is empty!");
  800. assert(SourceCand.NumberToCanonNum.size() != 0 &&
  801. "Base canonical relationship is empty!");
  802. assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
  803. assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
  804. DenseSet<unsigned> UsedGVNs;
  805. // Iterate over the mappings provided from this candidate to SourceCand. We
  806. // are then able to map the GVN in this candidate to the same canonical number
  807. // given to the corresponding GVN in SourceCand.
  808. for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
  809. unsigned SourceGVN = GVNMapping.first;
  810. assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
  811. unsigned ResultGVN;
  812. // We need special handling if we have more than one potential value. This
  813. // means that there are at least two GVNs that could correspond to this GVN.
  814. // This could lead to potential swapping later on, so we make a decision
  815. // here to ensure a one-to-one mapping.
  816. if (GVNMapping.second.size() > 1) {
  817. bool Found = false;
  818. for (unsigned Val : GVNMapping.second) {
  819. // We make sure the target value number hasn't already been reserved.
  820. if (UsedGVNs.contains(Val))
  821. continue;
  822. // We make sure that the opposite mapping is still consistent.
  823. DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
  824. FromSourceMapping.find(Val);
  825. if (!It->second.contains(SourceGVN))
  826. continue;
  827. // We pick the first item that satisfies these conditions.
  828. Found = true;
  829. ResultGVN = Val;
  830. break;
  831. }
  832. assert(Found && "Could not find matching value for source GVN");
  833. (void)Found;
  834. } else
  835. ResultGVN = *GVNMapping.second.begin();
  836. // Whatever GVN is found, we mark it as used.
  837. UsedGVNs.insert(ResultGVN);
  838. unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
  839. CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
  840. NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
  841. }
  842. DenseSet<BasicBlock *> BBSet;
  843. getBasicBlocks(BBSet);
  844. // Find canonical numbers for the BasicBlocks in the current candidate.
  845. // This is done by finding the corresponding value for the first instruction
  846. // in the block in the current candidate, finding the matching value in the
  847. // source candidate. Then by finding the parent of this value, use the
  848. // canonical number of the block in the source candidate for the canonical
  849. // number in the current candidate.
  850. for (BasicBlock *BB : BBSet) {
  851. unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
  852. // We can skip the BasicBlock if the canonical numbering has already been
  853. // found in a separate instruction.
  854. if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
  855. continue;
  856. // If the basic block is the starting block, then the shared instruction may
  857. // not be the first instruction in the block, it will be the first
  858. // instruction in the similarity region.
  859. Value *FirstOutlineInst = BB == getStartBB()
  860. ? frontInstruction()
  861. : &*BB->instructionsWithoutDebug().begin();
  862. unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
  863. unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
  864. unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
  865. Value *SourceV = *SourceCand.fromGVN(SourceGVN);
  866. BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
  867. unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
  868. unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
  869. CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
  870. NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
  871. }
  872. }
  873. void IRSimilarityCandidate::createCanonicalMappingFor(
  874. IRSimilarityCandidate &CurrCand) {
  875. assert(CurrCand.CanonNumToNumber.size() == 0 &&
  876. "Canonical Relationship is non-empty");
  877. assert(CurrCand.NumberToCanonNum.size() == 0 &&
  878. "Canonical Relationship is non-empty");
  879. unsigned CanonNum = 0;
  880. // Iterate over the value numbers found, the order does not matter in this
  881. // case.
  882. for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
  883. CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
  884. CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
  885. CanonNum++;
  886. }
  887. }
  888. /// From the list of IRSimilarityCandidates, perform a comparison between each
  889. /// IRSimilarityCandidate to determine if there are overlapping
  890. /// IRInstructionData, or if they do not have the same structure.
  891. ///
  892. /// \param [in] CandsForRepSubstring - The vector containing the
  893. /// IRSimilarityCandidates.
  894. /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
  895. /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
  896. /// vector are structurally similar to one another.
  897. static void findCandidateStructures(
  898. std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
  899. DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
  900. std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
  901. InnerCandEndIt;
  902. // IRSimilarityCandidates each have a structure for operand use. It is
  903. // possible that two instances of the same subsequences have different
  904. // structure. Each type of structure found is assigned a number. This
  905. // DenseMap maps an IRSimilarityCandidate to which type of similarity
  906. // discovered it fits within.
  907. DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
  908. // Find the compatibility from each candidate to the others to determine
  909. // which candidates overlap and which have the same structure by mapping
  910. // each structure to a different group.
  911. bool SameStructure;
  912. bool Inserted;
  913. unsigned CurrentGroupNum = 0;
  914. unsigned OuterGroupNum;
  915. DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
  916. DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
  917. DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
  918. // Iterate over the candidates to determine its structural and overlapping
  919. // compatibility with other instructions
  920. DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
  921. DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
  922. for (CandIt = CandsForRepSubstring.begin(),
  923. CandEndIt = CandsForRepSubstring.end();
  924. CandIt != CandEndIt; CandIt++) {
  925. // Determine if it has an assigned structural group already.
  926. CandToGroupIt = CandToGroup.find(&*CandIt);
  927. if (CandToGroupIt == CandToGroup.end()) {
  928. // If not, we assign it one, and add it to our mapping.
  929. std::tie(CandToGroupIt, Inserted) =
  930. CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
  931. }
  932. // Get the structural group number from the iterator.
  933. OuterGroupNum = CandToGroupIt->second;
  934. // Check if we already have a list of IRSimilarityCandidates for the current
  935. // structural group. Create one if one does not exist.
  936. CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
  937. if (CurrentGroupPair == StructuralGroups.end()) {
  938. IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
  939. std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
  940. std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
  941. }
  942. // Iterate over the IRSimilarityCandidates following the current
  943. // IRSimilarityCandidate in the list to determine whether the two
  944. // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
  945. // of IRSimilarityCandidates.
  946. for (InnerCandIt = std::next(CandIt),
  947. InnerCandEndIt = CandsForRepSubstring.end();
  948. InnerCandIt != InnerCandEndIt; InnerCandIt++) {
  949. // We check if the inner item has a group already, if it does, we skip it.
  950. CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
  951. if (CandToGroupItInner != CandToGroup.end())
  952. continue;
  953. // Otherwise we determine if they have the same structure and add it to
  954. // vector if they match.
  955. ValueNumberMappingA.clear();
  956. ValueNumberMappingB.clear();
  957. SameStructure = IRSimilarityCandidate::compareStructure(
  958. *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
  959. if (!SameStructure)
  960. continue;
  961. InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
  962. ValueNumberMappingB);
  963. CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
  964. CurrentGroupPair->second.push_back(*InnerCandIt);
  965. }
  966. }
  967. }
  968. void IRSimilarityIdentifier::findCandidates(
  969. std::vector<IRInstructionData *> &InstrList,
  970. std::vector<unsigned> &IntegerMapping) {
  971. SuffixTree ST(IntegerMapping);
  972. std::vector<IRSimilarityCandidate> CandsForRepSubstring;
  973. std::vector<SimilarityGroup> NewCandidateGroups;
  974. DenseMap<unsigned, SimilarityGroup> StructuralGroups;
  975. // Iterate over the subsequences found by the Suffix Tree to create
  976. // IRSimilarityCandidates for each repeated subsequence and determine which
  977. // instances are structurally similar to one another.
  978. for (SuffixTree::RepeatedSubstring &RS : ST) {
  979. createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
  980. CandsForRepSubstring);
  981. if (CandsForRepSubstring.size() < 2)
  982. continue;
  983. findCandidateStructures(CandsForRepSubstring, StructuralGroups);
  984. for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
  985. // We only add the group if it contains more than one
  986. // IRSimilarityCandidate. If there is only one, that means there is no
  987. // other repeated subsequence with the same structure.
  988. if (Group.second.size() > 1)
  989. SimilarityCandidates->push_back(Group.second);
  990. CandsForRepSubstring.clear();
  991. StructuralGroups.clear();
  992. NewCandidateGroups.clear();
  993. }
  994. }
  995. SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
  996. ArrayRef<std::unique_ptr<Module>> Modules) {
  997. resetSimilarityCandidates();
  998. std::vector<IRInstructionData *> InstrList;
  999. std::vector<unsigned> IntegerMapping;
  1000. Mapper.InstClassifier.EnableBranches = this->EnableBranches;
  1001. Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
  1002. Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
  1003. Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
  1004. Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
  1005. populateMapper(Modules, InstrList, IntegerMapping);
  1006. findCandidates(InstrList, IntegerMapping);
  1007. return *SimilarityCandidates;
  1008. }
  1009. SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
  1010. resetSimilarityCandidates();
  1011. Mapper.InstClassifier.EnableBranches = this->EnableBranches;
  1012. Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
  1013. Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
  1014. Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
  1015. Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
  1016. std::vector<IRInstructionData *> InstrList;
  1017. std::vector<unsigned> IntegerMapping;
  1018. populateMapper(M, InstrList, IntegerMapping);
  1019. findCandidates(InstrList, IntegerMapping);
  1020. return *SimilarityCandidates;
  1021. }
  1022. INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
  1023. "ir-similarity-identifier", false, true)
  1024. IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
  1025. : ModulePass(ID) {
  1026. initializeIRSimilarityIdentifierWrapperPassPass(
  1027. *PassRegistry::getPassRegistry());
  1028. }
  1029. bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
  1030. IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
  1031. MatchCallsByName, !DisableIntrinsics,
  1032. false));
  1033. return false;
  1034. }
  1035. bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
  1036. IRSI.reset();
  1037. return false;
  1038. }
  1039. bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
  1040. IRSI->findSimilarity(M);
  1041. return false;
  1042. }
  1043. AnalysisKey IRSimilarityAnalysis::Key;
  1044. IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
  1045. ModuleAnalysisManager &) {
  1046. auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
  1047. MatchCallsByName, !DisableIntrinsics,
  1048. false);
  1049. IRSI.findSimilarity(M);
  1050. return IRSI;
  1051. }
  1052. PreservedAnalyses
  1053. IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
  1054. IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
  1055. std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
  1056. IRSI.getSimilarity();
  1057. for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
  1058. OS << CandVec.size() << " candidates of length "
  1059. << CandVec.begin()->getLength() << ". Found in: \n";
  1060. for (IRSimilarityCandidate &Cand : CandVec) {
  1061. OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
  1062. << ", Basic Block: ";
  1063. if (Cand.front()->Inst->getParent()->getName().str() == "")
  1064. OS << "(unnamed)";
  1065. else
  1066. OS << Cand.front()->Inst->getParent()->getName().str();
  1067. OS << "\n Start Instruction: ";
  1068. Cand.frontInstruction()->print(OS);
  1069. OS << "\n End Instruction: ";
  1070. Cand.backInstruction()->print(OS);
  1071. OS << "\n";
  1072. }
  1073. }
  1074. return PreservedAnalyses::all();
  1075. }
  1076. char IRSimilarityIdentifierWrapperPass::ID = 0;