IROutliner.cpp 119 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925
  1. //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. // Implementation for the IROutliner which is used by the IROutliner Pass.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/IPO/IROutliner.h"
  14. #include "llvm/Analysis/IRSimilarityIdentifier.h"
  15. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  16. #include "llvm/Analysis/TargetTransformInfo.h"
  17. #include "llvm/IR/Attributes.h"
  18. #include "llvm/IR/DebugInfoMetadata.h"
  19. #include "llvm/IR/DIBuilder.h"
  20. #include "llvm/IR/Dominators.h"
  21. #include "llvm/IR/Mangler.h"
  22. #include "llvm/IR/PassManager.h"
  23. #include "llvm/InitializePasses.h"
  24. #include "llvm/Pass.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Transforms/IPO.h"
  27. #include <map>
  28. #include <set>
  29. #include <vector>
  30. #define DEBUG_TYPE "iroutliner"
  31. using namespace llvm;
  32. using namespace IRSimilarity;
  33. // A command flag to be used for debugging to exclude branches from similarity
  34. // matching and outlining.
  35. namespace llvm {
  36. extern cl::opt<bool> DisableBranches;
  37. // A command flag to be used for debugging to indirect calls from similarity
  38. // matching and outlining.
  39. extern cl::opt<bool> DisableIndirectCalls;
  40. // A command flag to be used for debugging to exclude intrinsics from similarity
  41. // matching and outlining.
  42. extern cl::opt<bool> DisableIntrinsics;
  43. } // namespace llvm
  44. // Set to true if the user wants the ir outliner to run on linkonceodr linkage
  45. // functions. This is false by default because the linker can dedupe linkonceodr
  46. // functions. Since the outliner is confined to a single module (modulo LTO),
  47. // this is off by default. It should, however, be the default behavior in
  48. // LTO.
  49. static cl::opt<bool> EnableLinkOnceODRIROutlining(
  50. "enable-linkonceodr-ir-outlining", cl::Hidden,
  51. cl::desc("Enable the IR outliner on linkonceodr functions"),
  52. cl::init(false));
  53. // This is a debug option to test small pieces of code to ensure that outlining
  54. // works correctly.
  55. static cl::opt<bool> NoCostModel(
  56. "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
  57. cl::desc("Debug option to outline greedily, without restriction that "
  58. "calculated benefit outweighs cost"));
  59. /// The OutlinableGroup holds all the overarching information for outlining
  60. /// a set of regions that are structurally similar to one another, such as the
  61. /// types of the overall function, the output blocks, the sets of stores needed
  62. /// and a list of the different regions. This information is used in the
  63. /// deduplication of extracted regions with the same structure.
  64. struct OutlinableGroup {
  65. /// The sections that could be outlined
  66. std::vector<OutlinableRegion *> Regions;
  67. /// The argument types for the function created as the overall function to
  68. /// replace the extracted function for each region.
  69. std::vector<Type *> ArgumentTypes;
  70. /// The FunctionType for the overall function.
  71. FunctionType *OutlinedFunctionType = nullptr;
  72. /// The Function for the collective overall function.
  73. Function *OutlinedFunction = nullptr;
  74. /// Flag for whether we should not consider this group of OutlinableRegions
  75. /// for extraction.
  76. bool IgnoreGroup = false;
  77. /// The return blocks for the overall function.
  78. DenseMap<Value *, BasicBlock *> EndBBs;
  79. /// The PHIBlocks with their corresponding return block based on the return
  80. /// value as the key.
  81. DenseMap<Value *, BasicBlock *> PHIBlocks;
  82. /// A set containing the different GVN store sets needed. Each array contains
  83. /// a sorted list of the different values that need to be stored into output
  84. /// registers.
  85. DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
  86. /// Flag for whether the \ref ArgumentTypes have been defined after the
  87. /// extraction of the first region.
  88. bool InputTypesSet = false;
  89. /// The number of input values in \ref ArgumentTypes. Anything after this
  90. /// index in ArgumentTypes is an output argument.
  91. unsigned NumAggregateInputs = 0;
  92. /// The mapping of the canonical numbering of the values in outlined sections
  93. /// to specific arguments.
  94. DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
  95. /// The number of branches in the region target a basic block that is outside
  96. /// of the region.
  97. unsigned BranchesToOutside = 0;
  98. /// Tracker counting backwards from the highest unsigned value possible to
  99. /// avoid conflicting with the GVNs of assigned values. We start at -3 since
  100. /// -2 and -1 are assigned by the DenseMap.
  101. unsigned PHINodeGVNTracker = -3;
  102. DenseMap<unsigned,
  103. std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
  104. PHINodeGVNToGVNs;
  105. DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
  106. /// The number of instructions that will be outlined by extracting \ref
  107. /// Regions.
  108. InstructionCost Benefit = 0;
  109. /// The number of added instructions needed for the outlining of the \ref
  110. /// Regions.
  111. InstructionCost Cost = 0;
  112. /// The argument that needs to be marked with the swifterr attribute. If not
  113. /// needed, there is no value.
  114. Optional<unsigned> SwiftErrorArgument;
  115. /// For the \ref Regions, we look at every Value. If it is a constant,
  116. /// we check whether it is the same in Region.
  117. ///
  118. /// \param [in,out] NotSame contains the global value numbers where the
  119. /// constant is not always the same, and must be passed in as an argument.
  120. void findSameConstants(DenseSet<unsigned> &NotSame);
  121. /// For the regions, look at each set of GVN stores needed and account for
  122. /// each combination. Add an argument to the argument types if there is
  123. /// more than one combination.
  124. ///
  125. /// \param [in] M - The module we are outlining from.
  126. void collectGVNStoreSets(Module &M);
  127. };
  128. /// Move the contents of \p SourceBB to before the last instruction of \p
  129. /// TargetBB.
  130. /// \param SourceBB - the BasicBlock to pull Instructions from.
  131. /// \param TargetBB - the BasicBlock to put Instruction into.
  132. static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
  133. for (Instruction &I : llvm::make_early_inc_range(SourceBB))
  134. I.moveBefore(TargetBB, TargetBB.end());
  135. }
  136. /// A function to sort the keys of \p Map, which must be a mapping of constant
  137. /// values to basic blocks and return it in \p SortedKeys
  138. ///
  139. /// \param SortedKeys - The vector the keys will be return in and sorted.
  140. /// \param Map - The DenseMap containing keys to sort.
  141. static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
  142. DenseMap<Value *, BasicBlock *> &Map) {
  143. for (auto &VtoBB : Map)
  144. SortedKeys.push_back(VtoBB.first);
  145. stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
  146. const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
  147. const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
  148. assert(RHSC && "Not a constant integer in return value?");
  149. assert(LHSC && "Not a constant integer in return value?");
  150. return LHSC->getLimitedValue() < RHSC->getLimitedValue();
  151. });
  152. }
  153. Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
  154. Value *V) {
  155. Optional<unsigned> GVN = Candidate->getGVN(V);
  156. assert(GVN.hasValue() && "No GVN for incoming value");
  157. Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
  158. Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
  159. Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
  160. return FoundValueOpt.getValueOr(nullptr);
  161. }
  162. /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
  163. /// in \p Included to branch to BasicBlock \p Replace if they currently branch
  164. /// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
  165. /// when PHINodes are included in outlined regions.
  166. ///
  167. /// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
  168. /// checked.
  169. /// \param Find - The successor block to be replaced.
  170. /// \param Replace - The new succesor block to branch to.
  171. /// \param Included - The set of blocks about to be outlined.
  172. static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
  173. BasicBlock *Replace,
  174. DenseSet<BasicBlock *> &Included) {
  175. for (PHINode &PN : PHIBlock->phis()) {
  176. for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
  177. ++Idx) {
  178. // Check if the incoming block is included in the set of blocks being
  179. // outlined.
  180. BasicBlock *Incoming = PN.getIncomingBlock(Idx);
  181. if (!Included.contains(Incoming))
  182. continue;
  183. BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
  184. assert(BI && "Not a branch instruction?");
  185. // Look over the branching instructions into this block to see if we
  186. // used to branch to Find in this outlined block.
  187. for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
  188. Succ++) {
  189. // If we have found the block to replace, we do so here.
  190. if (BI->getSuccessor(Succ) != Find)
  191. continue;
  192. BI->setSuccessor(Succ, Replace);
  193. }
  194. }
  195. }
  196. }
  197. void OutlinableRegion::splitCandidate() {
  198. assert(!CandidateSplit && "Candidate already split!");
  199. Instruction *BackInst = Candidate->backInstruction();
  200. Instruction *EndInst = nullptr;
  201. // Check whether the last instruction is a terminator, if it is, we do
  202. // not split on the following instruction. We leave the block as it is. We
  203. // also check that this is not the last instruction in the Module, otherwise
  204. // the check for whether the current following instruction matches the
  205. // previously recorded instruction will be incorrect.
  206. if (!BackInst->isTerminator() ||
  207. BackInst->getParent() != &BackInst->getFunction()->back()) {
  208. EndInst = Candidate->end()->Inst;
  209. assert(EndInst && "Expected an end instruction?");
  210. }
  211. // We check if the current instruction following the last instruction in the
  212. // region is the same as the recorded instruction following the last
  213. // instruction. If they do not match, there could be problems in rewriting
  214. // the program after outlining, so we ignore it.
  215. if (!BackInst->isTerminator() &&
  216. EndInst != BackInst->getNextNonDebugInstruction())
  217. return;
  218. Instruction *StartInst = (*Candidate->begin()).Inst;
  219. assert(StartInst && "Expected a start instruction?");
  220. StartBB = StartInst->getParent();
  221. PrevBB = StartBB;
  222. DenseSet<BasicBlock *> BBSet;
  223. Candidate->getBasicBlocks(BBSet);
  224. // We iterate over the instructions in the region, if we find a PHINode, we
  225. // check if there are predecessors outside of the region, if there are,
  226. // we ignore this region since we are unable to handle the severing of the
  227. // phi node right now.
  228. BasicBlock::iterator It = StartInst->getIterator();
  229. while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
  230. unsigned NumPredsOutsideRegion = 0;
  231. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
  232. if (!BBSet.contains(PN->getIncomingBlock(i)))
  233. ++NumPredsOutsideRegion;
  234. if (NumPredsOutsideRegion > 1)
  235. return;
  236. It++;
  237. }
  238. // If the region starts with a PHINode, but is not the initial instruction of
  239. // the BasicBlock, we ignore this region for now.
  240. if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
  241. return;
  242. // If the region ends with a PHINode, but does not contain all of the phi node
  243. // instructions of the region, we ignore it for now.
  244. if (isa<PHINode>(BackInst)) {
  245. EndBB = BackInst->getParent();
  246. if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
  247. return;
  248. }
  249. // The basic block gets split like so:
  250. // block: block:
  251. // inst1 inst1
  252. // inst2 inst2
  253. // region1 br block_to_outline
  254. // region2 block_to_outline:
  255. // region3 -> region1
  256. // region4 region2
  257. // inst3 region3
  258. // inst4 region4
  259. // br block_after_outline
  260. // block_after_outline:
  261. // inst3
  262. // inst4
  263. std::string OriginalName = PrevBB->getName().str();
  264. StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
  265. PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
  266. CandidateSplit = true;
  267. if (!BackInst->isTerminator()) {
  268. EndBB = EndInst->getParent();
  269. FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
  270. EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
  271. FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
  272. } else {
  273. EndBB = BackInst->getParent();
  274. EndsInBranch = true;
  275. FollowBB = nullptr;
  276. }
  277. // Refind the basic block set.
  278. BBSet.clear();
  279. Candidate->getBasicBlocks(BBSet);
  280. // For the phi nodes in the new starting basic block of the region, we
  281. // reassign the targets of the basic blocks branching instructions.
  282. replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
  283. if (FollowBB)
  284. replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
  285. }
  286. void OutlinableRegion::reattachCandidate() {
  287. assert(CandidateSplit && "Candidate is not split!");
  288. // The basic block gets reattached like so:
  289. // block: block:
  290. // inst1 inst1
  291. // inst2 inst2
  292. // br block_to_outline region1
  293. // block_to_outline: -> region2
  294. // region1 region3
  295. // region2 region4
  296. // region3 inst3
  297. // region4 inst4
  298. // br block_after_outline
  299. // block_after_outline:
  300. // inst3
  301. // inst4
  302. assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
  303. assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
  304. PrevBB->getTerminator()->eraseFromParent();
  305. // If we reattaching after outlining, we iterate over the phi nodes to
  306. // the initial block, and reassign the branch instructions of the incoming
  307. // blocks to the block we are remerging into.
  308. if (!ExtractedFunction) {
  309. DenseSet<BasicBlock *> BBSet;
  310. Candidate->getBasicBlocks(BBSet);
  311. replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
  312. if (!EndsInBranch)
  313. replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
  314. }
  315. moveBBContents(*StartBB, *PrevBB);
  316. BasicBlock *PlacementBB = PrevBB;
  317. if (StartBB != EndBB)
  318. PlacementBB = EndBB;
  319. if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
  320. assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
  321. assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
  322. PlacementBB->getTerminator()->eraseFromParent();
  323. moveBBContents(*FollowBB, *PlacementBB);
  324. PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
  325. FollowBB->eraseFromParent();
  326. }
  327. PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
  328. StartBB->eraseFromParent();
  329. // Make sure to save changes back to the StartBB.
  330. StartBB = PrevBB;
  331. EndBB = nullptr;
  332. PrevBB = nullptr;
  333. FollowBB = nullptr;
  334. CandidateSplit = false;
  335. }
  336. /// Find whether \p V matches the Constants previously found for the \p GVN.
  337. ///
  338. /// \param V - The value to check for consistency.
  339. /// \param GVN - The global value number assigned to \p V.
  340. /// \param GVNToConstant - The mapping of global value number to Constants.
  341. /// \returns true if the Value matches the Constant mapped to by V and false if
  342. /// it \p V is a Constant but does not match.
  343. /// \returns None if \p V is not a Constant.
  344. static Optional<bool>
  345. constantMatches(Value *V, unsigned GVN,
  346. DenseMap<unsigned, Constant *> &GVNToConstant) {
  347. // See if we have a constants
  348. Constant *CST = dyn_cast<Constant>(V);
  349. if (!CST)
  350. return None;
  351. // Holds a mapping from a global value number to a Constant.
  352. DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
  353. bool Inserted;
  354. // If we have a constant, try to make a new entry in the GVNToConstant.
  355. std::tie(GVNToConstantIt, Inserted) =
  356. GVNToConstant.insert(std::make_pair(GVN, CST));
  357. // If it was found and is not equal, it is not the same. We do not
  358. // handle this case yet, and exit early.
  359. if (Inserted || (GVNToConstantIt->second == CST))
  360. return true;
  361. return false;
  362. }
  363. InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
  364. InstructionCost Benefit = 0;
  365. // Estimate the benefit of outlining a specific sections of the program. We
  366. // delegate mostly this task to the TargetTransformInfo so that if the target
  367. // has specific changes, we can have a more accurate estimate.
  368. // However, getInstructionCost delegates the code size calculation for
  369. // arithmetic instructions to getArithmeticInstrCost in
  370. // include/Analysis/TargetTransformImpl.h, where it always estimates that the
  371. // code size for a division and remainder instruction to be equal to 4, and
  372. // everything else to 1. This is not an accurate representation of the
  373. // division instruction for targets that have a native division instruction.
  374. // To be overly conservative, we only add 1 to the number of instructions for
  375. // each division instruction.
  376. for (IRInstructionData &ID : *Candidate) {
  377. Instruction *I = ID.Inst;
  378. switch (I->getOpcode()) {
  379. case Instruction::FDiv:
  380. case Instruction::FRem:
  381. case Instruction::SDiv:
  382. case Instruction::SRem:
  383. case Instruction::UDiv:
  384. case Instruction::URem:
  385. Benefit += 1;
  386. break;
  387. default:
  388. Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
  389. break;
  390. }
  391. }
  392. return Benefit;
  393. }
  394. /// Check the \p OutputMappings structure for value \p Input, if it exists
  395. /// it has been used as an output for outlining, and has been renamed, and we
  396. /// return the new value, otherwise, we return the same value.
  397. ///
  398. /// \param OutputMappings [in] - The mapping of values to their renamed value
  399. /// after being used as an output for an outlined region.
  400. /// \param Input [in] - The value to find the remapped value of, if it exists.
  401. /// \return The remapped value if it has been renamed, and the same value if has
  402. /// not.
  403. static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
  404. Value *Input) {
  405. DenseMap<Value *, Value *>::const_iterator OutputMapping =
  406. OutputMappings.find(Input);
  407. if (OutputMapping != OutputMappings.end())
  408. return OutputMapping->second;
  409. return Input;
  410. }
  411. /// Find whether \p Region matches the global value numbering to Constant
  412. /// mapping found so far.
  413. ///
  414. /// \param Region - The OutlinableRegion we are checking for constants
  415. /// \param GVNToConstant - The mapping of global value number to Constants.
  416. /// \param NotSame - The set of global value numbers that do not have the same
  417. /// constant in each region.
  418. /// \returns true if all Constants are the same in every use of a Constant in \p
  419. /// Region and false if not
  420. static bool
  421. collectRegionsConstants(OutlinableRegion &Region,
  422. DenseMap<unsigned, Constant *> &GVNToConstant,
  423. DenseSet<unsigned> &NotSame) {
  424. bool ConstantsTheSame = true;
  425. IRSimilarityCandidate &C = *Region.Candidate;
  426. for (IRInstructionData &ID : C) {
  427. // Iterate over the operands in an instruction. If the global value number,
  428. // assigned by the IRSimilarityCandidate, has been seen before, we check if
  429. // the the number has been found to be not the same value in each instance.
  430. for (Value *V : ID.OperVals) {
  431. Optional<unsigned> GVNOpt = C.getGVN(V);
  432. assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
  433. unsigned GVN = GVNOpt.getValue();
  434. // Check if this global value has been found to not be the same already.
  435. if (NotSame.contains(GVN)) {
  436. if (isa<Constant>(V))
  437. ConstantsTheSame = false;
  438. continue;
  439. }
  440. // If it has been the same so far, we check the value for if the
  441. // associated Constant value match the previous instances of the same
  442. // global value number. If the global value does not map to a Constant,
  443. // it is considered to not be the same value.
  444. Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
  445. if (ConstantMatches.hasValue()) {
  446. if (ConstantMatches.getValue())
  447. continue;
  448. else
  449. ConstantsTheSame = false;
  450. }
  451. // While this value is a register, it might not have been previously,
  452. // make sure we don't already have a constant mapped to this global value
  453. // number.
  454. if (GVNToConstant.find(GVN) != GVNToConstant.end())
  455. ConstantsTheSame = false;
  456. NotSame.insert(GVN);
  457. }
  458. }
  459. return ConstantsTheSame;
  460. }
  461. void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
  462. DenseMap<unsigned, Constant *> GVNToConstant;
  463. for (OutlinableRegion *Region : Regions)
  464. collectRegionsConstants(*Region, GVNToConstant, NotSame);
  465. }
  466. void OutlinableGroup::collectGVNStoreSets(Module &M) {
  467. for (OutlinableRegion *OS : Regions)
  468. OutputGVNCombinations.insert(OS->GVNStores);
  469. // We are adding an extracted argument to decide between which output path
  470. // to use in the basic block. It is used in a switch statement and only
  471. // needs to be an integer.
  472. if (OutputGVNCombinations.size() > 1)
  473. ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
  474. }
  475. /// Get the subprogram if it exists for one of the outlined regions.
  476. ///
  477. /// \param [in] Group - The set of regions to find a subprogram for.
  478. /// \returns the subprogram if it exists, or nullptr.
  479. static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
  480. for (OutlinableRegion *OS : Group.Regions)
  481. if (Function *F = OS->Call->getFunction())
  482. if (DISubprogram *SP = F->getSubprogram())
  483. return SP;
  484. return nullptr;
  485. }
  486. Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
  487. unsigned FunctionNameSuffix) {
  488. assert(!Group.OutlinedFunction && "Function is already defined!");
  489. Type *RetTy = Type::getVoidTy(M.getContext());
  490. // All extracted functions _should_ have the same return type at this point
  491. // since the similarity identifier ensures that all branches outside of the
  492. // region occur in the same place.
  493. // NOTE: Should we ever move to the model that uses a switch at every point
  494. // needed, meaning that we could branch within the region or out, it is
  495. // possible that we will need to switch to using the most general case all of
  496. // the time.
  497. for (OutlinableRegion *R : Group.Regions) {
  498. Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
  499. if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
  500. (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
  501. RetTy = ExtractedFuncType;
  502. }
  503. Group.OutlinedFunctionType = FunctionType::get(
  504. RetTy, Group.ArgumentTypes, false);
  505. // These functions will only be called from within the same module, so
  506. // we can set an internal linkage.
  507. Group.OutlinedFunction = Function::Create(
  508. Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
  509. "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
  510. // Transfer the swifterr attribute to the correct function parameter.
  511. if (Group.SwiftErrorArgument.hasValue())
  512. Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
  513. Attribute::SwiftError);
  514. Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
  515. Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
  516. // If there's a DISubprogram associated with this outlined function, then
  517. // emit debug info for the outlined function.
  518. if (DISubprogram *SP = getSubprogramOrNull(Group)) {
  519. Function *F = Group.OutlinedFunction;
  520. // We have a DISubprogram. Get its DICompileUnit.
  521. DICompileUnit *CU = SP->getUnit();
  522. DIBuilder DB(M, true, CU);
  523. DIFile *Unit = SP->getFile();
  524. Mangler Mg;
  525. // Get the mangled name of the function for the linkage name.
  526. std::string Dummy;
  527. llvm::raw_string_ostream MangledNameStream(Dummy);
  528. Mg.getNameWithPrefix(MangledNameStream, F, false);
  529. DISubprogram *OutlinedSP = DB.createFunction(
  530. Unit /* Context */, F->getName(), MangledNameStream.str(),
  531. Unit /* File */,
  532. 0 /* Line 0 is reserved for compiler-generated code. */,
  533. DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
  534. 0, /* Line 0 is reserved for compiler-generated code. */
  535. DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
  536. /* Outlined code is optimized code by definition. */
  537. DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
  538. // Don't add any new variables to the subprogram.
  539. DB.finalizeSubprogram(OutlinedSP);
  540. // Attach subprogram to the function.
  541. F->setSubprogram(OutlinedSP);
  542. // We're done with the DIBuilder.
  543. DB.finalize();
  544. }
  545. return Group.OutlinedFunction;
  546. }
  547. /// Move each BasicBlock in \p Old to \p New.
  548. ///
  549. /// \param [in] Old - The function to move the basic blocks from.
  550. /// \param [in] New - The function to move the basic blocks to.
  551. /// \param [out] NewEnds - The return blocks of the new overall function.
  552. static void moveFunctionData(Function &Old, Function &New,
  553. DenseMap<Value *, BasicBlock *> &NewEnds) {
  554. for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
  555. CurrBB.removeFromParent();
  556. CurrBB.insertInto(&New);
  557. Instruction *I = CurrBB.getTerminator();
  558. // For each block we find a return instruction is, it is a potential exit
  559. // path for the function. We keep track of each block based on the return
  560. // value here.
  561. if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
  562. NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
  563. std::vector<Instruction *> DebugInsts;
  564. for (Instruction &Val : CurrBB) {
  565. // We must handle the scoping of called functions differently than
  566. // other outlined instructions.
  567. if (!isa<CallInst>(&Val)) {
  568. // Remove the debug information for outlined functions.
  569. Val.setDebugLoc(DebugLoc());
  570. continue;
  571. }
  572. // From this point we are only handling call instructions.
  573. CallInst *CI = cast<CallInst>(&Val);
  574. // We add any debug statements here, to be removed after. Since the
  575. // instructions originate from many different locations in the program,
  576. // it will cause incorrect reporting from a debugger if we keep the
  577. // same debug instructions.
  578. if (isa<DbgInfoIntrinsic>(CI)) {
  579. DebugInsts.push_back(&Val);
  580. continue;
  581. }
  582. // Edit the scope of called functions inside of outlined functions.
  583. if (DISubprogram *SP = New.getSubprogram()) {
  584. DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
  585. Val.setDebugLoc(DI);
  586. }
  587. }
  588. for (Instruction *I : DebugInsts)
  589. I->eraseFromParent();
  590. }
  591. assert(NewEnds.size() > 0 && "No return instruction for new function?");
  592. }
  593. /// Find the the constants that will need to be lifted into arguments
  594. /// as they are not the same in each instance of the region.
  595. ///
  596. /// \param [in] C - The IRSimilarityCandidate containing the region we are
  597. /// analyzing.
  598. /// \param [in] NotSame - The set of global value numbers that do not have a
  599. /// single Constant across all OutlinableRegions similar to \p C.
  600. /// \param [out] Inputs - The list containing the global value numbers of the
  601. /// arguments needed for the region of code.
  602. static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
  603. std::vector<unsigned> &Inputs) {
  604. DenseSet<unsigned> Seen;
  605. // Iterate over the instructions, and find what constants will need to be
  606. // extracted into arguments.
  607. for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
  608. IDIt != EndIDIt; IDIt++) {
  609. for (Value *V : (*IDIt).OperVals) {
  610. // Since these are stored before any outlining, they will be in the
  611. // global value numbering.
  612. unsigned GVN = C.getGVN(V).getValue();
  613. if (isa<Constant>(V))
  614. if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
  615. Inputs.push_back(GVN);
  616. Seen.insert(GVN);
  617. }
  618. }
  619. }
  620. }
  621. /// Find the GVN for the inputs that have been found by the CodeExtractor.
  622. ///
  623. /// \param [in] C - The IRSimilarityCandidate containing the region we are
  624. /// analyzing.
  625. /// \param [in] CurrentInputs - The set of inputs found by the
  626. /// CodeExtractor.
  627. /// \param [in] OutputMappings - The mapping of values that have been replaced
  628. /// by a new output value.
  629. /// \param [out] EndInputNumbers - The global value numbers for the extracted
  630. /// arguments.
  631. static void mapInputsToGVNs(IRSimilarityCandidate &C,
  632. SetVector<Value *> &CurrentInputs,
  633. const DenseMap<Value *, Value *> &OutputMappings,
  634. std::vector<unsigned> &EndInputNumbers) {
  635. // Get the Global Value Number for each input. We check if the Value has been
  636. // replaced by a different value at output, and use the original value before
  637. // replacement.
  638. for (Value *Input : CurrentInputs) {
  639. assert(Input && "Have a nullptr as an input");
  640. if (OutputMappings.find(Input) != OutputMappings.end())
  641. Input = OutputMappings.find(Input)->second;
  642. assert(C.getGVN(Input).hasValue() &&
  643. "Could not find a numbering for the given input");
  644. EndInputNumbers.push_back(C.getGVN(Input).getValue());
  645. }
  646. }
  647. /// Find the original value for the \p ArgInput values if any one of them was
  648. /// replaced during a previous extraction.
  649. ///
  650. /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
  651. /// \param [in] OutputMappings - The mapping of values that have been replaced
  652. /// by a new output value.
  653. /// \param [out] RemappedArgInputs - The remapped values according to
  654. /// \p OutputMappings that will be extracted.
  655. static void
  656. remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
  657. const DenseMap<Value *, Value *> &OutputMappings,
  658. SetVector<Value *> &RemappedArgInputs) {
  659. // Get the global value number for each input that will be extracted as an
  660. // argument by the code extractor, remapping if needed for reloaded values.
  661. for (Value *Input : ArgInputs) {
  662. if (OutputMappings.find(Input) != OutputMappings.end())
  663. Input = OutputMappings.find(Input)->second;
  664. RemappedArgInputs.insert(Input);
  665. }
  666. }
  667. /// Find the input GVNs and the output values for a region of Instructions.
  668. /// Using the code extractor, we collect the inputs to the extracted function.
  669. ///
  670. /// The \p Region can be identified as needing to be ignored in this function.
  671. /// It should be checked whether it should be ignored after a call to this
  672. /// function.
  673. ///
  674. /// \param [in,out] Region - The region of code to be analyzed.
  675. /// \param [out] InputGVNs - The global value numbers for the extracted
  676. /// arguments.
  677. /// \param [in] NotSame - The global value numbers in the region that do not
  678. /// have the same constant value in the regions structurally similar to
  679. /// \p Region.
  680. /// \param [in] OutputMappings - The mapping of values that have been replaced
  681. /// by a new output value after extraction.
  682. /// \param [out] ArgInputs - The values of the inputs to the extracted function.
  683. /// \param [out] Outputs - The set of values extracted by the CodeExtractor
  684. /// as outputs.
  685. static void getCodeExtractorArguments(
  686. OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
  687. DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
  688. SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
  689. IRSimilarityCandidate &C = *Region.Candidate;
  690. // OverallInputs are the inputs to the region found by the CodeExtractor,
  691. // SinkCands and HoistCands are used by the CodeExtractor to find sunken
  692. // allocas of values whose lifetimes are contained completely within the
  693. // outlined region. PremappedInputs are the arguments found by the
  694. // CodeExtractor, removing conditions such as sunken allocas, but that
  695. // may need to be remapped due to the extracted output values replacing
  696. // the original values. We use DummyOutputs for this first run of finding
  697. // inputs and outputs since the outputs could change during findAllocas,
  698. // the correct set of extracted outputs will be in the final Outputs ValueSet.
  699. SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
  700. DummyOutputs;
  701. // Use the code extractor to get the inputs and outputs, without sunken
  702. // allocas or removing llvm.assumes.
  703. CodeExtractor *CE = Region.CE;
  704. CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
  705. assert(Region.StartBB && "Region must have a start BasicBlock!");
  706. Function *OrigF = Region.StartBB->getParent();
  707. CodeExtractorAnalysisCache CEAC(*OrigF);
  708. BasicBlock *Dummy = nullptr;
  709. // The region may be ineligible due to VarArgs in the parent function. In this
  710. // case we ignore the region.
  711. if (!CE->isEligible()) {
  712. Region.IgnoreRegion = true;
  713. return;
  714. }
  715. // Find if any values are going to be sunk into the function when extracted
  716. CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
  717. CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
  718. // TODO: Support regions with sunken allocas: values whose lifetimes are
  719. // contained completely within the outlined region. These are not guaranteed
  720. // to be the same in every region, so we must elevate them all to arguments
  721. // when they appear. If these values are not equal, it means there is some
  722. // Input in OverallInputs that was removed for ArgInputs.
  723. if (OverallInputs.size() != PremappedInputs.size()) {
  724. Region.IgnoreRegion = true;
  725. return;
  726. }
  727. findConstants(C, NotSame, InputGVNs);
  728. mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
  729. remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
  730. ArgInputs);
  731. // Sort the GVNs, since we now have constants included in the \ref InputGVNs
  732. // we need to make sure they are in a deterministic order.
  733. stable_sort(InputGVNs);
  734. }
  735. /// Look over the inputs and map each input argument to an argument in the
  736. /// overall function for the OutlinableRegions. This creates a way to replace
  737. /// the arguments of the extracted function with the arguments of the new
  738. /// overall function.
  739. ///
  740. /// \param [in,out] Region - The region of code to be analyzed.
  741. /// \param [in] InputGVNs - The global value numbering of the input values
  742. /// collected.
  743. /// \param [in] ArgInputs - The values of the arguments to the extracted
  744. /// function.
  745. static void
  746. findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
  747. std::vector<unsigned> &InputGVNs,
  748. SetVector<Value *> &ArgInputs) {
  749. IRSimilarityCandidate &C = *Region.Candidate;
  750. OutlinableGroup &Group = *Region.Parent;
  751. // This counts the argument number in the overall function.
  752. unsigned TypeIndex = 0;
  753. // This counts the argument number in the extracted function.
  754. unsigned OriginalIndex = 0;
  755. // Find the mapping of the extracted arguments to the arguments for the
  756. // overall function. Since there may be extra arguments in the overall
  757. // function to account for the extracted constants, we have two different
  758. // counters as we find extracted arguments, and as we come across overall
  759. // arguments.
  760. // Additionally, in our first pass, for the first extracted function,
  761. // we find argument locations for the canonical value numbering. This
  762. // numbering overrides any discovered location for the extracted code.
  763. for (unsigned InputVal : InputGVNs) {
  764. Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
  765. assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
  766. unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
  767. Optional<Value *> InputOpt = C.fromGVN(InputVal);
  768. assert(InputOpt.hasValue() && "Global value number not found?");
  769. Value *Input = InputOpt.getValue();
  770. DenseMap<unsigned, unsigned>::iterator AggArgIt =
  771. Group.CanonicalNumberToAggArg.find(CanonicalNumber);
  772. if (!Group.InputTypesSet) {
  773. Group.ArgumentTypes.push_back(Input->getType());
  774. // If the input value has a swifterr attribute, make sure to mark the
  775. // argument in the overall function.
  776. if (Input->isSwiftError()) {
  777. assert(
  778. !Group.SwiftErrorArgument.hasValue() &&
  779. "Argument already marked with swifterr for this OutlinableGroup!");
  780. Group.SwiftErrorArgument = TypeIndex;
  781. }
  782. }
  783. // Check if we have a constant. If we do add it to the overall argument
  784. // number to Constant map for the region, and continue to the next input.
  785. if (Constant *CST = dyn_cast<Constant>(Input)) {
  786. if (AggArgIt != Group.CanonicalNumberToAggArg.end())
  787. Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
  788. else {
  789. Group.CanonicalNumberToAggArg.insert(
  790. std::make_pair(CanonicalNumber, TypeIndex));
  791. Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
  792. }
  793. TypeIndex++;
  794. continue;
  795. }
  796. // It is not a constant, we create the mapping from extracted argument list
  797. // to the overall argument list, using the canonical location, if it exists.
  798. assert(ArgInputs.count(Input) && "Input cannot be found!");
  799. if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
  800. if (OriginalIndex != AggArgIt->second)
  801. Region.ChangedArgOrder = true;
  802. Region.ExtractedArgToAgg.insert(
  803. std::make_pair(OriginalIndex, AggArgIt->second));
  804. Region.AggArgToExtracted.insert(
  805. std::make_pair(AggArgIt->second, OriginalIndex));
  806. } else {
  807. Group.CanonicalNumberToAggArg.insert(
  808. std::make_pair(CanonicalNumber, TypeIndex));
  809. Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
  810. Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
  811. }
  812. OriginalIndex++;
  813. TypeIndex++;
  814. }
  815. // If the function type definitions for the OutlinableGroup holding the region
  816. // have not been set, set the length of the inputs here. We should have the
  817. // same inputs for all of the different regions contained in the
  818. // OutlinableGroup since they are all structurally similar to one another.
  819. if (!Group.InputTypesSet) {
  820. Group.NumAggregateInputs = TypeIndex;
  821. Group.InputTypesSet = true;
  822. }
  823. Region.NumExtractedInputs = OriginalIndex;
  824. }
  825. /// Check if the \p V has any uses outside of the region other than \p PN.
  826. ///
  827. /// \param V [in] - The value to check.
  828. /// \param PHILoc [in] - The location in the PHINode of \p V.
  829. /// \param PN [in] - The PHINode using \p V.
  830. /// \param Exits [in] - The potential blocks we exit to from the outlined
  831. /// region.
  832. /// \param BlocksInRegion [in] - The basic blocks contained in the region.
  833. /// \returns true if \p V has any use soutside its region other than \p PN.
  834. static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
  835. SmallPtrSet<BasicBlock *, 1> &Exits,
  836. DenseSet<BasicBlock *> &BlocksInRegion) {
  837. // We check to see if the value is used by the PHINode from some other
  838. // predecessor not included in the region. If it is, we make sure
  839. // to keep it as an output.
  840. SmallVector<unsigned, 2> IncomingNumbers(PN.getNumIncomingValues());
  841. std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0);
  842. if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
  843. return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
  844. !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
  845. }))
  846. return true;
  847. // Check if the value is used by any other instructions outside the region.
  848. return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
  849. Instruction *I = dyn_cast<Instruction>(U);
  850. if (!I)
  851. return false;
  852. // If the use of the item is inside the region, we skip it. Uses
  853. // inside the region give us useful information about how the item could be
  854. // used as an output.
  855. BasicBlock *Parent = I->getParent();
  856. if (BlocksInRegion.contains(Parent))
  857. return false;
  858. // If it's not a PHINode then we definitely know the use matters. This
  859. // output value will not completely combined with another item in a PHINode
  860. // as it is directly reference by another non-phi instruction
  861. if (!isa<PHINode>(I))
  862. return true;
  863. // If we have a PHINode outside one of the exit locations, then it
  864. // can be considered an outside use as well. If there is a PHINode
  865. // contained in the Exit where this values use matters, it will be
  866. // caught when we analyze that PHINode.
  867. if (!Exits.contains(Parent))
  868. return true;
  869. return false;
  870. });
  871. }
  872. /// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
  873. /// considered outputs. A PHINodes is an output when more than one incoming
  874. /// value has been marked by the CodeExtractor as an output.
  875. ///
  876. /// \param CurrentExitFromRegion [in] - The block to analyze.
  877. /// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
  878. /// region.
  879. /// \param RegionBlocks [in] - The basic blocks in the region.
  880. /// \param Outputs [in, out] - The existing outputs for the region, we may add
  881. /// PHINodes to this as we find that they replace output values.
  882. /// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
  883. /// totally replaced by a PHINode.
  884. /// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
  885. /// in PHINodes, but have other uses, and should still be considered outputs.
  886. static void analyzeExitPHIsForOutputUses(
  887. BasicBlock *CurrentExitFromRegion,
  888. SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
  889. DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
  890. DenseSet<Value *> &OutputsReplacedByPHINode,
  891. DenseSet<Value *> &OutputsWithNonPhiUses) {
  892. for (PHINode &PN : CurrentExitFromRegion->phis()) {
  893. // Find all incoming values from the outlining region.
  894. SmallVector<unsigned, 2> IncomingVals;
  895. for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
  896. if (RegionBlocks.contains(PN.getIncomingBlock(I)))
  897. IncomingVals.push_back(I);
  898. // Do not process PHI if there are no predecessors from region.
  899. unsigned NumIncomingVals = IncomingVals.size();
  900. if (NumIncomingVals == 0)
  901. continue;
  902. // If there is one predecessor, we mark it as a value that needs to be kept
  903. // as an output.
  904. if (NumIncomingVals == 1) {
  905. Value *V = PN.getIncomingValue(*IncomingVals.begin());
  906. OutputsWithNonPhiUses.insert(V);
  907. OutputsReplacedByPHINode.erase(V);
  908. continue;
  909. }
  910. // This PHINode will be used as an output value, so we add it to our list.
  911. Outputs.insert(&PN);
  912. // Not all of the incoming values should be ignored as other inputs and
  913. // outputs may have uses in outlined region. If they have other uses
  914. // outside of the single PHINode we should not skip over it.
  915. for (unsigned Idx : IncomingVals) {
  916. Value *V = PN.getIncomingValue(Idx);
  917. if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
  918. OutputsWithNonPhiUses.insert(V);
  919. OutputsReplacedByPHINode.erase(V);
  920. continue;
  921. }
  922. if (!OutputsWithNonPhiUses.contains(V))
  923. OutputsReplacedByPHINode.insert(V);
  924. }
  925. }
  926. }
  927. // Represents the type for the unsigned number denoting the output number for
  928. // phi node, along with the canonical number for the exit block.
  929. using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
  930. // The list of canonical numbers for the incoming values to a PHINode.
  931. using CanonList = SmallVector<unsigned, 2>;
  932. // The pair type representing the set of canonical values being combined in the
  933. // PHINode, along with the location data for the PHINode.
  934. using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
  935. /// Encode \p PND as an integer for easy lookup based on the argument location,
  936. /// the parent BasicBlock canonical numbering, and the canonical numbering of
  937. /// the values stored in the PHINode.
  938. ///
  939. /// \param PND - The data to hash.
  940. /// \returns The hash code of \p PND.
  941. static hash_code encodePHINodeData(PHINodeData &PND) {
  942. return llvm::hash_combine(
  943. llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
  944. llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
  945. }
  946. /// Create a special GVN for PHINodes that will be used outside of
  947. /// the region. We create a hash code based on the Canonical number of the
  948. /// parent BasicBlock, the canonical numbering of the values stored in the
  949. /// PHINode and the aggregate argument location. This is used to find whether
  950. /// this PHINode type has been given a canonical numbering already. If not, we
  951. /// assign it a value and store it for later use. The value is returned to
  952. /// identify different output schemes for the set of regions.
  953. ///
  954. /// \param Region - The region that \p PN is an output for.
  955. /// \param PN - The PHINode we are analyzing.
  956. /// \param AggArgIdx - The argument \p PN will be stored into.
  957. /// \returns An optional holding the assigned canonical number, or None if
  958. /// there is some attribute of the PHINode blocking it from being used.
  959. static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
  960. PHINode *PN, unsigned AggArgIdx) {
  961. OutlinableGroup &Group = *Region.Parent;
  962. IRSimilarityCandidate &Cand = *Region.Candidate;
  963. BasicBlock *PHIBB = PN->getParent();
  964. CanonList PHIGVNs;
  965. for (Value *Incoming : PN->incoming_values()) {
  966. // If we cannot find a GVN, this means that the input to the PHINode is
  967. // not included in the region we are trying to analyze, meaning, that if
  968. // it was outlined, we would be adding an extra input. We ignore this
  969. // case for now, and so ignore the region.
  970. Optional<unsigned> OGVN = Cand.getGVN(Incoming);
  971. if (!OGVN.hasValue()) {
  972. Region.IgnoreRegion = true;
  973. return None;
  974. }
  975. // Collect the canonical numbers of the values in the PHINode.
  976. unsigned GVN = OGVN.getValue();
  977. OGVN = Cand.getCanonicalNum(GVN);
  978. assert(OGVN.hasValue() && "No GVN found for incoming value?");
  979. PHIGVNs.push_back(*OGVN);
  980. }
  981. // Now that we have the GVNs for the incoming values, we are going to combine
  982. // them with the GVN of the incoming bock, and the output location of the
  983. // PHINode to generate a hash value representing this instance of the PHINode.
  984. DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
  985. DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
  986. Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
  987. assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
  988. BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
  989. assert(BBGVN.hasValue() &&
  990. "Could not find canonical number for the incoming block!");
  991. // Create a pair of the exit block canonical value, and the aggregate
  992. // argument location, connected to the canonical numbers stored in the
  993. // PHINode.
  994. PHINodeData TemporaryPair =
  995. std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
  996. hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
  997. // Look for and create a new entry in our connection between canonical
  998. // numbers for PHINodes, and the set of objects we just created.
  999. GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
  1000. if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
  1001. bool Inserted = false;
  1002. std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
  1003. std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
  1004. std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
  1005. std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
  1006. }
  1007. return GVNToPHIIt->second;
  1008. }
  1009. /// Create a mapping of the output arguments for the \p Region to the output
  1010. /// arguments of the overall outlined function.
  1011. ///
  1012. /// \param [in,out] Region - The region of code to be analyzed.
  1013. /// \param [in] Outputs - The values found by the code extractor.
  1014. static void
  1015. findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
  1016. SetVector<Value *> &Outputs) {
  1017. OutlinableGroup &Group = *Region.Parent;
  1018. IRSimilarityCandidate &C = *Region.Candidate;
  1019. SmallVector<BasicBlock *> BE;
  1020. DenseSet<BasicBlock *> BlocksInRegion;
  1021. C.getBasicBlocks(BlocksInRegion, BE);
  1022. // Find the exits to the region.
  1023. SmallPtrSet<BasicBlock *, 1> Exits;
  1024. for (BasicBlock *Block : BE)
  1025. for (BasicBlock *Succ : successors(Block))
  1026. if (!BlocksInRegion.contains(Succ))
  1027. Exits.insert(Succ);
  1028. // After determining which blocks exit to PHINodes, we add these PHINodes to
  1029. // the set of outputs to be processed. We also check the incoming values of
  1030. // the PHINodes for whether they should no longer be considered outputs.
  1031. DenseSet<Value *> OutputsReplacedByPHINode;
  1032. DenseSet<Value *> OutputsWithNonPhiUses;
  1033. for (BasicBlock *ExitBB : Exits)
  1034. analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
  1035. OutputsReplacedByPHINode,
  1036. OutputsWithNonPhiUses);
  1037. // This counts the argument number in the extracted function.
  1038. unsigned OriginalIndex = Region.NumExtractedInputs;
  1039. // This counts the argument number in the overall function.
  1040. unsigned TypeIndex = Group.NumAggregateInputs;
  1041. bool TypeFound;
  1042. DenseSet<unsigned> AggArgsUsed;
  1043. // Iterate over the output types and identify if there is an aggregate pointer
  1044. // type whose base type matches the current output type. If there is, we mark
  1045. // that we will use this output register for this value. If not we add another
  1046. // type to the overall argument type list. We also store the GVNs used for
  1047. // stores to identify which values will need to be moved into an special
  1048. // block that holds the stores to the output registers.
  1049. for (Value *Output : Outputs) {
  1050. TypeFound = false;
  1051. // We can do this since it is a result value, and will have a number
  1052. // that is necessarily the same. BUT if in the future, the instructions
  1053. // do not have to be in same order, but are functionally the same, we will
  1054. // have to use a different scheme, as one-to-one correspondence is not
  1055. // guaranteed.
  1056. unsigned ArgumentSize = Group.ArgumentTypes.size();
  1057. // If the output is combined in a PHINode, we make sure to skip over it.
  1058. if (OutputsReplacedByPHINode.contains(Output))
  1059. continue;
  1060. unsigned AggArgIdx = 0;
  1061. for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
  1062. if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
  1063. continue;
  1064. if (AggArgsUsed.contains(Jdx))
  1065. continue;
  1066. TypeFound = true;
  1067. AggArgsUsed.insert(Jdx);
  1068. Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
  1069. Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
  1070. AggArgIdx = Jdx;
  1071. break;
  1072. }
  1073. // We were unable to find an unused type in the output type set that matches
  1074. // the output, so we add a pointer type to the argument types of the overall
  1075. // function to handle this output and create a mapping to it.
  1076. if (!TypeFound) {
  1077. Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
  1078. // Mark the new pointer type as the last value in the aggregate argument
  1079. // list.
  1080. unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
  1081. AggArgsUsed.insert(ArgTypeIdx);
  1082. Region.ExtractedArgToAgg.insert(
  1083. std::make_pair(OriginalIndex, ArgTypeIdx));
  1084. Region.AggArgToExtracted.insert(
  1085. std::make_pair(ArgTypeIdx, OriginalIndex));
  1086. AggArgIdx = ArgTypeIdx;
  1087. }
  1088. // TODO: Adapt to the extra input from the PHINode.
  1089. PHINode *PN = dyn_cast<PHINode>(Output);
  1090. Optional<unsigned> GVN;
  1091. if (PN && !BlocksInRegion.contains(PN->getParent())) {
  1092. // Values outside the region can be combined into PHINode when we
  1093. // have multiple exits. We collect both of these into a list to identify
  1094. // which values are being used in the PHINode. Each list identifies a
  1095. // different PHINode, and a different output. We store the PHINode as it's
  1096. // own canonical value. These canonical values are also dependent on the
  1097. // output argument it is saved to.
  1098. // If two PHINodes have the same canonical values, but different aggregate
  1099. // argument locations, then they will have distinct Canonical Values.
  1100. GVN = getGVNForPHINode(Region, PN, AggArgIdx);
  1101. if (!GVN.hasValue())
  1102. return;
  1103. } else {
  1104. // If we do not have a PHINode we use the global value numbering for the
  1105. // output value, to find the canonical number to add to the set of stored
  1106. // values.
  1107. GVN = C.getGVN(Output);
  1108. GVN = C.getCanonicalNum(*GVN);
  1109. }
  1110. // Each region has a potentially unique set of outputs. We save which
  1111. // values are output in a list of canonical values so we can differentiate
  1112. // among the different store schemes.
  1113. Region.GVNStores.push_back(*GVN);
  1114. OriginalIndex++;
  1115. TypeIndex++;
  1116. }
  1117. // We sort the stored values to make sure that we are not affected by analysis
  1118. // order when determining what combination of items were stored.
  1119. stable_sort(Region.GVNStores);
  1120. }
  1121. void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
  1122. DenseSet<unsigned> &NotSame) {
  1123. std::vector<unsigned> Inputs;
  1124. SetVector<Value *> ArgInputs, Outputs;
  1125. getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
  1126. Outputs);
  1127. if (Region.IgnoreRegion)
  1128. return;
  1129. // Map the inputs found by the CodeExtractor to the arguments found for
  1130. // the overall function.
  1131. findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
  1132. // Map the outputs found by the CodeExtractor to the arguments found for
  1133. // the overall function.
  1134. findExtractedOutputToOverallOutputMapping(Region, Outputs);
  1135. }
  1136. /// Replace the extracted function in the Region with a call to the overall
  1137. /// function constructed from the deduplicated similar regions, replacing and
  1138. /// remapping the values passed to the extracted function as arguments to the
  1139. /// new arguments of the overall function.
  1140. ///
  1141. /// \param [in] M - The module to outline from.
  1142. /// \param [in] Region - The regions of extracted code to be replaced with a new
  1143. /// function.
  1144. /// \returns a call instruction with the replaced function.
  1145. CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
  1146. std::vector<Value *> NewCallArgs;
  1147. DenseMap<unsigned, unsigned>::iterator ArgPair;
  1148. OutlinableGroup &Group = *Region.Parent;
  1149. CallInst *Call = Region.Call;
  1150. assert(Call && "Call to replace is nullptr?");
  1151. Function *AggFunc = Group.OutlinedFunction;
  1152. assert(AggFunc && "Function to replace with is nullptr?");
  1153. // If the arguments are the same size, there are not values that need to be
  1154. // made into an argument, the argument ordering has not been change, or
  1155. // different output registers to handle. We can simply replace the called
  1156. // function in this case.
  1157. if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
  1158. LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
  1159. << *AggFunc << " with same number of arguments\n");
  1160. Call->setCalledFunction(AggFunc);
  1161. return Call;
  1162. }
  1163. // We have a different number of arguments than the new function, so
  1164. // we need to use our previously mappings off extracted argument to overall
  1165. // function argument, and constants to overall function argument to create the
  1166. // new argument list.
  1167. for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
  1168. if (AggArgIdx == AggFunc->arg_size() - 1 &&
  1169. Group.OutputGVNCombinations.size() > 1) {
  1170. // If we are on the last argument, and we need to differentiate between
  1171. // output blocks, add an integer to the argument list to determine
  1172. // what block to take
  1173. LLVM_DEBUG(dbgs() << "Set switch block argument to "
  1174. << Region.OutputBlockNum << "\n");
  1175. NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
  1176. Region.OutputBlockNum));
  1177. continue;
  1178. }
  1179. ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
  1180. if (ArgPair != Region.AggArgToExtracted.end()) {
  1181. Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
  1182. // If we found the mapping from the extracted function to the overall
  1183. // function, we simply add it to the argument list. We use the same
  1184. // value, it just needs to honor the new order of arguments.
  1185. LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
  1186. << *ArgumentValue << "\n");
  1187. NewCallArgs.push_back(ArgumentValue);
  1188. continue;
  1189. }
  1190. // If it is a constant, we simply add it to the argument list as a value.
  1191. if (Region.AggArgToConstant.find(AggArgIdx) !=
  1192. Region.AggArgToConstant.end()) {
  1193. Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
  1194. LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
  1195. << *CST << "\n");
  1196. NewCallArgs.push_back(CST);
  1197. continue;
  1198. }
  1199. // Add a nullptr value if the argument is not found in the extracted
  1200. // function. If we cannot find a value, it means it is not in use
  1201. // for the region, so we should not pass anything to it.
  1202. LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
  1203. NewCallArgs.push_back(ConstantPointerNull::get(
  1204. static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
  1205. }
  1206. LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
  1207. << *AggFunc << " with new set of arguments\n");
  1208. // Create the new call instruction and erase the old one.
  1209. Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
  1210. Call);
  1211. // It is possible that the call to the outlined function is either the first
  1212. // instruction is in the new block, the last instruction, or both. If either
  1213. // of these is the case, we need to make sure that we replace the instruction
  1214. // in the IRInstructionData struct with the new call.
  1215. CallInst *OldCall = Region.Call;
  1216. if (Region.NewFront->Inst == OldCall)
  1217. Region.NewFront->Inst = Call;
  1218. if (Region.NewBack->Inst == OldCall)
  1219. Region.NewBack->Inst = Call;
  1220. // Transfer any debug information.
  1221. Call->setDebugLoc(Region.Call->getDebugLoc());
  1222. // Since our output may determine which branch we go to, we make sure to
  1223. // propogate this new call value through the module.
  1224. OldCall->replaceAllUsesWith(Call);
  1225. // Remove the old instruction.
  1226. OldCall->eraseFromParent();
  1227. Region.Call = Call;
  1228. // Make sure that the argument in the new function has the SwiftError
  1229. // argument.
  1230. if (Group.SwiftErrorArgument.hasValue())
  1231. Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
  1232. Attribute::SwiftError);
  1233. return Call;
  1234. }
  1235. /// Find or create a BasicBlock in the outlined function containing PhiBlocks
  1236. /// for \p RetVal.
  1237. ///
  1238. /// \param Group - The OutlinableGroup containing the information about the
  1239. /// overall outlined function.
  1240. /// \param RetVal - The return value or exit option that we are currently
  1241. /// evaluating.
  1242. /// \returns The found or newly created BasicBlock to contain the needed
  1243. /// PHINodes to be used as outputs.
  1244. static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
  1245. DenseMap<Value *, BasicBlock *>::iterator PhiBlockForRetVal,
  1246. ReturnBlockForRetVal;
  1247. PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
  1248. ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
  1249. assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
  1250. "Could not find output value!");
  1251. BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
  1252. // Find if a PHIBlock exists for this return value already. If it is
  1253. // the first time we are analyzing this, we will not, so we record it.
  1254. PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
  1255. if (PhiBlockForRetVal != Group.PHIBlocks.end())
  1256. return PhiBlockForRetVal->second;
  1257. // If we did not find a block, we create one, and insert it into the
  1258. // overall function and record it.
  1259. bool Inserted = false;
  1260. BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
  1261. ReturnBB->getParent());
  1262. std::tie(PhiBlockForRetVal, Inserted) =
  1263. Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
  1264. // We find the predecessors of the return block in the newly created outlined
  1265. // function in order to point them to the new PHIBlock rather than the already
  1266. // existing return block.
  1267. SmallVector<BranchInst *, 2> BranchesToChange;
  1268. for (BasicBlock *Pred : predecessors(ReturnBB))
  1269. BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
  1270. // Now we mark the branch instructions found, and change the references of the
  1271. // return block to the newly created PHIBlock.
  1272. for (BranchInst *BI : BranchesToChange)
  1273. for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
  1274. if (BI->getSuccessor(Succ) != ReturnBB)
  1275. continue;
  1276. BI->setSuccessor(Succ, PHIBlock);
  1277. }
  1278. BranchInst::Create(ReturnBB, PHIBlock);
  1279. return PhiBlockForRetVal->second;
  1280. }
  1281. /// For the function call now representing the \p Region, find the passed value
  1282. /// to that call that represents Argument \p A at the call location if the
  1283. /// call has already been replaced with a call to the overall, aggregate
  1284. /// function.
  1285. ///
  1286. /// \param A - The Argument to get the passed value for.
  1287. /// \param Region - The extracted Region corresponding to the outlined function.
  1288. /// \returns The Value representing \p A at the call site.
  1289. static Value *
  1290. getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
  1291. const OutlinableRegion &Region) {
  1292. // If we don't need to adjust the argument number at all (since the call
  1293. // has already been replaced by a call to the overall outlined function)
  1294. // we can just get the specified argument.
  1295. return Region.Call->getArgOperand(A->getArgNo());
  1296. }
  1297. /// For the function call now representing the \p Region, find the passed value
  1298. /// to that call that represents Argument \p A at the call location if the
  1299. /// call has only been replaced by the call to the aggregate function.
  1300. ///
  1301. /// \param A - The Argument to get the passed value for.
  1302. /// \param Region - The extracted Region corresponding to the outlined function.
  1303. /// \returns The Value representing \p A at the call site.
  1304. static Value *
  1305. getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
  1306. const OutlinableRegion &Region) {
  1307. unsigned ArgNum = A->getArgNo();
  1308. // If it is a constant, we can look at our mapping from when we created
  1309. // the outputs to figure out what the constant value is.
  1310. if (Region.AggArgToConstant.count(ArgNum))
  1311. return Region.AggArgToConstant.find(ArgNum)->second;
  1312. // If it is not a constant, and we are not looking at the overall function, we
  1313. // need to adjust which argument we are looking at.
  1314. ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
  1315. return Region.Call->getArgOperand(ArgNum);
  1316. }
  1317. /// Find the canonical numbering for the incoming Values into the PHINode \p PN.
  1318. ///
  1319. /// \param PN [in] - The PHINode that we are finding the canonical numbers for.
  1320. /// \param Region [in] - The OutlinableRegion containing \p PN.
  1321. /// \param OutputMappings [in] - The mapping of output values from outlined
  1322. /// region to their original values.
  1323. /// \param CanonNums [out] - The canonical numbering for the incoming values to
  1324. /// \p PN.
  1325. /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
  1326. /// of \p Region rather than the overall function's call.
  1327. static void
  1328. findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
  1329. const DenseMap<Value *, Value *> &OutputMappings,
  1330. DenseSet<unsigned> &CanonNums,
  1331. bool ReplacedWithOutlinedCall = true) {
  1332. // Iterate over the incoming values.
  1333. for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
  1334. Value *IVal = PN->getIncomingValue(Idx);
  1335. // If we have an argument as incoming value, we need to grab the passed
  1336. // value from the call itself.
  1337. if (Argument *A = dyn_cast<Argument>(IVal)) {
  1338. if (ReplacedWithOutlinedCall)
  1339. IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
  1340. else
  1341. IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
  1342. }
  1343. // Get the original value if it has been replaced by an output value.
  1344. IVal = findOutputMapping(OutputMappings, IVal);
  1345. // Find and add the canonical number for the incoming value.
  1346. Optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
  1347. assert(GVN.hasValue() && "No GVN for incoming value");
  1348. Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
  1349. assert(CanonNum.hasValue() && "No Canonical Number for GVN");
  1350. CanonNums.insert(*CanonNum);
  1351. }
  1352. }
  1353. /// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
  1354. /// in order to condense the number of instructions added to the outlined
  1355. /// function.
  1356. ///
  1357. /// \param PN [in] - The PHINode that we are finding the canonical numbers for.
  1358. /// \param Region [in] - The OutlinableRegion containing \p PN.
  1359. /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
  1360. /// \p PN in.
  1361. /// \param OutputMappings [in] - The mapping of output values from outlined
  1362. /// region to their original values.
  1363. /// \return the newly found or created PHINode in \p OverallPhiBlock.
  1364. static PHINode*
  1365. findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
  1366. BasicBlock *OverallPhiBlock,
  1367. const DenseMap<Value *, Value *> &OutputMappings) {
  1368. OutlinableGroup &Group = *Region.Parent;
  1369. DenseSet<unsigned> PNCanonNums;
  1370. // We have to use the extracted function since we have merged this region into
  1371. // the overall function yet. We make sure to reassign the argument numbering
  1372. // since it is possible that the argument ordering is different between the
  1373. // functions.
  1374. findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
  1375. /* ReplacedWithOutlinedCall = */ false);
  1376. OutlinableRegion *FirstRegion = Group.Regions[0];
  1377. DenseSet<unsigned> CurrentCanonNums;
  1378. // Find the Canonical Numbering for each PHINode, if it matches, we replace
  1379. // the uses of the PHINode we are searching for, with the found PHINode.
  1380. for (PHINode &CurrPN : OverallPhiBlock->phis()) {
  1381. CurrentCanonNums.clear();
  1382. findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
  1383. /* ReplacedWithOutlinedCall = */ true);
  1384. if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) {
  1385. return CurrentCanonNums.contains(CanonNum);
  1386. }))
  1387. return &CurrPN;
  1388. }
  1389. // If we've made it here, it means we weren't able to replace the PHINode, so
  1390. // we must insert it ourselves.
  1391. PHINode *NewPN = cast<PHINode>(PN.clone());
  1392. NewPN->insertBefore(&*OverallPhiBlock->begin());
  1393. for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
  1394. Idx++) {
  1395. Value *IncomingVal = NewPN->getIncomingValue(Idx);
  1396. BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
  1397. // Find corresponding basic block in the overall function for the incoming
  1398. // block.
  1399. Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI();
  1400. assert(FirstNonPHI && "Incoming block is empty?");
  1401. Value *CorrespondingVal =
  1402. Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI);
  1403. assert(CorrespondingVal && "Value is nullptr?");
  1404. BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent();
  1405. NewPN->setIncomingBlock(Idx, BlockToUse);
  1406. // If we have an argument we make sure we replace using the argument from
  1407. // the correct function.
  1408. if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
  1409. Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
  1410. NewPN->setIncomingValue(Idx, Val);
  1411. continue;
  1412. }
  1413. // Find the corresponding value in the overall function.
  1414. IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
  1415. Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
  1416. assert(Val && "Value is nullptr?");
  1417. NewPN->setIncomingValue(Idx, Val);
  1418. }
  1419. return NewPN;
  1420. }
  1421. // Within an extracted function, replace the argument uses of the extracted
  1422. // region with the arguments of the function for an OutlinableGroup.
  1423. //
  1424. /// \param [in] Region - The region of extracted code to be changed.
  1425. /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
  1426. /// region.
  1427. /// \param [in] FirstFunction - A flag to indicate whether we are using this
  1428. /// function to define the overall outlined function for all the regions, or
  1429. /// if we are operating on one of the following regions.
  1430. static void
  1431. replaceArgumentUses(OutlinableRegion &Region,
  1432. DenseMap<Value *, BasicBlock *> &OutputBBs,
  1433. const DenseMap<Value *, Value *> &OutputMappings,
  1434. bool FirstFunction = false) {
  1435. OutlinableGroup &Group = *Region.Parent;
  1436. assert(Region.ExtractedFunction && "Region has no extracted function?");
  1437. Function *DominatingFunction = Region.ExtractedFunction;
  1438. if (FirstFunction)
  1439. DominatingFunction = Group.OutlinedFunction;
  1440. DominatorTree DT(*DominatingFunction);
  1441. for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
  1442. ArgIdx++) {
  1443. assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
  1444. Region.ExtractedArgToAgg.end() &&
  1445. "No mapping from extracted to outlined?");
  1446. unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
  1447. Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
  1448. Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
  1449. // The argument is an input, so we can simply replace it with the overall
  1450. // argument value
  1451. if (ArgIdx < Region.NumExtractedInputs) {
  1452. LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
  1453. << *Region.ExtractedFunction << " with " << *AggArg
  1454. << " in function " << *Group.OutlinedFunction << "\n");
  1455. Arg->replaceAllUsesWith(AggArg);
  1456. continue;
  1457. }
  1458. // If we are replacing an output, we place the store value in its own
  1459. // block inside the overall function before replacing the use of the output
  1460. // in the function.
  1461. assert(Arg->hasOneUse() && "Output argument can only have one use");
  1462. User *InstAsUser = Arg->user_back();
  1463. assert(InstAsUser && "User is nullptr!");
  1464. Instruction *I = cast<Instruction>(InstAsUser);
  1465. BasicBlock *BB = I->getParent();
  1466. SmallVector<BasicBlock *, 4> Descendants;
  1467. DT.getDescendants(BB, Descendants);
  1468. bool EdgeAdded = false;
  1469. if (Descendants.size() == 0) {
  1470. EdgeAdded = true;
  1471. DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
  1472. DT.getDescendants(BB, Descendants);
  1473. }
  1474. // Iterate over the following blocks, looking for return instructions,
  1475. // if we find one, find the corresponding output block for the return value
  1476. // and move our store instruction there.
  1477. for (BasicBlock *DescendBB : Descendants) {
  1478. ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
  1479. if (!RI)
  1480. continue;
  1481. Value *RetVal = RI->getReturnValue();
  1482. auto VBBIt = OutputBBs.find(RetVal);
  1483. assert(VBBIt != OutputBBs.end() && "Could not find output value!");
  1484. // If this is storing a PHINode, we must make sure it is included in the
  1485. // overall function.
  1486. StoreInst *SI = cast<StoreInst>(I);
  1487. Value *ValueOperand = SI->getValueOperand();
  1488. StoreInst *NewI = cast<StoreInst>(I->clone());
  1489. NewI->setDebugLoc(DebugLoc());
  1490. BasicBlock *OutputBB = VBBIt->second;
  1491. OutputBB->getInstList().push_back(NewI);
  1492. LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
  1493. << *OutputBB << "\n");
  1494. // If this is storing a PHINode, we must make sure it is included in the
  1495. // overall function.
  1496. if (!isa<PHINode>(ValueOperand) ||
  1497. Region.Candidate->getGVN(ValueOperand).hasValue()) {
  1498. if (FirstFunction)
  1499. continue;
  1500. Value *CorrVal =
  1501. Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
  1502. assert(CorrVal && "Value is nullptr?");
  1503. NewI->setOperand(0, CorrVal);
  1504. continue;
  1505. }
  1506. PHINode *PN = cast<PHINode>(SI->getValueOperand());
  1507. // If it has a value, it was not split by the code extractor, which
  1508. // is what we are looking for.
  1509. if (Region.Candidate->getGVN(PN).hasValue())
  1510. continue;
  1511. // We record the parent block for the PHINode in the Region so that
  1512. // we can exclude it from checks later on.
  1513. Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
  1514. // If this is the first function, we do not need to worry about mergiing
  1515. // this with any other block in the overall outlined function, so we can
  1516. // just continue.
  1517. if (FirstFunction) {
  1518. BasicBlock *PHIBlock = PN->getParent();
  1519. Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
  1520. continue;
  1521. }
  1522. // We look for the aggregate block that contains the PHINodes leading into
  1523. // this exit path. If we can't find one, we create one.
  1524. BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
  1525. // For our PHINode, we find the combined canonical numbering, and
  1526. // attempt to find a matching PHINode in the overall PHIBlock. If we
  1527. // cannot, we copy the PHINode and move it into this new block.
  1528. PHINode *NewPN =
  1529. findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings);
  1530. NewI->setOperand(0, NewPN);
  1531. }
  1532. // If we added an edge for basic blocks without a predecessor, we remove it
  1533. // here.
  1534. if (EdgeAdded)
  1535. DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
  1536. I->eraseFromParent();
  1537. LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
  1538. << *Region.ExtractedFunction << " with " << *AggArg
  1539. << " in function " << *Group.OutlinedFunction << "\n");
  1540. Arg->replaceAllUsesWith(AggArg);
  1541. }
  1542. }
  1543. /// Within an extracted function, replace the constants that need to be lifted
  1544. /// into arguments with the actual argument.
  1545. ///
  1546. /// \param Region [in] - The region of extracted code to be changed.
  1547. void replaceConstants(OutlinableRegion &Region) {
  1548. OutlinableGroup &Group = *Region.Parent;
  1549. // Iterate over the constants that need to be elevated into arguments
  1550. for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
  1551. unsigned AggArgIdx = Const.first;
  1552. Function *OutlinedFunction = Group.OutlinedFunction;
  1553. assert(OutlinedFunction && "Overall Function is not defined?");
  1554. Constant *CST = Const.second;
  1555. Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
  1556. // Identify the argument it will be elevated to, and replace instances of
  1557. // that constant in the function.
  1558. // TODO: If in the future constants do not have one global value number,
  1559. // i.e. a constant 1 could be mapped to several values, this check will
  1560. // have to be more strict. It cannot be using only replaceUsesWithIf.
  1561. LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
  1562. << " in function " << *OutlinedFunction << " with "
  1563. << *Arg << "\n");
  1564. CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
  1565. if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
  1566. return I->getFunction() == OutlinedFunction;
  1567. return false;
  1568. });
  1569. }
  1570. }
  1571. /// It is possible that there is a basic block that already performs the same
  1572. /// stores. This returns a duplicate block, if it exists
  1573. ///
  1574. /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
  1575. /// \param OutputStoreBBs [in] The existing output blocks.
  1576. /// \returns an optional value with the number output block if there is a match.
  1577. Optional<unsigned> findDuplicateOutputBlock(
  1578. DenseMap<Value *, BasicBlock *> &OutputBBs,
  1579. std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
  1580. bool Mismatch = false;
  1581. unsigned MatchingNum = 0;
  1582. // We compare the new set output blocks to the other sets of output blocks.
  1583. // If they are the same number, and have identical instructions, they are
  1584. // considered to be the same.
  1585. for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
  1586. Mismatch = false;
  1587. for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
  1588. DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
  1589. OutputBBs.find(VToB.first);
  1590. if (OutputBBIt == OutputBBs.end()) {
  1591. Mismatch = true;
  1592. break;
  1593. }
  1594. BasicBlock *CompBB = VToB.second;
  1595. BasicBlock *OutputBB = OutputBBIt->second;
  1596. if (CompBB->size() - 1 != OutputBB->size()) {
  1597. Mismatch = true;
  1598. break;
  1599. }
  1600. BasicBlock::iterator NIt = OutputBB->begin();
  1601. for (Instruction &I : *CompBB) {
  1602. if (isa<BranchInst>(&I))
  1603. continue;
  1604. if (!I.isIdenticalTo(&(*NIt))) {
  1605. Mismatch = true;
  1606. break;
  1607. }
  1608. NIt++;
  1609. }
  1610. }
  1611. if (!Mismatch)
  1612. return MatchingNum;
  1613. MatchingNum++;
  1614. }
  1615. return None;
  1616. }
  1617. /// Remove empty output blocks from the outlined region.
  1618. ///
  1619. /// \param BlocksToPrune - Mapping of return values output blocks for the \p
  1620. /// Region.
  1621. /// \param Region - The OutlinableRegion we are analyzing.
  1622. static bool
  1623. analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
  1624. OutlinableRegion &Region) {
  1625. bool AllRemoved = true;
  1626. Value *RetValueForBB;
  1627. BasicBlock *NewBB;
  1628. SmallVector<Value *, 4> ToRemove;
  1629. // Iterate over the output blocks created in the outlined section.
  1630. for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
  1631. RetValueForBB = VtoBB.first;
  1632. NewBB = VtoBB.second;
  1633. // If there are no instructions, we remove it from the module, and also
  1634. // mark the value for removal from the return value to output block mapping.
  1635. if (NewBB->size() == 0) {
  1636. NewBB->eraseFromParent();
  1637. ToRemove.push_back(RetValueForBB);
  1638. continue;
  1639. }
  1640. // Mark that we could not remove all the blocks since they were not all
  1641. // empty.
  1642. AllRemoved = false;
  1643. }
  1644. // Remove the return value from the mapping.
  1645. for (Value *V : ToRemove)
  1646. BlocksToPrune.erase(V);
  1647. // Mark the region as having the no output scheme.
  1648. if (AllRemoved)
  1649. Region.OutputBlockNum = -1;
  1650. return AllRemoved;
  1651. }
  1652. /// For the outlined section, move needed the StoreInsts for the output
  1653. /// registers into their own block. Then, determine if there is a duplicate
  1654. /// output block already created.
  1655. ///
  1656. /// \param [in] OG - The OutlinableGroup of regions to be outlined.
  1657. /// \param [in] Region - The OutlinableRegion that is being analyzed.
  1658. /// \param [in,out] OutputBBs - the blocks that stores for this region will be
  1659. /// placed in.
  1660. /// \param [in] EndBBs - the final blocks of the extracted function.
  1661. /// \param [in] OutputMappings - OutputMappings the mapping of values that have
  1662. /// been replaced by a new output value.
  1663. /// \param [in,out] OutputStoreBBs - The existing output blocks.
  1664. static void alignOutputBlockWithAggFunc(
  1665. OutlinableGroup &OG, OutlinableRegion &Region,
  1666. DenseMap<Value *, BasicBlock *> &OutputBBs,
  1667. DenseMap<Value *, BasicBlock *> &EndBBs,
  1668. const DenseMap<Value *, Value *> &OutputMappings,
  1669. std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
  1670. // If none of the output blocks have any instructions, this means that we do
  1671. // not have to determine if it matches any of the other output schemes, and we
  1672. // don't have to do anything else.
  1673. if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
  1674. return;
  1675. // Determine is there is a duplicate set of blocks.
  1676. Optional<unsigned> MatchingBB =
  1677. findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
  1678. // If there is, we remove the new output blocks. If it does not,
  1679. // we add it to our list of sets of output blocks.
  1680. if (MatchingBB.hasValue()) {
  1681. LLVM_DEBUG(dbgs() << "Set output block for region in function"
  1682. << Region.ExtractedFunction << " to "
  1683. << MatchingBB.getValue());
  1684. Region.OutputBlockNum = MatchingBB.getValue();
  1685. for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
  1686. VtoBB.second->eraseFromParent();
  1687. return;
  1688. }
  1689. Region.OutputBlockNum = OutputStoreBBs.size();
  1690. Value *RetValueForBB;
  1691. BasicBlock *NewBB;
  1692. OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
  1693. for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
  1694. RetValueForBB = VtoBB.first;
  1695. NewBB = VtoBB.second;
  1696. DenseMap<Value *, BasicBlock *>::iterator VBBIt =
  1697. EndBBs.find(RetValueForBB);
  1698. LLVM_DEBUG(dbgs() << "Create output block for region in"
  1699. << Region.ExtractedFunction << " to "
  1700. << *NewBB);
  1701. BranchInst::Create(VBBIt->second, NewBB);
  1702. OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
  1703. }
  1704. }
  1705. /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
  1706. /// before creating a basic block for each \p NewMap, and inserting into the new
  1707. /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
  1708. ///
  1709. /// \param OldMap [in] - The mapping to base the new mapping off of.
  1710. /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
  1711. /// \param ParentFunc [in] - The function to put the new basic block in.
  1712. /// \param BaseName [in] - The start of the BasicBlock names to be appended to
  1713. /// by an index value.
  1714. static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
  1715. DenseMap<Value *, BasicBlock *> &NewMap,
  1716. Function *ParentFunc, Twine BaseName) {
  1717. unsigned Idx = 0;
  1718. std::vector<Value *> SortedKeys;
  1719. getSortedConstantKeys(SortedKeys, OldMap);
  1720. for (Value *RetVal : SortedKeys) {
  1721. BasicBlock *NewBB = BasicBlock::Create(
  1722. ParentFunc->getContext(),
  1723. Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
  1724. ParentFunc);
  1725. NewMap.insert(std::make_pair(RetVal, NewBB));
  1726. }
  1727. }
  1728. /// Create the switch statement for outlined function to differentiate between
  1729. /// all the output blocks.
  1730. ///
  1731. /// For the outlined section, determine if an outlined block already exists that
  1732. /// matches the needed stores for the extracted section.
  1733. /// \param [in] M - The module we are outlining from.
  1734. /// \param [in] OG - The group of regions to be outlined.
  1735. /// \param [in] EndBBs - The final blocks of the extracted function.
  1736. /// \param [in,out] OutputStoreBBs - The existing output blocks.
  1737. void createSwitchStatement(
  1738. Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
  1739. std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
  1740. // We only need the switch statement if there is more than one store
  1741. // combination, or there is more than one set of output blocks. The first
  1742. // will occur when we store different sets of values for two different
  1743. // regions. The second will occur when we have two outputs that are combined
  1744. // in a PHINode outside of the region in one outlined instance, and are used
  1745. // seaparately in another. This will create the same set of OutputGVNs, but
  1746. // will generate two different output schemes.
  1747. if (OG.OutputGVNCombinations.size() > 1) {
  1748. Function *AggFunc = OG.OutlinedFunction;
  1749. // Create a final block for each different return block.
  1750. DenseMap<Value *, BasicBlock *> ReturnBBs;
  1751. createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
  1752. for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
  1753. std::pair<Value *, BasicBlock *> &OutputBlock =
  1754. *OG.EndBBs.find(RetBlockPair.first);
  1755. BasicBlock *ReturnBlock = RetBlockPair.second;
  1756. BasicBlock *EndBB = OutputBlock.second;
  1757. Instruction *Term = EndBB->getTerminator();
  1758. // Move the return value to the final block instead of the original exit
  1759. // stub.
  1760. Term->moveBefore(*ReturnBlock, ReturnBlock->end());
  1761. // Put the switch statement in the old end basic block for the function
  1762. // with a fall through to the new return block.
  1763. LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
  1764. << OutputStoreBBs.size() << "\n");
  1765. SwitchInst *SwitchI =
  1766. SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
  1767. ReturnBlock, OutputStoreBBs.size(), EndBB);
  1768. unsigned Idx = 0;
  1769. for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
  1770. DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
  1771. OutputStoreBB.find(OutputBlock.first);
  1772. if (OSBBIt == OutputStoreBB.end())
  1773. continue;
  1774. BasicBlock *BB = OSBBIt->second;
  1775. SwitchI->addCase(
  1776. ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
  1777. Term = BB->getTerminator();
  1778. Term->setSuccessor(0, ReturnBlock);
  1779. Idx++;
  1780. }
  1781. }
  1782. return;
  1783. }
  1784. assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
  1785. // If there needs to be stores, move them from the output blocks to their
  1786. // corresponding ending block. We do not check that the OutputGVNCombinations
  1787. // is equal to 1 here since that could just been the case where there are 0
  1788. // outputs. Instead, we check whether there is more than one set of output
  1789. // blocks since this is the only case where we would have to move the
  1790. // stores, and erase the extraneous blocks.
  1791. if (OutputStoreBBs.size() == 1) {
  1792. LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
  1793. << *OG.OutlinedFunction << "\n");
  1794. DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
  1795. for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
  1796. DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
  1797. EndBBs.find(VBPair.first);
  1798. assert(EndBBIt != EndBBs.end() && "Could not find end block");
  1799. BasicBlock *EndBB = EndBBIt->second;
  1800. BasicBlock *OutputBB = VBPair.second;
  1801. Instruction *Term = OutputBB->getTerminator();
  1802. Term->eraseFromParent();
  1803. Term = EndBB->getTerminator();
  1804. moveBBContents(*OutputBB, *EndBB);
  1805. Term->moveBefore(*EndBB, EndBB->end());
  1806. OutputBB->eraseFromParent();
  1807. }
  1808. }
  1809. }
  1810. /// Fill the new function that will serve as the replacement function for all of
  1811. /// the extracted regions of a certain structure from the first region in the
  1812. /// list of regions. Replace this first region's extracted function with the
  1813. /// new overall function.
  1814. ///
  1815. /// \param [in] M - The module we are outlining from.
  1816. /// \param [in] CurrentGroup - The group of regions to be outlined.
  1817. /// \param [in,out] OutputStoreBBs - The output blocks for each different
  1818. /// set of stores needed for the different functions.
  1819. /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
  1820. /// once outlining is complete.
  1821. /// \param [in] OutputMappings - Extracted functions to erase from module
  1822. /// once outlining is complete.
  1823. static void fillOverallFunction(
  1824. Module &M, OutlinableGroup &CurrentGroup,
  1825. std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
  1826. std::vector<Function *> &FuncsToRemove,
  1827. const DenseMap<Value *, Value *> &OutputMappings) {
  1828. OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
  1829. // Move first extracted function's instructions into new function.
  1830. LLVM_DEBUG(dbgs() << "Move instructions from "
  1831. << *CurrentOS->ExtractedFunction << " to instruction "
  1832. << *CurrentGroup.OutlinedFunction << "\n");
  1833. moveFunctionData(*CurrentOS->ExtractedFunction,
  1834. *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
  1835. // Transfer the attributes from the function to the new function.
  1836. for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
  1837. CurrentGroup.OutlinedFunction->addFnAttr(A);
  1838. // Create a new set of output blocks for the first extracted function.
  1839. DenseMap<Value *, BasicBlock *> NewBBs;
  1840. createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
  1841. CurrentGroup.OutlinedFunction, "output_block_0");
  1842. CurrentOS->OutputBlockNum = 0;
  1843. replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
  1844. replaceConstants(*CurrentOS);
  1845. // We first identify if any output blocks are empty, if they are we remove
  1846. // them. We then create a branch instruction to the basic block to the return
  1847. // block for the function for each non empty output block.
  1848. if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
  1849. OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
  1850. for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
  1851. DenseMap<Value *, BasicBlock *>::iterator VBBIt =
  1852. CurrentGroup.EndBBs.find(VToBB.first);
  1853. BasicBlock *EndBB = VBBIt->second;
  1854. BranchInst::Create(EndBB, VToBB.second);
  1855. OutputStoreBBs.back().insert(VToBB);
  1856. }
  1857. }
  1858. // Replace the call to the extracted function with the outlined function.
  1859. CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
  1860. // We only delete the extracted functions at the end since we may need to
  1861. // reference instructions contained in them for mapping purposes.
  1862. FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
  1863. }
  1864. void IROutliner::deduplicateExtractedSections(
  1865. Module &M, OutlinableGroup &CurrentGroup,
  1866. std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
  1867. createFunction(M, CurrentGroup, OutlinedFunctionNum);
  1868. std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
  1869. OutlinableRegion *CurrentOS;
  1870. fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
  1871. OutputMappings);
  1872. std::vector<Value *> SortedKeys;
  1873. for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
  1874. CurrentOS = CurrentGroup.Regions[Idx];
  1875. AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
  1876. *CurrentOS->ExtractedFunction);
  1877. // Create a set of BasicBlocks, one for each return block, to hold the
  1878. // needed store instructions.
  1879. DenseMap<Value *, BasicBlock *> NewBBs;
  1880. createAndInsertBasicBlocks(
  1881. CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
  1882. "output_block_" + Twine(static_cast<unsigned>(Idx)));
  1883. replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
  1884. alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
  1885. CurrentGroup.EndBBs, OutputMappings,
  1886. OutputStoreBBs);
  1887. CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
  1888. FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
  1889. }
  1890. // Create a switch statement to handle the different output schemes.
  1891. createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
  1892. OutlinedFunctionNum++;
  1893. }
  1894. /// Checks that the next instruction in the InstructionDataList matches the
  1895. /// next instruction in the module. If they do not, there could be the
  1896. /// possibility that extra code has been inserted, and we must ignore it.
  1897. ///
  1898. /// \param ID - The IRInstructionData to check the next instruction of.
  1899. /// \returns true if the InstructionDataList and actual instruction match.
  1900. static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
  1901. // We check if there is a discrepancy between the InstructionDataList
  1902. // and the actual next instruction in the module. If there is, it means
  1903. // that an extra instruction was added, likely by the CodeExtractor.
  1904. // Since we do not have any similarity data about this particular
  1905. // instruction, we cannot confidently outline it, and must discard this
  1906. // candidate.
  1907. IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
  1908. Instruction *NextIDLInst = NextIDIt->Inst;
  1909. Instruction *NextModuleInst = nullptr;
  1910. if (!ID.Inst->isTerminator())
  1911. NextModuleInst = ID.Inst->getNextNonDebugInstruction();
  1912. else if (NextIDLInst != nullptr)
  1913. NextModuleInst =
  1914. &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
  1915. if (NextIDLInst && NextIDLInst != NextModuleInst)
  1916. return false;
  1917. return true;
  1918. }
  1919. bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
  1920. const OutlinableRegion &Region) {
  1921. IRSimilarityCandidate *IRSC = Region.Candidate;
  1922. unsigned StartIdx = IRSC->getStartIdx();
  1923. unsigned EndIdx = IRSC->getEndIdx();
  1924. // A check to make sure that we are not about to attempt to outline something
  1925. // that has already been outlined.
  1926. for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
  1927. if (Outlined.contains(Idx))
  1928. return false;
  1929. // We check if the recorded instruction matches the actual next instruction,
  1930. // if it does not, we fix it in the InstructionDataList.
  1931. if (!Region.Candidate->backInstruction()->isTerminator()) {
  1932. Instruction *NewEndInst =
  1933. Region.Candidate->backInstruction()->getNextNonDebugInstruction();
  1934. assert(NewEndInst && "Next instruction is a nullptr?");
  1935. if (Region.Candidate->end()->Inst != NewEndInst) {
  1936. IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
  1937. IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
  1938. IRInstructionData(*NewEndInst,
  1939. InstructionClassifier.visit(*NewEndInst), *IDL);
  1940. // Insert the first IRInstructionData of the new region after the
  1941. // last IRInstructionData of the IRSimilarityCandidate.
  1942. IDL->insert(Region.Candidate->end(), *NewEndIRID);
  1943. }
  1944. }
  1945. return none_of(*IRSC, [this](IRInstructionData &ID) {
  1946. if (!nextIRInstructionDataMatchesNextInst(ID))
  1947. return true;
  1948. return !this->InstructionClassifier.visit(ID.Inst);
  1949. });
  1950. }
  1951. void IROutliner::pruneIncompatibleRegions(
  1952. std::vector<IRSimilarityCandidate> &CandidateVec,
  1953. OutlinableGroup &CurrentGroup) {
  1954. bool PreviouslyOutlined;
  1955. // Sort from beginning to end, so the IRSimilarityCandidates are in order.
  1956. stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
  1957. const IRSimilarityCandidate &RHS) {
  1958. return LHS.getStartIdx() < RHS.getStartIdx();
  1959. });
  1960. IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
  1961. // Since outlining a call and a branch instruction will be the same as only
  1962. // outlinining a call instruction, we ignore it as a space saving.
  1963. if (FirstCandidate.getLength() == 2) {
  1964. if (isa<CallInst>(FirstCandidate.front()->Inst) &&
  1965. isa<BranchInst>(FirstCandidate.back()->Inst))
  1966. return;
  1967. }
  1968. unsigned CurrentEndIdx = 0;
  1969. for (IRSimilarityCandidate &IRSC : CandidateVec) {
  1970. PreviouslyOutlined = false;
  1971. unsigned StartIdx = IRSC.getStartIdx();
  1972. unsigned EndIdx = IRSC.getEndIdx();
  1973. for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
  1974. if (Outlined.contains(Idx)) {
  1975. PreviouslyOutlined = true;
  1976. break;
  1977. }
  1978. if (PreviouslyOutlined)
  1979. continue;
  1980. // Check over the instructions, and if the basic block has its address
  1981. // taken for use somewhere else, we do not outline that block.
  1982. bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
  1983. return ID.Inst->getParent()->hasAddressTaken();
  1984. });
  1985. if (BBHasAddressTaken)
  1986. continue;
  1987. if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
  1988. !OutlineFromLinkODRs)
  1989. continue;
  1990. // Greedily prune out any regions that will overlap with already chosen
  1991. // regions.
  1992. if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
  1993. continue;
  1994. bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
  1995. if (!nextIRInstructionDataMatchesNextInst(ID))
  1996. return true;
  1997. return !this->InstructionClassifier.visit(ID.Inst);
  1998. });
  1999. if (BadInst)
  2000. continue;
  2001. OutlinableRegion *OS = new (RegionAllocator.Allocate())
  2002. OutlinableRegion(IRSC, CurrentGroup);
  2003. CurrentGroup.Regions.push_back(OS);
  2004. CurrentEndIdx = EndIdx;
  2005. }
  2006. }
  2007. InstructionCost
  2008. IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
  2009. InstructionCost RegionBenefit = 0;
  2010. for (OutlinableRegion *Region : CurrentGroup.Regions) {
  2011. TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
  2012. // We add the number of instructions in the region to the benefit as an
  2013. // estimate as to how much will be removed.
  2014. RegionBenefit += Region->getBenefit(TTI);
  2015. LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
  2016. << " saved instructions to overfall benefit.\n");
  2017. }
  2018. return RegionBenefit;
  2019. }
  2020. /// For the \p OutputCanon number passed in find the value represented by this
  2021. /// canonical number. If it is from a PHINode, we pick the first incoming
  2022. /// value and return that Value instead.
  2023. ///
  2024. /// \param Region - The OutlinableRegion to get the Value from.
  2025. /// \param OutputCanon - The canonical number to find the Value from.
  2026. /// \returns The Value represented by a canonical number \p OutputCanon in \p
  2027. /// Region.
  2028. static Value *findOutputValueInRegion(OutlinableRegion &Region,
  2029. unsigned OutputCanon) {
  2030. OutlinableGroup &CurrentGroup = *Region.Parent;
  2031. // If the value is greater than the value in the tracker, we have a
  2032. // PHINode and will instead use one of the incoming values to find the
  2033. // type.
  2034. if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
  2035. auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
  2036. assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
  2037. "Could not find GVN set for PHINode number!");
  2038. assert(It->second.second.size() > 0 && "PHINode does not have any values!");
  2039. OutputCanon = *It->second.second.begin();
  2040. }
  2041. Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
  2042. assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
  2043. Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
  2044. assert(OV.hasValue() && "Could not find value for GVN?");
  2045. return *OV;
  2046. }
  2047. InstructionCost
  2048. IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
  2049. InstructionCost OverallCost = 0;
  2050. for (OutlinableRegion *Region : CurrentGroup.Regions) {
  2051. TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
  2052. // Each output incurs a load after the call, so we add that to the cost.
  2053. for (unsigned OutputCanon : Region->GVNStores) {
  2054. Value *V = findOutputValueInRegion(*Region, OutputCanon);
  2055. InstructionCost LoadCost =
  2056. TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
  2057. TargetTransformInfo::TCK_CodeSize);
  2058. LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
  2059. << " instructions to cost for output of type "
  2060. << *V->getType() << "\n");
  2061. OverallCost += LoadCost;
  2062. }
  2063. }
  2064. return OverallCost;
  2065. }
  2066. /// Find the extra instructions needed to handle any output values for the
  2067. /// region.
  2068. ///
  2069. /// \param [in] M - The Module to outline from.
  2070. /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
  2071. /// \param [in] TTI - The TargetTransformInfo used to collect information for
  2072. /// new instruction costs.
  2073. /// \returns the additional cost to handle the outputs.
  2074. static InstructionCost findCostForOutputBlocks(Module &M,
  2075. OutlinableGroup &CurrentGroup,
  2076. TargetTransformInfo &TTI) {
  2077. InstructionCost OutputCost = 0;
  2078. unsigned NumOutputBranches = 0;
  2079. OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
  2080. IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
  2081. DenseSet<BasicBlock *> CandidateBlocks;
  2082. Candidate.getBasicBlocks(CandidateBlocks);
  2083. // Count the number of different output branches that point to blocks outside
  2084. // of the region.
  2085. DenseSet<BasicBlock *> FoundBlocks;
  2086. for (IRInstructionData &ID : Candidate) {
  2087. if (!isa<BranchInst>(ID.Inst))
  2088. continue;
  2089. for (Value *V : ID.OperVals) {
  2090. BasicBlock *BB = static_cast<BasicBlock *>(V);
  2091. DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
  2092. if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
  2093. continue;
  2094. FoundBlocks.insert(BB);
  2095. NumOutputBranches++;
  2096. }
  2097. }
  2098. CurrentGroup.BranchesToOutside = NumOutputBranches;
  2099. for (const ArrayRef<unsigned> &OutputUse :
  2100. CurrentGroup.OutputGVNCombinations) {
  2101. for (unsigned OutputCanon : OutputUse) {
  2102. Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
  2103. InstructionCost StoreCost =
  2104. TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
  2105. TargetTransformInfo::TCK_CodeSize);
  2106. // An instruction cost is added for each store set that needs to occur for
  2107. // various output combinations inside the function, plus a branch to
  2108. // return to the exit block.
  2109. LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
  2110. << " instructions to cost for output of type "
  2111. << *V->getType() << "\n");
  2112. OutputCost += StoreCost * NumOutputBranches;
  2113. }
  2114. InstructionCost BranchCost =
  2115. TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
  2116. LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
  2117. << " a branch instruction\n");
  2118. OutputCost += BranchCost * NumOutputBranches;
  2119. }
  2120. // If there is more than one output scheme, we must have a comparison and
  2121. // branch for each different item in the switch statement.
  2122. if (CurrentGroup.OutputGVNCombinations.size() > 1) {
  2123. InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
  2124. Instruction::ICmp, Type::getInt32Ty(M.getContext()),
  2125. Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
  2126. TargetTransformInfo::TCK_CodeSize);
  2127. InstructionCost BranchCost =
  2128. TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
  2129. unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
  2130. InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
  2131. LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
  2132. << " instructions for each switch case for each different"
  2133. << " output path in a function\n");
  2134. OutputCost += TotalCost * NumOutputBranches;
  2135. }
  2136. return OutputCost;
  2137. }
  2138. void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
  2139. InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
  2140. CurrentGroup.Benefit += RegionBenefit;
  2141. LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
  2142. InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
  2143. CurrentGroup.Cost += OutputReloadCost;
  2144. LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
  2145. InstructionCost AverageRegionBenefit =
  2146. RegionBenefit / CurrentGroup.Regions.size();
  2147. unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
  2148. unsigned NumRegions = CurrentGroup.Regions.size();
  2149. TargetTransformInfo &TTI =
  2150. getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
  2151. // We add one region to the cost once, to account for the instructions added
  2152. // inside of the newly created function.
  2153. LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
  2154. << " instructions to cost for body of new function.\n");
  2155. CurrentGroup.Cost += AverageRegionBenefit;
  2156. LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
  2157. // For each argument, we must add an instruction for loading the argument
  2158. // out of the register and into a value inside of the newly outlined function.
  2159. LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
  2160. << " instructions to cost for each argument in the new"
  2161. << " function.\n");
  2162. CurrentGroup.Cost +=
  2163. OverallArgumentNum * TargetTransformInfo::TCC_Basic;
  2164. LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
  2165. // Each argument needs to either be loaded into a register or onto the stack.
  2166. // Some arguments will only be loaded into the stack once the argument
  2167. // registers are filled.
  2168. LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
  2169. << " instructions to cost for each argument in the new"
  2170. << " function " << NumRegions << " times for the "
  2171. << "needed argument handling at the call site.\n");
  2172. CurrentGroup.Cost +=
  2173. 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
  2174. LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
  2175. CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
  2176. LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
  2177. }
  2178. void IROutliner::updateOutputMapping(OutlinableRegion &Region,
  2179. ArrayRef<Value *> Outputs,
  2180. LoadInst *LI) {
  2181. // For and load instructions following the call
  2182. Value *Operand = LI->getPointerOperand();
  2183. Optional<unsigned> OutputIdx = None;
  2184. // Find if the operand it is an output register.
  2185. for (unsigned ArgIdx = Region.NumExtractedInputs;
  2186. ArgIdx < Region.Call->arg_size(); ArgIdx++) {
  2187. if (Operand == Region.Call->getArgOperand(ArgIdx)) {
  2188. OutputIdx = ArgIdx - Region.NumExtractedInputs;
  2189. break;
  2190. }
  2191. }
  2192. // If we found an output register, place a mapping of the new value
  2193. // to the original in the mapping.
  2194. if (!OutputIdx.hasValue())
  2195. return;
  2196. if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
  2197. OutputMappings.end()) {
  2198. LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
  2199. << *Outputs[OutputIdx.getValue()] << "\n");
  2200. OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
  2201. } else {
  2202. Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
  2203. LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
  2204. << *Outputs[OutputIdx.getValue()] << "\n");
  2205. OutputMappings.insert(std::make_pair(LI, Orig));
  2206. }
  2207. }
  2208. bool IROutliner::extractSection(OutlinableRegion &Region) {
  2209. SetVector<Value *> ArgInputs, Outputs, SinkCands;
  2210. assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
  2211. BasicBlock *InitialStart = Region.StartBB;
  2212. Function *OrigF = Region.StartBB->getParent();
  2213. CodeExtractorAnalysisCache CEAC(*OrigF);
  2214. Region.ExtractedFunction =
  2215. Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
  2216. // If the extraction was successful, find the BasicBlock, and reassign the
  2217. // OutlinableRegion blocks
  2218. if (!Region.ExtractedFunction) {
  2219. LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
  2220. << "\n");
  2221. Region.reattachCandidate();
  2222. return false;
  2223. }
  2224. // Get the block containing the called branch, and reassign the blocks as
  2225. // necessary. If the original block still exists, it is because we ended on
  2226. // a branch instruction, and so we move the contents into the block before
  2227. // and assign the previous block correctly.
  2228. User *InstAsUser = Region.ExtractedFunction->user_back();
  2229. BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
  2230. Region.PrevBB = RewrittenBB->getSinglePredecessor();
  2231. assert(Region.PrevBB && "PrevBB is nullptr?");
  2232. if (Region.PrevBB == InitialStart) {
  2233. BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
  2234. Instruction *BI = NewPrev->getTerminator();
  2235. BI->eraseFromParent();
  2236. moveBBContents(*InitialStart, *NewPrev);
  2237. Region.PrevBB = NewPrev;
  2238. InitialStart->eraseFromParent();
  2239. }
  2240. Region.StartBB = RewrittenBB;
  2241. Region.EndBB = RewrittenBB;
  2242. // The sequences of outlinable regions has now changed. We must fix the
  2243. // IRInstructionDataList for consistency. Although they may not be illegal
  2244. // instructions, they should not be compared with anything else as they
  2245. // should not be outlined in this round. So marking these as illegal is
  2246. // allowed.
  2247. IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
  2248. Instruction *BeginRewritten = &*RewrittenBB->begin();
  2249. Instruction *EndRewritten = &*RewrittenBB->begin();
  2250. Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
  2251. *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
  2252. Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
  2253. *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
  2254. // Insert the first IRInstructionData of the new region in front of the
  2255. // first IRInstructionData of the IRSimilarityCandidate.
  2256. IDL->insert(Region.Candidate->begin(), *Region.NewFront);
  2257. // Insert the first IRInstructionData of the new region after the
  2258. // last IRInstructionData of the IRSimilarityCandidate.
  2259. IDL->insert(Region.Candidate->end(), *Region.NewBack);
  2260. // Remove the IRInstructionData from the IRSimilarityCandidate.
  2261. IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
  2262. assert(RewrittenBB != nullptr &&
  2263. "Could not find a predecessor after extraction!");
  2264. // Iterate over the new set of instructions to find the new call
  2265. // instruction.
  2266. for (Instruction &I : *RewrittenBB)
  2267. if (CallInst *CI = dyn_cast<CallInst>(&I)) {
  2268. if (Region.ExtractedFunction == CI->getCalledFunction())
  2269. Region.Call = CI;
  2270. } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
  2271. updateOutputMapping(Region, Outputs.getArrayRef(), LI);
  2272. Region.reattachCandidate();
  2273. return true;
  2274. }
  2275. unsigned IROutliner::doOutline(Module &M) {
  2276. // Find the possible similarity sections.
  2277. InstructionClassifier.EnableBranches = !DisableBranches;
  2278. InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
  2279. InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
  2280. IRSimilarityIdentifier &Identifier = getIRSI(M);
  2281. SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
  2282. // Sort them by size of extracted sections
  2283. unsigned OutlinedFunctionNum = 0;
  2284. // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
  2285. // to sort them by the potential number of instructions to be outlined
  2286. if (SimilarityCandidates.size() > 1)
  2287. llvm::stable_sort(SimilarityCandidates,
  2288. [](const std::vector<IRSimilarityCandidate> &LHS,
  2289. const std::vector<IRSimilarityCandidate> &RHS) {
  2290. return LHS[0].getLength() * LHS.size() >
  2291. RHS[0].getLength() * RHS.size();
  2292. });
  2293. // Creating OutlinableGroups for each SimilarityCandidate to be used in
  2294. // each of the following for loops to avoid making an allocator.
  2295. std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
  2296. DenseSet<unsigned> NotSame;
  2297. std::vector<OutlinableGroup *> NegativeCostGroups;
  2298. std::vector<OutlinableRegion *> OutlinedRegions;
  2299. // Iterate over the possible sets of similarity.
  2300. unsigned PotentialGroupIdx = 0;
  2301. for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
  2302. OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
  2303. // Remove entries that were previously outlined
  2304. pruneIncompatibleRegions(CandidateVec, CurrentGroup);
  2305. // We pruned the number of regions to 0 to 1, meaning that it's not worth
  2306. // trying to outlined since there is no compatible similar instance of this
  2307. // code.
  2308. if (CurrentGroup.Regions.size() < 2)
  2309. continue;
  2310. // Determine if there are any values that are the same constant throughout
  2311. // each section in the set.
  2312. NotSame.clear();
  2313. CurrentGroup.findSameConstants(NotSame);
  2314. if (CurrentGroup.IgnoreGroup)
  2315. continue;
  2316. // Create a CodeExtractor for each outlinable region. Identify inputs and
  2317. // outputs for each section using the code extractor and create the argument
  2318. // types for the Aggregate Outlining Function.
  2319. OutlinedRegions.clear();
  2320. for (OutlinableRegion *OS : CurrentGroup.Regions) {
  2321. // Break the outlinable region out of its parent BasicBlock into its own
  2322. // BasicBlocks (see function implementation).
  2323. OS->splitCandidate();
  2324. // There's a chance that when the region is split, extra instructions are
  2325. // added to the region. This makes the region no longer viable
  2326. // to be split, so we ignore it for outlining.
  2327. if (!OS->CandidateSplit)
  2328. continue;
  2329. SmallVector<BasicBlock *> BE;
  2330. DenseSet<BasicBlock *> BlocksInRegion;
  2331. OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
  2332. OS->CE = new (ExtractorAllocator.Allocate())
  2333. CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
  2334. false, "outlined");
  2335. findAddInputsOutputs(M, *OS, NotSame);
  2336. if (!OS->IgnoreRegion)
  2337. OutlinedRegions.push_back(OS);
  2338. // We recombine the blocks together now that we have gathered all the
  2339. // needed information.
  2340. OS->reattachCandidate();
  2341. }
  2342. CurrentGroup.Regions = std::move(OutlinedRegions);
  2343. if (CurrentGroup.Regions.empty())
  2344. continue;
  2345. CurrentGroup.collectGVNStoreSets(M);
  2346. if (CostModel)
  2347. findCostBenefit(M, CurrentGroup);
  2348. // If we are adhering to the cost model, skip those groups where the cost
  2349. // outweighs the benefits.
  2350. if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
  2351. OptimizationRemarkEmitter &ORE =
  2352. getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
  2353. ORE.emit([&]() {
  2354. IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
  2355. OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
  2356. C->frontInstruction());
  2357. R << "did not outline "
  2358. << ore::NV(std::to_string(CurrentGroup.Regions.size()))
  2359. << " regions due to estimated increase of "
  2360. << ore::NV("InstructionIncrease",
  2361. CurrentGroup.Cost - CurrentGroup.Benefit)
  2362. << " instructions at locations ";
  2363. interleave(
  2364. CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
  2365. [&R](OutlinableRegion *Region) {
  2366. R << ore::NV(
  2367. "DebugLoc",
  2368. Region->Candidate->frontInstruction()->getDebugLoc());
  2369. },
  2370. [&R]() { R << " "; });
  2371. return R;
  2372. });
  2373. continue;
  2374. }
  2375. NegativeCostGroups.push_back(&CurrentGroup);
  2376. }
  2377. ExtractorAllocator.DestroyAll();
  2378. if (NegativeCostGroups.size() > 1)
  2379. stable_sort(NegativeCostGroups,
  2380. [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
  2381. return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
  2382. });
  2383. std::vector<Function *> FuncsToRemove;
  2384. for (OutlinableGroup *CG : NegativeCostGroups) {
  2385. OutlinableGroup &CurrentGroup = *CG;
  2386. OutlinedRegions.clear();
  2387. for (OutlinableRegion *Region : CurrentGroup.Regions) {
  2388. // We check whether our region is compatible with what has already been
  2389. // outlined, and whether we need to ignore this item.
  2390. if (!isCompatibleWithAlreadyOutlinedCode(*Region))
  2391. continue;
  2392. OutlinedRegions.push_back(Region);
  2393. }
  2394. if (OutlinedRegions.size() < 2)
  2395. continue;
  2396. // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
  2397. // we are still outlining enough regions to make up for the added cost.
  2398. CurrentGroup.Regions = std::move(OutlinedRegions);
  2399. if (CostModel) {
  2400. CurrentGroup.Benefit = 0;
  2401. CurrentGroup.Cost = 0;
  2402. findCostBenefit(M, CurrentGroup);
  2403. if (CurrentGroup.Cost >= CurrentGroup.Benefit)
  2404. continue;
  2405. }
  2406. OutlinedRegions.clear();
  2407. for (OutlinableRegion *Region : CurrentGroup.Regions) {
  2408. Region->splitCandidate();
  2409. if (!Region->CandidateSplit)
  2410. continue;
  2411. OutlinedRegions.push_back(Region);
  2412. }
  2413. CurrentGroup.Regions = std::move(OutlinedRegions);
  2414. if (CurrentGroup.Regions.size() < 2) {
  2415. for (OutlinableRegion *R : CurrentGroup.Regions)
  2416. R->reattachCandidate();
  2417. continue;
  2418. }
  2419. LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
  2420. << " and benefit " << CurrentGroup.Benefit << "\n");
  2421. // Create functions out of all the sections, and mark them as outlined.
  2422. OutlinedRegions.clear();
  2423. for (OutlinableRegion *OS : CurrentGroup.Regions) {
  2424. SmallVector<BasicBlock *> BE;
  2425. DenseSet<BasicBlock *> BlocksInRegion;
  2426. OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
  2427. OS->CE = new (ExtractorAllocator.Allocate())
  2428. CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
  2429. false, "outlined");
  2430. bool FunctionOutlined = extractSection(*OS);
  2431. if (FunctionOutlined) {
  2432. unsigned StartIdx = OS->Candidate->getStartIdx();
  2433. unsigned EndIdx = OS->Candidate->getEndIdx();
  2434. for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
  2435. Outlined.insert(Idx);
  2436. OutlinedRegions.push_back(OS);
  2437. }
  2438. }
  2439. LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
  2440. << " with benefit " << CurrentGroup.Benefit
  2441. << " and cost " << CurrentGroup.Cost << "\n");
  2442. CurrentGroup.Regions = std::move(OutlinedRegions);
  2443. if (CurrentGroup.Regions.empty())
  2444. continue;
  2445. OptimizationRemarkEmitter &ORE =
  2446. getORE(*CurrentGroup.Regions[0]->Call->getFunction());
  2447. ORE.emit([&]() {
  2448. IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
  2449. OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
  2450. R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
  2451. << " regions with decrease of "
  2452. << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
  2453. << " instructions at locations ";
  2454. interleave(
  2455. CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
  2456. [&R](OutlinableRegion *Region) {
  2457. R << ore::NV("DebugLoc",
  2458. Region->Candidate->frontInstruction()->getDebugLoc());
  2459. },
  2460. [&R]() { R << " "; });
  2461. return R;
  2462. });
  2463. deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
  2464. OutlinedFunctionNum);
  2465. }
  2466. for (Function *F : FuncsToRemove)
  2467. F->eraseFromParent();
  2468. return OutlinedFunctionNum;
  2469. }
  2470. bool IROutliner::run(Module &M) {
  2471. CostModel = !NoCostModel;
  2472. OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
  2473. return doOutline(M) > 0;
  2474. }
  2475. // Pass Manager Boilerplate
  2476. namespace {
  2477. class IROutlinerLegacyPass : public ModulePass {
  2478. public:
  2479. static char ID;
  2480. IROutlinerLegacyPass() : ModulePass(ID) {
  2481. initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
  2482. }
  2483. void getAnalysisUsage(AnalysisUsage &AU) const override {
  2484. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  2485. AU.addRequired<TargetTransformInfoWrapperPass>();
  2486. AU.addRequired<IRSimilarityIdentifierWrapperPass>();
  2487. }
  2488. bool runOnModule(Module &M) override;
  2489. };
  2490. } // namespace
  2491. bool IROutlinerLegacyPass::runOnModule(Module &M) {
  2492. if (skipModule(M))
  2493. return false;
  2494. std::unique_ptr<OptimizationRemarkEmitter> ORE;
  2495. auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
  2496. ORE.reset(new OptimizationRemarkEmitter(&F));
  2497. return *ORE.get();
  2498. };
  2499. auto GTTI = [this](Function &F) -> TargetTransformInfo & {
  2500. return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  2501. };
  2502. auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
  2503. return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
  2504. };
  2505. return IROutliner(GTTI, GIRSI, GORE).run(M);
  2506. }
  2507. PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
  2508. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  2509. std::function<TargetTransformInfo &(Function &)> GTTI =
  2510. [&FAM](Function &F) -> TargetTransformInfo & {
  2511. return FAM.getResult<TargetIRAnalysis>(F);
  2512. };
  2513. std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
  2514. [&AM](Module &M) -> IRSimilarityIdentifier & {
  2515. return AM.getResult<IRSimilarityAnalysis>(M);
  2516. };
  2517. std::unique_ptr<OptimizationRemarkEmitter> ORE;
  2518. std::function<OptimizationRemarkEmitter &(Function &)> GORE =
  2519. [&ORE](Function &F) -> OptimizationRemarkEmitter & {
  2520. ORE.reset(new OptimizationRemarkEmitter(&F));
  2521. return *ORE.get();
  2522. };
  2523. if (IROutliner(GTTI, GIRSI, GORE).run(M))
  2524. return PreservedAnalyses::none();
  2525. return PreservedAnalyses::all();
  2526. }
  2527. char IROutlinerLegacyPass::ID = 0;
  2528. INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
  2529. false)
  2530. INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
  2531. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  2532. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  2533. INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
  2534. false)
  2535. ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }