CodeExtractor.cpp 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894
  1. //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the interface to tear out a code region, such as an
  10. // individual loop or a parallel section, into a new function, replacing it with
  11. // a call to the new function.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/Utils/CodeExtractor.h"
  15. #include "llvm/ADT/ArrayRef.h"
  16. #include "llvm/ADT/DenseMap.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SetVector.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/Analysis/AssumptionCache.h"
  22. #include "llvm/Analysis/BlockFrequencyInfo.h"
  23. #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
  24. #include "llvm/Analysis/BranchProbabilityInfo.h"
  25. #include "llvm/Analysis/LoopInfo.h"
  26. #include "llvm/IR/Argument.h"
  27. #include "llvm/IR/Attributes.h"
  28. #include "llvm/IR/BasicBlock.h"
  29. #include "llvm/IR/CFG.h"
  30. #include "llvm/IR/Constant.h"
  31. #include "llvm/IR/Constants.h"
  32. #include "llvm/IR/DIBuilder.h"
  33. #include "llvm/IR/DataLayout.h"
  34. #include "llvm/IR/DebugInfo.h"
  35. #include "llvm/IR/DebugInfoMetadata.h"
  36. #include "llvm/IR/DerivedTypes.h"
  37. #include "llvm/IR/Dominators.h"
  38. #include "llvm/IR/Function.h"
  39. #include "llvm/IR/GlobalValue.h"
  40. #include "llvm/IR/InstIterator.h"
  41. #include "llvm/IR/InstrTypes.h"
  42. #include "llvm/IR/Instruction.h"
  43. #include "llvm/IR/Instructions.h"
  44. #include "llvm/IR/IntrinsicInst.h"
  45. #include "llvm/IR/Intrinsics.h"
  46. #include "llvm/IR/LLVMContext.h"
  47. #include "llvm/IR/MDBuilder.h"
  48. #include "llvm/IR/Module.h"
  49. #include "llvm/IR/PatternMatch.h"
  50. #include "llvm/IR/Type.h"
  51. #include "llvm/IR/User.h"
  52. #include "llvm/IR/Value.h"
  53. #include "llvm/IR/Verifier.h"
  54. #include "llvm/Support/BlockFrequency.h"
  55. #include "llvm/Support/BranchProbability.h"
  56. #include "llvm/Support/Casting.h"
  57. #include "llvm/Support/CommandLine.h"
  58. #include "llvm/Support/Debug.h"
  59. #include "llvm/Support/ErrorHandling.h"
  60. #include "llvm/Support/raw_ostream.h"
  61. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  62. #include <cassert>
  63. #include <cstdint>
  64. #include <iterator>
  65. #include <map>
  66. #include <utility>
  67. #include <vector>
  68. using namespace llvm;
  69. using namespace llvm::PatternMatch;
  70. using ProfileCount = Function::ProfileCount;
  71. #define DEBUG_TYPE "code-extractor"
  72. // Provide a command-line option to aggregate function arguments into a struct
  73. // for functions produced by the code extractor. This is useful when converting
  74. // extracted functions to pthread-based code, as only one argument (void*) can
  75. // be passed in to pthread_create().
  76. static cl::opt<bool>
  77. AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
  78. cl::desc("Aggregate arguments to code-extracted functions"));
  79. /// Test whether a block is valid for extraction.
  80. static bool isBlockValidForExtraction(const BasicBlock &BB,
  81. const SetVector<BasicBlock *> &Result,
  82. bool AllowVarArgs, bool AllowAlloca) {
  83. // taking the address of a basic block moved to another function is illegal
  84. if (BB.hasAddressTaken())
  85. return false;
  86. // don't hoist code that uses another basicblock address, as it's likely to
  87. // lead to unexpected behavior, like cross-function jumps
  88. SmallPtrSet<User const *, 16> Visited;
  89. SmallVector<User const *, 16> ToVisit;
  90. for (Instruction const &Inst : BB)
  91. ToVisit.push_back(&Inst);
  92. while (!ToVisit.empty()) {
  93. User const *Curr = ToVisit.pop_back_val();
  94. if (!Visited.insert(Curr).second)
  95. continue;
  96. if (isa<BlockAddress const>(Curr))
  97. return false; // even a reference to self is likely to be not compatible
  98. if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
  99. continue;
  100. for (auto const &U : Curr->operands()) {
  101. if (auto *UU = dyn_cast<User>(U))
  102. ToVisit.push_back(UU);
  103. }
  104. }
  105. // If explicitly requested, allow vastart and alloca. For invoke instructions
  106. // verify that extraction is valid.
  107. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
  108. if (isa<AllocaInst>(I)) {
  109. if (!AllowAlloca)
  110. return false;
  111. continue;
  112. }
  113. if (const auto *II = dyn_cast<InvokeInst>(I)) {
  114. // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
  115. // must be a part of the subgraph which is being extracted.
  116. if (auto *UBB = II->getUnwindDest())
  117. if (!Result.count(UBB))
  118. return false;
  119. continue;
  120. }
  121. // All catch handlers of a catchswitch instruction as well as the unwind
  122. // destination must be in the subgraph.
  123. if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) {
  124. if (auto *UBB = CSI->getUnwindDest())
  125. if (!Result.count(UBB))
  126. return false;
  127. for (const auto *HBB : CSI->handlers())
  128. if (!Result.count(const_cast<BasicBlock*>(HBB)))
  129. return false;
  130. continue;
  131. }
  132. // Make sure that entire catch handler is within subgraph. It is sufficient
  133. // to check that catch return's block is in the list.
  134. if (const auto *CPI = dyn_cast<CatchPadInst>(I)) {
  135. for (const auto *U : CPI->users())
  136. if (const auto *CRI = dyn_cast<CatchReturnInst>(U))
  137. if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
  138. return false;
  139. continue;
  140. }
  141. // And do similar checks for cleanup handler - the entire handler must be
  142. // in subgraph which is going to be extracted. For cleanup return should
  143. // additionally check that the unwind destination is also in the subgraph.
  144. if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) {
  145. for (const auto *U : CPI->users())
  146. if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
  147. if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
  148. return false;
  149. continue;
  150. }
  151. if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) {
  152. if (auto *UBB = CRI->getUnwindDest())
  153. if (!Result.count(UBB))
  154. return false;
  155. continue;
  156. }
  157. if (const CallInst *CI = dyn_cast<CallInst>(I)) {
  158. if (const Function *F = CI->getCalledFunction()) {
  159. auto IID = F->getIntrinsicID();
  160. if (IID == Intrinsic::vastart) {
  161. if (AllowVarArgs)
  162. continue;
  163. else
  164. return false;
  165. }
  166. // Currently, we miscompile outlined copies of eh_typid_for. There are
  167. // proposals for fixing this in llvm.org/PR39545.
  168. if (IID == Intrinsic::eh_typeid_for)
  169. return false;
  170. }
  171. }
  172. }
  173. return true;
  174. }
  175. /// Build a set of blocks to extract if the input blocks are viable.
  176. static SetVector<BasicBlock *>
  177. buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
  178. bool AllowVarArgs, bool AllowAlloca) {
  179. assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
  180. SetVector<BasicBlock *> Result;
  181. // Loop over the blocks, adding them to our set-vector, and aborting with an
  182. // empty set if we encounter invalid blocks.
  183. for (BasicBlock *BB : BBs) {
  184. // If this block is dead, don't process it.
  185. if (DT && !DT->isReachableFromEntry(BB))
  186. continue;
  187. if (!Result.insert(BB))
  188. llvm_unreachable("Repeated basic blocks in extraction input");
  189. }
  190. LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
  191. << '\n');
  192. for (auto *BB : Result) {
  193. if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
  194. return {};
  195. // Make sure that the first block is not a landing pad.
  196. if (BB == Result.front()) {
  197. if (BB->isEHPad()) {
  198. LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
  199. return {};
  200. }
  201. continue;
  202. }
  203. // All blocks other than the first must not have predecessors outside of
  204. // the subgraph which is being extracted.
  205. for (auto *PBB : predecessors(BB))
  206. if (!Result.count(PBB)) {
  207. LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
  208. "outside the region except for the first block!\n"
  209. << "Problematic source BB: " << BB->getName() << "\n"
  210. << "Problematic destination BB: " << PBB->getName()
  211. << "\n");
  212. return {};
  213. }
  214. }
  215. return Result;
  216. }
  217. CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
  218. bool AggregateArgs, BlockFrequencyInfo *BFI,
  219. BranchProbabilityInfo *BPI, AssumptionCache *AC,
  220. bool AllowVarArgs, bool AllowAlloca,
  221. BasicBlock *AllocationBlock, std::string Suffix)
  222. : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
  223. BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
  224. AllowVarArgs(AllowVarArgs),
  225. Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
  226. Suffix(Suffix) {}
  227. CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
  228. BlockFrequencyInfo *BFI,
  229. BranchProbabilityInfo *BPI, AssumptionCache *AC,
  230. std::string Suffix)
  231. : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
  232. BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false),
  233. Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
  234. /* AllowVarArgs */ false,
  235. /* AllowAlloca */ false)),
  236. Suffix(Suffix) {}
  237. /// definedInRegion - Return true if the specified value is defined in the
  238. /// extracted region.
  239. static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
  240. if (Instruction *I = dyn_cast<Instruction>(V))
  241. if (Blocks.count(I->getParent()))
  242. return true;
  243. return false;
  244. }
  245. /// definedInCaller - Return true if the specified value is defined in the
  246. /// function being code extracted, but not in the region being extracted.
  247. /// These values must be passed in as live-ins to the function.
  248. static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
  249. if (isa<Argument>(V)) return true;
  250. if (Instruction *I = dyn_cast<Instruction>(V))
  251. if (!Blocks.count(I->getParent()))
  252. return true;
  253. return false;
  254. }
  255. static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
  256. BasicBlock *CommonExitBlock = nullptr;
  257. auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
  258. for (auto *Succ : successors(Block)) {
  259. // Internal edges, ok.
  260. if (Blocks.count(Succ))
  261. continue;
  262. if (!CommonExitBlock) {
  263. CommonExitBlock = Succ;
  264. continue;
  265. }
  266. if (CommonExitBlock != Succ)
  267. return true;
  268. }
  269. return false;
  270. };
  271. if (any_of(Blocks, hasNonCommonExitSucc))
  272. return nullptr;
  273. return CommonExitBlock;
  274. }
  275. CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
  276. for (BasicBlock &BB : F) {
  277. for (Instruction &II : BB.instructionsWithoutDebug())
  278. if (auto *AI = dyn_cast<AllocaInst>(&II))
  279. Allocas.push_back(AI);
  280. findSideEffectInfoForBlock(BB);
  281. }
  282. }
  283. void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
  284. for (Instruction &II : BB.instructionsWithoutDebug()) {
  285. unsigned Opcode = II.getOpcode();
  286. Value *MemAddr = nullptr;
  287. switch (Opcode) {
  288. case Instruction::Store:
  289. case Instruction::Load: {
  290. if (Opcode == Instruction::Store) {
  291. StoreInst *SI = cast<StoreInst>(&II);
  292. MemAddr = SI->getPointerOperand();
  293. } else {
  294. LoadInst *LI = cast<LoadInst>(&II);
  295. MemAddr = LI->getPointerOperand();
  296. }
  297. // Global variable can not be aliased with locals.
  298. if (isa<Constant>(MemAddr))
  299. break;
  300. Value *Base = MemAddr->stripInBoundsConstantOffsets();
  301. if (!isa<AllocaInst>(Base)) {
  302. SideEffectingBlocks.insert(&BB);
  303. return;
  304. }
  305. BaseMemAddrs[&BB].insert(Base);
  306. break;
  307. }
  308. default: {
  309. IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
  310. if (IntrInst) {
  311. if (IntrInst->isLifetimeStartOrEnd())
  312. break;
  313. SideEffectingBlocks.insert(&BB);
  314. return;
  315. }
  316. // Treat all the other cases conservatively if it has side effects.
  317. if (II.mayHaveSideEffects()) {
  318. SideEffectingBlocks.insert(&BB);
  319. return;
  320. }
  321. }
  322. }
  323. }
  324. }
  325. bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
  326. BasicBlock &BB, AllocaInst *Addr) const {
  327. if (SideEffectingBlocks.count(&BB))
  328. return true;
  329. auto It = BaseMemAddrs.find(&BB);
  330. if (It != BaseMemAddrs.end())
  331. return It->second.count(Addr);
  332. return false;
  333. }
  334. bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
  335. const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
  336. AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
  337. Function *Func = (*Blocks.begin())->getParent();
  338. for (BasicBlock &BB : *Func) {
  339. if (Blocks.count(&BB))
  340. continue;
  341. if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
  342. return false;
  343. }
  344. return true;
  345. }
  346. BasicBlock *
  347. CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
  348. BasicBlock *SinglePredFromOutlineRegion = nullptr;
  349. assert(!Blocks.count(CommonExitBlock) &&
  350. "Expect a block outside the region!");
  351. for (auto *Pred : predecessors(CommonExitBlock)) {
  352. if (!Blocks.count(Pred))
  353. continue;
  354. if (!SinglePredFromOutlineRegion) {
  355. SinglePredFromOutlineRegion = Pred;
  356. } else if (SinglePredFromOutlineRegion != Pred) {
  357. SinglePredFromOutlineRegion = nullptr;
  358. break;
  359. }
  360. }
  361. if (SinglePredFromOutlineRegion)
  362. return SinglePredFromOutlineRegion;
  363. #ifndef NDEBUG
  364. auto getFirstPHI = [](BasicBlock *BB) {
  365. BasicBlock::iterator I = BB->begin();
  366. PHINode *FirstPhi = nullptr;
  367. while (I != BB->end()) {
  368. PHINode *Phi = dyn_cast<PHINode>(I);
  369. if (!Phi)
  370. break;
  371. if (!FirstPhi) {
  372. FirstPhi = Phi;
  373. break;
  374. }
  375. }
  376. return FirstPhi;
  377. };
  378. // If there are any phi nodes, the single pred either exists or has already
  379. // be created before code extraction.
  380. assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
  381. #endif
  382. BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
  383. CommonExitBlock->getFirstNonPHI()->getIterator());
  384. for (BasicBlock *Pred :
  385. llvm::make_early_inc_range(predecessors(CommonExitBlock))) {
  386. if (Blocks.count(Pred))
  387. continue;
  388. Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
  389. }
  390. // Now add the old exit block to the outline region.
  391. Blocks.insert(CommonExitBlock);
  392. OldTargets.push_back(NewExitBlock);
  393. return CommonExitBlock;
  394. }
  395. // Find the pair of life time markers for address 'Addr' that are either
  396. // defined inside the outline region or can legally be shrinkwrapped into the
  397. // outline region. If there are not other untracked uses of the address, return
  398. // the pair of markers if found; otherwise return a pair of nullptr.
  399. CodeExtractor::LifetimeMarkerInfo
  400. CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
  401. Instruction *Addr,
  402. BasicBlock *ExitBlock) const {
  403. LifetimeMarkerInfo Info;
  404. for (User *U : Addr->users()) {
  405. IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
  406. if (IntrInst) {
  407. // We don't model addresses with multiple start/end markers, but the
  408. // markers do not need to be in the region.
  409. if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
  410. if (Info.LifeStart)
  411. return {};
  412. Info.LifeStart = IntrInst;
  413. continue;
  414. }
  415. if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
  416. if (Info.LifeEnd)
  417. return {};
  418. Info.LifeEnd = IntrInst;
  419. continue;
  420. }
  421. // At this point, permit debug uses outside of the region.
  422. // This is fixed in a later call to fixupDebugInfoPostExtraction().
  423. if (isa<DbgInfoIntrinsic>(IntrInst))
  424. continue;
  425. }
  426. // Find untracked uses of the address, bail.
  427. if (!definedInRegion(Blocks, U))
  428. return {};
  429. }
  430. if (!Info.LifeStart || !Info.LifeEnd)
  431. return {};
  432. Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
  433. Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
  434. // Do legality check.
  435. if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
  436. !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
  437. return {};
  438. // Check to see if we have a place to do hoisting, if not, bail.
  439. if (Info.HoistLifeEnd && !ExitBlock)
  440. return {};
  441. return Info;
  442. }
  443. void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
  444. ValueSet &SinkCands, ValueSet &HoistCands,
  445. BasicBlock *&ExitBlock) const {
  446. Function *Func = (*Blocks.begin())->getParent();
  447. ExitBlock = getCommonExitBlock(Blocks);
  448. auto moveOrIgnoreLifetimeMarkers =
  449. [&](const LifetimeMarkerInfo &LMI) -> bool {
  450. if (!LMI.LifeStart)
  451. return false;
  452. if (LMI.SinkLifeStart) {
  453. LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
  454. << "\n");
  455. SinkCands.insert(LMI.LifeStart);
  456. }
  457. if (LMI.HoistLifeEnd) {
  458. LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
  459. HoistCands.insert(LMI.LifeEnd);
  460. }
  461. return true;
  462. };
  463. // Look up allocas in the original function in CodeExtractorAnalysisCache, as
  464. // this is much faster than walking all the instructions.
  465. for (AllocaInst *AI : CEAC.getAllocas()) {
  466. BasicBlock *BB = AI->getParent();
  467. if (Blocks.count(BB))
  468. continue;
  469. // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
  470. // check whether it is actually still in the original function.
  471. Function *AIFunc = BB->getParent();
  472. if (AIFunc != Func)
  473. continue;
  474. LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
  475. bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
  476. if (Moved) {
  477. LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
  478. SinkCands.insert(AI);
  479. continue;
  480. }
  481. // Find bitcasts in the outlined region that have lifetime marker users
  482. // outside that region. Replace the lifetime marker use with an
  483. // outside region bitcast to avoid unnecessary alloca/reload instructions
  484. // and extra lifetime markers.
  485. SmallVector<Instruction *, 2> LifetimeBitcastUsers;
  486. for (User *U : AI->users()) {
  487. if (!definedInRegion(Blocks, U))
  488. continue;
  489. if (U->stripInBoundsConstantOffsets() != AI)
  490. continue;
  491. Instruction *Bitcast = cast<Instruction>(U);
  492. for (User *BU : Bitcast->users()) {
  493. IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
  494. if (!IntrInst)
  495. continue;
  496. if (!IntrInst->isLifetimeStartOrEnd())
  497. continue;
  498. if (definedInRegion(Blocks, IntrInst))
  499. continue;
  500. LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
  501. << *Bitcast << " in out-of-region lifetime marker "
  502. << *IntrInst << "\n");
  503. LifetimeBitcastUsers.push_back(IntrInst);
  504. }
  505. }
  506. for (Instruction *I : LifetimeBitcastUsers) {
  507. Module *M = AIFunc->getParent();
  508. LLVMContext &Ctx = M->getContext();
  509. auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
  510. CastInst *CastI =
  511. CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
  512. I->replaceUsesOfWith(I->getOperand(1), CastI);
  513. }
  514. // Follow any bitcasts.
  515. SmallVector<Instruction *, 2> Bitcasts;
  516. SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
  517. for (User *U : AI->users()) {
  518. if (U->stripInBoundsConstantOffsets() == AI) {
  519. Instruction *Bitcast = cast<Instruction>(U);
  520. LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
  521. if (LMI.LifeStart) {
  522. Bitcasts.push_back(Bitcast);
  523. BitcastLifetimeInfo.push_back(LMI);
  524. continue;
  525. }
  526. }
  527. // Found unknown use of AI.
  528. if (!definedInRegion(Blocks, U)) {
  529. Bitcasts.clear();
  530. break;
  531. }
  532. }
  533. // Either no bitcasts reference the alloca or there are unknown uses.
  534. if (Bitcasts.empty())
  535. continue;
  536. LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
  537. SinkCands.insert(AI);
  538. for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
  539. Instruction *BitcastAddr = Bitcasts[I];
  540. const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
  541. assert(LMI.LifeStart &&
  542. "Unsafe to sink bitcast without lifetime markers");
  543. moveOrIgnoreLifetimeMarkers(LMI);
  544. if (!definedInRegion(Blocks, BitcastAddr)) {
  545. LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
  546. << "\n");
  547. SinkCands.insert(BitcastAddr);
  548. }
  549. }
  550. }
  551. }
  552. bool CodeExtractor::isEligible() const {
  553. if (Blocks.empty())
  554. return false;
  555. BasicBlock *Header = *Blocks.begin();
  556. Function *F = Header->getParent();
  557. // For functions with varargs, check that varargs handling is only done in the
  558. // outlined function, i.e vastart and vaend are only used in outlined blocks.
  559. if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
  560. auto containsVarArgIntrinsic = [](const Instruction &I) {
  561. if (const CallInst *CI = dyn_cast<CallInst>(&I))
  562. if (const Function *Callee = CI->getCalledFunction())
  563. return Callee->getIntrinsicID() == Intrinsic::vastart ||
  564. Callee->getIntrinsicID() == Intrinsic::vaend;
  565. return false;
  566. };
  567. for (auto &BB : *F) {
  568. if (Blocks.count(&BB))
  569. continue;
  570. if (llvm::any_of(BB, containsVarArgIntrinsic))
  571. return false;
  572. }
  573. }
  574. return true;
  575. }
  576. void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
  577. const ValueSet &SinkCands) const {
  578. for (BasicBlock *BB : Blocks) {
  579. // If a used value is defined outside the region, it's an input. If an
  580. // instruction is used outside the region, it's an output.
  581. for (Instruction &II : *BB) {
  582. for (auto &OI : II.operands()) {
  583. Value *V = OI;
  584. if (!SinkCands.count(V) && definedInCaller(Blocks, V))
  585. Inputs.insert(V);
  586. }
  587. for (User *U : II.users())
  588. if (!definedInRegion(Blocks, U)) {
  589. Outputs.insert(&II);
  590. break;
  591. }
  592. }
  593. }
  594. }
  595. /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
  596. /// of the region, we need to split the entry block of the region so that the
  597. /// PHI node is easier to deal with.
  598. void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
  599. unsigned NumPredsFromRegion = 0;
  600. unsigned NumPredsOutsideRegion = 0;
  601. if (Header != &Header->getParent()->getEntryBlock()) {
  602. PHINode *PN = dyn_cast<PHINode>(Header->begin());
  603. if (!PN) return; // No PHI nodes.
  604. // If the header node contains any PHI nodes, check to see if there is more
  605. // than one entry from outside the region. If so, we need to sever the
  606. // header block into two.
  607. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
  608. if (Blocks.count(PN->getIncomingBlock(i)))
  609. ++NumPredsFromRegion;
  610. else
  611. ++NumPredsOutsideRegion;
  612. // If there is one (or fewer) predecessor from outside the region, we don't
  613. // need to do anything special.
  614. if (NumPredsOutsideRegion <= 1) return;
  615. }
  616. // Otherwise, we need to split the header block into two pieces: one
  617. // containing PHI nodes merging values from outside of the region, and a
  618. // second that contains all of the code for the block and merges back any
  619. // incoming values from inside of the region.
  620. BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
  621. // We only want to code extract the second block now, and it becomes the new
  622. // header of the region.
  623. BasicBlock *OldPred = Header;
  624. Blocks.remove(OldPred);
  625. Blocks.insert(NewBB);
  626. Header = NewBB;
  627. // Okay, now we need to adjust the PHI nodes and any branches from within the
  628. // region to go to the new header block instead of the old header block.
  629. if (NumPredsFromRegion) {
  630. PHINode *PN = cast<PHINode>(OldPred->begin());
  631. // Loop over all of the predecessors of OldPred that are in the region,
  632. // changing them to branch to NewBB instead.
  633. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
  634. if (Blocks.count(PN->getIncomingBlock(i))) {
  635. Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
  636. TI->replaceUsesOfWith(OldPred, NewBB);
  637. }
  638. // Okay, everything within the region is now branching to the right block, we
  639. // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
  640. BasicBlock::iterator AfterPHIs;
  641. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
  642. PHINode *PN = cast<PHINode>(AfterPHIs);
  643. // Create a new PHI node in the new region, which has an incoming value
  644. // from OldPred of PN.
  645. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
  646. PN->getName() + ".ce", &NewBB->front());
  647. PN->replaceAllUsesWith(NewPN);
  648. NewPN->addIncoming(PN, OldPred);
  649. // Loop over all of the incoming value in PN, moving them to NewPN if they
  650. // are from the extracted region.
  651. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
  652. if (Blocks.count(PN->getIncomingBlock(i))) {
  653. NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
  654. PN->removeIncomingValue(i);
  655. --i;
  656. }
  657. }
  658. }
  659. }
  660. }
  661. /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
  662. /// outlined region, we split these PHIs on two: one with inputs from region
  663. /// and other with remaining incoming blocks; then first PHIs are placed in
  664. /// outlined region.
  665. void CodeExtractor::severSplitPHINodesOfExits(
  666. const SmallPtrSetImpl<BasicBlock *> &Exits) {
  667. for (BasicBlock *ExitBB : Exits) {
  668. BasicBlock *NewBB = nullptr;
  669. for (PHINode &PN : ExitBB->phis()) {
  670. // Find all incoming values from the outlining region.
  671. SmallVector<unsigned, 2> IncomingVals;
  672. for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
  673. if (Blocks.count(PN.getIncomingBlock(i)))
  674. IncomingVals.push_back(i);
  675. // Do not process PHI if there is one (or fewer) predecessor from region.
  676. // If PHI has exactly one predecessor from region, only this one incoming
  677. // will be replaced on codeRepl block, so it should be safe to skip PHI.
  678. if (IncomingVals.size() <= 1)
  679. continue;
  680. // Create block for new PHIs and add it to the list of outlined if it
  681. // wasn't done before.
  682. if (!NewBB) {
  683. NewBB = BasicBlock::Create(ExitBB->getContext(),
  684. ExitBB->getName() + ".split",
  685. ExitBB->getParent(), ExitBB);
  686. SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
  687. for (BasicBlock *PredBB : Preds)
  688. if (Blocks.count(PredBB))
  689. PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
  690. BranchInst::Create(ExitBB, NewBB);
  691. Blocks.insert(NewBB);
  692. }
  693. // Split this PHI.
  694. PHINode *NewPN =
  695. PHINode::Create(PN.getType(), IncomingVals.size(),
  696. PN.getName() + ".ce", NewBB->getFirstNonPHI());
  697. for (unsigned i : IncomingVals)
  698. NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
  699. for (unsigned i : reverse(IncomingVals))
  700. PN.removeIncomingValue(i, false);
  701. PN.addIncoming(NewPN, NewBB);
  702. }
  703. }
  704. }
  705. void CodeExtractor::splitReturnBlocks() {
  706. for (BasicBlock *Block : Blocks)
  707. if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
  708. BasicBlock *New =
  709. Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
  710. if (DT) {
  711. // Old dominates New. New node dominates all other nodes dominated
  712. // by Old.
  713. DomTreeNode *OldNode = DT->getNode(Block);
  714. SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
  715. OldNode->end());
  716. DomTreeNode *NewNode = DT->addNewBlock(New, Block);
  717. for (DomTreeNode *I : Children)
  718. DT->changeImmediateDominator(I, NewNode);
  719. }
  720. }
  721. }
  722. /// constructFunction - make a function based on inputs and outputs, as follows:
  723. /// f(in0, ..., inN, out0, ..., outN)
  724. Function *CodeExtractor::constructFunction(const ValueSet &inputs,
  725. const ValueSet &outputs,
  726. BasicBlock *header,
  727. BasicBlock *newRootNode,
  728. BasicBlock *newHeader,
  729. Function *oldFunction,
  730. Module *M) {
  731. LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
  732. LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
  733. // This function returns unsigned, outputs will go back by reference.
  734. switch (NumExitBlocks) {
  735. case 0:
  736. case 1: RetTy = Type::getVoidTy(header->getContext()); break;
  737. case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
  738. default: RetTy = Type::getInt16Ty(header->getContext()); break;
  739. }
  740. std::vector<Type *> ParamTy;
  741. std::vector<Type *> AggParamTy;
  742. ValueSet StructValues;
  743. const DataLayout &DL = M->getDataLayout();
  744. // Add the types of the input values to the function's argument list
  745. for (Value *value : inputs) {
  746. LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
  747. if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) {
  748. AggParamTy.push_back(value->getType());
  749. StructValues.insert(value);
  750. } else
  751. ParamTy.push_back(value->getType());
  752. }
  753. // Add the types of the output values to the function's argument list.
  754. for (Value *output : outputs) {
  755. LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
  756. if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
  757. AggParamTy.push_back(output->getType());
  758. StructValues.insert(output);
  759. } else
  760. ParamTy.push_back(
  761. PointerType::get(output->getType(), DL.getAllocaAddrSpace()));
  762. }
  763. assert(
  764. (ParamTy.size() + AggParamTy.size()) ==
  765. (inputs.size() + outputs.size()) &&
  766. "Number of scalar and aggregate params does not match inputs, outputs");
  767. assert((StructValues.empty() || AggregateArgs) &&
  768. "Expeced StructValues only with AggregateArgs set");
  769. // Concatenate scalar and aggregate params in ParamTy.
  770. size_t NumScalarParams = ParamTy.size();
  771. StructType *StructTy = nullptr;
  772. if (AggregateArgs && !AggParamTy.empty()) {
  773. StructTy = StructType::get(M->getContext(), AggParamTy);
  774. ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace()));
  775. }
  776. LLVM_DEBUG({
  777. dbgs() << "Function type: " << *RetTy << " f(";
  778. for (Type *i : ParamTy)
  779. dbgs() << *i << ", ";
  780. dbgs() << ")\n";
  781. });
  782. FunctionType *funcType = FunctionType::get(
  783. RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg());
  784. std::string SuffixToUse =
  785. Suffix.empty()
  786. ? (header->getName().empty() ? "extracted" : header->getName().str())
  787. : Suffix;
  788. // Create the new function
  789. Function *newFunction = Function::Create(
  790. funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
  791. oldFunction->getName() + "." + SuffixToUse, M);
  792. // Inherit all of the target dependent attributes and white-listed
  793. // target independent attributes.
  794. // (e.g. If the extracted region contains a call to an x86.sse
  795. // instruction we need to make sure that the extracted region has the
  796. // "target-features" attribute allowing it to be lowered.
  797. // FIXME: This should be changed to check to see if a specific
  798. // attribute can not be inherited.
  799. for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) {
  800. if (Attr.isStringAttribute()) {
  801. if (Attr.getKindAsString() == "thunk")
  802. continue;
  803. } else
  804. switch (Attr.getKindAsEnum()) {
  805. // Those attributes cannot be propagated safely. Explicitly list them
  806. // here so we get a warning if new attributes are added.
  807. case Attribute::AllocSize:
  808. case Attribute::Builtin:
  809. case Attribute::Convergent:
  810. case Attribute::JumpTable:
  811. case Attribute::Naked:
  812. case Attribute::NoBuiltin:
  813. case Attribute::NoMerge:
  814. case Attribute::NoReturn:
  815. case Attribute::NoSync:
  816. case Attribute::ReturnsTwice:
  817. case Attribute::Speculatable:
  818. case Attribute::StackAlignment:
  819. case Attribute::WillReturn:
  820. case Attribute::AllocKind:
  821. case Attribute::PresplitCoroutine:
  822. case Attribute::Memory:
  823. continue;
  824. // Those attributes should be safe to propagate to the extracted function.
  825. case Attribute::AlwaysInline:
  826. case Attribute::Cold:
  827. case Attribute::DisableSanitizerInstrumentation:
  828. case Attribute::FnRetThunkExtern:
  829. case Attribute::Hot:
  830. case Attribute::NoRecurse:
  831. case Attribute::InlineHint:
  832. case Attribute::MinSize:
  833. case Attribute::NoCallback:
  834. case Attribute::NoDuplicate:
  835. case Attribute::NoFree:
  836. case Attribute::NoImplicitFloat:
  837. case Attribute::NoInline:
  838. case Attribute::NonLazyBind:
  839. case Attribute::NoRedZone:
  840. case Attribute::NoUnwind:
  841. case Attribute::NoSanitizeBounds:
  842. case Attribute::NoSanitizeCoverage:
  843. case Attribute::NullPointerIsValid:
  844. case Attribute::OptForFuzzing:
  845. case Attribute::OptimizeNone:
  846. case Attribute::OptimizeForSize:
  847. case Attribute::SafeStack:
  848. case Attribute::ShadowCallStack:
  849. case Attribute::SanitizeAddress:
  850. case Attribute::SanitizeMemory:
  851. case Attribute::SanitizeThread:
  852. case Attribute::SanitizeHWAddress:
  853. case Attribute::SanitizeMemTag:
  854. case Attribute::SpeculativeLoadHardening:
  855. case Attribute::StackProtect:
  856. case Attribute::StackProtectReq:
  857. case Attribute::StackProtectStrong:
  858. case Attribute::StrictFP:
  859. case Attribute::UWTable:
  860. case Attribute::VScaleRange:
  861. case Attribute::NoCfCheck:
  862. case Attribute::MustProgress:
  863. case Attribute::NoProfile:
  864. case Attribute::SkipProfile:
  865. break;
  866. // These attributes cannot be applied to functions.
  867. case Attribute::Alignment:
  868. case Attribute::AllocatedPointer:
  869. case Attribute::AllocAlign:
  870. case Attribute::ByVal:
  871. case Attribute::Dereferenceable:
  872. case Attribute::DereferenceableOrNull:
  873. case Attribute::ElementType:
  874. case Attribute::InAlloca:
  875. case Attribute::InReg:
  876. case Attribute::Nest:
  877. case Attribute::NoAlias:
  878. case Attribute::NoCapture:
  879. case Attribute::NoUndef:
  880. case Attribute::NonNull:
  881. case Attribute::Preallocated:
  882. case Attribute::ReadNone:
  883. case Attribute::ReadOnly:
  884. case Attribute::Returned:
  885. case Attribute::SExt:
  886. case Attribute::StructRet:
  887. case Attribute::SwiftError:
  888. case Attribute::SwiftSelf:
  889. case Attribute::SwiftAsync:
  890. case Attribute::ZExt:
  891. case Attribute::ImmArg:
  892. case Attribute::ByRef:
  893. case Attribute::WriteOnly:
  894. // These are not really attributes.
  895. case Attribute::None:
  896. case Attribute::EndAttrKinds:
  897. case Attribute::EmptyKey:
  898. case Attribute::TombstoneKey:
  899. llvm_unreachable("Not a function attribute");
  900. }
  901. newFunction->addFnAttr(Attr);
  902. }
  903. newFunction->insert(newFunction->end(), newRootNode);
  904. // Create scalar and aggregate iterators to name all of the arguments we
  905. // inserted.
  906. Function::arg_iterator ScalarAI = newFunction->arg_begin();
  907. Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams);
  908. // Rewrite all users of the inputs in the extracted region to use the
  909. // arguments (or appropriate addressing into struct) instead.
  910. for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
  911. Value *RewriteVal;
  912. if (AggregateArgs && StructValues.contains(inputs[i])) {
  913. Value *Idx[2];
  914. Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
  915. Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
  916. Instruction *TI = newFunction->begin()->getTerminator();
  917. GetElementPtrInst *GEP = GetElementPtrInst::Create(
  918. StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI);
  919. RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP,
  920. "loadgep_" + inputs[i]->getName(), TI);
  921. ++aggIdx;
  922. } else
  923. RewriteVal = &*ScalarAI++;
  924. std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
  925. for (User *use : Users)
  926. if (Instruction *inst = dyn_cast<Instruction>(use))
  927. if (Blocks.count(inst->getParent()))
  928. inst->replaceUsesOfWith(inputs[i], RewriteVal);
  929. }
  930. // Set names for input and output arguments.
  931. if (NumScalarParams) {
  932. ScalarAI = newFunction->arg_begin();
  933. for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI)
  934. if (!StructValues.contains(inputs[i]))
  935. ScalarAI->setName(inputs[i]->getName());
  936. for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI)
  937. if (!StructValues.contains(outputs[i]))
  938. ScalarAI->setName(outputs[i]->getName() + ".out");
  939. }
  940. // Rewrite branches to basic blocks outside of the loop to new dummy blocks
  941. // within the new function. This must be done before we lose track of which
  942. // blocks were originally in the code region.
  943. std::vector<User *> Users(header->user_begin(), header->user_end());
  944. for (auto &U : Users)
  945. // The BasicBlock which contains the branch is not in the region
  946. // modify the branch target to a new block
  947. if (Instruction *I = dyn_cast<Instruction>(U))
  948. if (I->isTerminator() && I->getFunction() == oldFunction &&
  949. !Blocks.count(I->getParent()))
  950. I->replaceUsesOfWith(header, newHeader);
  951. return newFunction;
  952. }
  953. /// Erase lifetime.start markers which reference inputs to the extraction
  954. /// region, and insert the referenced memory into \p LifetimesStart.
  955. ///
  956. /// The extraction region is defined by a set of blocks (\p Blocks), and a set
  957. /// of allocas which will be moved from the caller function into the extracted
  958. /// function (\p SunkAllocas).
  959. static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
  960. const SetVector<Value *> &SunkAllocas,
  961. SetVector<Value *> &LifetimesStart) {
  962. for (BasicBlock *BB : Blocks) {
  963. for (Instruction &I : llvm::make_early_inc_range(*BB)) {
  964. auto *II = dyn_cast<IntrinsicInst>(&I);
  965. if (!II || !II->isLifetimeStartOrEnd())
  966. continue;
  967. // Get the memory operand of the lifetime marker. If the underlying
  968. // object is a sunk alloca, or is otherwise defined in the extraction
  969. // region, the lifetime marker must not be erased.
  970. Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
  971. if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
  972. continue;
  973. if (II->getIntrinsicID() == Intrinsic::lifetime_start)
  974. LifetimesStart.insert(Mem);
  975. II->eraseFromParent();
  976. }
  977. }
  978. }
  979. /// Insert lifetime start/end markers surrounding the call to the new function
  980. /// for objects defined in the caller.
  981. static void insertLifetimeMarkersSurroundingCall(
  982. Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
  983. CallInst *TheCall) {
  984. LLVMContext &Ctx = M->getContext();
  985. auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
  986. auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
  987. Instruction *Term = TheCall->getParent()->getTerminator();
  988. // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
  989. // needed to satisfy this requirement so they may be reused.
  990. DenseMap<Value *, Value *> Bitcasts;
  991. // Emit lifetime markers for the pointers given in \p Objects. Insert the
  992. // markers before the call if \p InsertBefore, and after the call otherwise.
  993. auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
  994. bool InsertBefore) {
  995. for (Value *Mem : Objects) {
  996. assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
  997. TheCall->getFunction()) &&
  998. "Input memory not defined in original function");
  999. Value *&MemAsI8Ptr = Bitcasts[Mem];
  1000. if (!MemAsI8Ptr) {
  1001. if (Mem->getType() == Int8PtrTy)
  1002. MemAsI8Ptr = Mem;
  1003. else
  1004. MemAsI8Ptr =
  1005. CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
  1006. }
  1007. auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
  1008. if (InsertBefore)
  1009. Marker->insertBefore(TheCall);
  1010. else
  1011. Marker->insertBefore(Term);
  1012. }
  1013. };
  1014. if (!LifetimesStart.empty()) {
  1015. auto StartFn = llvm::Intrinsic::getDeclaration(
  1016. M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
  1017. insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
  1018. }
  1019. if (!LifetimesEnd.empty()) {
  1020. auto EndFn = llvm::Intrinsic::getDeclaration(
  1021. M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
  1022. insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
  1023. }
  1024. }
  1025. /// emitCallAndSwitchStatement - This method sets up the caller side by adding
  1026. /// the call instruction, splitting any PHI nodes in the header block as
  1027. /// necessary.
  1028. CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
  1029. BasicBlock *codeReplacer,
  1030. ValueSet &inputs,
  1031. ValueSet &outputs) {
  1032. // Emit a call to the new function, passing in: *pointer to struct (if
  1033. // aggregating parameters), or plan inputs and allocated memory for outputs
  1034. std::vector<Value *> params, ReloadOutputs, Reloads;
  1035. ValueSet StructValues;
  1036. Module *M = newFunction->getParent();
  1037. LLVMContext &Context = M->getContext();
  1038. const DataLayout &DL = M->getDataLayout();
  1039. CallInst *call = nullptr;
  1040. // Add inputs as params, or to be filled into the struct
  1041. unsigned ScalarInputArgNo = 0;
  1042. SmallVector<unsigned, 1> SwiftErrorArgs;
  1043. for (Value *input : inputs) {
  1044. if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input))
  1045. StructValues.insert(input);
  1046. else {
  1047. params.push_back(input);
  1048. if (input->isSwiftError())
  1049. SwiftErrorArgs.push_back(ScalarInputArgNo);
  1050. }
  1051. ++ScalarInputArgNo;
  1052. }
  1053. // Create allocas for the outputs
  1054. unsigned ScalarOutputArgNo = 0;
  1055. for (Value *output : outputs) {
  1056. if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
  1057. StructValues.insert(output);
  1058. } else {
  1059. AllocaInst *alloca =
  1060. new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
  1061. nullptr, output->getName() + ".loc",
  1062. &codeReplacer->getParent()->front().front());
  1063. ReloadOutputs.push_back(alloca);
  1064. params.push_back(alloca);
  1065. ++ScalarOutputArgNo;
  1066. }
  1067. }
  1068. StructType *StructArgTy = nullptr;
  1069. AllocaInst *Struct = nullptr;
  1070. unsigned NumAggregatedInputs = 0;
  1071. if (AggregateArgs && !StructValues.empty()) {
  1072. std::vector<Type *> ArgTypes;
  1073. for (Value *V : StructValues)
  1074. ArgTypes.push_back(V->getType());
  1075. // Allocate a struct at the beginning of this function
  1076. StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
  1077. Struct = new AllocaInst(
  1078. StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg",
  1079. AllocationBlock ? &*AllocationBlock->getFirstInsertionPt()
  1080. : &codeReplacer->getParent()->front().front());
  1081. params.push_back(Struct);
  1082. // Store aggregated inputs in the struct.
  1083. for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
  1084. if (inputs.contains(StructValues[i])) {
  1085. Value *Idx[2];
  1086. Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
  1087. Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
  1088. GetElementPtrInst *GEP = GetElementPtrInst::Create(
  1089. StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
  1090. GEP->insertInto(codeReplacer, codeReplacer->end());
  1091. new StoreInst(StructValues[i], GEP, codeReplacer);
  1092. NumAggregatedInputs++;
  1093. }
  1094. }
  1095. }
  1096. // Emit the call to the function
  1097. call = CallInst::Create(newFunction, params,
  1098. NumExitBlocks > 1 ? "targetBlock" : "");
  1099. // Add debug location to the new call, if the original function has debug
  1100. // info. In that case, the terminator of the entry block of the extracted
  1101. // function contains the first debug location of the extracted function,
  1102. // set in extractCodeRegion.
  1103. if (codeReplacer->getParent()->getSubprogram()) {
  1104. if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
  1105. call->setDebugLoc(DL);
  1106. }
  1107. call->insertInto(codeReplacer, codeReplacer->end());
  1108. // Set swifterror parameter attributes.
  1109. for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
  1110. call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
  1111. newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
  1112. }
  1113. // Reload the outputs passed in by reference, use the struct if output is in
  1114. // the aggregate or reload from the scalar argument.
  1115. for (unsigned i = 0, e = outputs.size(), scalarIdx = 0,
  1116. aggIdx = NumAggregatedInputs;
  1117. i != e; ++i) {
  1118. Value *Output = nullptr;
  1119. if (AggregateArgs && StructValues.contains(outputs[i])) {
  1120. Value *Idx[2];
  1121. Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
  1122. Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
  1123. GetElementPtrInst *GEP = GetElementPtrInst::Create(
  1124. StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
  1125. GEP->insertInto(codeReplacer, codeReplacer->end());
  1126. Output = GEP;
  1127. ++aggIdx;
  1128. } else {
  1129. Output = ReloadOutputs[scalarIdx];
  1130. ++scalarIdx;
  1131. }
  1132. LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
  1133. outputs[i]->getName() + ".reload",
  1134. codeReplacer);
  1135. Reloads.push_back(load);
  1136. std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
  1137. for (User *U : Users) {
  1138. Instruction *inst = cast<Instruction>(U);
  1139. if (!Blocks.count(inst->getParent()))
  1140. inst->replaceUsesOfWith(outputs[i], load);
  1141. }
  1142. }
  1143. // Now we can emit a switch statement using the call as a value.
  1144. SwitchInst *TheSwitch =
  1145. SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
  1146. codeReplacer, 0, codeReplacer);
  1147. // Since there may be multiple exits from the original region, make the new
  1148. // function return an unsigned, switch on that number. This loop iterates
  1149. // over all of the blocks in the extracted region, updating any terminator
  1150. // instructions in the to-be-extracted region that branch to blocks that are
  1151. // not in the region to be extracted.
  1152. std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
  1153. // Iterate over the previously collected targets, and create new blocks inside
  1154. // the function to branch to.
  1155. unsigned switchVal = 0;
  1156. for (BasicBlock *OldTarget : OldTargets) {
  1157. if (Blocks.count(OldTarget))
  1158. continue;
  1159. BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
  1160. if (NewTarget)
  1161. continue;
  1162. // If we don't already have an exit stub for this non-extracted
  1163. // destination, create one now!
  1164. NewTarget = BasicBlock::Create(Context,
  1165. OldTarget->getName() + ".exitStub",
  1166. newFunction);
  1167. unsigned SuccNum = switchVal++;
  1168. Value *brVal = nullptr;
  1169. assert(NumExitBlocks < 0xffff && "too many exit blocks for switch");
  1170. switch (NumExitBlocks) {
  1171. case 0:
  1172. case 1: break; // No value needed.
  1173. case 2: // Conditional branch, return a bool
  1174. brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
  1175. break;
  1176. default:
  1177. brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
  1178. break;
  1179. }
  1180. ReturnInst::Create(Context, brVal, NewTarget);
  1181. // Update the switch instruction.
  1182. TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
  1183. SuccNum),
  1184. OldTarget);
  1185. }
  1186. for (BasicBlock *Block : Blocks) {
  1187. Instruction *TI = Block->getTerminator();
  1188. for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
  1189. if (Blocks.count(TI->getSuccessor(i)))
  1190. continue;
  1191. BasicBlock *OldTarget = TI->getSuccessor(i);
  1192. // add a new basic block which returns the appropriate value
  1193. BasicBlock *NewTarget = ExitBlockMap[OldTarget];
  1194. assert(NewTarget && "Unknown target block!");
  1195. // rewrite the original branch instruction with this new target
  1196. TI->setSuccessor(i, NewTarget);
  1197. }
  1198. }
  1199. // Store the arguments right after the definition of output value.
  1200. // This should be proceeded after creating exit stubs to be ensure that invoke
  1201. // result restore will be placed in the outlined function.
  1202. Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin();
  1203. std::advance(ScalarOutputArgBegin, ScalarInputArgNo);
  1204. Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin();
  1205. std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo);
  1206. for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e;
  1207. ++i) {
  1208. auto *OutI = dyn_cast<Instruction>(outputs[i]);
  1209. if (!OutI)
  1210. continue;
  1211. // Find proper insertion point.
  1212. BasicBlock::iterator InsertPt;
  1213. // In case OutI is an invoke, we insert the store at the beginning in the
  1214. // 'normal destination' BB. Otherwise we insert the store right after OutI.
  1215. if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
  1216. InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
  1217. else if (auto *Phi = dyn_cast<PHINode>(OutI))
  1218. InsertPt = Phi->getParent()->getFirstInsertionPt();
  1219. else
  1220. InsertPt = std::next(OutI->getIterator());
  1221. Instruction *InsertBefore = &*InsertPt;
  1222. assert((InsertBefore->getFunction() == newFunction ||
  1223. Blocks.count(InsertBefore->getParent())) &&
  1224. "InsertPt should be in new function");
  1225. if (AggregateArgs && StructValues.contains(outputs[i])) {
  1226. assert(AggOutputArgBegin != newFunction->arg_end() &&
  1227. "Number of aggregate output arguments should match "
  1228. "the number of defined values");
  1229. Value *Idx[2];
  1230. Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
  1231. Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
  1232. GetElementPtrInst *GEP = GetElementPtrInst::Create(
  1233. StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(),
  1234. InsertBefore);
  1235. new StoreInst(outputs[i], GEP, InsertBefore);
  1236. ++aggIdx;
  1237. // Since there should be only one struct argument aggregating
  1238. // all the output values, we shouldn't increment AggOutputArgBegin, which
  1239. // always points to the struct argument, in this case.
  1240. } else {
  1241. assert(ScalarOutputArgBegin != newFunction->arg_end() &&
  1242. "Number of scalar output arguments should match "
  1243. "the number of defined values");
  1244. new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore);
  1245. ++ScalarOutputArgBegin;
  1246. }
  1247. }
  1248. // Now that we've done the deed, simplify the switch instruction.
  1249. Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
  1250. switch (NumExitBlocks) {
  1251. case 0:
  1252. // There are no successors (the block containing the switch itself), which
  1253. // means that previously this was the last part of the function, and hence
  1254. // this should be rewritten as a `ret'
  1255. // Check if the function should return a value
  1256. if (OldFnRetTy->isVoidTy()) {
  1257. ReturnInst::Create(Context, nullptr, TheSwitch); // Return void
  1258. } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
  1259. // return what we have
  1260. ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
  1261. } else {
  1262. // Otherwise we must have code extracted an unwind or something, just
  1263. // return whatever we want.
  1264. ReturnInst::Create(Context,
  1265. Constant::getNullValue(OldFnRetTy), TheSwitch);
  1266. }
  1267. TheSwitch->eraseFromParent();
  1268. break;
  1269. case 1:
  1270. // Only a single destination, change the switch into an unconditional
  1271. // branch.
  1272. BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
  1273. TheSwitch->eraseFromParent();
  1274. break;
  1275. case 2:
  1276. BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
  1277. call, TheSwitch);
  1278. TheSwitch->eraseFromParent();
  1279. break;
  1280. default:
  1281. // Otherwise, make the default destination of the switch instruction be one
  1282. // of the other successors.
  1283. TheSwitch->setCondition(call);
  1284. TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
  1285. // Remove redundant case
  1286. TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
  1287. break;
  1288. }
  1289. // Insert lifetime markers around the reloads of any output values. The
  1290. // allocas output values are stored in are only in-use in the codeRepl block.
  1291. insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
  1292. return call;
  1293. }
  1294. void CodeExtractor::moveCodeToFunction(Function *newFunction) {
  1295. auto newFuncIt = newFunction->front().getIterator();
  1296. for (BasicBlock *Block : Blocks) {
  1297. // Delete the basic block from the old function, and the list of blocks
  1298. Block->removeFromParent();
  1299. // Insert this basic block into the new function
  1300. // Insert the original blocks after the entry block created
  1301. // for the new function. The entry block may be followed
  1302. // by a set of exit blocks at this point, but these exit
  1303. // blocks better be placed at the end of the new function.
  1304. newFuncIt = newFunction->insert(std::next(newFuncIt), Block);
  1305. }
  1306. }
  1307. void CodeExtractor::calculateNewCallTerminatorWeights(
  1308. BasicBlock *CodeReplacer,
  1309. DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
  1310. BranchProbabilityInfo *BPI) {
  1311. using Distribution = BlockFrequencyInfoImplBase::Distribution;
  1312. using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
  1313. // Update the branch weights for the exit block.
  1314. Instruction *TI = CodeReplacer->getTerminator();
  1315. SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
  1316. // Block Frequency distribution with dummy node.
  1317. Distribution BranchDist;
  1318. SmallVector<BranchProbability, 4> EdgeProbabilities(
  1319. TI->getNumSuccessors(), BranchProbability::getUnknown());
  1320. // Add each of the frequencies of the successors.
  1321. for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
  1322. BlockNode ExitNode(i);
  1323. uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
  1324. if (ExitFreq != 0)
  1325. BranchDist.addExit(ExitNode, ExitFreq);
  1326. else
  1327. EdgeProbabilities[i] = BranchProbability::getZero();
  1328. }
  1329. // Check for no total weight.
  1330. if (BranchDist.Total == 0) {
  1331. BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
  1332. return;
  1333. }
  1334. // Normalize the distribution so that they can fit in unsigned.
  1335. BranchDist.normalize();
  1336. // Create normalized branch weights and set the metadata.
  1337. for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
  1338. const auto &Weight = BranchDist.Weights[I];
  1339. // Get the weight and update the current BFI.
  1340. BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
  1341. BranchProbability BP(Weight.Amount, BranchDist.Total);
  1342. EdgeProbabilities[Weight.TargetNode.Index] = BP;
  1343. }
  1344. BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
  1345. TI->setMetadata(
  1346. LLVMContext::MD_prof,
  1347. MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
  1348. }
  1349. /// Erase debug info intrinsics which refer to values in \p F but aren't in
  1350. /// \p F.
  1351. static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
  1352. for (Instruction &I : instructions(F)) {
  1353. SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
  1354. findDbgUsers(DbgUsers, &I);
  1355. for (DbgVariableIntrinsic *DVI : DbgUsers)
  1356. if (DVI->getFunction() != &F)
  1357. DVI->eraseFromParent();
  1358. }
  1359. }
  1360. /// Fix up the debug info in the old and new functions by pointing line
  1361. /// locations and debug intrinsics to the new subprogram scope, and by deleting
  1362. /// intrinsics which point to values outside of the new function.
  1363. static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
  1364. CallInst &TheCall) {
  1365. DISubprogram *OldSP = OldFunc.getSubprogram();
  1366. LLVMContext &Ctx = OldFunc.getContext();
  1367. if (!OldSP) {
  1368. // Erase any debug info the new function contains.
  1369. stripDebugInfo(NewFunc);
  1370. // Make sure the old function doesn't contain any non-local metadata refs.
  1371. eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
  1372. return;
  1373. }
  1374. // Create a subprogram for the new function. Leave out a description of the
  1375. // function arguments, as the parameters don't correspond to anything at the
  1376. // source level.
  1377. assert(OldSP->getUnit() && "Missing compile unit for subprogram");
  1378. DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false,
  1379. OldSP->getUnit());
  1380. auto SPType =
  1381. DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt));
  1382. DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
  1383. DISubprogram::SPFlagOptimized |
  1384. DISubprogram::SPFlagLocalToUnit;
  1385. auto NewSP = DIB.createFunction(
  1386. OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(),
  1387. /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
  1388. NewFunc.setSubprogram(NewSP);
  1389. // Debug intrinsics in the new function need to be updated in one of two
  1390. // ways:
  1391. // 1) They need to be deleted, because they describe a value in the old
  1392. // function.
  1393. // 2) They need to point to fresh metadata, e.g. because they currently
  1394. // point to a variable in the wrong scope.
  1395. SmallDenseMap<DINode *, DINode *> RemappedMetadata;
  1396. SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
  1397. DenseMap<const MDNode *, MDNode *> Cache;
  1398. for (Instruction &I : instructions(NewFunc)) {
  1399. auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
  1400. if (!DII)
  1401. continue;
  1402. // Point the intrinsic to a fresh label within the new function if the
  1403. // intrinsic was not inlined from some other function.
  1404. if (auto *DLI = dyn_cast<DbgLabelInst>(&I)) {
  1405. if (DLI->getDebugLoc().getInlinedAt())
  1406. continue;
  1407. DILabel *OldLabel = DLI->getLabel();
  1408. DINode *&NewLabel = RemappedMetadata[OldLabel];
  1409. if (!NewLabel) {
  1410. DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
  1411. *OldLabel->getScope(), *NewSP, Ctx, Cache);
  1412. NewLabel = DILabel::get(Ctx, NewScope, OldLabel->getName(),
  1413. OldLabel->getFile(), OldLabel->getLine());
  1414. }
  1415. DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel));
  1416. continue;
  1417. }
  1418. auto IsInvalidLocation = [&NewFunc](Value *Location) {
  1419. // Location is invalid if it isn't a constant or an instruction, or is an
  1420. // instruction but isn't in the new function.
  1421. if (!Location ||
  1422. (!isa<Constant>(Location) && !isa<Instruction>(Location)))
  1423. return true;
  1424. Instruction *LocationInst = dyn_cast<Instruction>(Location);
  1425. return LocationInst && LocationInst->getFunction() != &NewFunc;
  1426. };
  1427. auto *DVI = cast<DbgVariableIntrinsic>(DII);
  1428. // If any of the used locations are invalid, delete the intrinsic.
  1429. if (any_of(DVI->location_ops(), IsInvalidLocation)) {
  1430. DebugIntrinsicsToDelete.push_back(DVI);
  1431. continue;
  1432. }
  1433. // If the variable was in the scope of the old function, i.e. it was not
  1434. // inlined, point the intrinsic to a fresh variable within the new function.
  1435. if (!DVI->getDebugLoc().getInlinedAt()) {
  1436. DILocalVariable *OldVar = DVI->getVariable();
  1437. DINode *&NewVar = RemappedMetadata[OldVar];
  1438. if (!NewVar) {
  1439. DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
  1440. *OldVar->getScope(), *NewSP, Ctx, Cache);
  1441. NewVar = DIB.createAutoVariable(
  1442. NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
  1443. OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
  1444. OldVar->getAlignInBits());
  1445. }
  1446. DVI->setVariable(cast<DILocalVariable>(NewVar));
  1447. }
  1448. }
  1449. for (auto *DII : DebugIntrinsicsToDelete)
  1450. DII->eraseFromParent();
  1451. DIB.finalizeSubprogram(NewSP);
  1452. // Fix up the scope information attached to the line locations in the new
  1453. // function.
  1454. for (Instruction &I : instructions(NewFunc)) {
  1455. if (const DebugLoc &DL = I.getDebugLoc())
  1456. I.setDebugLoc(
  1457. DebugLoc::replaceInlinedAtSubprogram(DL, *NewSP, Ctx, Cache));
  1458. // Loop info metadata may contain line locations. Fix them up.
  1459. auto updateLoopInfoLoc = [&Ctx, &Cache, NewSP](Metadata *MD) -> Metadata * {
  1460. if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
  1461. return DebugLoc::replaceInlinedAtSubprogram(Loc, *NewSP, Ctx, Cache);
  1462. return MD;
  1463. };
  1464. updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
  1465. }
  1466. if (!TheCall.getDebugLoc())
  1467. TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP));
  1468. eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
  1469. }
  1470. Function *
  1471. CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
  1472. ValueSet Inputs, Outputs;
  1473. return extractCodeRegion(CEAC, Inputs, Outputs);
  1474. }
  1475. Function *
  1476. CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
  1477. ValueSet &inputs, ValueSet &outputs) {
  1478. if (!isEligible())
  1479. return nullptr;
  1480. // Assumption: this is a single-entry code region, and the header is the first
  1481. // block in the region.
  1482. BasicBlock *header = *Blocks.begin();
  1483. Function *oldFunction = header->getParent();
  1484. // Calculate the entry frequency of the new function before we change the root
  1485. // block.
  1486. BlockFrequency EntryFreq;
  1487. if (BFI) {
  1488. assert(BPI && "Both BPI and BFI are required to preserve profile info");
  1489. for (BasicBlock *Pred : predecessors(header)) {
  1490. if (Blocks.count(Pred))
  1491. continue;
  1492. EntryFreq +=
  1493. BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
  1494. }
  1495. }
  1496. // Remove CondGuardInsts that will be moved to the new function from the old
  1497. // function's assumption cache.
  1498. for (BasicBlock *Block : Blocks) {
  1499. for (Instruction &I : llvm::make_early_inc_range(*Block)) {
  1500. if (auto *CI = dyn_cast<CondGuardInst>(&I)) {
  1501. if (AC)
  1502. AC->unregisterAssumption(CI);
  1503. CI->eraseFromParent();
  1504. }
  1505. }
  1506. }
  1507. // If we have any return instructions in the region, split those blocks so
  1508. // that the return is not in the region.
  1509. splitReturnBlocks();
  1510. // Calculate the exit blocks for the extracted region and the total exit
  1511. // weights for each of those blocks.
  1512. DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
  1513. SmallPtrSet<BasicBlock *, 1> ExitBlocks;
  1514. for (BasicBlock *Block : Blocks) {
  1515. for (BasicBlock *Succ : successors(Block)) {
  1516. if (!Blocks.count(Succ)) {
  1517. // Update the branch weight for this successor.
  1518. if (BFI) {
  1519. BlockFrequency &BF = ExitWeights[Succ];
  1520. BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ);
  1521. }
  1522. ExitBlocks.insert(Succ);
  1523. }
  1524. }
  1525. }
  1526. NumExitBlocks = ExitBlocks.size();
  1527. for (BasicBlock *Block : Blocks) {
  1528. Instruction *TI = Block->getTerminator();
  1529. for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
  1530. if (Blocks.count(TI->getSuccessor(i)))
  1531. continue;
  1532. BasicBlock *OldTarget = TI->getSuccessor(i);
  1533. OldTargets.push_back(OldTarget);
  1534. }
  1535. }
  1536. // If we have to split PHI nodes of the entry or exit blocks, do so now.
  1537. severSplitPHINodesOfEntry(header);
  1538. severSplitPHINodesOfExits(ExitBlocks);
  1539. // This takes place of the original loop
  1540. BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
  1541. "codeRepl", oldFunction,
  1542. header);
  1543. // The new function needs a root node because other nodes can branch to the
  1544. // head of the region, but the entry node of a function cannot have preds.
  1545. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
  1546. "newFuncRoot");
  1547. auto *BranchI = BranchInst::Create(header);
  1548. // If the original function has debug info, we have to add a debug location
  1549. // to the new branch instruction from the artificial entry block.
  1550. // We use the debug location of the first instruction in the extracted
  1551. // blocks, as there is no other equivalent line in the source code.
  1552. if (oldFunction->getSubprogram()) {
  1553. any_of(Blocks, [&BranchI](const BasicBlock *BB) {
  1554. return any_of(*BB, [&BranchI](const Instruction &I) {
  1555. if (!I.getDebugLoc())
  1556. return false;
  1557. BranchI->setDebugLoc(I.getDebugLoc());
  1558. return true;
  1559. });
  1560. });
  1561. }
  1562. BranchI->insertInto(newFuncRoot, newFuncRoot->end());
  1563. ValueSet SinkingCands, HoistingCands;
  1564. BasicBlock *CommonExit = nullptr;
  1565. findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
  1566. assert(HoistingCands.empty() || CommonExit);
  1567. // Find inputs to, outputs from the code region.
  1568. findInputsOutputs(inputs, outputs, SinkingCands);
  1569. // Now sink all instructions which only have non-phi uses inside the region.
  1570. // Group the allocas at the start of the block, so that any bitcast uses of
  1571. // the allocas are well-defined.
  1572. AllocaInst *FirstSunkAlloca = nullptr;
  1573. for (auto *II : SinkingCands) {
  1574. if (auto *AI = dyn_cast<AllocaInst>(II)) {
  1575. AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
  1576. if (!FirstSunkAlloca)
  1577. FirstSunkAlloca = AI;
  1578. }
  1579. }
  1580. assert((SinkingCands.empty() || FirstSunkAlloca) &&
  1581. "Did not expect a sink candidate without any allocas");
  1582. for (auto *II : SinkingCands) {
  1583. if (!isa<AllocaInst>(II)) {
  1584. cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
  1585. }
  1586. }
  1587. if (!HoistingCands.empty()) {
  1588. auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
  1589. Instruction *TI = HoistToBlock->getTerminator();
  1590. for (auto *II : HoistingCands)
  1591. cast<Instruction>(II)->moveBefore(TI);
  1592. }
  1593. // Collect objects which are inputs to the extraction region and also
  1594. // referenced by lifetime start markers within it. The effects of these
  1595. // markers must be replicated in the calling function to prevent the stack
  1596. // coloring pass from merging slots which store input objects.
  1597. ValueSet LifetimesStart;
  1598. eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
  1599. // Construct new function based on inputs/outputs & add allocas for all defs.
  1600. Function *newFunction =
  1601. constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
  1602. oldFunction, oldFunction->getParent());
  1603. // Update the entry count of the function.
  1604. if (BFI) {
  1605. auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
  1606. if (Count)
  1607. newFunction->setEntryCount(
  1608. ProfileCount(*Count, Function::PCT_Real)); // FIXME
  1609. BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
  1610. }
  1611. CallInst *TheCall =
  1612. emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
  1613. moveCodeToFunction(newFunction);
  1614. // Replicate the effects of any lifetime start/end markers which referenced
  1615. // input objects in the extraction region by placing markers around the call.
  1616. insertLifetimeMarkersSurroundingCall(
  1617. oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
  1618. // Propagate personality info to the new function if there is one.
  1619. if (oldFunction->hasPersonalityFn())
  1620. newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
  1621. // Update the branch weights for the exit block.
  1622. if (BFI && NumExitBlocks > 1)
  1623. calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
  1624. // Loop over all of the PHI nodes in the header and exit blocks, and change
  1625. // any references to the old incoming edge to be the new incoming edge.
  1626. for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
  1627. PHINode *PN = cast<PHINode>(I);
  1628. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
  1629. if (!Blocks.count(PN->getIncomingBlock(i)))
  1630. PN->setIncomingBlock(i, newFuncRoot);
  1631. }
  1632. for (BasicBlock *ExitBB : ExitBlocks)
  1633. for (PHINode &PN : ExitBB->phis()) {
  1634. Value *IncomingCodeReplacerVal = nullptr;
  1635. for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
  1636. // Ignore incoming values from outside of the extracted region.
  1637. if (!Blocks.count(PN.getIncomingBlock(i)))
  1638. continue;
  1639. // Ensure that there is only one incoming value from codeReplacer.
  1640. if (!IncomingCodeReplacerVal) {
  1641. PN.setIncomingBlock(i, codeReplacer);
  1642. IncomingCodeReplacerVal = PN.getIncomingValue(i);
  1643. } else
  1644. assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
  1645. "PHI has two incompatbile incoming values from codeRepl");
  1646. }
  1647. }
  1648. fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
  1649. // Mark the new function `noreturn` if applicable. Terminators which resume
  1650. // exception propagation are treated as returning instructions. This is to
  1651. // avoid inserting traps after calls to outlined functions which unwind.
  1652. bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
  1653. const Instruction *Term = BB.getTerminator();
  1654. return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
  1655. });
  1656. if (doesNotReturn)
  1657. newFunction->setDoesNotReturn();
  1658. LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
  1659. newFunction->dump();
  1660. report_fatal_error("verification of newFunction failed!");
  1661. });
  1662. LLVM_DEBUG(if (verifyFunction(*oldFunction))
  1663. report_fatal_error("verification of oldFunction failed!"));
  1664. LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
  1665. report_fatal_error("Stale Asumption cache for old Function!"));
  1666. return newFunction;
  1667. }
  1668. bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
  1669. const Function &NewFunc,
  1670. AssumptionCache *AC) {
  1671. for (auto AssumeVH : AC->assumptions()) {
  1672. auto *I = dyn_cast_or_null<CondGuardInst>(AssumeVH);
  1673. if (!I)
  1674. continue;
  1675. // There shouldn't be any llvm.assume intrinsics in the new function.
  1676. if (I->getFunction() != &OldFunc)
  1677. return true;
  1678. // There shouldn't be any stale affected values in the assumption cache
  1679. // that were previously in the old function, but that have now been moved
  1680. // to the new function.
  1681. for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
  1682. auto *AffectedCI = dyn_cast_or_null<CondGuardInst>(AffectedValVH);
  1683. if (!AffectedCI)
  1684. continue;
  1685. if (AffectedCI->getFunction() != &OldFunc)
  1686. return true;
  1687. auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0));
  1688. if (AssumedInst->getFunction() != &OldFunc)
  1689. return true;
  1690. }
  1691. }
  1692. return false;
  1693. }
  1694. void CodeExtractor::excludeArgFromAggregate(Value *Arg) {
  1695. ExcludeArgsFromAggregate.insert(Arg);
  1696. }