InlineFunction.cpp 104 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524
  1. //===- InlineFunction.cpp - Code to perform function inlining -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements inlining of a function into a call site, resolving
  10. // parameters and the return value as appropriate.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ADT/DenseMap.h"
  14. #include "llvm/ADT/None.h"
  15. #include "llvm/ADT/Optional.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/SetVector.h"
  18. #include "llvm/ADT/SmallPtrSet.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/StringExtras.h"
  21. #include "llvm/ADT/iterator_range.h"
  22. #include "llvm/Analysis/AliasAnalysis.h"
  23. #include "llvm/Analysis/AssumptionCache.h"
  24. #include "llvm/Analysis/BlockFrequencyInfo.h"
  25. #include "llvm/Analysis/CallGraph.h"
  26. #include "llvm/Analysis/CaptureTracking.h"
  27. #include "llvm/Analysis/EHPersonalities.h"
  28. #include "llvm/Analysis/InstructionSimplify.h"
  29. #include "llvm/Analysis/ProfileSummaryInfo.h"
  30. #include "llvm/Transforms/Utils/Local.h"
  31. #include "llvm/Analysis/ValueTracking.h"
  32. #include "llvm/Analysis/VectorUtils.h"
  33. #include "llvm/IR/Argument.h"
  34. #include "llvm/IR/BasicBlock.h"
  35. #include "llvm/IR/CFG.h"
  36. #include "llvm/IR/Constant.h"
  37. #include "llvm/IR/Constants.h"
  38. #include "llvm/IR/DIBuilder.h"
  39. #include "llvm/IR/DataLayout.h"
  40. #include "llvm/IR/DebugInfoMetadata.h"
  41. #include "llvm/IR/DebugLoc.h"
  42. #include "llvm/IR/DerivedTypes.h"
  43. #include "llvm/IR/Dominators.h"
  44. #include "llvm/IR/Function.h"
  45. #include "llvm/IR/IRBuilder.h"
  46. #include "llvm/IR/InstrTypes.h"
  47. #include "llvm/IR/Instruction.h"
  48. #include "llvm/IR/Instructions.h"
  49. #include "llvm/IR/IntrinsicInst.h"
  50. #include "llvm/IR/Intrinsics.h"
  51. #include "llvm/IR/LLVMContext.h"
  52. #include "llvm/IR/MDBuilder.h"
  53. #include "llvm/IR/Metadata.h"
  54. #include "llvm/IR/Module.h"
  55. #include "llvm/IR/Type.h"
  56. #include "llvm/IR/User.h"
  57. #include "llvm/IR/Value.h"
  58. #include "llvm/Support/Casting.h"
  59. #include "llvm/Support/CommandLine.h"
  60. #include "llvm/Support/ErrorHandling.h"
  61. #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
  62. #include "llvm/Transforms/Utils/Cloning.h"
  63. #include "llvm/Transforms/Utils/ValueMapper.h"
  64. #include <algorithm>
  65. #include <cassert>
  66. #include <cstdint>
  67. #include <iterator>
  68. #include <limits>
  69. #include <string>
  70. #include <utility>
  71. #include <vector>
  72. using namespace llvm;
  73. using ProfileCount = Function::ProfileCount;
  74. static cl::opt<bool>
  75. EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
  76. cl::Hidden,
  77. cl::desc("Convert noalias attributes to metadata during inlining."));
  78. static cl::opt<bool>
  79. UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
  80. cl::ZeroOrMore, cl::init(true),
  81. cl::desc("Use the llvm.experimental.noalias.scope.decl "
  82. "intrinsic during inlining."));
  83. // Disabled by default, because the added alignment assumptions may increase
  84. // compile-time and block optimizations. This option is not suitable for use
  85. // with frontends that emit comprehensive parameter alignment annotations.
  86. static cl::opt<bool>
  87. PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
  88. cl::init(false), cl::Hidden,
  89. cl::desc("Convert align attributes to assumptions during inlining."));
  90. static cl::opt<bool> UpdateReturnAttributes(
  91. "update-return-attrs", cl::init(true), cl::Hidden,
  92. cl::desc("Update return attributes on calls within inlined body"));
  93. static cl::opt<unsigned> InlinerAttributeWindow(
  94. "max-inst-checked-for-throw-during-inlining", cl::Hidden,
  95. cl::desc("the maximum number of instructions analyzed for may throw during "
  96. "attribute inference in inlined body"),
  97. cl::init(4));
  98. namespace {
  99. /// A class for recording information about inlining a landing pad.
  100. class LandingPadInliningInfo {
  101. /// Destination of the invoke's unwind.
  102. BasicBlock *OuterResumeDest;
  103. /// Destination for the callee's resume.
  104. BasicBlock *InnerResumeDest = nullptr;
  105. /// LandingPadInst associated with the invoke.
  106. LandingPadInst *CallerLPad = nullptr;
  107. /// PHI for EH values from landingpad insts.
  108. PHINode *InnerEHValuesPHI = nullptr;
  109. SmallVector<Value*, 8> UnwindDestPHIValues;
  110. public:
  111. LandingPadInliningInfo(InvokeInst *II)
  112. : OuterResumeDest(II->getUnwindDest()) {
  113. // If there are PHI nodes in the unwind destination block, we need to keep
  114. // track of which values came into them from the invoke before removing
  115. // the edge from this block.
  116. BasicBlock *InvokeBB = II->getParent();
  117. BasicBlock::iterator I = OuterResumeDest->begin();
  118. for (; isa<PHINode>(I); ++I) {
  119. // Save the value to use for this edge.
  120. PHINode *PHI = cast<PHINode>(I);
  121. UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
  122. }
  123. CallerLPad = cast<LandingPadInst>(I);
  124. }
  125. /// The outer unwind destination is the target of
  126. /// unwind edges introduced for calls within the inlined function.
  127. BasicBlock *getOuterResumeDest() const {
  128. return OuterResumeDest;
  129. }
  130. BasicBlock *getInnerResumeDest();
  131. LandingPadInst *getLandingPadInst() const { return CallerLPad; }
  132. /// Forward the 'resume' instruction to the caller's landing pad block.
  133. /// When the landing pad block has only one predecessor, this is
  134. /// a simple branch. When there is more than one predecessor, we need to
  135. /// split the landing pad block after the landingpad instruction and jump
  136. /// to there.
  137. void forwardResume(ResumeInst *RI,
  138. SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
  139. /// Add incoming-PHI values to the unwind destination block for the given
  140. /// basic block, using the values for the original invoke's source block.
  141. void addIncomingPHIValuesFor(BasicBlock *BB) const {
  142. addIncomingPHIValuesForInto(BB, OuterResumeDest);
  143. }
  144. void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
  145. BasicBlock::iterator I = dest->begin();
  146. for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
  147. PHINode *phi = cast<PHINode>(I);
  148. phi->addIncoming(UnwindDestPHIValues[i], src);
  149. }
  150. }
  151. };
  152. } // end anonymous namespace
  153. /// Get or create a target for the branch from ResumeInsts.
  154. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
  155. if (InnerResumeDest) return InnerResumeDest;
  156. // Split the landing pad.
  157. BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
  158. InnerResumeDest =
  159. OuterResumeDest->splitBasicBlock(SplitPoint,
  160. OuterResumeDest->getName() + ".body");
  161. // The number of incoming edges we expect to the inner landing pad.
  162. const unsigned PHICapacity = 2;
  163. // Create corresponding new PHIs for all the PHIs in the outer landing pad.
  164. Instruction *InsertPoint = &InnerResumeDest->front();
  165. BasicBlock::iterator I = OuterResumeDest->begin();
  166. for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
  167. PHINode *OuterPHI = cast<PHINode>(I);
  168. PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
  169. OuterPHI->getName() + ".lpad-body",
  170. InsertPoint);
  171. OuterPHI->replaceAllUsesWith(InnerPHI);
  172. InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
  173. }
  174. // Create a PHI for the exception values.
  175. InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
  176. "eh.lpad-body", InsertPoint);
  177. CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
  178. InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
  179. // All done.
  180. return InnerResumeDest;
  181. }
  182. /// Forward the 'resume' instruction to the caller's landing pad block.
  183. /// When the landing pad block has only one predecessor, this is a simple
  184. /// branch. When there is more than one predecessor, we need to split the
  185. /// landing pad block after the landingpad instruction and jump to there.
  186. void LandingPadInliningInfo::forwardResume(
  187. ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
  188. BasicBlock *Dest = getInnerResumeDest();
  189. BasicBlock *Src = RI->getParent();
  190. BranchInst::Create(Dest, Src);
  191. // Update the PHIs in the destination. They were inserted in an order which
  192. // makes this work.
  193. addIncomingPHIValuesForInto(Src, Dest);
  194. InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
  195. RI->eraseFromParent();
  196. }
  197. /// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
  198. static Value *getParentPad(Value *EHPad) {
  199. if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
  200. return FPI->getParentPad();
  201. return cast<CatchSwitchInst>(EHPad)->getParentPad();
  202. }
  203. using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
  204. /// Helper for getUnwindDestToken that does the descendant-ward part of
  205. /// the search.
  206. static Value *getUnwindDestTokenHelper(Instruction *EHPad,
  207. UnwindDestMemoTy &MemoMap) {
  208. SmallVector<Instruction *, 8> Worklist(1, EHPad);
  209. while (!Worklist.empty()) {
  210. Instruction *CurrentPad = Worklist.pop_back_val();
  211. // We only put pads on the worklist that aren't in the MemoMap. When
  212. // we find an unwind dest for a pad we may update its ancestors, but
  213. // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
  214. // so they should never get updated while queued on the worklist.
  215. assert(!MemoMap.count(CurrentPad));
  216. Value *UnwindDestToken = nullptr;
  217. if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
  218. if (CatchSwitch->hasUnwindDest()) {
  219. UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
  220. } else {
  221. // Catchswitch doesn't have a 'nounwind' variant, and one might be
  222. // annotated as "unwinds to caller" when really it's nounwind (see
  223. // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
  224. // parent's unwind dest from this. We can check its catchpads'
  225. // descendants, since they might include a cleanuppad with an
  226. // "unwinds to caller" cleanupret, which can be trusted.
  227. for (auto HI = CatchSwitch->handler_begin(),
  228. HE = CatchSwitch->handler_end();
  229. HI != HE && !UnwindDestToken; ++HI) {
  230. BasicBlock *HandlerBlock = *HI;
  231. auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
  232. for (User *Child : CatchPad->users()) {
  233. // Intentionally ignore invokes here -- since the catchswitch is
  234. // marked "unwind to caller", it would be a verifier error if it
  235. // contained an invoke which unwinds out of it, so any invoke we'd
  236. // encounter must unwind to some child of the catch.
  237. if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
  238. continue;
  239. Instruction *ChildPad = cast<Instruction>(Child);
  240. auto Memo = MemoMap.find(ChildPad);
  241. if (Memo == MemoMap.end()) {
  242. // Haven't figured out this child pad yet; queue it.
  243. Worklist.push_back(ChildPad);
  244. continue;
  245. }
  246. // We've already checked this child, but might have found that
  247. // it offers no proof either way.
  248. Value *ChildUnwindDestToken = Memo->second;
  249. if (!ChildUnwindDestToken)
  250. continue;
  251. // We already know the child's unwind dest, which can either
  252. // be ConstantTokenNone to indicate unwind to caller, or can
  253. // be another child of the catchpad. Only the former indicates
  254. // the unwind dest of the catchswitch.
  255. if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
  256. UnwindDestToken = ChildUnwindDestToken;
  257. break;
  258. }
  259. assert(getParentPad(ChildUnwindDestToken) == CatchPad);
  260. }
  261. }
  262. }
  263. } else {
  264. auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
  265. for (User *U : CleanupPad->users()) {
  266. if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
  267. if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
  268. UnwindDestToken = RetUnwindDest->getFirstNonPHI();
  269. else
  270. UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
  271. break;
  272. }
  273. Value *ChildUnwindDestToken;
  274. if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
  275. ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
  276. } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
  277. Instruction *ChildPad = cast<Instruction>(U);
  278. auto Memo = MemoMap.find(ChildPad);
  279. if (Memo == MemoMap.end()) {
  280. // Haven't resolved this child yet; queue it and keep searching.
  281. Worklist.push_back(ChildPad);
  282. continue;
  283. }
  284. // We've checked this child, but still need to ignore it if it
  285. // had no proof either way.
  286. ChildUnwindDestToken = Memo->second;
  287. if (!ChildUnwindDestToken)
  288. continue;
  289. } else {
  290. // Not a relevant user of the cleanuppad
  291. continue;
  292. }
  293. // In a well-formed program, the child/invoke must either unwind to
  294. // an(other) child of the cleanup, or exit the cleanup. In the
  295. // first case, continue searching.
  296. if (isa<Instruction>(ChildUnwindDestToken) &&
  297. getParentPad(ChildUnwindDestToken) == CleanupPad)
  298. continue;
  299. UnwindDestToken = ChildUnwindDestToken;
  300. break;
  301. }
  302. }
  303. // If we haven't found an unwind dest for CurrentPad, we may have queued its
  304. // children, so move on to the next in the worklist.
  305. if (!UnwindDestToken)
  306. continue;
  307. // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
  308. // any ancestors of CurrentPad up to but not including UnwindDestToken's
  309. // parent pad. Record this in the memo map, and check to see if the
  310. // original EHPad being queried is one of the ones exited.
  311. Value *UnwindParent;
  312. if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
  313. UnwindParent = getParentPad(UnwindPad);
  314. else
  315. UnwindParent = nullptr;
  316. bool ExitedOriginalPad = false;
  317. for (Instruction *ExitedPad = CurrentPad;
  318. ExitedPad && ExitedPad != UnwindParent;
  319. ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
  320. // Skip over catchpads since they just follow their catchswitches.
  321. if (isa<CatchPadInst>(ExitedPad))
  322. continue;
  323. MemoMap[ExitedPad] = UnwindDestToken;
  324. ExitedOriginalPad |= (ExitedPad == EHPad);
  325. }
  326. if (ExitedOriginalPad)
  327. return UnwindDestToken;
  328. // Continue the search.
  329. }
  330. // No definitive information is contained within this funclet.
  331. return nullptr;
  332. }
  333. /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
  334. /// return that pad instruction. If it unwinds to caller, return
  335. /// ConstantTokenNone. If it does not have a definitive unwind destination,
  336. /// return nullptr.
  337. ///
  338. /// This routine gets invoked for calls in funclets in inlinees when inlining
  339. /// an invoke. Since many funclets don't have calls inside them, it's queried
  340. /// on-demand rather than building a map of pads to unwind dests up front.
  341. /// Determining a funclet's unwind dest may require recursively searching its
  342. /// descendants, and also ancestors and cousins if the descendants don't provide
  343. /// an answer. Since most funclets will have their unwind dest immediately
  344. /// available as the unwind dest of a catchswitch or cleanupret, this routine
  345. /// searches top-down from the given pad and then up. To avoid worst-case
  346. /// quadratic run-time given that approach, it uses a memo map to avoid
  347. /// re-processing funclet trees. The callers that rewrite the IR as they go
  348. /// take advantage of this, for correctness, by checking/forcing rewritten
  349. /// pads' entries to match the original callee view.
  350. static Value *getUnwindDestToken(Instruction *EHPad,
  351. UnwindDestMemoTy &MemoMap) {
  352. // Catchpads unwind to the same place as their catchswitch;
  353. // redirct any queries on catchpads so the code below can
  354. // deal with just catchswitches and cleanuppads.
  355. if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
  356. EHPad = CPI->getCatchSwitch();
  357. // Check if we've already determined the unwind dest for this pad.
  358. auto Memo = MemoMap.find(EHPad);
  359. if (Memo != MemoMap.end())
  360. return Memo->second;
  361. // Search EHPad and, if necessary, its descendants.
  362. Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
  363. assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
  364. if (UnwindDestToken)
  365. return UnwindDestToken;
  366. // No information is available for this EHPad from itself or any of its
  367. // descendants. An unwind all the way out to a pad in the caller would
  368. // need also to agree with the unwind dest of the parent funclet, so
  369. // search up the chain to try to find a funclet with information. Put
  370. // null entries in the memo map to avoid re-processing as we go up.
  371. MemoMap[EHPad] = nullptr;
  372. #ifndef NDEBUG
  373. SmallPtrSet<Instruction *, 4> TempMemos;
  374. TempMemos.insert(EHPad);
  375. #endif
  376. Instruction *LastUselessPad = EHPad;
  377. Value *AncestorToken;
  378. for (AncestorToken = getParentPad(EHPad);
  379. auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
  380. AncestorToken = getParentPad(AncestorToken)) {
  381. // Skip over catchpads since they just follow their catchswitches.
  382. if (isa<CatchPadInst>(AncestorPad))
  383. continue;
  384. // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
  385. // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
  386. // call to getUnwindDestToken, that would mean that AncestorPad had no
  387. // information in itself, its descendants, or its ancestors. If that
  388. // were the case, then we should also have recorded the lack of information
  389. // for the descendant that we're coming from. So assert that we don't
  390. // find a null entry in the MemoMap for AncestorPad.
  391. assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
  392. auto AncestorMemo = MemoMap.find(AncestorPad);
  393. if (AncestorMemo == MemoMap.end()) {
  394. UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
  395. } else {
  396. UnwindDestToken = AncestorMemo->second;
  397. }
  398. if (UnwindDestToken)
  399. break;
  400. LastUselessPad = AncestorPad;
  401. MemoMap[LastUselessPad] = nullptr;
  402. #ifndef NDEBUG
  403. TempMemos.insert(LastUselessPad);
  404. #endif
  405. }
  406. // We know that getUnwindDestTokenHelper was called on LastUselessPad and
  407. // returned nullptr (and likewise for EHPad and any of its ancestors up to
  408. // LastUselessPad), so LastUselessPad has no information from below. Since
  409. // getUnwindDestTokenHelper must investigate all downward paths through
  410. // no-information nodes to prove that a node has no information like this,
  411. // and since any time it finds information it records it in the MemoMap for
  412. // not just the immediately-containing funclet but also any ancestors also
  413. // exited, it must be the case that, walking downward from LastUselessPad,
  414. // visiting just those nodes which have not been mapped to an unwind dest
  415. // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
  416. // they are just used to keep getUnwindDestTokenHelper from repeating work),
  417. // any node visited must have been exhaustively searched with no information
  418. // for it found.
  419. SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
  420. while (!Worklist.empty()) {
  421. Instruction *UselessPad = Worklist.pop_back_val();
  422. auto Memo = MemoMap.find(UselessPad);
  423. if (Memo != MemoMap.end() && Memo->second) {
  424. // Here the name 'UselessPad' is a bit of a misnomer, because we've found
  425. // that it is a funclet that does have information about unwinding to
  426. // a particular destination; its parent was a useless pad.
  427. // Since its parent has no information, the unwind edge must not escape
  428. // the parent, and must target a sibling of this pad. This local unwind
  429. // gives us no information about EHPad. Leave it and the subtree rooted
  430. // at it alone.
  431. assert(getParentPad(Memo->second) == getParentPad(UselessPad));
  432. continue;
  433. }
  434. // We know we don't have information for UselesPad. If it has an entry in
  435. // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
  436. // added on this invocation of getUnwindDestToken; if a previous invocation
  437. // recorded nullptr, it would have had to prove that the ancestors of
  438. // UselessPad, which include LastUselessPad, had no information, and that
  439. // in turn would have required proving that the descendants of
  440. // LastUselesPad, which include EHPad, have no information about
  441. // LastUselessPad, which would imply that EHPad was mapped to nullptr in
  442. // the MemoMap on that invocation, which isn't the case if we got here.
  443. assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
  444. // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
  445. // information that we'd be contradicting by making a map entry for it
  446. // (which is something that getUnwindDestTokenHelper must have proved for
  447. // us to get here). Just assert on is direct users here; the checks in
  448. // this downward walk at its descendants will verify that they don't have
  449. // any unwind edges that exit 'UselessPad' either (i.e. they either have no
  450. // unwind edges or unwind to a sibling).
  451. MemoMap[UselessPad] = UnwindDestToken;
  452. if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
  453. assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
  454. for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
  455. auto *CatchPad = HandlerBlock->getFirstNonPHI();
  456. for (User *U : CatchPad->users()) {
  457. assert(
  458. (!isa<InvokeInst>(U) ||
  459. (getParentPad(
  460. cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
  461. CatchPad)) &&
  462. "Expected useless pad");
  463. if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
  464. Worklist.push_back(cast<Instruction>(U));
  465. }
  466. }
  467. } else {
  468. assert(isa<CleanupPadInst>(UselessPad));
  469. for (User *U : UselessPad->users()) {
  470. assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
  471. assert((!isa<InvokeInst>(U) ||
  472. (getParentPad(
  473. cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
  474. UselessPad)) &&
  475. "Expected useless pad");
  476. if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
  477. Worklist.push_back(cast<Instruction>(U));
  478. }
  479. }
  480. }
  481. return UnwindDestToken;
  482. }
  483. /// When we inline a basic block into an invoke,
  484. /// we have to turn all of the calls that can throw into invokes.
  485. /// This function analyze BB to see if there are any calls, and if so,
  486. /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
  487. /// nodes in that block with the values specified in InvokeDestPHIValues.
  488. static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
  489. BasicBlock *BB, BasicBlock *UnwindEdge,
  490. UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
  491. for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
  492. Instruction *I = &*BBI++;
  493. // We only need to check for function calls: inlined invoke
  494. // instructions require no special handling.
  495. CallInst *CI = dyn_cast<CallInst>(I);
  496. if (!CI || CI->doesNotThrow() || CI->isInlineAsm())
  497. continue;
  498. // We do not need to (and in fact, cannot) convert possibly throwing calls
  499. // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
  500. // invokes. The caller's "segment" of the deoptimization continuation
  501. // attached to the newly inlined @llvm.experimental_deoptimize
  502. // (resp. @llvm.experimental.guard) call should contain the exception
  503. // handling logic, if any.
  504. if (auto *F = CI->getCalledFunction())
  505. if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
  506. F->getIntrinsicID() == Intrinsic::experimental_guard)
  507. continue;
  508. if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
  509. // This call is nested inside a funclet. If that funclet has an unwind
  510. // destination within the inlinee, then unwinding out of this call would
  511. // be UB. Rewriting this call to an invoke which targets the inlined
  512. // invoke's unwind dest would give the call's parent funclet multiple
  513. // unwind destinations, which is something that subsequent EH table
  514. // generation can't handle and that the veirifer rejects. So when we
  515. // see such a call, leave it as a call.
  516. auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
  517. Value *UnwindDestToken =
  518. getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
  519. if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
  520. continue;
  521. #ifndef NDEBUG
  522. Instruction *MemoKey;
  523. if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
  524. MemoKey = CatchPad->getCatchSwitch();
  525. else
  526. MemoKey = FuncletPad;
  527. assert(FuncletUnwindMap->count(MemoKey) &&
  528. (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
  529. "must get memoized to avoid confusing later searches");
  530. #endif // NDEBUG
  531. }
  532. changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
  533. return BB;
  534. }
  535. return nullptr;
  536. }
  537. /// If we inlined an invoke site, we need to convert calls
  538. /// in the body of the inlined function into invokes.
  539. ///
  540. /// II is the invoke instruction being inlined. FirstNewBlock is the first
  541. /// block of the inlined code (the last block is the end of the function),
  542. /// and InlineCodeInfo is information about the code that got inlined.
  543. static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
  544. ClonedCodeInfo &InlinedCodeInfo) {
  545. BasicBlock *InvokeDest = II->getUnwindDest();
  546. Function *Caller = FirstNewBlock->getParent();
  547. // The inlined code is currently at the end of the function, scan from the
  548. // start of the inlined code to its end, checking for stuff we need to
  549. // rewrite.
  550. LandingPadInliningInfo Invoke(II);
  551. // Get all of the inlined landing pad instructions.
  552. SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
  553. for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
  554. I != E; ++I)
  555. if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
  556. InlinedLPads.insert(II->getLandingPadInst());
  557. // Append the clauses from the outer landing pad instruction into the inlined
  558. // landing pad instructions.
  559. LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
  560. for (LandingPadInst *InlinedLPad : InlinedLPads) {
  561. unsigned OuterNum = OuterLPad->getNumClauses();
  562. InlinedLPad->reserveClauses(OuterNum);
  563. for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
  564. InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
  565. if (OuterLPad->isCleanup())
  566. InlinedLPad->setCleanup(true);
  567. }
  568. for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
  569. BB != E; ++BB) {
  570. if (InlinedCodeInfo.ContainsCalls)
  571. if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
  572. &*BB, Invoke.getOuterResumeDest()))
  573. // Update any PHI nodes in the exceptional block to indicate that there
  574. // is now a new entry in them.
  575. Invoke.addIncomingPHIValuesFor(NewBB);
  576. // Forward any resumes that are remaining here.
  577. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
  578. Invoke.forwardResume(RI, InlinedLPads);
  579. }
  580. // Now that everything is happy, we have one final detail. The PHI nodes in
  581. // the exception destination block still have entries due to the original
  582. // invoke instruction. Eliminate these entries (which might even delete the
  583. // PHI node) now.
  584. InvokeDest->removePredecessor(II->getParent());
  585. }
  586. /// If we inlined an invoke site, we need to convert calls
  587. /// in the body of the inlined function into invokes.
  588. ///
  589. /// II is the invoke instruction being inlined. FirstNewBlock is the first
  590. /// block of the inlined code (the last block is the end of the function),
  591. /// and InlineCodeInfo is information about the code that got inlined.
  592. static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
  593. ClonedCodeInfo &InlinedCodeInfo) {
  594. BasicBlock *UnwindDest = II->getUnwindDest();
  595. Function *Caller = FirstNewBlock->getParent();
  596. assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
  597. // If there are PHI nodes in the unwind destination block, we need to keep
  598. // track of which values came into them from the invoke before removing the
  599. // edge from this block.
  600. SmallVector<Value *, 8> UnwindDestPHIValues;
  601. BasicBlock *InvokeBB = II->getParent();
  602. for (Instruction &I : *UnwindDest) {
  603. // Save the value to use for this edge.
  604. PHINode *PHI = dyn_cast<PHINode>(&I);
  605. if (!PHI)
  606. break;
  607. UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
  608. }
  609. // Add incoming-PHI values to the unwind destination block for the given basic
  610. // block, using the values for the original invoke's source block.
  611. auto UpdatePHINodes = [&](BasicBlock *Src) {
  612. BasicBlock::iterator I = UnwindDest->begin();
  613. for (Value *V : UnwindDestPHIValues) {
  614. PHINode *PHI = cast<PHINode>(I);
  615. PHI->addIncoming(V, Src);
  616. ++I;
  617. }
  618. };
  619. // This connects all the instructions which 'unwind to caller' to the invoke
  620. // destination.
  621. UnwindDestMemoTy FuncletUnwindMap;
  622. for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
  623. BB != E; ++BB) {
  624. if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
  625. if (CRI->unwindsToCaller()) {
  626. auto *CleanupPad = CRI->getCleanupPad();
  627. CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
  628. CRI->eraseFromParent();
  629. UpdatePHINodes(&*BB);
  630. // Finding a cleanupret with an unwind destination would confuse
  631. // subsequent calls to getUnwindDestToken, so map the cleanuppad
  632. // to short-circuit any such calls and recognize this as an "unwind
  633. // to caller" cleanup.
  634. assert(!FuncletUnwindMap.count(CleanupPad) ||
  635. isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
  636. FuncletUnwindMap[CleanupPad] =
  637. ConstantTokenNone::get(Caller->getContext());
  638. }
  639. }
  640. Instruction *I = BB->getFirstNonPHI();
  641. if (!I->isEHPad())
  642. continue;
  643. Instruction *Replacement = nullptr;
  644. if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
  645. if (CatchSwitch->unwindsToCaller()) {
  646. Value *UnwindDestToken;
  647. if (auto *ParentPad =
  648. dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
  649. // This catchswitch is nested inside another funclet. If that
  650. // funclet has an unwind destination within the inlinee, then
  651. // unwinding out of this catchswitch would be UB. Rewriting this
  652. // catchswitch to unwind to the inlined invoke's unwind dest would
  653. // give the parent funclet multiple unwind destinations, which is
  654. // something that subsequent EH table generation can't handle and
  655. // that the veirifer rejects. So when we see such a call, leave it
  656. // as "unwind to caller".
  657. UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
  658. if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
  659. continue;
  660. } else {
  661. // This catchswitch has no parent to inherit constraints from, and
  662. // none of its descendants can have an unwind edge that exits it and
  663. // targets another funclet in the inlinee. It may or may not have a
  664. // descendant that definitively has an unwind to caller. In either
  665. // case, we'll have to assume that any unwinds out of it may need to
  666. // be routed to the caller, so treat it as though it has a definitive
  667. // unwind to caller.
  668. UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
  669. }
  670. auto *NewCatchSwitch = CatchSwitchInst::Create(
  671. CatchSwitch->getParentPad(), UnwindDest,
  672. CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
  673. CatchSwitch);
  674. for (BasicBlock *PadBB : CatchSwitch->handlers())
  675. NewCatchSwitch->addHandler(PadBB);
  676. // Propagate info for the old catchswitch over to the new one in
  677. // the unwind map. This also serves to short-circuit any subsequent
  678. // checks for the unwind dest of this catchswitch, which would get
  679. // confused if they found the outer handler in the callee.
  680. FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
  681. Replacement = NewCatchSwitch;
  682. }
  683. } else if (!isa<FuncletPadInst>(I)) {
  684. llvm_unreachable("unexpected EHPad!");
  685. }
  686. if (Replacement) {
  687. Replacement->takeName(I);
  688. I->replaceAllUsesWith(Replacement);
  689. I->eraseFromParent();
  690. UpdatePHINodes(&*BB);
  691. }
  692. }
  693. if (InlinedCodeInfo.ContainsCalls)
  694. for (Function::iterator BB = FirstNewBlock->getIterator(),
  695. E = Caller->end();
  696. BB != E; ++BB)
  697. if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
  698. &*BB, UnwindDest, &FuncletUnwindMap))
  699. // Update any PHI nodes in the exceptional block to indicate that there
  700. // is now a new entry in them.
  701. UpdatePHINodes(NewBB);
  702. // Now that everything is happy, we have one final detail. The PHI nodes in
  703. // the exception destination block still have entries due to the original
  704. // invoke instruction. Eliminate these entries (which might even delete the
  705. // PHI node) now.
  706. UnwindDest->removePredecessor(InvokeBB);
  707. }
  708. /// When inlining a call site that has !llvm.mem.parallel_loop_access,
  709. /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
  710. /// be propagated to all memory-accessing cloned instructions.
  711. static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
  712. Function::iterator FEnd) {
  713. MDNode *MemParallelLoopAccess =
  714. CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
  715. MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
  716. MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);
  717. MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);
  718. if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
  719. return;
  720. for (BasicBlock &BB : make_range(FStart, FEnd)) {
  721. for (Instruction &I : BB) {
  722. // This metadata is only relevant for instructions that access memory.
  723. if (!I.mayReadOrWriteMemory())
  724. continue;
  725. if (MemParallelLoopAccess) {
  726. // TODO: This probably should not overwrite MemParalleLoopAccess.
  727. MemParallelLoopAccess = MDNode::concatenate(
  728. I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
  729. MemParallelLoopAccess);
  730. I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
  731. MemParallelLoopAccess);
  732. }
  733. if (AccessGroup)
  734. I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
  735. I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
  736. if (AliasScope)
  737. I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
  738. I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
  739. if (NoAlias)
  740. I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
  741. I.getMetadata(LLVMContext::MD_noalias), NoAlias));
  742. }
  743. }
  744. }
  745. /// Utility for cloning !noalias and !alias.scope metadata. When a code region
  746. /// using scoped alias metadata is inlined, the aliasing relationships may not
  747. /// hold between the two version. It is necessary to create a deep clone of the
  748. /// metadata, putting the two versions in separate scope domains.
  749. class ScopedAliasMetadataDeepCloner {
  750. using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
  751. SetVector<const MDNode *> MD;
  752. MetadataMap MDMap;
  753. void addRecursiveMetadataUses();
  754. public:
  755. ScopedAliasMetadataDeepCloner(const Function *F);
  756. /// Create a new clone of the scoped alias metadata, which will be used by
  757. /// subsequent remap() calls.
  758. void clone();
  759. /// Remap instructions in the given range from the original to the cloned
  760. /// metadata.
  761. void remap(Function::iterator FStart, Function::iterator FEnd);
  762. };
  763. ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
  764. const Function *F) {
  765. for (const BasicBlock &BB : *F) {
  766. for (const Instruction &I : BB) {
  767. if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
  768. MD.insert(M);
  769. if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
  770. MD.insert(M);
  771. // We also need to clone the metadata in noalias intrinsics.
  772. if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
  773. MD.insert(Decl->getScopeList());
  774. }
  775. }
  776. addRecursiveMetadataUses();
  777. }
  778. void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
  779. SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
  780. while (!Queue.empty()) {
  781. const MDNode *M = cast<MDNode>(Queue.pop_back_val());
  782. for (const Metadata *Op : M->operands())
  783. if (const MDNode *OpMD = dyn_cast<MDNode>(Op))
  784. if (MD.insert(OpMD))
  785. Queue.push_back(OpMD);
  786. }
  787. }
  788. void ScopedAliasMetadataDeepCloner::clone() {
  789. assert(MDMap.empty() && "clone() already called ?");
  790. SmallVector<TempMDTuple, 16> DummyNodes;
  791. for (const MDNode *I : MD) {
  792. DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None));
  793. MDMap[I].reset(DummyNodes.back().get());
  794. }
  795. // Create new metadata nodes to replace the dummy nodes, replacing old
  796. // metadata references with either a dummy node or an already-created new
  797. // node.
  798. SmallVector<Metadata *, 4> NewOps;
  799. for (const MDNode *I : MD) {
  800. for (const Metadata *Op : I->operands()) {
  801. if (const MDNode *M = dyn_cast<MDNode>(Op))
  802. NewOps.push_back(MDMap[M]);
  803. else
  804. NewOps.push_back(const_cast<Metadata *>(Op));
  805. }
  806. MDNode *NewM = MDNode::get(I->getContext(), NewOps);
  807. MDTuple *TempM = cast<MDTuple>(MDMap[I]);
  808. assert(TempM->isTemporary() && "Expected temporary node");
  809. TempM->replaceAllUsesWith(NewM);
  810. NewOps.clear();
  811. }
  812. }
  813. void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
  814. Function::iterator FEnd) {
  815. if (MDMap.empty())
  816. return; // Nothing to do.
  817. for (BasicBlock &BB : make_range(FStart, FEnd)) {
  818. for (Instruction &I : BB) {
  819. // TODO: The null checks for the MDMap.lookup() results should no longer
  820. // be necessary.
  821. if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
  822. if (MDNode *MNew = MDMap.lookup(M))
  823. I.setMetadata(LLVMContext::MD_alias_scope, MNew);
  824. if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
  825. if (MDNode *MNew = MDMap.lookup(M))
  826. I.setMetadata(LLVMContext::MD_noalias, MNew);
  827. if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
  828. if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
  829. Decl->setScopeList(MNew);
  830. }
  831. }
  832. }
  833. /// If the inlined function has noalias arguments,
  834. /// then add new alias scopes for each noalias argument, tag the mapped noalias
  835. /// parameters with noalias metadata specifying the new scope, and tag all
  836. /// non-derived loads, stores and memory intrinsics with the new alias scopes.
  837. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
  838. const DataLayout &DL, AAResults *CalleeAAR) {
  839. if (!EnableNoAliasConversion)
  840. return;
  841. const Function *CalledFunc = CB.getCalledFunction();
  842. SmallVector<const Argument *, 4> NoAliasArgs;
  843. for (const Argument &Arg : CalledFunc->args())
  844. if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
  845. NoAliasArgs.push_back(&Arg);
  846. if (NoAliasArgs.empty())
  847. return;
  848. // To do a good job, if a noalias variable is captured, we need to know if
  849. // the capture point dominates the particular use we're considering.
  850. DominatorTree DT;
  851. DT.recalculate(const_cast<Function&>(*CalledFunc));
  852. // noalias indicates that pointer values based on the argument do not alias
  853. // pointer values which are not based on it. So we add a new "scope" for each
  854. // noalias function argument. Accesses using pointers based on that argument
  855. // become part of that alias scope, accesses using pointers not based on that
  856. // argument are tagged as noalias with that scope.
  857. DenseMap<const Argument *, MDNode *> NewScopes;
  858. MDBuilder MDB(CalledFunc->getContext());
  859. // Create a new scope domain for this function.
  860. MDNode *NewDomain =
  861. MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
  862. for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
  863. const Argument *A = NoAliasArgs[i];
  864. std::string Name = std::string(CalledFunc->getName());
  865. if (A->hasName()) {
  866. Name += ": %";
  867. Name += A->getName();
  868. } else {
  869. Name += ": argument ";
  870. Name += utostr(i);
  871. }
  872. // Note: We always create a new anonymous root here. This is true regardless
  873. // of the linkage of the callee because the aliasing "scope" is not just a
  874. // property of the callee, but also all control dependencies in the caller.
  875. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
  876. NewScopes.insert(std::make_pair(A, NewScope));
  877. if (UseNoAliasIntrinsic) {
  878. // Introduce a llvm.experimental.noalias.scope.decl for the noalias
  879. // argument.
  880. MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
  881. auto *NoAliasDecl =
  882. IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
  883. // Ignore the result for now. The result will be used when the
  884. // llvm.noalias intrinsic is introduced.
  885. (void)NoAliasDecl;
  886. }
  887. }
  888. // Iterate over all new instructions in the map; for all memory-access
  889. // instructions, add the alias scope metadata.
  890. for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
  891. VMI != VMIE; ++VMI) {
  892. if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
  893. if (!VMI->second)
  894. continue;
  895. Instruction *NI = dyn_cast<Instruction>(VMI->second);
  896. if (!NI)
  897. continue;
  898. bool IsArgMemOnlyCall = false, IsFuncCall = false;
  899. SmallVector<const Value *, 2> PtrArgs;
  900. if (const LoadInst *LI = dyn_cast<LoadInst>(I))
  901. PtrArgs.push_back(LI->getPointerOperand());
  902. else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
  903. PtrArgs.push_back(SI->getPointerOperand());
  904. else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
  905. PtrArgs.push_back(VAAI->getPointerOperand());
  906. else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
  907. PtrArgs.push_back(CXI->getPointerOperand());
  908. else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
  909. PtrArgs.push_back(RMWI->getPointerOperand());
  910. else if (const auto *Call = dyn_cast<CallBase>(I)) {
  911. // If we know that the call does not access memory, then we'll still
  912. // know that about the inlined clone of this call site, and we don't
  913. // need to add metadata.
  914. if (Call->doesNotAccessMemory())
  915. continue;
  916. IsFuncCall = true;
  917. if (CalleeAAR) {
  918. FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
  919. if (AAResults::onlyAccessesArgPointees(MRB))
  920. IsArgMemOnlyCall = true;
  921. }
  922. for (Value *Arg : Call->args()) {
  923. // We need to check the underlying objects of all arguments, not just
  924. // the pointer arguments, because we might be passing pointers as
  925. // integers, etc.
  926. // However, if we know that the call only accesses pointer arguments,
  927. // then we only need to check the pointer arguments.
  928. if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
  929. continue;
  930. PtrArgs.push_back(Arg);
  931. }
  932. }
  933. // If we found no pointers, then this instruction is not suitable for
  934. // pairing with an instruction to receive aliasing metadata.
  935. // However, if this is a call, this we might just alias with none of the
  936. // noalias arguments.
  937. if (PtrArgs.empty() && !IsFuncCall)
  938. continue;
  939. // It is possible that there is only one underlying object, but you
  940. // need to go through several PHIs to see it, and thus could be
  941. // repeated in the Objects list.
  942. SmallPtrSet<const Value *, 4> ObjSet;
  943. SmallVector<Metadata *, 4> Scopes, NoAliases;
  944. SmallSetVector<const Argument *, 4> NAPtrArgs;
  945. for (const Value *V : PtrArgs) {
  946. SmallVector<const Value *, 4> Objects;
  947. getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
  948. for (const Value *O : Objects)
  949. ObjSet.insert(O);
  950. }
  951. // Figure out if we're derived from anything that is not a noalias
  952. // argument.
  953. bool CanDeriveViaCapture = false, UsesAliasingPtr = false;
  954. for (const Value *V : ObjSet) {
  955. // Is this value a constant that cannot be derived from any pointer
  956. // value (we need to exclude constant expressions, for example, that
  957. // are formed from arithmetic on global symbols).
  958. bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
  959. isa<ConstantPointerNull>(V) ||
  960. isa<ConstantDataVector>(V) || isa<UndefValue>(V);
  961. if (IsNonPtrConst)
  962. continue;
  963. // If this is anything other than a noalias argument, then we cannot
  964. // completely describe the aliasing properties using alias.scope
  965. // metadata (and, thus, won't add any).
  966. if (const Argument *A = dyn_cast<Argument>(V)) {
  967. if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
  968. UsesAliasingPtr = true;
  969. } else {
  970. UsesAliasingPtr = true;
  971. }
  972. // If this is not some identified function-local object (which cannot
  973. // directly alias a noalias argument), or some other argument (which,
  974. // by definition, also cannot alias a noalias argument), then we could
  975. // alias a noalias argument that has been captured).
  976. if (!isa<Argument>(V) &&
  977. !isIdentifiedFunctionLocal(const_cast<Value*>(V)))
  978. CanDeriveViaCapture = true;
  979. }
  980. // A function call can always get captured noalias pointers (via other
  981. // parameters, globals, etc.).
  982. if (IsFuncCall && !IsArgMemOnlyCall)
  983. CanDeriveViaCapture = true;
  984. // First, we want to figure out all of the sets with which we definitely
  985. // don't alias. Iterate over all noalias set, and add those for which:
  986. // 1. The noalias argument is not in the set of objects from which we
  987. // definitely derive.
  988. // 2. The noalias argument has not yet been captured.
  989. // An arbitrary function that might load pointers could see captured
  990. // noalias arguments via other noalias arguments or globals, and so we
  991. // must always check for prior capture.
  992. for (const Argument *A : NoAliasArgs) {
  993. if (!ObjSet.count(A) && (!CanDeriveViaCapture ||
  994. // It might be tempting to skip the
  995. // PointerMayBeCapturedBefore check if
  996. // A->hasNoCaptureAttr() is true, but this is
  997. // incorrect because nocapture only guarantees
  998. // that no copies outlive the function, not
  999. // that the value cannot be locally captured.
  1000. !PointerMayBeCapturedBefore(A,
  1001. /* ReturnCaptures */ false,
  1002. /* StoreCaptures */ false, I, &DT)))
  1003. NoAliases.push_back(NewScopes[A]);
  1004. }
  1005. if (!NoAliases.empty())
  1006. NI->setMetadata(LLVMContext::MD_noalias,
  1007. MDNode::concatenate(
  1008. NI->getMetadata(LLVMContext::MD_noalias),
  1009. MDNode::get(CalledFunc->getContext(), NoAliases)));
  1010. // Next, we want to figure out all of the sets to which we might belong.
  1011. // We might belong to a set if the noalias argument is in the set of
  1012. // underlying objects. If there is some non-noalias argument in our list
  1013. // of underlying objects, then we cannot add a scope because the fact
  1014. // that some access does not alias with any set of our noalias arguments
  1015. // cannot itself guarantee that it does not alias with this access
  1016. // (because there is some pointer of unknown origin involved and the
  1017. // other access might also depend on this pointer). We also cannot add
  1018. // scopes to arbitrary functions unless we know they don't access any
  1019. // non-parameter pointer-values.
  1020. bool CanAddScopes = !UsesAliasingPtr;
  1021. if (CanAddScopes && IsFuncCall)
  1022. CanAddScopes = IsArgMemOnlyCall;
  1023. if (CanAddScopes)
  1024. for (const Argument *A : NoAliasArgs) {
  1025. if (ObjSet.count(A))
  1026. Scopes.push_back(NewScopes[A]);
  1027. }
  1028. if (!Scopes.empty())
  1029. NI->setMetadata(
  1030. LLVMContext::MD_alias_scope,
  1031. MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
  1032. MDNode::get(CalledFunc->getContext(), Scopes)));
  1033. }
  1034. }
  1035. }
  1036. static bool MayContainThrowingOrExitingCall(Instruction *Begin,
  1037. Instruction *End) {
  1038. assert(Begin->getParent() == End->getParent() &&
  1039. "Expected to be in same basic block!");
  1040. unsigned NumInstChecked = 0;
  1041. // Check that all instructions in the range [Begin, End) are guaranteed to
  1042. // transfer execution to successor.
  1043. for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
  1044. if (NumInstChecked++ > InlinerAttributeWindow ||
  1045. !isGuaranteedToTransferExecutionToSuccessor(&I))
  1046. return true;
  1047. return false;
  1048. }
  1049. static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
  1050. AttrBuilder AB(CB.getAttributes(), AttributeList::ReturnIndex);
  1051. if (AB.empty())
  1052. return AB;
  1053. AttrBuilder Valid;
  1054. // Only allow these white listed attributes to be propagated back to the
  1055. // callee. This is because other attributes may only be valid on the call
  1056. // itself, i.e. attributes such as signext and zeroext.
  1057. if (auto DerefBytes = AB.getDereferenceableBytes())
  1058. Valid.addDereferenceableAttr(DerefBytes);
  1059. if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes())
  1060. Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
  1061. if (AB.contains(Attribute::NoAlias))
  1062. Valid.addAttribute(Attribute::NoAlias);
  1063. if (AB.contains(Attribute::NonNull))
  1064. Valid.addAttribute(Attribute::NonNull);
  1065. return Valid;
  1066. }
  1067. static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
  1068. if (!UpdateReturnAttributes)
  1069. return;
  1070. AttrBuilder Valid = IdentifyValidAttributes(CB);
  1071. if (Valid.empty())
  1072. return;
  1073. auto *CalledFunction = CB.getCalledFunction();
  1074. auto &Context = CalledFunction->getContext();
  1075. for (auto &BB : *CalledFunction) {
  1076. auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
  1077. if (!RI || !isa<CallBase>(RI->getOperand(0)))
  1078. continue;
  1079. auto *RetVal = cast<CallBase>(RI->getOperand(0));
  1080. // Sanity check that the cloned RetVal exists and is a call, otherwise we
  1081. // cannot add the attributes on the cloned RetVal.
  1082. // Simplification during inlining could have transformed the cloned
  1083. // instruction.
  1084. auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
  1085. if (!NewRetVal)
  1086. continue;
  1087. // Backward propagation of attributes to the returned value may be incorrect
  1088. // if it is control flow dependent.
  1089. // Consider:
  1090. // @callee {
  1091. // %rv = call @foo()
  1092. // %rv2 = call @bar()
  1093. // if (%rv2 != null)
  1094. // return %rv2
  1095. // if (%rv == null)
  1096. // exit()
  1097. // return %rv
  1098. // }
  1099. // caller() {
  1100. // %val = call nonnull @callee()
  1101. // }
  1102. // Here we cannot add the nonnull attribute on either foo or bar. So, we
  1103. // limit the check to both RetVal and RI are in the same basic block and
  1104. // there are no throwing/exiting instructions between these instructions.
  1105. if (RI->getParent() != RetVal->getParent() ||
  1106. MayContainThrowingOrExitingCall(RetVal, RI))
  1107. continue;
  1108. // Add to the existing attributes of NewRetVal, i.e. the cloned call
  1109. // instruction.
  1110. // NB! When we have the same attribute already existing on NewRetVal, but
  1111. // with a differing value, the AttributeList's merge API honours the already
  1112. // existing attribute value (i.e. attributes such as dereferenceable,
  1113. // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
  1114. AttributeList AL = NewRetVal->getAttributes();
  1115. AttributeList NewAL =
  1116. AL.addAttributes(Context, AttributeList::ReturnIndex, Valid);
  1117. NewRetVal->setAttributes(NewAL);
  1118. }
  1119. }
  1120. /// If the inlined function has non-byval align arguments, then
  1121. /// add @llvm.assume-based alignment assumptions to preserve this information.
  1122. static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
  1123. if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
  1124. return;
  1125. AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
  1126. auto &DL = CB.getCaller()->getParent()->getDataLayout();
  1127. // To avoid inserting redundant assumptions, we should check for assumptions
  1128. // already in the caller. To do this, we might need a DT of the caller.
  1129. DominatorTree DT;
  1130. bool DTCalculated = false;
  1131. Function *CalledFunc = CB.getCalledFunction();
  1132. for (Argument &Arg : CalledFunc->args()) {
  1133. unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
  1134. if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) {
  1135. if (!DTCalculated) {
  1136. DT.recalculate(*CB.getCaller());
  1137. DTCalculated = true;
  1138. }
  1139. // If we can already prove the asserted alignment in the context of the
  1140. // caller, then don't bother inserting the assumption.
  1141. Value *ArgVal = CB.getArgOperand(Arg.getArgNo());
  1142. if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= Align)
  1143. continue;
  1144. CallInst *NewAsmp =
  1145. IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align);
  1146. AC->registerAssumption(NewAsmp);
  1147. }
  1148. }
  1149. }
  1150. /// Once we have cloned code over from a callee into the caller,
  1151. /// update the specified callgraph to reflect the changes we made.
  1152. /// Note that it's possible that not all code was copied over, so only
  1153. /// some edges of the callgraph may remain.
  1154. static void UpdateCallGraphAfterInlining(CallBase &CB,
  1155. Function::iterator FirstNewBlock,
  1156. ValueToValueMapTy &VMap,
  1157. InlineFunctionInfo &IFI) {
  1158. CallGraph &CG = *IFI.CG;
  1159. const Function *Caller = CB.getCaller();
  1160. const Function *Callee = CB.getCalledFunction();
  1161. CallGraphNode *CalleeNode = CG[Callee];
  1162. CallGraphNode *CallerNode = CG[Caller];
  1163. // Since we inlined some uninlined call sites in the callee into the caller,
  1164. // add edges from the caller to all of the callees of the callee.
  1165. CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
  1166. // Consider the case where CalleeNode == CallerNode.
  1167. CallGraphNode::CalledFunctionsVector CallCache;
  1168. if (CalleeNode == CallerNode) {
  1169. CallCache.assign(I, E);
  1170. I = CallCache.begin();
  1171. E = CallCache.end();
  1172. }
  1173. for (; I != E; ++I) {
  1174. // Skip 'refererence' call records.
  1175. if (!I->first)
  1176. continue;
  1177. const Value *OrigCall = *I->first;
  1178. ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
  1179. // Only copy the edge if the call was inlined!
  1180. if (VMI == VMap.end() || VMI->second == nullptr)
  1181. continue;
  1182. // If the call was inlined, but then constant folded, there is no edge to
  1183. // add. Check for this case.
  1184. auto *NewCall = dyn_cast<CallBase>(VMI->second);
  1185. if (!NewCall)
  1186. continue;
  1187. // We do not treat intrinsic calls like real function calls because we
  1188. // expect them to become inline code; do not add an edge for an intrinsic.
  1189. if (NewCall->getCalledFunction() &&
  1190. NewCall->getCalledFunction()->isIntrinsic())
  1191. continue;
  1192. // Remember that this call site got inlined for the client of
  1193. // InlineFunction.
  1194. IFI.InlinedCalls.push_back(NewCall);
  1195. // It's possible that inlining the callsite will cause it to go from an
  1196. // indirect to a direct call by resolving a function pointer. If this
  1197. // happens, set the callee of the new call site to a more precise
  1198. // destination. This can also happen if the call graph node of the caller
  1199. // was just unnecessarily imprecise.
  1200. if (!I->second->getFunction())
  1201. if (Function *F = NewCall->getCalledFunction()) {
  1202. // Indirect call site resolved to direct call.
  1203. CallerNode->addCalledFunction(NewCall, CG[F]);
  1204. continue;
  1205. }
  1206. CallerNode->addCalledFunction(NewCall, I->second);
  1207. }
  1208. // Update the call graph by deleting the edge from Callee to Caller. We must
  1209. // do this after the loop above in case Caller and Callee are the same.
  1210. CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
  1211. }
  1212. static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
  1213. BasicBlock *InsertBlock,
  1214. InlineFunctionInfo &IFI) {
  1215. Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
  1216. IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
  1217. Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
  1218. // Always generate a memcpy of alignment 1 here because we don't know
  1219. // the alignment of the src pointer. Other optimizations can infer
  1220. // better alignment.
  1221. Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
  1222. /*SrcAlign*/ Align(1), Size);
  1223. }
  1224. /// When inlining a call site that has a byval argument,
  1225. /// we have to make the implicit memcpy explicit by adding it.
  1226. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
  1227. const Function *CalledFunc,
  1228. InlineFunctionInfo &IFI,
  1229. unsigned ByValAlignment) {
  1230. PointerType *ArgTy = cast<PointerType>(Arg->getType());
  1231. Type *AggTy = ArgTy->getElementType();
  1232. Function *Caller = TheCall->getFunction();
  1233. const DataLayout &DL = Caller->getParent()->getDataLayout();
  1234. // If the called function is readonly, then it could not mutate the caller's
  1235. // copy of the byval'd memory. In this case, it is safe to elide the copy and
  1236. // temporary.
  1237. if (CalledFunc->onlyReadsMemory()) {
  1238. // If the byval argument has a specified alignment that is greater than the
  1239. // passed in pointer, then we either have to round up the input pointer or
  1240. // give up on this transformation.
  1241. if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
  1242. return Arg;
  1243. AssumptionCache *AC =
  1244. IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
  1245. // If the pointer is already known to be sufficiently aligned, or if we can
  1246. // round it up to a larger alignment, then we don't need a temporary.
  1247. if (getOrEnforceKnownAlignment(Arg, Align(ByValAlignment), DL, TheCall,
  1248. AC) >= ByValAlignment)
  1249. return Arg;
  1250. // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
  1251. // for code quality, but rarely happens and is required for correctness.
  1252. }
  1253. // Create the alloca. If we have DataLayout, use nice alignment.
  1254. Align Alignment(DL.getPrefTypeAlignment(AggTy));
  1255. // If the byval had an alignment specified, we *must* use at least that
  1256. // alignment, as it is required by the byval argument (and uses of the
  1257. // pointer inside the callee).
  1258. Alignment = max(Alignment, MaybeAlign(ByValAlignment));
  1259. Value *NewAlloca =
  1260. new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
  1261. Arg->getName(), &*Caller->begin()->begin());
  1262. IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
  1263. // Uses of the argument in the function should use our new alloca
  1264. // instead.
  1265. return NewAlloca;
  1266. }
  1267. // Check whether this Value is used by a lifetime intrinsic.
  1268. static bool isUsedByLifetimeMarker(Value *V) {
  1269. for (User *U : V->users())
  1270. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
  1271. if (II->isLifetimeStartOrEnd())
  1272. return true;
  1273. return false;
  1274. }
  1275. // Check whether the given alloca already has
  1276. // lifetime.start or lifetime.end intrinsics.
  1277. static bool hasLifetimeMarkers(AllocaInst *AI) {
  1278. Type *Ty = AI->getType();
  1279. Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
  1280. Ty->getPointerAddressSpace());
  1281. if (Ty == Int8PtrTy)
  1282. return isUsedByLifetimeMarker(AI);
  1283. // Do a scan to find all the casts to i8*.
  1284. for (User *U : AI->users()) {
  1285. if (U->getType() != Int8PtrTy) continue;
  1286. if (U->stripPointerCasts() != AI) continue;
  1287. if (isUsedByLifetimeMarker(U))
  1288. return true;
  1289. }
  1290. return false;
  1291. }
  1292. /// Return the result of AI->isStaticAlloca() if AI were moved to the entry
  1293. /// block. Allocas used in inalloca calls and allocas of dynamic array size
  1294. /// cannot be static.
  1295. static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
  1296. return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
  1297. }
  1298. /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
  1299. /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
  1300. static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
  1301. LLVMContext &Ctx,
  1302. DenseMap<const MDNode *, MDNode *> &IANodes) {
  1303. auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
  1304. return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),
  1305. OrigDL.getScope(), IA);
  1306. }
  1307. /// Update inlined instructions' line numbers to
  1308. /// to encode location where these instructions are inlined.
  1309. static void fixupLineNumbers(Function *Fn, Function::iterator FI,
  1310. Instruction *TheCall, bool CalleeHasDebugInfo) {
  1311. const DebugLoc &TheCallDL = TheCall->getDebugLoc();
  1312. if (!TheCallDL)
  1313. return;
  1314. auto &Ctx = Fn->getContext();
  1315. DILocation *InlinedAtNode = TheCallDL;
  1316. // Create a unique call site, not to be confused with any other call from the
  1317. // same location.
  1318. InlinedAtNode = DILocation::getDistinct(
  1319. Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
  1320. InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
  1321. // Cache the inlined-at nodes as they're built so they are reused, without
  1322. // this every instruction's inlined-at chain would become distinct from each
  1323. // other.
  1324. DenseMap<const MDNode *, MDNode *> IANodes;
  1325. // Check if we are not generating inline line tables and want to use
  1326. // the call site location instead.
  1327. bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
  1328. for (; FI != Fn->end(); ++FI) {
  1329. for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
  1330. BI != BE; ++BI) {
  1331. // Loop metadata needs to be updated so that the start and end locs
  1332. // reference inlined-at locations.
  1333. auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes](
  1334. const DILocation &Loc) -> DILocation * {
  1335. return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get();
  1336. };
  1337. updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc);
  1338. if (!NoInlineLineTables)
  1339. if (DebugLoc DL = BI->getDebugLoc()) {
  1340. DebugLoc IDL =
  1341. inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
  1342. BI->setDebugLoc(IDL);
  1343. continue;
  1344. }
  1345. if (CalleeHasDebugInfo && !NoInlineLineTables)
  1346. continue;
  1347. // If the inlined instruction has no line number, or if inline info
  1348. // is not being generated, make it look as if it originates from the call
  1349. // location. This is important for ((__always_inline, __nodebug__))
  1350. // functions which must use caller location for all instructions in their
  1351. // function body.
  1352. // Don't update static allocas, as they may get moved later.
  1353. if (auto *AI = dyn_cast<AllocaInst>(BI))
  1354. if (allocaWouldBeStaticInEntry(AI))
  1355. continue;
  1356. BI->setDebugLoc(TheCallDL);
  1357. }
  1358. // Remove debug info intrinsics if we're not keeping inline info.
  1359. if (NoInlineLineTables) {
  1360. BasicBlock::iterator BI = FI->begin();
  1361. while (BI != FI->end()) {
  1362. if (isa<DbgInfoIntrinsic>(BI)) {
  1363. BI = BI->eraseFromParent();
  1364. continue;
  1365. }
  1366. ++BI;
  1367. }
  1368. }
  1369. }
  1370. }
  1371. /// Update the block frequencies of the caller after a callee has been inlined.
  1372. ///
  1373. /// Each block cloned into the caller has its block frequency scaled by the
  1374. /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
  1375. /// callee's entry block gets the same frequency as the callsite block and the
  1376. /// relative frequencies of all cloned blocks remain the same after cloning.
  1377. static void updateCallerBFI(BasicBlock *CallSiteBlock,
  1378. const ValueToValueMapTy &VMap,
  1379. BlockFrequencyInfo *CallerBFI,
  1380. BlockFrequencyInfo *CalleeBFI,
  1381. const BasicBlock &CalleeEntryBlock) {
  1382. SmallPtrSet<BasicBlock *, 16> ClonedBBs;
  1383. for (auto Entry : VMap) {
  1384. if (!isa<BasicBlock>(Entry.first) || !Entry.second)
  1385. continue;
  1386. auto *OrigBB = cast<BasicBlock>(Entry.first);
  1387. auto *ClonedBB = cast<BasicBlock>(Entry.second);
  1388. uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
  1389. if (!ClonedBBs.insert(ClonedBB).second) {
  1390. // Multiple blocks in the callee might get mapped to one cloned block in
  1391. // the caller since we prune the callee as we clone it. When that happens,
  1392. // we want to use the maximum among the original blocks' frequencies.
  1393. uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
  1394. if (NewFreq > Freq)
  1395. Freq = NewFreq;
  1396. }
  1397. CallerBFI->setBlockFreq(ClonedBB, Freq);
  1398. }
  1399. BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
  1400. CallerBFI->setBlockFreqAndScale(
  1401. EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
  1402. ClonedBBs);
  1403. }
  1404. /// Update the branch metadata for cloned call instructions.
  1405. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
  1406. const ProfileCount &CalleeEntryCount,
  1407. const CallBase &TheCall, ProfileSummaryInfo *PSI,
  1408. BlockFrequencyInfo *CallerBFI) {
  1409. if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
  1410. CalleeEntryCount.getCount() < 1)
  1411. return;
  1412. auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
  1413. int64_t CallCount =
  1414. std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount());
  1415. updateProfileCallee(Callee, -CallCount, &VMap);
  1416. }
  1417. void llvm::updateProfileCallee(
  1418. Function *Callee, int64_t entryDelta,
  1419. const ValueMap<const Value *, WeakTrackingVH> *VMap) {
  1420. auto CalleeCount = Callee->getEntryCount();
  1421. if (!CalleeCount.hasValue())
  1422. return;
  1423. uint64_t priorEntryCount = CalleeCount.getCount();
  1424. uint64_t newEntryCount;
  1425. // Since CallSiteCount is an estimate, it could exceed the original callee
  1426. // count and has to be set to 0 so guard against underflow.
  1427. if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
  1428. newEntryCount = 0;
  1429. else
  1430. newEntryCount = priorEntryCount + entryDelta;
  1431. // During inlining ?
  1432. if (VMap) {
  1433. uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
  1434. for (auto Entry : *VMap)
  1435. if (isa<CallInst>(Entry.first))
  1436. if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
  1437. CI->updateProfWeight(cloneEntryCount, priorEntryCount);
  1438. }
  1439. if (entryDelta) {
  1440. Callee->setEntryCount(newEntryCount);
  1441. for (BasicBlock &BB : *Callee)
  1442. // No need to update the callsite if it is pruned during inlining.
  1443. if (!VMap || VMap->count(&BB))
  1444. for (Instruction &I : BB)
  1445. if (CallInst *CI = dyn_cast<CallInst>(&I))
  1446. CI->updateProfWeight(newEntryCount, priorEntryCount);
  1447. }
  1448. }
  1449. /// This function inlines the called function into the basic block of the
  1450. /// caller. This returns false if it is not possible to inline this call.
  1451. /// The program is still in a well defined state if this occurs though.
  1452. ///
  1453. /// Note that this only does one level of inlining. For example, if the
  1454. /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
  1455. /// exists in the instruction stream. Similarly this will inline a recursive
  1456. /// function by one level.
  1457. llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
  1458. AAResults *CalleeAAR,
  1459. bool InsertLifetime,
  1460. Function *ForwardVarArgsTo) {
  1461. assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
  1462. // FIXME: we don't inline callbr yet.
  1463. if (isa<CallBrInst>(CB))
  1464. return InlineResult::failure("We don't inline callbr yet.");
  1465. // If IFI has any state in it, zap it before we fill it in.
  1466. IFI.reset();
  1467. Function *CalledFunc = CB.getCalledFunction();
  1468. if (!CalledFunc || // Can't inline external function or indirect
  1469. CalledFunc->isDeclaration()) // call!
  1470. return InlineResult::failure("external or indirect");
  1471. // The inliner does not know how to inline through calls with operand bundles
  1472. // in general ...
  1473. if (CB.hasOperandBundles()) {
  1474. for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
  1475. uint32_t Tag = CB.getOperandBundleAt(i).getTagID();
  1476. // ... but it knows how to inline through "deopt" operand bundles ...
  1477. if (Tag == LLVMContext::OB_deopt)
  1478. continue;
  1479. // ... and "funclet" operand bundles.
  1480. if (Tag == LLVMContext::OB_funclet)
  1481. continue;
  1482. return InlineResult::failure("unsupported operand bundle");
  1483. }
  1484. }
  1485. // If the call to the callee cannot throw, set the 'nounwind' flag on any
  1486. // calls that we inline.
  1487. bool MarkNoUnwind = CB.doesNotThrow();
  1488. BasicBlock *OrigBB = CB.getParent();
  1489. Function *Caller = OrigBB->getParent();
  1490. // GC poses two hazards to inlining, which only occur when the callee has GC:
  1491. // 1. If the caller has no GC, then the callee's GC must be propagated to the
  1492. // caller.
  1493. // 2. If the caller has a differing GC, it is invalid to inline.
  1494. if (CalledFunc->hasGC()) {
  1495. if (!Caller->hasGC())
  1496. Caller->setGC(CalledFunc->getGC());
  1497. else if (CalledFunc->getGC() != Caller->getGC())
  1498. return InlineResult::failure("incompatible GC");
  1499. }
  1500. // Get the personality function from the callee if it contains a landing pad.
  1501. Constant *CalledPersonality =
  1502. CalledFunc->hasPersonalityFn()
  1503. ? CalledFunc->getPersonalityFn()->stripPointerCasts()
  1504. : nullptr;
  1505. // Find the personality function used by the landing pads of the caller. If it
  1506. // exists, then check to see that it matches the personality function used in
  1507. // the callee.
  1508. Constant *CallerPersonality =
  1509. Caller->hasPersonalityFn()
  1510. ? Caller->getPersonalityFn()->stripPointerCasts()
  1511. : nullptr;
  1512. if (CalledPersonality) {
  1513. if (!CallerPersonality)
  1514. Caller->setPersonalityFn(CalledPersonality);
  1515. // If the personality functions match, then we can perform the
  1516. // inlining. Otherwise, we can't inline.
  1517. // TODO: This isn't 100% true. Some personality functions are proper
  1518. // supersets of others and can be used in place of the other.
  1519. else if (CalledPersonality != CallerPersonality)
  1520. return InlineResult::failure("incompatible personality");
  1521. }
  1522. // We need to figure out which funclet the callsite was in so that we may
  1523. // properly nest the callee.
  1524. Instruction *CallSiteEHPad = nullptr;
  1525. if (CallerPersonality) {
  1526. EHPersonality Personality = classifyEHPersonality(CallerPersonality);
  1527. if (isScopedEHPersonality(Personality)) {
  1528. Optional<OperandBundleUse> ParentFunclet =
  1529. CB.getOperandBundle(LLVMContext::OB_funclet);
  1530. if (ParentFunclet)
  1531. CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
  1532. // OK, the inlining site is legal. What about the target function?
  1533. if (CallSiteEHPad) {
  1534. if (Personality == EHPersonality::MSVC_CXX) {
  1535. // The MSVC personality cannot tolerate catches getting inlined into
  1536. // cleanup funclets.
  1537. if (isa<CleanupPadInst>(CallSiteEHPad)) {
  1538. // Ok, the call site is within a cleanuppad. Let's check the callee
  1539. // for catchpads.
  1540. for (const BasicBlock &CalledBB : *CalledFunc) {
  1541. if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
  1542. return InlineResult::failure("catch in cleanup funclet");
  1543. }
  1544. }
  1545. } else if (isAsynchronousEHPersonality(Personality)) {
  1546. // SEH is even less tolerant, there may not be any sort of exceptional
  1547. // funclet in the callee.
  1548. for (const BasicBlock &CalledBB : *CalledFunc) {
  1549. if (CalledBB.isEHPad())
  1550. return InlineResult::failure("SEH in cleanup funclet");
  1551. }
  1552. }
  1553. }
  1554. }
  1555. }
  1556. // Determine if we are dealing with a call in an EHPad which does not unwind
  1557. // to caller.
  1558. bool EHPadForCallUnwindsLocally = false;
  1559. if (CallSiteEHPad && isa<CallInst>(CB)) {
  1560. UnwindDestMemoTy FuncletUnwindMap;
  1561. Value *CallSiteUnwindDestToken =
  1562. getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
  1563. EHPadForCallUnwindsLocally =
  1564. CallSiteUnwindDestToken &&
  1565. !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
  1566. }
  1567. // Get an iterator to the last basic block in the function, which will have
  1568. // the new function inlined after it.
  1569. Function::iterator LastBlock = --Caller->end();
  1570. // Make sure to capture all of the return instructions from the cloned
  1571. // function.
  1572. SmallVector<ReturnInst*, 8> Returns;
  1573. ClonedCodeInfo InlinedFunctionInfo;
  1574. Function::iterator FirstNewBlock;
  1575. { // Scope to destroy VMap after cloning.
  1576. ValueToValueMapTy VMap;
  1577. // Keep a list of pair (dst, src) to emit byval initializations.
  1578. SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
  1579. // When inlining a function that contains noalias scope metadata,
  1580. // this metadata needs to be cloned so that the inlined blocks
  1581. // have different "unique scopes" at every call site.
  1582. // Track the metadata that must be cloned. Do this before other changes to
  1583. // the function, so that we do not get in trouble when inlining caller ==
  1584. // callee.
  1585. ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
  1586. auto &DL = Caller->getParent()->getDataLayout();
  1587. // Calculate the vector of arguments to pass into the function cloner, which
  1588. // matches up the formal to the actual argument values.
  1589. auto AI = CB.arg_begin();
  1590. unsigned ArgNo = 0;
  1591. for (Function::arg_iterator I = CalledFunc->arg_begin(),
  1592. E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
  1593. Value *ActualArg = *AI;
  1594. // When byval arguments actually inlined, we need to make the copy implied
  1595. // by them explicit. However, we don't do this if the callee is readonly
  1596. // or readnone, because the copy would be unneeded: the callee doesn't
  1597. // modify the struct.
  1598. if (CB.isByValArgument(ArgNo)) {
  1599. ActualArg = HandleByValArgument(ActualArg, &CB, CalledFunc, IFI,
  1600. CalledFunc->getParamAlignment(ArgNo));
  1601. if (ActualArg != *AI)
  1602. ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
  1603. }
  1604. VMap[&*I] = ActualArg;
  1605. }
  1606. // TODO: Remove this when users have been updated to the assume bundles.
  1607. // Add alignment assumptions if necessary. We do this before the inlined
  1608. // instructions are actually cloned into the caller so that we can easily
  1609. // check what will be known at the start of the inlined code.
  1610. AddAlignmentAssumptions(CB, IFI);
  1611. AssumptionCache *AC =
  1612. IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
  1613. /// Preserve all attributes on of the call and its parameters.
  1614. salvageKnowledge(&CB, AC);
  1615. // We want the inliner to prune the code as it copies. We would LOVE to
  1616. // have no dead or constant instructions leftover after inlining occurs
  1617. // (which can happen, e.g., because an argument was constant), but we'll be
  1618. // happy with whatever the cloner can do.
  1619. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
  1620. /*ModuleLevelChanges=*/false, Returns, ".i",
  1621. &InlinedFunctionInfo, &CB);
  1622. // Remember the first block that is newly cloned over.
  1623. FirstNewBlock = LastBlock; ++FirstNewBlock;
  1624. if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
  1625. // Update the BFI of blocks cloned into the caller.
  1626. updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
  1627. CalledFunc->front());
  1628. updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB,
  1629. IFI.PSI, IFI.CallerBFI);
  1630. // Inject byval arguments initialization.
  1631. for (std::pair<Value*, Value*> &Init : ByValInit)
  1632. HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
  1633. &*FirstNewBlock, IFI);
  1634. Optional<OperandBundleUse> ParentDeopt =
  1635. CB.getOperandBundle(LLVMContext::OB_deopt);
  1636. if (ParentDeopt) {
  1637. SmallVector<OperandBundleDef, 2> OpDefs;
  1638. for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
  1639. CallBase *ICS = dyn_cast_or_null<CallBase>(VH);
  1640. if (!ICS)
  1641. continue; // instruction was DCE'd or RAUW'ed to undef
  1642. OpDefs.clear();
  1643. OpDefs.reserve(ICS->getNumOperandBundles());
  1644. for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
  1645. ++COBi) {
  1646. auto ChildOB = ICS->getOperandBundleAt(COBi);
  1647. if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
  1648. // If the inlined call has other operand bundles, let them be
  1649. OpDefs.emplace_back(ChildOB);
  1650. continue;
  1651. }
  1652. // It may be useful to separate this logic (of handling operand
  1653. // bundles) out to a separate "policy" component if this gets crowded.
  1654. // Prepend the parent's deoptimization continuation to the newly
  1655. // inlined call's deoptimization continuation.
  1656. std::vector<Value *> MergedDeoptArgs;
  1657. MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
  1658. ChildOB.Inputs.size());
  1659. llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);
  1660. llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);
  1661. OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
  1662. }
  1663. Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS);
  1664. // Note: the RAUW does the appropriate fixup in VMap, so we need to do
  1665. // this even if the call returns void.
  1666. ICS->replaceAllUsesWith(NewI);
  1667. VH = nullptr;
  1668. ICS->eraseFromParent();
  1669. }
  1670. }
  1671. // Update the callgraph if requested.
  1672. if (IFI.CG)
  1673. UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI);
  1674. // For 'nodebug' functions, the associated DISubprogram is always null.
  1675. // Conservatively avoid propagating the callsite debug location to
  1676. // instructions inlined from a function whose DISubprogram is not null.
  1677. fixupLineNumbers(Caller, FirstNewBlock, &CB,
  1678. CalledFunc->getSubprogram() != nullptr);
  1679. // Now clone the inlined noalias scope metadata.
  1680. SAMetadataCloner.clone();
  1681. SAMetadataCloner.remap(FirstNewBlock, Caller->end());
  1682. // Add noalias metadata if necessary.
  1683. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
  1684. // Clone return attributes on the callsite into the calls within the inlined
  1685. // function which feed into its return value.
  1686. AddReturnAttributes(CB, VMap);
  1687. // Propagate metadata on the callsite if necessary.
  1688. PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
  1689. // Register any cloned assumptions.
  1690. if (IFI.GetAssumptionCache)
  1691. for (BasicBlock &NewBlock :
  1692. make_range(FirstNewBlock->getIterator(), Caller->end()))
  1693. for (Instruction &I : NewBlock)
  1694. if (auto *II = dyn_cast<IntrinsicInst>(&I))
  1695. if (II->getIntrinsicID() == Intrinsic::assume)
  1696. IFI.GetAssumptionCache(*Caller).registerAssumption(II);
  1697. }
  1698. // If there are any alloca instructions in the block that used to be the entry
  1699. // block for the callee, move them to the entry block of the caller. First
  1700. // calculate which instruction they should be inserted before. We insert the
  1701. // instructions at the end of the current alloca list.
  1702. {
  1703. BasicBlock::iterator InsertPoint = Caller->begin()->begin();
  1704. for (BasicBlock::iterator I = FirstNewBlock->begin(),
  1705. E = FirstNewBlock->end(); I != E; ) {
  1706. AllocaInst *AI = dyn_cast<AllocaInst>(I++);
  1707. if (!AI) continue;
  1708. // If the alloca is now dead, remove it. This often occurs due to code
  1709. // specialization.
  1710. if (AI->use_empty()) {
  1711. AI->eraseFromParent();
  1712. continue;
  1713. }
  1714. if (!allocaWouldBeStaticInEntry(AI))
  1715. continue;
  1716. // Keep track of the static allocas that we inline into the caller.
  1717. IFI.StaticAllocas.push_back(AI);
  1718. // Scan for the block of allocas that we can move over, and move them
  1719. // all at once.
  1720. while (isa<AllocaInst>(I) &&
  1721. !cast<AllocaInst>(I)->use_empty() &&
  1722. allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
  1723. IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
  1724. ++I;
  1725. }
  1726. // Transfer all of the allocas over in a block. Using splice means
  1727. // that the instructions aren't removed from the symbol table, then
  1728. // reinserted.
  1729. Caller->getEntryBlock().getInstList().splice(
  1730. InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
  1731. }
  1732. }
  1733. SmallVector<Value*,4> VarArgsToForward;
  1734. SmallVector<AttributeSet, 4> VarArgsAttrs;
  1735. for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
  1736. i < CB.getNumArgOperands(); i++) {
  1737. VarArgsToForward.push_back(CB.getArgOperand(i));
  1738. VarArgsAttrs.push_back(CB.getAttributes().getParamAttributes(i));
  1739. }
  1740. bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
  1741. if (InlinedFunctionInfo.ContainsCalls) {
  1742. CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
  1743. if (CallInst *CI = dyn_cast<CallInst>(&CB))
  1744. CallSiteTailKind = CI->getTailCallKind();
  1745. // For inlining purposes, the "notail" marker is the same as no marker.
  1746. if (CallSiteTailKind == CallInst::TCK_NoTail)
  1747. CallSiteTailKind = CallInst::TCK_None;
  1748. for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
  1749. ++BB) {
  1750. for (auto II = BB->begin(); II != BB->end();) {
  1751. Instruction &I = *II++;
  1752. CallInst *CI = dyn_cast<CallInst>(&I);
  1753. if (!CI)
  1754. continue;
  1755. // Forward varargs from inlined call site to calls to the
  1756. // ForwardVarArgsTo function, if requested, and to musttail calls.
  1757. if (!VarArgsToForward.empty() &&
  1758. ((ForwardVarArgsTo &&
  1759. CI->getCalledFunction() == ForwardVarArgsTo) ||
  1760. CI->isMustTailCall())) {
  1761. // Collect attributes for non-vararg parameters.
  1762. AttributeList Attrs = CI->getAttributes();
  1763. SmallVector<AttributeSet, 8> ArgAttrs;
  1764. if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
  1765. for (unsigned ArgNo = 0;
  1766. ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
  1767. ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
  1768. }
  1769. // Add VarArg attributes.
  1770. ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
  1771. Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(),
  1772. Attrs.getRetAttributes(), ArgAttrs);
  1773. // Add VarArgs to existing parameters.
  1774. SmallVector<Value *, 6> Params(CI->arg_operands());
  1775. Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
  1776. CallInst *NewCI = CallInst::Create(
  1777. CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
  1778. NewCI->setDebugLoc(CI->getDebugLoc());
  1779. NewCI->setAttributes(Attrs);
  1780. NewCI->setCallingConv(CI->getCallingConv());
  1781. CI->replaceAllUsesWith(NewCI);
  1782. CI->eraseFromParent();
  1783. CI = NewCI;
  1784. }
  1785. if (Function *F = CI->getCalledFunction())
  1786. InlinedDeoptimizeCalls |=
  1787. F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
  1788. // We need to reduce the strength of any inlined tail calls. For
  1789. // musttail, we have to avoid introducing potential unbounded stack
  1790. // growth. For example, if functions 'f' and 'g' are mutually recursive
  1791. // with musttail, we can inline 'g' into 'f' so long as we preserve
  1792. // musttail on the cloned call to 'f'. If either the inlined call site
  1793. // or the cloned call site is *not* musttail, the program already has
  1794. // one frame of stack growth, so it's safe to remove musttail. Here is
  1795. // a table of example transformations:
  1796. //
  1797. // f -> musttail g -> musttail f ==> f -> musttail f
  1798. // f -> musttail g -> tail f ==> f -> tail f
  1799. // f -> g -> musttail f ==> f -> f
  1800. // f -> g -> tail f ==> f -> f
  1801. //
  1802. // Inlined notail calls should remain notail calls.
  1803. CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
  1804. if (ChildTCK != CallInst::TCK_NoTail)
  1805. ChildTCK = std::min(CallSiteTailKind, ChildTCK);
  1806. CI->setTailCallKind(ChildTCK);
  1807. InlinedMustTailCalls |= CI->isMustTailCall();
  1808. // Calls inlined through a 'nounwind' call site should be marked
  1809. // 'nounwind'.
  1810. if (MarkNoUnwind)
  1811. CI->setDoesNotThrow();
  1812. }
  1813. }
  1814. }
  1815. // Leave lifetime markers for the static alloca's, scoping them to the
  1816. // function we just inlined.
  1817. if (InsertLifetime && !IFI.StaticAllocas.empty()) {
  1818. IRBuilder<> builder(&FirstNewBlock->front());
  1819. for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
  1820. AllocaInst *AI = IFI.StaticAllocas[ai];
  1821. // Don't mark swifterror allocas. They can't have bitcast uses.
  1822. if (AI->isSwiftError())
  1823. continue;
  1824. // If the alloca is already scoped to something smaller than the whole
  1825. // function then there's no need to add redundant, less accurate markers.
  1826. if (hasLifetimeMarkers(AI))
  1827. continue;
  1828. // Try to determine the size of the allocation.
  1829. ConstantInt *AllocaSize = nullptr;
  1830. if (ConstantInt *AIArraySize =
  1831. dyn_cast<ConstantInt>(AI->getArraySize())) {
  1832. auto &DL = Caller->getParent()->getDataLayout();
  1833. Type *AllocaType = AI->getAllocatedType();
  1834. TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
  1835. uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
  1836. // Don't add markers for zero-sized allocas.
  1837. if (AllocaArraySize == 0)
  1838. continue;
  1839. // Check that array size doesn't saturate uint64_t and doesn't
  1840. // overflow when it's multiplied by type size.
  1841. if (!AllocaTypeSize.isScalable() &&
  1842. AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
  1843. std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
  1844. AllocaTypeSize.getFixedSize()) {
  1845. AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
  1846. AllocaArraySize * AllocaTypeSize);
  1847. }
  1848. }
  1849. builder.CreateLifetimeStart(AI, AllocaSize);
  1850. for (ReturnInst *RI : Returns) {
  1851. // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
  1852. // call and a return. The return kills all local allocas.
  1853. if (InlinedMustTailCalls &&
  1854. RI->getParent()->getTerminatingMustTailCall())
  1855. continue;
  1856. if (InlinedDeoptimizeCalls &&
  1857. RI->getParent()->getTerminatingDeoptimizeCall())
  1858. continue;
  1859. IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
  1860. }
  1861. }
  1862. }
  1863. // If the inlined code contained dynamic alloca instructions, wrap the inlined
  1864. // code with llvm.stacksave/llvm.stackrestore intrinsics.
  1865. if (InlinedFunctionInfo.ContainsDynamicAllocas) {
  1866. Module *M = Caller->getParent();
  1867. // Get the two intrinsics we care about.
  1868. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
  1869. Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
  1870. // Insert the llvm.stacksave.
  1871. CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
  1872. .CreateCall(StackSave, {}, "savedstack");
  1873. // Insert a call to llvm.stackrestore before any return instructions in the
  1874. // inlined function.
  1875. for (ReturnInst *RI : Returns) {
  1876. // Don't insert llvm.stackrestore calls between a musttail or deoptimize
  1877. // call and a return. The return will restore the stack pointer.
  1878. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
  1879. continue;
  1880. if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
  1881. continue;
  1882. IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
  1883. }
  1884. }
  1885. // If we are inlining for an invoke instruction, we must make sure to rewrite
  1886. // any call instructions into invoke instructions. This is sensitive to which
  1887. // funclet pads were top-level in the inlinee, so must be done before
  1888. // rewriting the "parent pad" links.
  1889. if (auto *II = dyn_cast<InvokeInst>(&CB)) {
  1890. BasicBlock *UnwindDest = II->getUnwindDest();
  1891. Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
  1892. if (isa<LandingPadInst>(FirstNonPHI)) {
  1893. HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
  1894. } else {
  1895. HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
  1896. }
  1897. }
  1898. // Update the lexical scopes of the new funclets and callsites.
  1899. // Anything that had 'none' as its parent is now nested inside the callsite's
  1900. // EHPad.
  1901. if (CallSiteEHPad) {
  1902. for (Function::iterator BB = FirstNewBlock->getIterator(),
  1903. E = Caller->end();
  1904. BB != E; ++BB) {
  1905. // Add bundle operands to any top-level call sites.
  1906. SmallVector<OperandBundleDef, 1> OpBundles;
  1907. for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
  1908. CallBase *I = dyn_cast<CallBase>(&*BBI++);
  1909. if (!I)
  1910. continue;
  1911. // Skip call sites which are nounwind intrinsics.
  1912. auto *CalledFn =
  1913. dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
  1914. if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow())
  1915. continue;
  1916. // Skip call sites which already have a "funclet" bundle.
  1917. if (I->getOperandBundle(LLVMContext::OB_funclet))
  1918. continue;
  1919. I->getOperandBundlesAsDefs(OpBundles);
  1920. OpBundles.emplace_back("funclet", CallSiteEHPad);
  1921. Instruction *NewInst = CallBase::Create(I, OpBundles, I);
  1922. NewInst->takeName(I);
  1923. I->replaceAllUsesWith(NewInst);
  1924. I->eraseFromParent();
  1925. OpBundles.clear();
  1926. }
  1927. // It is problematic if the inlinee has a cleanupret which unwinds to
  1928. // caller and we inline it into a call site which doesn't unwind but into
  1929. // an EH pad that does. Such an edge must be dynamically unreachable.
  1930. // As such, we replace the cleanupret with unreachable.
  1931. if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
  1932. if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
  1933. changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
  1934. Instruction *I = BB->getFirstNonPHI();
  1935. if (!I->isEHPad())
  1936. continue;
  1937. if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
  1938. if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
  1939. CatchSwitch->setParentPad(CallSiteEHPad);
  1940. } else {
  1941. auto *FPI = cast<FuncletPadInst>(I);
  1942. if (isa<ConstantTokenNone>(FPI->getParentPad()))
  1943. FPI->setParentPad(CallSiteEHPad);
  1944. }
  1945. }
  1946. }
  1947. if (InlinedDeoptimizeCalls) {
  1948. // We need to at least remove the deoptimizing returns from the Return set,
  1949. // so that the control flow from those returns does not get merged into the
  1950. // caller (but terminate it instead). If the caller's return type does not
  1951. // match the callee's return type, we also need to change the return type of
  1952. // the intrinsic.
  1953. if (Caller->getReturnType() == CB.getType()) {
  1954. llvm::erase_if(Returns, [](ReturnInst *RI) {
  1955. return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
  1956. });
  1957. } else {
  1958. SmallVector<ReturnInst *, 8> NormalReturns;
  1959. Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
  1960. Caller->getParent(), Intrinsic::experimental_deoptimize,
  1961. {Caller->getReturnType()});
  1962. for (ReturnInst *RI : Returns) {
  1963. CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
  1964. if (!DeoptCall) {
  1965. NormalReturns.push_back(RI);
  1966. continue;
  1967. }
  1968. // The calling convention on the deoptimize call itself may be bogus,
  1969. // since the code we're inlining may have undefined behavior (and may
  1970. // never actually execute at runtime); but all
  1971. // @llvm.experimental.deoptimize declarations have to have the same
  1972. // calling convention in a well-formed module.
  1973. auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
  1974. NewDeoptIntrinsic->setCallingConv(CallingConv);
  1975. auto *CurBB = RI->getParent();
  1976. RI->eraseFromParent();
  1977. SmallVector<Value *, 4> CallArgs(DeoptCall->args());
  1978. SmallVector<OperandBundleDef, 1> OpBundles;
  1979. DeoptCall->getOperandBundlesAsDefs(OpBundles);
  1980. DeoptCall->eraseFromParent();
  1981. assert(!OpBundles.empty() &&
  1982. "Expected at least the deopt operand bundle");
  1983. IRBuilder<> Builder(CurBB);
  1984. CallInst *NewDeoptCall =
  1985. Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
  1986. NewDeoptCall->setCallingConv(CallingConv);
  1987. if (NewDeoptCall->getType()->isVoidTy())
  1988. Builder.CreateRetVoid();
  1989. else
  1990. Builder.CreateRet(NewDeoptCall);
  1991. }
  1992. // Leave behind the normal returns so we can merge control flow.
  1993. std::swap(Returns, NormalReturns);
  1994. }
  1995. }
  1996. // Handle any inlined musttail call sites. In order for a new call site to be
  1997. // musttail, the source of the clone and the inlined call site must have been
  1998. // musttail. Therefore it's safe to return without merging control into the
  1999. // phi below.
  2000. if (InlinedMustTailCalls) {
  2001. // Check if we need to bitcast the result of any musttail calls.
  2002. Type *NewRetTy = Caller->getReturnType();
  2003. bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
  2004. // Handle the returns preceded by musttail calls separately.
  2005. SmallVector<ReturnInst *, 8> NormalReturns;
  2006. for (ReturnInst *RI : Returns) {
  2007. CallInst *ReturnedMustTail =
  2008. RI->getParent()->getTerminatingMustTailCall();
  2009. if (!ReturnedMustTail) {
  2010. NormalReturns.push_back(RI);
  2011. continue;
  2012. }
  2013. if (!NeedBitCast)
  2014. continue;
  2015. // Delete the old return and any preceding bitcast.
  2016. BasicBlock *CurBB = RI->getParent();
  2017. auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
  2018. RI->eraseFromParent();
  2019. if (OldCast)
  2020. OldCast->eraseFromParent();
  2021. // Insert a new bitcast and return with the right type.
  2022. IRBuilder<> Builder(CurBB);
  2023. Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
  2024. }
  2025. // Leave behind the normal returns so we can merge control flow.
  2026. std::swap(Returns, NormalReturns);
  2027. }
  2028. // Now that all of the transforms on the inlined code have taken place but
  2029. // before we splice the inlined code into the CFG and lose track of which
  2030. // blocks were actually inlined, collect the call sites. We only do this if
  2031. // call graph updates weren't requested, as those provide value handle based
  2032. // tracking of inlined call sites instead.
  2033. if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
  2034. // Otherwise just collect the raw call sites that were inlined.
  2035. for (BasicBlock &NewBB :
  2036. make_range(FirstNewBlock->getIterator(), Caller->end()))
  2037. for (Instruction &I : NewBB)
  2038. if (auto *CB = dyn_cast<CallBase>(&I))
  2039. IFI.InlinedCallSites.push_back(CB);
  2040. }
  2041. // If we cloned in _exactly one_ basic block, and if that block ends in a
  2042. // return instruction, we splice the body of the inlined callee directly into
  2043. // the calling basic block.
  2044. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
  2045. // Move all of the instructions right before the call.
  2046. OrigBB->getInstList().splice(CB.getIterator(), FirstNewBlock->getInstList(),
  2047. FirstNewBlock->begin(), FirstNewBlock->end());
  2048. // Remove the cloned basic block.
  2049. Caller->getBasicBlockList().pop_back();
  2050. // If the call site was an invoke instruction, add a branch to the normal
  2051. // destination.
  2052. if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
  2053. BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB);
  2054. NewBr->setDebugLoc(Returns[0]->getDebugLoc());
  2055. }
  2056. // If the return instruction returned a value, replace uses of the call with
  2057. // uses of the returned value.
  2058. if (!CB.use_empty()) {
  2059. ReturnInst *R = Returns[0];
  2060. if (&CB == R->getReturnValue())
  2061. CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
  2062. else
  2063. CB.replaceAllUsesWith(R->getReturnValue());
  2064. }
  2065. // Since we are now done with the Call/Invoke, we can delete it.
  2066. CB.eraseFromParent();
  2067. // Since we are now done with the return instruction, delete it also.
  2068. Returns[0]->eraseFromParent();
  2069. // We are now done with the inlining.
  2070. return InlineResult::success();
  2071. }
  2072. // Otherwise, we have the normal case, of more than one block to inline or
  2073. // multiple return sites.
  2074. // We want to clone the entire callee function into the hole between the
  2075. // "starter" and "ender" blocks. How we accomplish this depends on whether
  2076. // this is an invoke instruction or a call instruction.
  2077. BasicBlock *AfterCallBB;
  2078. BranchInst *CreatedBranchToNormalDest = nullptr;
  2079. if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
  2080. // Add an unconditional branch to make this look like the CallInst case...
  2081. CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB);
  2082. // Split the basic block. This guarantees that no PHI nodes will have to be
  2083. // updated due to new incoming edges, and make the invoke case more
  2084. // symmetric to the call case.
  2085. AfterCallBB =
  2086. OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
  2087. CalledFunc->getName() + ".exit");
  2088. } else { // It's a call
  2089. // If this is a call instruction, we need to split the basic block that
  2090. // the call lives in.
  2091. //
  2092. AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),
  2093. CalledFunc->getName() + ".exit");
  2094. }
  2095. if (IFI.CallerBFI) {
  2096. // Copy original BB's block frequency to AfterCallBB
  2097. IFI.CallerBFI->setBlockFreq(
  2098. AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
  2099. }
  2100. // Change the branch that used to go to AfterCallBB to branch to the first
  2101. // basic block of the inlined function.
  2102. //
  2103. Instruction *Br = OrigBB->getTerminator();
  2104. assert(Br && Br->getOpcode() == Instruction::Br &&
  2105. "splitBasicBlock broken!");
  2106. Br->setOperand(0, &*FirstNewBlock);
  2107. // Now that the function is correct, make it a little bit nicer. In
  2108. // particular, move the basic blocks inserted from the end of the function
  2109. // into the space made by splitting the source basic block.
  2110. Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
  2111. Caller->getBasicBlockList(), FirstNewBlock,
  2112. Caller->end());
  2113. // Handle all of the return instructions that we just cloned in, and eliminate
  2114. // any users of the original call/invoke instruction.
  2115. Type *RTy = CalledFunc->getReturnType();
  2116. PHINode *PHI = nullptr;
  2117. if (Returns.size() > 1) {
  2118. // The PHI node should go at the front of the new basic block to merge all
  2119. // possible incoming values.
  2120. if (!CB.use_empty()) {
  2121. PHI = PHINode::Create(RTy, Returns.size(), CB.getName(),
  2122. &AfterCallBB->front());
  2123. // Anything that used the result of the function call should now use the
  2124. // PHI node as their operand.
  2125. CB.replaceAllUsesWith(PHI);
  2126. }
  2127. // Loop over all of the return instructions adding entries to the PHI node
  2128. // as appropriate.
  2129. if (PHI) {
  2130. for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
  2131. ReturnInst *RI = Returns[i];
  2132. assert(RI->getReturnValue()->getType() == PHI->getType() &&
  2133. "Ret value not consistent in function!");
  2134. PHI->addIncoming(RI->getReturnValue(), RI->getParent());
  2135. }
  2136. }
  2137. // Add a branch to the merge points and remove return instructions.
  2138. DebugLoc Loc;
  2139. for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
  2140. ReturnInst *RI = Returns[i];
  2141. BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
  2142. Loc = RI->getDebugLoc();
  2143. BI->setDebugLoc(Loc);
  2144. RI->eraseFromParent();
  2145. }
  2146. // We need to set the debug location to *somewhere* inside the
  2147. // inlined function. The line number may be nonsensical, but the
  2148. // instruction will at least be associated with the right
  2149. // function.
  2150. if (CreatedBranchToNormalDest)
  2151. CreatedBranchToNormalDest->setDebugLoc(Loc);
  2152. } else if (!Returns.empty()) {
  2153. // Otherwise, if there is exactly one return value, just replace anything
  2154. // using the return value of the call with the computed value.
  2155. if (!CB.use_empty()) {
  2156. if (&CB == Returns[0]->getReturnValue())
  2157. CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
  2158. else
  2159. CB.replaceAllUsesWith(Returns[0]->getReturnValue());
  2160. }
  2161. // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
  2162. BasicBlock *ReturnBB = Returns[0]->getParent();
  2163. ReturnBB->replaceAllUsesWith(AfterCallBB);
  2164. // Splice the code from the return block into the block that it will return
  2165. // to, which contains the code that was after the call.
  2166. AfterCallBB->getInstList().splice(AfterCallBB->begin(),
  2167. ReturnBB->getInstList());
  2168. if (CreatedBranchToNormalDest)
  2169. CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
  2170. // Delete the return instruction now and empty ReturnBB now.
  2171. Returns[0]->eraseFromParent();
  2172. ReturnBB->eraseFromParent();
  2173. } else if (!CB.use_empty()) {
  2174. // No returns, but something is using the return value of the call. Just
  2175. // nuke the result.
  2176. CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
  2177. }
  2178. // Since we are now done with the Call/Invoke, we can delete it.
  2179. CB.eraseFromParent();
  2180. // If we inlined any musttail calls and the original return is now
  2181. // unreachable, delete it. It can only contain a bitcast and ret.
  2182. if (InlinedMustTailCalls && pred_empty(AfterCallBB))
  2183. AfterCallBB->eraseFromParent();
  2184. // We should always be able to fold the entry block of the function into the
  2185. // single predecessor of the block...
  2186. assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  2187. BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
  2188. // Splice the code entry block into calling block, right before the
  2189. // unconditional branch.
  2190. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
  2191. OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
  2192. // Remove the unconditional branch.
  2193. OrigBB->getInstList().erase(Br);
  2194. // Now we can remove the CalleeEntry block, which is now empty.
  2195. Caller->getBasicBlockList().erase(CalleeEntry);
  2196. // If we inserted a phi node, check to see if it has a single value (e.g. all
  2197. // the entries are the same or undef). If so, remove the PHI so it doesn't
  2198. // block other optimizations.
  2199. if (PHI) {
  2200. AssumptionCache *AC =
  2201. IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
  2202. auto &DL = Caller->getParent()->getDataLayout();
  2203. if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
  2204. PHI->replaceAllUsesWith(V);
  2205. PHI->eraseFromParent();
  2206. }
  2207. }
  2208. return InlineResult::success();
  2209. }