ArgumentPromotion.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854
  1. //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass promotes "by reference" arguments to be "by value" arguments. In
  10. // practice, this means looking for internal functions that have pointer
  11. // arguments. If it can prove, through the use of alias analysis, that an
  12. // argument is *only* loaded, then it can pass the value into the function
  13. // instead of the address of the value. This can cause recursive simplification
  14. // of code and lead to the elimination of allocas (especially in C++ template
  15. // code like the STL).
  16. //
  17. // This pass also handles aggregate arguments that are passed into a function,
  18. // scalarizing them if the elements of the aggregate are only loaded. Note that
  19. // by default it refuses to scalarize aggregates which would require passing in
  20. // more than three operands to the function, because passing thousands of
  21. // operands for a large array or structure is unprofitable! This limit can be
  22. // configured or disabled, however.
  23. //
  24. // Note that this transformation could also be done for arguments that are only
  25. // stored to (returning the value instead), but does not currently. This case
  26. // would be best handled when and if LLVM begins supporting multiple return
  27. // values from functions.
  28. //
  29. //===----------------------------------------------------------------------===//
  30. #include "llvm/Transforms/IPO/ArgumentPromotion.h"
  31. #include "llvm/ADT/DepthFirstIterator.h"
  32. #include "llvm/ADT/STLExtras.h"
  33. #include "llvm/ADT/ScopeExit.h"
  34. #include "llvm/ADT/SmallPtrSet.h"
  35. #include "llvm/ADT/SmallVector.h"
  36. #include "llvm/ADT/Statistic.h"
  37. #include "llvm/ADT/Twine.h"
  38. #include "llvm/Analysis/AssumptionCache.h"
  39. #include "llvm/Analysis/BasicAliasAnalysis.h"
  40. #include "llvm/Analysis/CallGraph.h"
  41. #include "llvm/Analysis/Loads.h"
  42. #include "llvm/Analysis/MemoryLocation.h"
  43. #include "llvm/Analysis/TargetTransformInfo.h"
  44. #include "llvm/Analysis/ValueTracking.h"
  45. #include "llvm/IR/Argument.h"
  46. #include "llvm/IR/Attributes.h"
  47. #include "llvm/IR/BasicBlock.h"
  48. #include "llvm/IR/CFG.h"
  49. #include "llvm/IR/Constants.h"
  50. #include "llvm/IR/DataLayout.h"
  51. #include "llvm/IR/DerivedTypes.h"
  52. #include "llvm/IR/Dominators.h"
  53. #include "llvm/IR/Function.h"
  54. #include "llvm/IR/IRBuilder.h"
  55. #include "llvm/IR/InstrTypes.h"
  56. #include "llvm/IR/Instruction.h"
  57. #include "llvm/IR/Instructions.h"
  58. #include "llvm/IR/Metadata.h"
  59. #include "llvm/IR/NoFolder.h"
  60. #include "llvm/IR/PassManager.h"
  61. #include "llvm/IR/Type.h"
  62. #include "llvm/IR/Use.h"
  63. #include "llvm/IR/User.h"
  64. #include "llvm/IR/Value.h"
  65. #include "llvm/Support/Casting.h"
  66. #include "llvm/Support/Debug.h"
  67. #include "llvm/Support/raw_ostream.h"
  68. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  69. #include <algorithm>
  70. #include <cassert>
  71. #include <cstdint>
  72. #include <utility>
  73. #include <vector>
  74. using namespace llvm;
  75. #define DEBUG_TYPE "argpromotion"
  76. STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
  77. STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
  78. namespace {
  79. struct ArgPart {
  80. Type *Ty;
  81. Align Alignment;
  82. /// A representative guaranteed-executed load or store instruction for use by
  83. /// metadata transfer.
  84. Instruction *MustExecInstr;
  85. };
  86. using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
  87. } // end anonymous namespace
  88. static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
  89. Value *Ptr, Type *ResElemTy, int64_t Offset) {
  90. // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise
  91. // fall back to an i8 GEP to a specific offset.
  92. unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
  93. APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
  94. if (!Ptr->getType()->isOpaquePointerTy()) {
  95. Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
  96. if (OrigOffset == 0 && OrigElemTy == ResElemTy)
  97. return Ptr;
  98. if (OrigElemTy->isSized()) {
  99. APInt TmpOffset = OrigOffset;
  100. Type *TmpTy = OrigElemTy;
  101. SmallVector<APInt> IntIndices =
  102. DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
  103. if (TmpOffset == 0) {
  104. // Try to add trailing zero indices to reach the right type.
  105. while (TmpTy != ResElemTy) {
  106. Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
  107. if (!NextTy)
  108. break;
  109. IntIndices.push_back(APInt::getZero(
  110. isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
  111. TmpTy = NextTy;
  112. }
  113. SmallVector<Value *> Indices;
  114. for (const APInt &Index : IntIndices)
  115. Indices.push_back(IRB.getInt(Index));
  116. if (OrigOffset != 0 || TmpTy == ResElemTy) {
  117. Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
  118. return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
  119. }
  120. }
  121. }
  122. }
  123. if (OrigOffset != 0) {
  124. Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
  125. Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
  126. }
  127. return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
  128. }
  129. /// DoPromotion - This method actually performs the promotion of the specified
  130. /// arguments, and returns the new function. At this point, we know that it's
  131. /// safe to do so.
  132. static Function *
  133. doPromotion(Function *F, FunctionAnalysisManager &FAM,
  134. const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
  135. &ArgsToPromote) {
  136. // Start by computing a new prototype for the function, which is the same as
  137. // the old function, but has modified arguments.
  138. FunctionType *FTy = F->getFunctionType();
  139. std::vector<Type *> Params;
  140. // Attribute - Keep track of the parameter attributes for the arguments
  141. // that we are *not* promoting. For the ones that we do promote, the parameter
  142. // attributes are lost
  143. SmallVector<AttributeSet, 8> ArgAttrVec;
  144. AttributeList PAL = F->getAttributes();
  145. // First, determine the new argument list
  146. unsigned ArgNo = 0;
  147. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
  148. ++I, ++ArgNo) {
  149. if (!ArgsToPromote.count(&*I)) {
  150. // Unchanged argument
  151. Params.push_back(I->getType());
  152. ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
  153. } else if (I->use_empty()) {
  154. // Dead argument (which are always marked as promotable)
  155. ++NumArgumentsDead;
  156. } else {
  157. const auto &ArgParts = ArgsToPromote.find(&*I)->second;
  158. for (const auto &Pair : ArgParts) {
  159. Params.push_back(Pair.second.Ty);
  160. ArgAttrVec.push_back(AttributeSet());
  161. }
  162. ++NumArgumentsPromoted;
  163. }
  164. }
  165. Type *RetTy = FTy->getReturnType();
  166. // Construct the new function type using the new arguments.
  167. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
  168. // Create the new function body and insert it into the module.
  169. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
  170. F->getName());
  171. NF->copyAttributesFrom(F);
  172. NF->copyMetadata(F, 0);
  173. // The new function will have the !dbg metadata copied from the original
  174. // function. The original function may not be deleted, and dbg metadata need
  175. // to be unique, so we need to drop it.
  176. F->setSubprogram(nullptr);
  177. LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
  178. << "From: " << *F);
  179. uint64_t LargestVectorWidth = 0;
  180. for (auto *I : Params)
  181. if (auto *VT = dyn_cast<llvm::VectorType>(I))
  182. LargestVectorWidth = std::max(
  183. LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue());
  184. // Recompute the parameter attributes list based on the new arguments for
  185. // the function.
  186. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
  187. PAL.getRetAttrs(), ArgAttrVec));
  188. AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
  189. ArgAttrVec.clear();
  190. F->getParent()->getFunctionList().insert(F->getIterator(), NF);
  191. NF->takeName(F);
  192. // Loop over all the callers of the function, transforming the call sites to
  193. // pass in the loaded pointers.
  194. SmallVector<Value *, 16> Args;
  195. const DataLayout &DL = F->getParent()->getDataLayout();
  196. while (!F->use_empty()) {
  197. CallBase &CB = cast<CallBase>(*F->user_back());
  198. assert(CB.getCalledFunction() == F);
  199. const AttributeList &CallPAL = CB.getAttributes();
  200. IRBuilder<NoFolder> IRB(&CB);
  201. // Loop over the operands, inserting GEP and loads in the caller as
  202. // appropriate.
  203. auto *AI = CB.arg_begin();
  204. ArgNo = 0;
  205. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
  206. ++I, ++AI, ++ArgNo) {
  207. if (!ArgsToPromote.count(&*I)) {
  208. Args.push_back(*AI); // Unmodified argument
  209. ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
  210. } else if (!I->use_empty()) {
  211. Value *V = *AI;
  212. const auto &ArgParts = ArgsToPromote.find(&*I)->second;
  213. for (const auto &Pair : ArgParts) {
  214. LoadInst *LI = IRB.CreateAlignedLoad(
  215. Pair.second.Ty,
  216. createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
  217. Pair.second.Alignment, V->getName() + ".val");
  218. if (Pair.second.MustExecInstr) {
  219. LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
  220. LI->copyMetadata(*Pair.second.MustExecInstr,
  221. {LLVMContext::MD_range, LLVMContext::MD_nonnull,
  222. LLVMContext::MD_dereferenceable,
  223. LLVMContext::MD_dereferenceable_or_null,
  224. LLVMContext::MD_align, LLVMContext::MD_noundef,
  225. LLVMContext::MD_nontemporal});
  226. }
  227. Args.push_back(LI);
  228. ArgAttrVec.push_back(AttributeSet());
  229. }
  230. }
  231. }
  232. // Push any varargs arguments on the list.
  233. for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
  234. Args.push_back(*AI);
  235. ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
  236. }
  237. SmallVector<OperandBundleDef, 1> OpBundles;
  238. CB.getOperandBundlesAsDefs(OpBundles);
  239. CallBase *NewCS = nullptr;
  240. if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
  241. NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
  242. Args, OpBundles, "", &CB);
  243. } else {
  244. auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
  245. NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
  246. NewCS = NewCall;
  247. }
  248. NewCS->setCallingConv(CB.getCallingConv());
  249. NewCS->setAttributes(AttributeList::get(F->getContext(),
  250. CallPAL.getFnAttrs(),
  251. CallPAL.getRetAttrs(), ArgAttrVec));
  252. NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
  253. Args.clear();
  254. ArgAttrVec.clear();
  255. AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
  256. LargestVectorWidth);
  257. if (!CB.use_empty()) {
  258. CB.replaceAllUsesWith(NewCS);
  259. NewCS->takeName(&CB);
  260. }
  261. // Finally, remove the old call from the program, reducing the use-count of
  262. // F.
  263. CB.eraseFromParent();
  264. }
  265. // Since we have now created the new function, splice the body of the old
  266. // function right into the new function, leaving the old rotting hulk of the
  267. // function empty.
  268. NF->splice(NF->begin(), F);
  269. // We will collect all the new created allocas to promote them into registers
  270. // after the following loop
  271. SmallVector<AllocaInst *, 4> Allocas;
  272. // Loop over the argument list, transferring uses of the old arguments over to
  273. // the new arguments, also transferring over the names as well.
  274. Function::arg_iterator I2 = NF->arg_begin();
  275. for (Argument &Arg : F->args()) {
  276. if (!ArgsToPromote.count(&Arg)) {
  277. // If this is an unmodified argument, move the name and users over to the
  278. // new version.
  279. Arg.replaceAllUsesWith(&*I2);
  280. I2->takeName(&Arg);
  281. ++I2;
  282. continue;
  283. }
  284. // There potentially are metadata uses for things like llvm.dbg.value.
  285. // Replace them with undef, after handling the other regular uses.
  286. auto RauwUndefMetadata = make_scope_exit(
  287. [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
  288. if (Arg.use_empty())
  289. continue;
  290. // Otherwise, if we promoted this argument, we have to create an alloca in
  291. // the callee for every promotable part and store each of the new incoming
  292. // arguments into the corresponding alloca, what lets the old code (the
  293. // store instructions if they are allowed especially) a chance to work as
  294. // before.
  295. assert(Arg.getType()->isPointerTy() &&
  296. "Only arguments with a pointer type are promotable");
  297. IRBuilder<NoFolder> IRB(&NF->begin()->front());
  298. // Add only the promoted elements, so parts from ArgsToPromote
  299. SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
  300. for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
  301. int64_t Offset = Pair.first;
  302. const ArgPart &Part = Pair.second;
  303. Argument *NewArg = I2++;
  304. NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
  305. AllocaInst *NewAlloca = IRB.CreateAlloca(
  306. Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
  307. NewAlloca->setAlignment(Pair.second.Alignment);
  308. IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
  309. // Collect the alloca to retarget the users to
  310. OffsetToAlloca.insert({Offset, NewAlloca});
  311. }
  312. auto GetAlloca = [&](Value *Ptr) {
  313. APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
  314. Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
  315. /* AllowNonInbounds */ true);
  316. assert(Ptr == &Arg && "Not constant offset from arg?");
  317. return OffsetToAlloca.lookup(Offset.getSExtValue());
  318. };
  319. // Cleanup the code from the dead instructions: GEPs and BitCasts in between
  320. // the original argument and its users: loads and stores. Retarget every
  321. // user to the new created alloca.
  322. SmallVector<Value *, 16> Worklist;
  323. SmallVector<Instruction *, 16> DeadInsts;
  324. append_range(Worklist, Arg.users());
  325. while (!Worklist.empty()) {
  326. Value *V = Worklist.pop_back_val();
  327. if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
  328. DeadInsts.push_back(cast<Instruction>(V));
  329. append_range(Worklist, V->users());
  330. continue;
  331. }
  332. if (auto *LI = dyn_cast<LoadInst>(V)) {
  333. Value *Ptr = LI->getPointerOperand();
  334. LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
  335. continue;
  336. }
  337. if (auto *SI = dyn_cast<StoreInst>(V)) {
  338. assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
  339. Value *Ptr = SI->getPointerOperand();
  340. SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
  341. continue;
  342. }
  343. llvm_unreachable("Unexpected user");
  344. }
  345. for (Instruction *I : DeadInsts) {
  346. I->replaceAllUsesWith(PoisonValue::get(I->getType()));
  347. I->eraseFromParent();
  348. }
  349. // Collect the allocas for promotion
  350. for (const auto &Pair : OffsetToAlloca) {
  351. assert(isAllocaPromotable(Pair.second) &&
  352. "By design, only promotable allocas should be produced.");
  353. Allocas.push_back(Pair.second);
  354. }
  355. }
  356. LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
  357. << " alloca(s) are promotable by Mem2Reg\n");
  358. if (!Allocas.empty()) {
  359. // And we are able to call the `promoteMemoryToRegister()` function.
  360. // Our earlier checks have ensured that PromoteMemToReg() will
  361. // succeed.
  362. auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
  363. auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
  364. PromoteMemToReg(Allocas, DT, &AC);
  365. }
  366. return NF;
  367. }
  368. /// Return true if we can prove that all callees pass in a valid pointer for the
  369. /// specified function argument.
  370. static bool allCallersPassValidPointerForArgument(Argument *Arg,
  371. Align NeededAlign,
  372. uint64_t NeededDerefBytes) {
  373. Function *Callee = Arg->getParent();
  374. const DataLayout &DL = Callee->getParent()->getDataLayout();
  375. APInt Bytes(64, NeededDerefBytes);
  376. // Check if the argument itself is marked dereferenceable and aligned.
  377. if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
  378. return true;
  379. // Look at all call sites of the function. At this point we know we only have
  380. // direct callees.
  381. return all_of(Callee->users(), [&](User *U) {
  382. CallBase &CB = cast<CallBase>(*U);
  383. return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
  384. NeededAlign, Bytes, DL);
  385. });
  386. }
  387. /// Determine that this argument is safe to promote, and find the argument
  388. /// parts it can be promoted into.
  389. static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
  390. unsigned MaxElements, bool IsRecursive,
  391. SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
  392. // Quick exit for unused arguments
  393. if (Arg->use_empty())
  394. return true;
  395. // We can only promote this argument if all the uses are loads at known
  396. // offsets.
  397. //
  398. // Promoting the argument causes it to be loaded in the caller
  399. // unconditionally. This is only safe if we can prove that either the load
  400. // would have happened in the callee anyway (ie, there is a load in the entry
  401. // block) or the pointer passed in at every call site is guaranteed to be
  402. // valid.
  403. // In the former case, invalid loads can happen, but would have happened
  404. // anyway, in the latter case, invalid loads won't happen. This prevents us
  405. // from introducing an invalid load that wouldn't have happened in the
  406. // original code.
  407. SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
  408. Align NeededAlign(1);
  409. uint64_t NeededDerefBytes = 0;
  410. // And if this is a byval argument we also allow to have store instructions.
  411. // Only handle in such way arguments with specified alignment;
  412. // if it's unspecified, the actual alignment of the argument is
  413. // target-specific.
  414. bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
  415. // An end user of a pointer argument is a load or store instruction.
  416. // Returns std::nullopt if this load or store is not based on the argument.
  417. // Return true if we can promote the instruction, false otherwise.
  418. auto HandleEndUser = [&](auto *I, Type *Ty,
  419. bool GuaranteedToExecute) -> std::optional<bool> {
  420. // Don't promote volatile or atomic instructions.
  421. if (!I->isSimple())
  422. return false;
  423. Value *Ptr = I->getPointerOperand();
  424. APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
  425. Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
  426. /* AllowNonInbounds */ true);
  427. if (Ptr != Arg)
  428. return std::nullopt;
  429. if (Offset.getSignificantBits() >= 64)
  430. return false;
  431. TypeSize Size = DL.getTypeStoreSize(Ty);
  432. // Don't try to promote scalable types.
  433. if (Size.isScalable())
  434. return false;
  435. // If this is a recursive function and one of the types is a pointer,
  436. // then promoting it might lead to recursive promotion.
  437. if (IsRecursive && Ty->isPointerTy())
  438. return false;
  439. int64_t Off = Offset.getSExtValue();
  440. auto Pair = ArgParts.try_emplace(
  441. Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
  442. ArgPart &Part = Pair.first->second;
  443. bool OffsetNotSeenBefore = Pair.second;
  444. // We limit promotion to only promoting up to a fixed number of elements of
  445. // the aggregate.
  446. if (MaxElements > 0 && ArgParts.size() > MaxElements) {
  447. LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
  448. << "more than " << MaxElements << " parts\n");
  449. return false;
  450. }
  451. // For now, we only support loading/storing one specific type at a given
  452. // offset.
  453. if (Part.Ty != Ty) {
  454. LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
  455. << "accessed as both " << *Part.Ty << " and " << *Ty
  456. << " at offset " << Off << "\n");
  457. return false;
  458. }
  459. // If this instruction is not guaranteed to execute, and we haven't seen a
  460. // load or store at this offset before (or it had lower alignment), then we
  461. // need to remember that requirement.
  462. // Note that skipping instructions of previously seen offsets is only
  463. // correct because we only allow a single type for a given offset, which
  464. // also means that the number of accessed bytes will be the same.
  465. if (!GuaranteedToExecute &&
  466. (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
  467. // We won't be able to prove dereferenceability for negative offsets.
  468. if (Off < 0)
  469. return false;
  470. // If the offset is not aligned, an aligned base pointer won't help.
  471. if (!isAligned(I->getAlign(), Off))
  472. return false;
  473. NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
  474. NeededAlign = std::max(NeededAlign, I->getAlign());
  475. }
  476. Part.Alignment = std::max(Part.Alignment, I->getAlign());
  477. return true;
  478. };
  479. // Look for loads and stores that are guaranteed to execute on entry.
  480. for (Instruction &I : Arg->getParent()->getEntryBlock()) {
  481. std::optional<bool> Res{};
  482. if (LoadInst *LI = dyn_cast<LoadInst>(&I))
  483. Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
  484. else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
  485. Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
  486. /* GuaranteedToExecute */ true);
  487. if (Res && !*Res)
  488. return false;
  489. if (!isGuaranteedToTransferExecutionToSuccessor(&I))
  490. break;
  491. }
  492. // Now look at all loads of the argument. Remember the load instructions
  493. // for the aliasing check below.
  494. SmallVector<const Use *, 16> Worklist;
  495. SmallPtrSet<const Use *, 16> Visited;
  496. SmallVector<LoadInst *, 16> Loads;
  497. auto AppendUses = [&](const Value *V) {
  498. for (const Use &U : V->uses())
  499. if (Visited.insert(&U).second)
  500. Worklist.push_back(&U);
  501. };
  502. AppendUses(Arg);
  503. while (!Worklist.empty()) {
  504. const Use *U = Worklist.pop_back_val();
  505. Value *V = U->getUser();
  506. if (isa<BitCastInst>(V)) {
  507. AppendUses(V);
  508. continue;
  509. }
  510. if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
  511. if (!GEP->hasAllConstantIndices())
  512. return false;
  513. AppendUses(V);
  514. continue;
  515. }
  516. if (auto *LI = dyn_cast<LoadInst>(V)) {
  517. if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
  518. return false;
  519. Loads.push_back(LI);
  520. continue;
  521. }
  522. // Stores are allowed for byval arguments
  523. auto *SI = dyn_cast<StoreInst>(V);
  524. if (AreStoresAllowed && SI &&
  525. U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
  526. if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
  527. /* GuaranteedToExecute */ false))
  528. return false;
  529. continue;
  530. // Only stores TO the argument is allowed, all the other stores are
  531. // unknown users
  532. }
  533. // Unknown user.
  534. LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
  535. << "unknown user " << *V << "\n");
  536. return false;
  537. }
  538. if (NeededDerefBytes || NeededAlign > 1) {
  539. // Try to prove a required deref / aligned requirement.
  540. if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
  541. NeededDerefBytes)) {
  542. LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
  543. << "not dereferenceable or aligned\n");
  544. return false;
  545. }
  546. }
  547. if (ArgParts.empty())
  548. return true; // No users, this is a dead argument.
  549. // Sort parts by offset.
  550. append_range(ArgPartsVec, ArgParts);
  551. sort(ArgPartsVec, llvm::less_first());
  552. // Make sure the parts are non-overlapping.
  553. int64_t Offset = ArgPartsVec[0].first;
  554. for (const auto &Pair : ArgPartsVec) {
  555. if (Pair.first < Offset)
  556. return false; // Overlap with previous part.
  557. Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
  558. }
  559. // If store instructions are allowed, the path from the entry of the function
  560. // to each load may be not free of instructions that potentially invalidate
  561. // the load, and this is an admissible situation.
  562. if (AreStoresAllowed)
  563. return true;
  564. // Okay, now we know that the argument is only used by load instructions, and
  565. // it is safe to unconditionally perform all of them. Use alias analysis to
  566. // check to see if the pointer is guaranteed to not be modified from entry of
  567. // the function to each of the load instructions.
  568. // Because there could be several/many load instructions, remember which
  569. // blocks we know to be transparent to the load.
  570. df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
  571. for (LoadInst *Load : Loads) {
  572. // Check to see if the load is invalidated from the start of the block to
  573. // the load itself.
  574. BasicBlock *BB = Load->getParent();
  575. MemoryLocation Loc = MemoryLocation::get(Load);
  576. if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
  577. return false; // Pointer is invalidated!
  578. // Now check every path from the entry block to the load for transparency.
  579. // To do this, we perform a depth first search on the inverse CFG from the
  580. // loading block.
  581. for (BasicBlock *P : predecessors(BB)) {
  582. for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
  583. if (AAR.canBasicBlockModify(*TranspBB, Loc))
  584. return false;
  585. }
  586. }
  587. // If the path from the entry of the function to each load is free of
  588. // instructions that potentially invalidate the load, we can make the
  589. // transformation!
  590. return true;
  591. }
  592. /// Check if callers and callee agree on how promoted arguments would be
  593. /// passed.
  594. static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
  595. const TargetTransformInfo &TTI) {
  596. return all_of(F.uses(), [&](const Use &U) {
  597. CallBase *CB = dyn_cast<CallBase>(U.getUser());
  598. if (!CB)
  599. return false;
  600. const Function *Caller = CB->getCaller();
  601. const Function *Callee = CB->getCalledFunction();
  602. return TTI.areTypesABICompatible(Caller, Callee, Types);
  603. });
  604. }
  605. /// PromoteArguments - This method checks the specified function to see if there
  606. /// are any promotable arguments and if it is safe to promote the function (for
  607. /// example, all callers are direct). If safe to promote some arguments, it
  608. /// calls the DoPromotion method.
  609. static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
  610. unsigned MaxElements, bool IsRecursive) {
  611. // Don't perform argument promotion for naked functions; otherwise we can end
  612. // up removing parameters that are seemingly 'not used' as they are referred
  613. // to in the assembly.
  614. if (F->hasFnAttribute(Attribute::Naked))
  615. return nullptr;
  616. // Make sure that it is local to this module.
  617. if (!F->hasLocalLinkage())
  618. return nullptr;
  619. // Don't promote arguments for variadic functions. Adding, removing, or
  620. // changing non-pack parameters can change the classification of pack
  621. // parameters. Frontends encode that classification at the call site in the
  622. // IR, while in the callee the classification is determined dynamically based
  623. // on the number of registers consumed so far.
  624. if (F->isVarArg())
  625. return nullptr;
  626. // Don't transform functions that receive inallocas, as the transformation may
  627. // not be safe depending on calling convention.
  628. if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
  629. return nullptr;
  630. // First check: see if there are any pointer arguments! If not, quick exit.
  631. SmallVector<Argument *, 16> PointerArgs;
  632. for (Argument &I : F->args())
  633. if (I.getType()->isPointerTy())
  634. PointerArgs.push_back(&I);
  635. if (PointerArgs.empty())
  636. return nullptr;
  637. // Second check: make sure that all callers are direct callers. We can't
  638. // transform functions that have indirect callers. Also see if the function
  639. // is self-recursive.
  640. for (Use &U : F->uses()) {
  641. CallBase *CB = dyn_cast<CallBase>(U.getUser());
  642. // Must be a direct call.
  643. if (CB == nullptr || !CB->isCallee(&U) ||
  644. CB->getFunctionType() != F->getFunctionType())
  645. return nullptr;
  646. // Can't change signature of musttail callee
  647. if (CB->isMustTailCall())
  648. return nullptr;
  649. if (CB->getFunction() == F)
  650. IsRecursive = true;
  651. }
  652. // Can't change signature of musttail caller
  653. // FIXME: Support promoting whole chain of musttail functions
  654. for (BasicBlock &BB : *F)
  655. if (BB.getTerminatingMustTailCall())
  656. return nullptr;
  657. const DataLayout &DL = F->getParent()->getDataLayout();
  658. auto &AAR = FAM.getResult<AAManager>(*F);
  659. const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
  660. // Check to see which arguments are promotable. If an argument is promotable,
  661. // add it to ArgsToPromote.
  662. DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
  663. for (Argument *PtrArg : PointerArgs) {
  664. // Replace sret attribute with noalias. This reduces register pressure by
  665. // avoiding a register copy.
  666. if (PtrArg->hasStructRetAttr()) {
  667. unsigned ArgNo = PtrArg->getArgNo();
  668. F->removeParamAttr(ArgNo, Attribute::StructRet);
  669. F->addParamAttr(ArgNo, Attribute::NoAlias);
  670. for (Use &U : F->uses()) {
  671. CallBase &CB = cast<CallBase>(*U.getUser());
  672. CB.removeParamAttr(ArgNo, Attribute::StructRet);
  673. CB.addParamAttr(ArgNo, Attribute::NoAlias);
  674. }
  675. }
  676. // If we can promote the pointer to its value.
  677. SmallVector<OffsetAndArgPart, 4> ArgParts;
  678. if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
  679. SmallVector<Type *, 4> Types;
  680. for (const auto &Pair : ArgParts)
  681. Types.push_back(Pair.second.Ty);
  682. if (areTypesABICompatible(Types, *F, TTI)) {
  683. ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
  684. }
  685. }
  686. }
  687. // No promotable pointer arguments.
  688. if (ArgsToPromote.empty())
  689. return nullptr;
  690. return doPromotion(F, FAM, ArgsToPromote);
  691. }
  692. PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
  693. CGSCCAnalysisManager &AM,
  694. LazyCallGraph &CG,
  695. CGSCCUpdateResult &UR) {
  696. bool Changed = false, LocalChange;
  697. // Iterate until we stop promoting from this SCC.
  698. do {
  699. LocalChange = false;
  700. FunctionAnalysisManager &FAM =
  701. AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
  702. bool IsRecursive = C.size() > 1;
  703. for (LazyCallGraph::Node &N : C) {
  704. Function &OldF = N.getFunction();
  705. Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
  706. if (!NewF)
  707. continue;
  708. LocalChange = true;
  709. // Directly substitute the functions in the call graph. Note that this
  710. // requires the old function to be completely dead and completely
  711. // replaced by the new function. It does no call graph updates, it merely
  712. // swaps out the particular function mapped to a particular node in the
  713. // graph.
  714. C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
  715. FAM.clear(OldF, OldF.getName());
  716. OldF.eraseFromParent();
  717. PreservedAnalyses FuncPA;
  718. FuncPA.preserveSet<CFGAnalyses>();
  719. for (auto *U : NewF->users()) {
  720. auto *UserF = cast<CallBase>(U)->getFunction();
  721. FAM.invalidate(*UserF, FuncPA);
  722. }
  723. }
  724. Changed |= LocalChange;
  725. } while (LocalChange);
  726. if (!Changed)
  727. return PreservedAnalyses::all();
  728. PreservedAnalyses PA;
  729. // We've cleared out analyses for deleted functions.
  730. PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
  731. // We've manually invalidated analyses for functions we've modified.
  732. PA.preserveSet<AllAnalysesOn<Function>>();
  733. return PA;
  734. }