ArgumentPromotion.cpp 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187
  1. //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass promotes "by reference" arguments to be "by value" arguments. In
  10. // practice, this means looking for internal functions that have pointer
  11. // arguments. If it can prove, through the use of alias analysis, that an
  12. // argument is *only* loaded, then it can pass the value into the function
  13. // instead of the address of the value. This can cause recursive simplification
  14. // of code and lead to the elimination of allocas (especially in C++ template
  15. // code like the STL).
  16. //
  17. // This pass also handles aggregate arguments that are passed into a function,
  18. // scalarizing them if the elements of the aggregate are only loaded. Note that
  19. // by default it refuses to scalarize aggregates which would require passing in
  20. // more than three operands to the function, because passing thousands of
  21. // operands for a large array or structure is unprofitable! This limit can be
  22. // configured or disabled, however.
  23. //
  24. // Note that this transformation could also be done for arguments that are only
  25. // stored to (returning the value instead), but does not currently. This case
  26. // would be best handled when and if LLVM begins supporting multiple return
  27. // values from functions.
  28. //
  29. //===----------------------------------------------------------------------===//
  30. #include "llvm/Transforms/IPO/ArgumentPromotion.h"
  31. #include "llvm/ADT/DepthFirstIterator.h"
  32. #include "llvm/ADT/None.h"
  33. #include "llvm/ADT/Optional.h"
  34. #include "llvm/ADT/STLExtras.h"
  35. #include "llvm/ADT/ScopeExit.h"
  36. #include "llvm/ADT/SmallPtrSet.h"
  37. #include "llvm/ADT/SmallVector.h"
  38. #include "llvm/ADT/Statistic.h"
  39. #include "llvm/ADT/Twine.h"
  40. #include "llvm/Analysis/AssumptionCache.h"
  41. #include "llvm/Analysis/BasicAliasAnalysis.h"
  42. #include "llvm/Analysis/CGSCCPassManager.h"
  43. #include "llvm/Analysis/CallGraph.h"
  44. #include "llvm/Analysis/CallGraphSCCPass.h"
  45. #include "llvm/Analysis/LazyCallGraph.h"
  46. #include "llvm/Analysis/Loads.h"
  47. #include "llvm/Analysis/MemoryLocation.h"
  48. #include "llvm/Analysis/ValueTracking.h"
  49. #include "llvm/Analysis/TargetLibraryInfo.h"
  50. #include "llvm/Analysis/TargetTransformInfo.h"
  51. #include "llvm/IR/Argument.h"
  52. #include "llvm/IR/Attributes.h"
  53. #include "llvm/IR/BasicBlock.h"
  54. #include "llvm/IR/CFG.h"
  55. #include "llvm/IR/Constants.h"
  56. #include "llvm/IR/DataLayout.h"
  57. #include "llvm/IR/DerivedTypes.h"
  58. #include "llvm/IR/Function.h"
  59. #include "llvm/IR/IRBuilder.h"
  60. #include "llvm/IR/InstrTypes.h"
  61. #include "llvm/IR/Instruction.h"
  62. #include "llvm/IR/Instructions.h"
  63. #include "llvm/IR/Metadata.h"
  64. #include "llvm/IR/Module.h"
  65. #include "llvm/IR/NoFolder.h"
  66. #include "llvm/IR/PassManager.h"
  67. #include "llvm/IR/Type.h"
  68. #include "llvm/IR/Use.h"
  69. #include "llvm/IR/User.h"
  70. #include "llvm/IR/Value.h"
  71. #include "llvm/InitializePasses.h"
  72. #include "llvm/Pass.h"
  73. #include "llvm/Support/Casting.h"
  74. #include "llvm/Support/Debug.h"
  75. #include "llvm/Support/FormatVariadic.h"
  76. #include "llvm/Support/raw_ostream.h"
  77. #include "llvm/Transforms/IPO.h"
  78. #include <algorithm>
  79. #include <cassert>
  80. #include <cstdint>
  81. #include <functional>
  82. #include <iterator>
  83. #include <map>
  84. #include <set>
  85. #include <utility>
  86. #include <vector>
  87. using namespace llvm;
  88. #define DEBUG_TYPE "argpromotion"
  89. STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
  90. STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
  91. STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
  92. STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
  93. /// A vector used to hold the indices of a single GEP instruction
  94. using IndicesVector = std::vector<uint64_t>;
  95. /// DoPromotion - This method actually performs the promotion of the specified
  96. /// arguments, and returns the new function. At this point, we know that it's
  97. /// safe to do so.
  98. static Function *
  99. doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
  100. SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
  101. Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
  102. ReplaceCallSite) {
  103. // Start by computing a new prototype for the function, which is the same as
  104. // the old function, but has modified arguments.
  105. FunctionType *FTy = F->getFunctionType();
  106. std::vector<Type *> Params;
  107. using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;
  108. // ScalarizedElements - If we are promoting a pointer that has elements
  109. // accessed out of it, keep track of which elements are accessed so that we
  110. // can add one argument for each.
  111. //
  112. // Arguments that are directly loaded will have a zero element value here, to
  113. // handle cases where there are both a direct load and GEP accesses.
  114. std::map<Argument *, ScalarizeTable> ScalarizedElements;
  115. // OriginalLoads - Keep track of a representative load instruction from the
  116. // original function so that we can tell the alias analysis implementation
  117. // what the new GEP/Load instructions we are inserting look like.
  118. // We need to keep the original loads for each argument and the elements
  119. // of the argument that are accessed.
  120. std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;
  121. // Attribute - Keep track of the parameter attributes for the arguments
  122. // that we are *not* promoting. For the ones that we do promote, the parameter
  123. // attributes are lost
  124. SmallVector<AttributeSet, 8> ArgAttrVec;
  125. AttributeList PAL = F->getAttributes();
  126. // First, determine the new argument list
  127. unsigned ArgNo = 0;
  128. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
  129. ++I, ++ArgNo) {
  130. if (ByValArgsToTransform.count(&*I)) {
  131. // Simple byval argument? Just add all the struct element types.
  132. Type *AgTy = I->getParamByValType();
  133. StructType *STy = cast<StructType>(AgTy);
  134. llvm::append_range(Params, STy->elements());
  135. ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
  136. AttributeSet());
  137. ++NumByValArgsPromoted;
  138. } else if (!ArgsToPromote.count(&*I)) {
  139. // Unchanged argument
  140. Params.push_back(I->getType());
  141. ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
  142. } else if (I->use_empty()) {
  143. // Dead argument (which are always marked as promotable)
  144. ++NumArgumentsDead;
  145. } else {
  146. // Okay, this is being promoted. This means that the only uses are loads
  147. // or GEPs which are only used by loads
  148. // In this table, we will track which indices are loaded from the argument
  149. // (where direct loads are tracked as no indices).
  150. ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
  151. for (User *U : make_early_inc_range(I->users())) {
  152. Instruction *UI = cast<Instruction>(U);
  153. Type *SrcTy;
  154. if (LoadInst *L = dyn_cast<LoadInst>(UI))
  155. SrcTy = L->getType();
  156. else
  157. SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
  158. // Skip dead GEPs and remove them.
  159. if (isa<GetElementPtrInst>(UI) && UI->use_empty()) {
  160. UI->eraseFromParent();
  161. continue;
  162. }
  163. IndicesVector Indices;
  164. Indices.reserve(UI->getNumOperands() - 1);
  165. // Since loads will only have a single operand, and GEPs only a single
  166. // non-index operand, this will record direct loads without any indices,
  167. // and gep+loads with the GEP indices.
  168. for (const Use &I : llvm::drop_begin(UI->operands()))
  169. Indices.push_back(cast<ConstantInt>(I)->getSExtValue());
  170. // GEPs with a single 0 index can be merged with direct loads
  171. if (Indices.size() == 1 && Indices.front() == 0)
  172. Indices.clear();
  173. ArgIndices.insert(std::make_pair(SrcTy, Indices));
  174. LoadInst *OrigLoad;
  175. if (LoadInst *L = dyn_cast<LoadInst>(UI))
  176. OrigLoad = L;
  177. else
  178. // Take any load, we will use it only to update Alias Analysis
  179. OrigLoad = cast<LoadInst>(UI->user_back());
  180. OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
  181. }
  182. // Add a parameter to the function for each element passed in.
  183. for (const auto &ArgIndex : ArgIndices) {
  184. // not allowed to dereference ->begin() if size() is 0
  185. Params.push_back(GetElementPtrInst::getIndexedType(
  186. I->getType()->getPointerElementType(), ArgIndex.second));
  187. ArgAttrVec.push_back(AttributeSet());
  188. assert(Params.back());
  189. }
  190. if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
  191. ++NumArgumentsPromoted;
  192. else
  193. ++NumAggregatesPromoted;
  194. }
  195. }
  196. Type *RetTy = FTy->getReturnType();
  197. // Construct the new function type using the new arguments.
  198. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
  199. // Create the new function body and insert it into the module.
  200. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
  201. F->getName());
  202. NF->copyAttributesFrom(F);
  203. NF->copyMetadata(F, 0);
  204. // The new function will have the !dbg metadata copied from the original
  205. // function. The original function may not be deleted, and dbg metadata need
  206. // to be unique so we need to drop it.
  207. F->setSubprogram(nullptr);
  208. LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
  209. << "From: " << *F);
  210. // Recompute the parameter attributes list based on the new arguments for
  211. // the function.
  212. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
  213. PAL.getRetAttrs(), ArgAttrVec));
  214. ArgAttrVec.clear();
  215. F->getParent()->getFunctionList().insert(F->getIterator(), NF);
  216. NF->takeName(F);
  217. // Loop over all of the callers of the function, transforming the call sites
  218. // to pass in the loaded pointers.
  219. //
  220. SmallVector<Value *, 16> Args;
  221. const DataLayout &DL = F->getParent()->getDataLayout();
  222. while (!F->use_empty()) {
  223. CallBase &CB = cast<CallBase>(*F->user_back());
  224. assert(CB.getCalledFunction() == F);
  225. const AttributeList &CallPAL = CB.getAttributes();
  226. IRBuilder<NoFolder> IRB(&CB);
  227. // Loop over the operands, inserting GEP and loads in the caller as
  228. // appropriate.
  229. auto AI = CB.arg_begin();
  230. ArgNo = 0;
  231. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
  232. ++I, ++AI, ++ArgNo)
  233. if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
  234. Args.push_back(*AI); // Unmodified argument
  235. ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
  236. } else if (ByValArgsToTransform.count(&*I)) {
  237. // Emit a GEP and load for each element of the struct.
  238. Type *AgTy = I->getParamByValType();
  239. StructType *STy = cast<StructType>(AgTy);
  240. Value *Idxs[2] = {
  241. ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
  242. const StructLayout *SL = DL.getStructLayout(STy);
  243. Align StructAlign = *I->getParamAlign();
  244. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
  245. Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
  246. auto *Idx =
  247. IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
  248. // TODO: Tell AA about the new values?
  249. Align Alignment =
  250. commonAlignment(StructAlign, SL->getElementOffset(i));
  251. Args.push_back(IRB.CreateAlignedLoad(
  252. STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val"));
  253. ArgAttrVec.push_back(AttributeSet());
  254. }
  255. } else if (!I->use_empty()) {
  256. // Non-dead argument: insert GEPs and loads as appropriate.
  257. ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
  258. // Store the Value* version of the indices in here, but declare it now
  259. // for reuse.
  260. std::vector<Value *> Ops;
  261. for (const auto &ArgIndex : ArgIndices) {
  262. Value *V = *AI;
  263. LoadInst *OrigLoad =
  264. OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
  265. if (!ArgIndex.second.empty()) {
  266. Ops.reserve(ArgIndex.second.size());
  267. Type *ElTy = V->getType();
  268. for (auto II : ArgIndex.second) {
  269. // Use i32 to index structs, and i64 for others (pointers/arrays).
  270. // This satisfies GEP constraints.
  271. Type *IdxTy =
  272. (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
  273. : Type::getInt64Ty(F->getContext()));
  274. Ops.push_back(ConstantInt::get(IdxTy, II));
  275. // Keep track of the type we're currently indexing.
  276. if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
  277. ElTy = ElPTy->getPointerElementType();
  278. else
  279. ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II);
  280. }
  281. // And create a GEP to extract those indices.
  282. V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
  283. Ops.clear();
  284. }
  285. // Since we're replacing a load make sure we take the alignment
  286. // of the previous load.
  287. LoadInst *newLoad =
  288. IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
  289. newLoad->setAlignment(OrigLoad->getAlign());
  290. // Transfer the AA info too.
  291. newLoad->setAAMetadata(OrigLoad->getAAMetadata());
  292. Args.push_back(newLoad);
  293. ArgAttrVec.push_back(AttributeSet());
  294. }
  295. }
  296. // Push any varargs arguments on the list.
  297. for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
  298. Args.push_back(*AI);
  299. ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
  300. }
  301. SmallVector<OperandBundleDef, 1> OpBundles;
  302. CB.getOperandBundlesAsDefs(OpBundles);
  303. CallBase *NewCS = nullptr;
  304. if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
  305. NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
  306. Args, OpBundles, "", &CB);
  307. } else {
  308. auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
  309. NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
  310. NewCS = NewCall;
  311. }
  312. NewCS->setCallingConv(CB.getCallingConv());
  313. NewCS->setAttributes(AttributeList::get(F->getContext(),
  314. CallPAL.getFnAttrs(),
  315. CallPAL.getRetAttrs(), ArgAttrVec));
  316. NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
  317. Args.clear();
  318. ArgAttrVec.clear();
  319. // Update the callgraph to know that the callsite has been transformed.
  320. if (ReplaceCallSite)
  321. (*ReplaceCallSite)(CB, *NewCS);
  322. if (!CB.use_empty()) {
  323. CB.replaceAllUsesWith(NewCS);
  324. NewCS->takeName(&CB);
  325. }
  326. // Finally, remove the old call from the program, reducing the use-count of
  327. // F.
  328. CB.eraseFromParent();
  329. }
  330. // Since we have now created the new function, splice the body of the old
  331. // function right into the new function, leaving the old rotting hulk of the
  332. // function empty.
  333. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
  334. // Loop over the argument list, transferring uses of the old arguments over to
  335. // the new arguments, also transferring over the names as well.
  336. Function::arg_iterator I2 = NF->arg_begin();
  337. for (Argument &Arg : F->args()) {
  338. if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
  339. // If this is an unmodified argument, move the name and users over to the
  340. // new version.
  341. Arg.replaceAllUsesWith(&*I2);
  342. I2->takeName(&Arg);
  343. ++I2;
  344. continue;
  345. }
  346. if (ByValArgsToTransform.count(&Arg)) {
  347. // In the callee, we create an alloca, and store each of the new incoming
  348. // arguments into the alloca.
  349. Instruction *InsertPt = &NF->begin()->front();
  350. // Just add all the struct element types.
  351. Type *AgTy = Arg.getParamByValType();
  352. Align StructAlign = *Arg.getParamAlign();
  353. Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
  354. StructAlign, "", InsertPt);
  355. StructType *STy = cast<StructType>(AgTy);
  356. Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
  357. nullptr};
  358. const StructLayout *SL = DL.getStructLayout(STy);
  359. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
  360. Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
  361. Value *Idx = GetElementPtrInst::Create(
  362. AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
  363. InsertPt);
  364. I2->setName(Arg.getName() + "." + Twine(i));
  365. Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
  366. new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
  367. }
  368. // Anything that used the arg should now use the alloca.
  369. Arg.replaceAllUsesWith(TheAlloca);
  370. TheAlloca->takeName(&Arg);
  371. continue;
  372. }
  373. // There potentially are metadata uses for things like llvm.dbg.value.
  374. // Replace them with undef, after handling the other regular uses.
  375. auto RauwUndefMetadata = make_scope_exit(
  376. [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
  377. if (Arg.use_empty())
  378. continue;
  379. // Otherwise, if we promoted this argument, then all users are load
  380. // instructions (or GEPs with only load users), and all loads should be
  381. // using the new argument that we added.
  382. ScalarizeTable &ArgIndices = ScalarizedElements[&Arg];
  383. while (!Arg.use_empty()) {
  384. if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) {
  385. assert(ArgIndices.begin()->second.empty() &&
  386. "Load element should sort to front!");
  387. I2->setName(Arg.getName() + ".val");
  388. LI->replaceAllUsesWith(&*I2);
  389. LI->eraseFromParent();
  390. LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName()
  391. << "' in function '" << F->getName() << "'\n");
  392. } else {
  393. GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back());
  394. assert(!GEP->use_empty() &&
  395. "GEPs without uses should be cleaned up already");
  396. IndicesVector Operands;
  397. Operands.reserve(GEP->getNumIndices());
  398. for (const Use &Idx : GEP->indices())
  399. Operands.push_back(cast<ConstantInt>(Idx)->getSExtValue());
  400. // GEPs with a single 0 index can be merged with direct loads
  401. if (Operands.size() == 1 && Operands.front() == 0)
  402. Operands.clear();
  403. Function::arg_iterator TheArg = I2;
  404. for (ScalarizeTable::iterator It = ArgIndices.begin();
  405. It->second != Operands; ++It, ++TheArg) {
  406. assert(It != ArgIndices.end() && "GEP not handled??");
  407. }
  408. TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(),
  409. make_range(Operands.begin(), Operands.end())));
  410. LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
  411. << "' of function '" << NF->getName() << "'\n");
  412. // All of the uses must be load instructions. Replace them all with
  413. // the argument specified by ArgNo.
  414. while (!GEP->use_empty()) {
  415. LoadInst *L = cast<LoadInst>(GEP->user_back());
  416. L->replaceAllUsesWith(&*TheArg);
  417. L->eraseFromParent();
  418. }
  419. GEP->eraseFromParent();
  420. }
  421. }
  422. // Increment I2 past all of the arguments added for this promoted pointer.
  423. std::advance(I2, ArgIndices.size());
  424. }
  425. return NF;
  426. }
  427. /// Return true if we can prove that all callees pass in a valid pointer for the
  428. /// specified function argument.
  429. static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
  430. Function *Callee = Arg->getParent();
  431. const DataLayout &DL = Callee->getParent()->getDataLayout();
  432. unsigned ArgNo = Arg->getArgNo();
  433. // Look at all call sites of the function. At this point we know we only have
  434. // direct callees.
  435. for (User *U : Callee->users()) {
  436. CallBase &CB = cast<CallBase>(*U);
  437. if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL))
  438. return false;
  439. }
  440. return true;
  441. }
  442. /// Returns true if Prefix is a prefix of longer. That means, Longer has a size
  443. /// that is greater than or equal to the size of prefix, and each of the
  444. /// elements in Prefix is the same as the corresponding elements in Longer.
  445. ///
  446. /// This means it also returns true when Prefix and Longer are equal!
  447. static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
  448. if (Prefix.size() > Longer.size())
  449. return false;
  450. return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
  451. }
  452. /// Checks if Indices, or a prefix of Indices, is in Set.
  453. static bool prefixIn(const IndicesVector &Indices,
  454. std::set<IndicesVector> &Set) {
  455. std::set<IndicesVector>::iterator Low;
  456. Low = Set.upper_bound(Indices);
  457. if (Low != Set.begin())
  458. Low--;
  459. // Low is now the last element smaller than or equal to Indices. This means
  460. // it points to a prefix of Indices (possibly Indices itself), if such
  461. // prefix exists.
  462. //
  463. // This load is safe if any prefix of its operands is safe to load.
  464. return Low != Set.end() && isPrefix(*Low, Indices);
  465. }
  466. /// Mark the given indices (ToMark) as safe in the given set of indices
  467. /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
  468. /// is already a prefix of Indices in Safe, Indices are implicitely marked safe
  469. /// already. Furthermore, any indices that Indices is itself a prefix of, are
  470. /// removed from Safe (since they are implicitely safe because of Indices now).
  471. static void markIndicesSafe(const IndicesVector &ToMark,
  472. std::set<IndicesVector> &Safe) {
  473. std::set<IndicesVector>::iterator Low;
  474. Low = Safe.upper_bound(ToMark);
  475. // Guard against the case where Safe is empty
  476. if (Low != Safe.begin())
  477. Low--;
  478. // Low is now the last element smaller than or equal to Indices. This
  479. // means it points to a prefix of Indices (possibly Indices itself), if
  480. // such prefix exists.
  481. if (Low != Safe.end()) {
  482. if (isPrefix(*Low, ToMark))
  483. // If there is already a prefix of these indices (or exactly these
  484. // indices) marked a safe, don't bother adding these indices
  485. return;
  486. // Increment Low, so we can use it as a "insert before" hint
  487. ++Low;
  488. }
  489. // Insert
  490. Low = Safe.insert(Low, ToMark);
  491. ++Low;
  492. // If there we're a prefix of longer index list(s), remove those
  493. std::set<IndicesVector>::iterator End = Safe.end();
  494. while (Low != End && isPrefix(ToMark, *Low)) {
  495. std::set<IndicesVector>::iterator Remove = Low;
  496. ++Low;
  497. Safe.erase(Remove);
  498. }
  499. }
  500. /// isSafeToPromoteArgument - As you might guess from the name of this method,
  501. /// it checks to see if it is both safe and useful to promote the argument.
  502. /// This method limits promotion of aggregates to only promote up to three
  503. /// elements of the aggregate in order to avoid exploding the number of
  504. /// arguments passed in.
  505. static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR,
  506. unsigned MaxElements) {
  507. using GEPIndicesSet = std::set<IndicesVector>;
  508. // Quick exit for unused arguments
  509. if (Arg->use_empty())
  510. return true;
  511. // We can only promote this argument if all of the uses are loads, or are GEP
  512. // instructions (with constant indices) that are subsequently loaded.
  513. //
  514. // Promoting the argument causes it to be loaded in the caller
  515. // unconditionally. This is only safe if we can prove that either the load
  516. // would have happened in the callee anyway (ie, there is a load in the entry
  517. // block) or the pointer passed in at every call site is guaranteed to be
  518. // valid.
  519. // In the former case, invalid loads can happen, but would have happened
  520. // anyway, in the latter case, invalid loads won't happen. This prevents us
  521. // from introducing an invalid load that wouldn't have happened in the
  522. // original code.
  523. //
  524. // This set will contain all sets of indices that are loaded in the entry
  525. // block, and thus are safe to unconditionally load in the caller.
  526. GEPIndicesSet SafeToUnconditionallyLoad;
  527. // This set contains all the sets of indices that we are planning to promote.
  528. // This makes it possible to limit the number of arguments added.
  529. GEPIndicesSet ToPromote;
  530. // If the pointer is always valid, any load with first index 0 is valid.
  531. if (ByValTy)
  532. SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
  533. // Whenever a new underlying type for the operand is found, make sure it's
  534. // consistent with the GEPs and loads we've already seen and, if necessary,
  535. // use it to see if all incoming pointers are valid (which implies the 0-index
  536. // is safe).
  537. Type *BaseTy = ByValTy;
  538. auto UpdateBaseTy = [&](Type *NewBaseTy) {
  539. if (BaseTy)
  540. return BaseTy == NewBaseTy;
  541. BaseTy = NewBaseTy;
  542. if (allCallersPassValidPointerForArgument(Arg, BaseTy)) {
  543. assert(SafeToUnconditionallyLoad.empty());
  544. SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
  545. }
  546. return true;
  547. };
  548. // First, iterate functions that are guaranteed to execution on function
  549. // entry and mark loads of (geps of) arguments as safe.
  550. BasicBlock &EntryBlock = Arg->getParent()->front();
  551. // Declare this here so we can reuse it
  552. IndicesVector Indices;
  553. for (Instruction &I : EntryBlock) {
  554. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
  555. Value *V = LI->getPointerOperand();
  556. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
  557. V = GEP->getPointerOperand();
  558. if (V == Arg) {
  559. // This load actually loads (part of) Arg? Check the indices then.
  560. Indices.reserve(GEP->getNumIndices());
  561. for (Use &Idx : GEP->indices())
  562. if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
  563. Indices.push_back(CI->getSExtValue());
  564. else
  565. // We found a non-constant GEP index for this argument? Bail out
  566. // right away, can't promote this argument at all.
  567. return false;
  568. if (!UpdateBaseTy(GEP->getSourceElementType()))
  569. return false;
  570. // Indices checked out, mark them as safe
  571. markIndicesSafe(Indices, SafeToUnconditionallyLoad);
  572. Indices.clear();
  573. }
  574. } else if (V == Arg) {
  575. // Direct loads are equivalent to a GEP with a single 0 index.
  576. markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
  577. if (BaseTy && LI->getType() != BaseTy)
  578. return false;
  579. BaseTy = LI->getType();
  580. }
  581. }
  582. if (!isGuaranteedToTransferExecutionToSuccessor(&I))
  583. break;
  584. }
  585. // Now, iterate all uses of the argument to see if there are any uses that are
  586. // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
  587. SmallVector<LoadInst *, 16> Loads;
  588. IndicesVector Operands;
  589. for (Use &U : Arg->uses()) {
  590. User *UR = U.getUser();
  591. Operands.clear();
  592. if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
  593. // Don't hack volatile/atomic loads
  594. if (!LI->isSimple())
  595. return false;
  596. Loads.push_back(LI);
  597. // Direct loads are equivalent to a GEP with a zero index and then a load.
  598. Operands.push_back(0);
  599. if (!UpdateBaseTy(LI->getType()))
  600. return false;
  601. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
  602. if (GEP->use_empty()) {
  603. // Dead GEP's cause trouble later. Just remove them if we run into
  604. // them.
  605. continue;
  606. }
  607. if (!UpdateBaseTy(GEP->getSourceElementType()))
  608. return false;
  609. // Ensure that all of the indices are constants.
  610. for (Use &Idx : GEP->indices())
  611. if (ConstantInt *C = dyn_cast<ConstantInt>(Idx))
  612. Operands.push_back(C->getSExtValue());
  613. else
  614. return false; // Not a constant operand GEP!
  615. // Ensure that the only users of the GEP are load instructions.
  616. for (User *GEPU : GEP->users())
  617. if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
  618. // Don't hack volatile/atomic loads
  619. if (!LI->isSimple())
  620. return false;
  621. Loads.push_back(LI);
  622. } else {
  623. // Other uses than load?
  624. return false;
  625. }
  626. } else {
  627. return false; // Not a load or a GEP.
  628. }
  629. // Now, see if it is safe to promote this load / loads of this GEP. Loading
  630. // is safe if Operands, or a prefix of Operands, is marked as safe.
  631. if (!prefixIn(Operands, SafeToUnconditionallyLoad))
  632. return false;
  633. // See if we are already promoting a load with these indices. If not, check
  634. // to make sure that we aren't promoting too many elements. If so, nothing
  635. // to do.
  636. if (ToPromote.find(Operands) == ToPromote.end()) {
  637. if (MaxElements > 0 && ToPromote.size() == MaxElements) {
  638. LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '"
  639. << Arg->getName()
  640. << "' because it would require adding more "
  641. << "than " << MaxElements
  642. << " arguments to the function.\n");
  643. // We limit aggregate promotion to only promoting up to a fixed number
  644. // of elements of the aggregate.
  645. return false;
  646. }
  647. ToPromote.insert(std::move(Operands));
  648. }
  649. }
  650. if (Loads.empty())
  651. return true; // No users, this is a dead argument.
  652. // Okay, now we know that the argument is only used by load instructions and
  653. // it is safe to unconditionally perform all of them. Use alias analysis to
  654. // check to see if the pointer is guaranteed to not be modified from entry of
  655. // the function to each of the load instructions.
  656. // Because there could be several/many load instructions, remember which
  657. // blocks we know to be transparent to the load.
  658. df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
  659. for (LoadInst *Load : Loads) {
  660. // Check to see if the load is invalidated from the start of the block to
  661. // the load itself.
  662. BasicBlock *BB = Load->getParent();
  663. MemoryLocation Loc = MemoryLocation::get(Load);
  664. if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
  665. return false; // Pointer is invalidated!
  666. // Now check every path from the entry block to the load for transparency.
  667. // To do this, we perform a depth first search on the inverse CFG from the
  668. // loading block.
  669. for (BasicBlock *P : predecessors(BB)) {
  670. for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
  671. if (AAR.canBasicBlockModify(*TranspBB, Loc))
  672. return false;
  673. }
  674. }
  675. // If the path from the entry of the function to each load is free of
  676. // instructions that potentially invalidate the load, we can make the
  677. // transformation!
  678. return true;
  679. }
  680. bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
  681. // There is no size information, so be conservative.
  682. if (!type->isSized())
  683. return false;
  684. // If the alloc size is not equal to the storage size, then there are padding
  685. // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
  686. if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
  687. return false;
  688. // FIXME: This isn't the right way to check for padding in vectors with
  689. // non-byte-size elements.
  690. if (VectorType *seqTy = dyn_cast<VectorType>(type))
  691. return isDenselyPacked(seqTy->getElementType(), DL);
  692. // For array types, check for padding within members.
  693. if (ArrayType *seqTy = dyn_cast<ArrayType>(type))
  694. return isDenselyPacked(seqTy->getElementType(), DL);
  695. if (!isa<StructType>(type))
  696. return true;
  697. // Check for padding within and between elements of a struct.
  698. StructType *StructTy = cast<StructType>(type);
  699. const StructLayout *Layout = DL.getStructLayout(StructTy);
  700. uint64_t StartPos = 0;
  701. for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
  702. Type *ElTy = StructTy->getElementType(i);
  703. if (!isDenselyPacked(ElTy, DL))
  704. return false;
  705. if (StartPos != Layout->getElementOffsetInBits(i))
  706. return false;
  707. StartPos += DL.getTypeAllocSizeInBits(ElTy);
  708. }
  709. return true;
  710. }
  711. /// Checks if the padding bytes of an argument could be accessed.
  712. static bool canPaddingBeAccessed(Argument *arg) {
  713. assert(arg->hasByValAttr());
  714. // Track all the pointers to the argument to make sure they are not captured.
  715. SmallPtrSet<Value *, 16> PtrValues;
  716. PtrValues.insert(arg);
  717. // Track all of the stores.
  718. SmallVector<StoreInst *, 16> Stores;
  719. // Scan through the uses recursively to make sure the pointer is always used
  720. // sanely.
  721. SmallVector<Value *, 16> WorkList(arg->users());
  722. while (!WorkList.empty()) {
  723. Value *V = WorkList.pop_back_val();
  724. if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
  725. if (PtrValues.insert(V).second)
  726. llvm::append_range(WorkList, V->users());
  727. } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
  728. Stores.push_back(Store);
  729. } else if (!isa<LoadInst>(V)) {
  730. return true;
  731. }
  732. }
  733. // Check to make sure the pointers aren't captured
  734. for (StoreInst *Store : Stores)
  735. if (PtrValues.count(Store->getValueOperand()))
  736. return true;
  737. return false;
  738. }
  739. /// Check if callers and the callee \p F agree how promoted arguments would be
  740. /// passed. The ones that they do not agree on are eliminated from the sets but
  741. /// the return value has to be observed as well.
  742. static bool areFunctionArgsABICompatible(
  743. const Function &F, const TargetTransformInfo &TTI,
  744. SmallPtrSetImpl<Argument *> &ArgsToPromote,
  745. SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
  746. // TODO: Check individual arguments so we can promote a subset?
  747. SmallVector<Type *, 32> Types;
  748. for (Argument *Arg : ArgsToPromote)
  749. Types.push_back(Arg->getType()->getPointerElementType());
  750. for (Argument *Arg : ByValArgsToTransform)
  751. Types.push_back(Arg->getParamByValType());
  752. for (const Use &U : F.uses()) {
  753. CallBase *CB = dyn_cast<CallBase>(U.getUser());
  754. if (!CB)
  755. return false;
  756. const Function *Caller = CB->getCaller();
  757. const Function *Callee = CB->getCalledFunction();
  758. if (!TTI.areTypesABICompatible(Caller, Callee, Types))
  759. return false;
  760. }
  761. return true;
  762. }
  763. /// PromoteArguments - This method checks the specified function to see if there
  764. /// are any promotable arguments and if it is safe to promote the function (for
  765. /// example, all callers are direct). If safe to promote some arguments, it
  766. /// calls the DoPromotion method.
  767. static Function *
  768. promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
  769. unsigned MaxElements,
  770. Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
  771. ReplaceCallSite,
  772. const TargetTransformInfo &TTI) {
  773. // Don't perform argument promotion for naked functions; otherwise we can end
  774. // up removing parameters that are seemingly 'not used' as they are referred
  775. // to in the assembly.
  776. if(F->hasFnAttribute(Attribute::Naked))
  777. return nullptr;
  778. // Make sure that it is local to this module.
  779. if (!F->hasLocalLinkage())
  780. return nullptr;
  781. // Don't promote arguments for variadic functions. Adding, removing, or
  782. // changing non-pack parameters can change the classification of pack
  783. // parameters. Frontends encode that classification at the call site in the
  784. // IR, while in the callee the classification is determined dynamically based
  785. // on the number of registers consumed so far.
  786. if (F->isVarArg())
  787. return nullptr;
  788. // Don't transform functions that receive inallocas, as the transformation may
  789. // not be safe depending on calling convention.
  790. if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
  791. return nullptr;
  792. // First check: see if there are any pointer arguments! If not, quick exit.
  793. SmallVector<Argument *, 16> PointerArgs;
  794. for (Argument &I : F->args())
  795. if (I.getType()->isPointerTy())
  796. PointerArgs.push_back(&I);
  797. if (PointerArgs.empty())
  798. return nullptr;
  799. // Second check: make sure that all callers are direct callers. We can't
  800. // transform functions that have indirect callers. Also see if the function
  801. // is self-recursive and check that target features are compatible.
  802. bool isSelfRecursive = false;
  803. for (Use &U : F->uses()) {
  804. CallBase *CB = dyn_cast<CallBase>(U.getUser());
  805. // Must be a direct call.
  806. if (CB == nullptr || !CB->isCallee(&U))
  807. return nullptr;
  808. // Can't change signature of musttail callee
  809. if (CB->isMustTailCall())
  810. return nullptr;
  811. if (CB->getParent()->getParent() == F)
  812. isSelfRecursive = true;
  813. }
  814. // Can't change signature of musttail caller
  815. // FIXME: Support promoting whole chain of musttail functions
  816. for (BasicBlock &BB : *F)
  817. if (BB.getTerminatingMustTailCall())
  818. return nullptr;
  819. const DataLayout &DL = F->getParent()->getDataLayout();
  820. AAResults &AAR = AARGetter(*F);
  821. // Check to see which arguments are promotable. If an argument is promotable,
  822. // add it to ArgsToPromote.
  823. SmallPtrSet<Argument *, 8> ArgsToPromote;
  824. SmallPtrSet<Argument *, 8> ByValArgsToTransform;
  825. for (Argument *PtrArg : PointerArgs) {
  826. Type *AgTy = PtrArg->getType()->getPointerElementType();
  827. // Replace sret attribute with noalias. This reduces register pressure by
  828. // avoiding a register copy.
  829. if (PtrArg->hasStructRetAttr()) {
  830. unsigned ArgNo = PtrArg->getArgNo();
  831. F->removeParamAttr(ArgNo, Attribute::StructRet);
  832. F->addParamAttr(ArgNo, Attribute::NoAlias);
  833. for (Use &U : F->uses()) {
  834. CallBase &CB = cast<CallBase>(*U.getUser());
  835. CB.removeParamAttr(ArgNo, Attribute::StructRet);
  836. CB.addParamAttr(ArgNo, Attribute::NoAlias);
  837. }
  838. }
  839. // If this is a byval argument, and if the aggregate type is small, just
  840. // pass the elements, which is always safe, if the passed value is densely
  841. // packed or if we can prove the padding bytes are never accessed.
  842. //
  843. // Only handle arguments with specified alignment; if it's unspecified, the
  844. // actual alignment of the argument is target-specific.
  845. bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() &&
  846. (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
  847. !canPaddingBeAccessed(PtrArg));
  848. if (isSafeToPromote) {
  849. if (StructType *STy = dyn_cast<StructType>(AgTy)) {
  850. if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
  851. LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '"
  852. << PtrArg->getName()
  853. << "' because it would require adding more"
  854. << " than " << MaxElements
  855. << " arguments to the function.\n");
  856. continue;
  857. }
  858. // If all the elements are single-value types, we can promote it.
  859. bool AllSimple = true;
  860. for (const auto *EltTy : STy->elements()) {
  861. if (!EltTy->isSingleValueType()) {
  862. AllSimple = false;
  863. break;
  864. }
  865. }
  866. // Safe to transform, don't even bother trying to "promote" it.
  867. // Passing the elements as a scalar will allow sroa to hack on
  868. // the new alloca we introduce.
  869. if (AllSimple) {
  870. ByValArgsToTransform.insert(PtrArg);
  871. continue;
  872. }
  873. }
  874. }
  875. // If the argument is a recursive type and we're in a recursive
  876. // function, we could end up infinitely peeling the function argument.
  877. if (isSelfRecursive) {
  878. if (StructType *STy = dyn_cast<StructType>(AgTy)) {
  879. bool RecursiveType =
  880. llvm::is_contained(STy->elements(), PtrArg->getType());
  881. if (RecursiveType)
  882. continue;
  883. }
  884. }
  885. // Otherwise, see if we can promote the pointer to its value.
  886. Type *ByValTy =
  887. PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr;
  888. if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements))
  889. ArgsToPromote.insert(PtrArg);
  890. }
  891. // No promotable pointer arguments.
  892. if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
  893. return nullptr;
  894. if (!areFunctionArgsABICompatible(
  895. *F, TTI, ArgsToPromote, ByValArgsToTransform))
  896. return nullptr;
  897. return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
  898. }
  899. PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
  900. CGSCCAnalysisManager &AM,
  901. LazyCallGraph &CG,
  902. CGSCCUpdateResult &UR) {
  903. bool Changed = false, LocalChange;
  904. // Iterate until we stop promoting from this SCC.
  905. do {
  906. LocalChange = false;
  907. FunctionAnalysisManager &FAM =
  908. AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
  909. for (LazyCallGraph::Node &N : C) {
  910. Function &OldF = N.getFunction();
  911. // FIXME: This lambda must only be used with this function. We should
  912. // skip the lambda and just get the AA results directly.
  913. auto AARGetter = [&](Function &F) -> AAResults & {
  914. assert(&F == &OldF && "Called with an unexpected function!");
  915. return FAM.getResult<AAManager>(F);
  916. };
  917. const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
  918. Function *NewF =
  919. promoteArguments(&OldF, AARGetter, MaxElements, None, TTI);
  920. if (!NewF)
  921. continue;
  922. LocalChange = true;
  923. // Directly substitute the functions in the call graph. Note that this
  924. // requires the old function to be completely dead and completely
  925. // replaced by the new function. It does no call graph updates, it merely
  926. // swaps out the particular function mapped to a particular node in the
  927. // graph.
  928. C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
  929. FAM.clear(OldF, OldF.getName());
  930. OldF.eraseFromParent();
  931. PreservedAnalyses FuncPA;
  932. FuncPA.preserveSet<CFGAnalyses>();
  933. for (auto *U : NewF->users()) {
  934. auto *UserF = cast<CallBase>(U)->getFunction();
  935. FAM.invalidate(*UserF, FuncPA);
  936. }
  937. }
  938. Changed |= LocalChange;
  939. } while (LocalChange);
  940. if (!Changed)
  941. return PreservedAnalyses::all();
  942. PreservedAnalyses PA;
  943. // We've cleared out analyses for deleted functions.
  944. PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
  945. // We've manually invalidated analyses for functions we've modified.
  946. PA.preserveSet<AllAnalysesOn<Function>>();
  947. return PA;
  948. }
  949. namespace {
  950. /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
  951. struct ArgPromotion : public CallGraphSCCPass {
  952. // Pass identification, replacement for typeid
  953. static char ID;
  954. explicit ArgPromotion(unsigned MaxElements = 3)
  955. : CallGraphSCCPass(ID), MaxElements(MaxElements) {
  956. initializeArgPromotionPass(*PassRegistry::getPassRegistry());
  957. }
  958. void getAnalysisUsage(AnalysisUsage &AU) const override {
  959. AU.addRequired<AssumptionCacheTracker>();
  960. AU.addRequired<TargetLibraryInfoWrapperPass>();
  961. AU.addRequired<TargetTransformInfoWrapperPass>();
  962. getAAResultsAnalysisUsage(AU);
  963. CallGraphSCCPass::getAnalysisUsage(AU);
  964. }
  965. bool runOnSCC(CallGraphSCC &SCC) override;
  966. private:
  967. using llvm::Pass::doInitialization;
  968. bool doInitialization(CallGraph &CG) override;
  969. /// The maximum number of elements to expand, or 0 for unlimited.
  970. unsigned MaxElements;
  971. };
  972. } // end anonymous namespace
  973. char ArgPromotion::ID = 0;
  974. INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
  975. "Promote 'by reference' arguments to scalars", false,
  976. false)
  977. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  978. INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
  979. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  980. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  981. INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
  982. "Promote 'by reference' arguments to scalars", false, false)
  983. Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
  984. return new ArgPromotion(MaxElements);
  985. }
  986. bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
  987. if (skipSCC(SCC))
  988. return false;
  989. // Get the callgraph information that we need to update to reflect our
  990. // changes.
  991. CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
  992. LegacyAARGetter AARGetter(*this);
  993. bool Changed = false, LocalChange;
  994. // Iterate until we stop promoting from this SCC.
  995. do {
  996. LocalChange = false;
  997. // Attempt to promote arguments from all functions in this SCC.
  998. for (CallGraphNode *OldNode : SCC) {
  999. Function *OldF = OldNode->getFunction();
  1000. if (!OldF)
  1001. continue;
  1002. auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) {
  1003. Function *Caller = OldCS.getParent()->getParent();
  1004. CallGraphNode *NewCalleeNode =
  1005. CG.getOrInsertFunction(NewCS.getCalledFunction());
  1006. CallGraphNode *CallerNode = CG[Caller];
  1007. CallerNode->replaceCallEdge(cast<CallBase>(OldCS),
  1008. cast<CallBase>(NewCS), NewCalleeNode);
  1009. };
  1010. const TargetTransformInfo &TTI =
  1011. getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF);
  1012. if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
  1013. {ReplaceCallSite}, TTI)) {
  1014. LocalChange = true;
  1015. // Update the call graph for the newly promoted function.
  1016. CallGraphNode *NewNode = CG.getOrInsertFunction(NewF);
  1017. NewNode->stealCalledFunctionsFrom(OldNode);
  1018. if (OldNode->getNumReferences() == 0)
  1019. delete CG.removeFunctionFromModule(OldNode);
  1020. else
  1021. OldF->setLinkage(Function::ExternalLinkage);
  1022. // And updat ethe SCC we're iterating as well.
  1023. SCC.ReplaceNode(OldNode, NewNode);
  1024. }
  1025. }
  1026. // Remember that we changed something.
  1027. Changed |= LocalChange;
  1028. } while (LocalChange);
  1029. return Changed;
  1030. }
  1031. bool ArgPromotion::doInitialization(CallGraph &CG) {
  1032. return CallGraphSCCPass::doInitialization(CG);
  1033. }