PromoteMemoryToRegister.cpp 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file promotes memory references to be register references. It promotes
  10. // alloca instructions which only have loads and stores as uses. An alloca is
  11. // transformed by using iterated dominator frontiers to place PHI nodes, then
  12. // traversing the function in depth-first order to rewrite loads and stores as
  13. // appropriate.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/ADT/ArrayRef.h"
  17. #include "llvm/ADT/DenseMap.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/ADT/TinyPtrVector.h"
  23. #include "llvm/ADT/Twine.h"
  24. #include "llvm/Analysis/AssumptionCache.h"
  25. #include "llvm/Analysis/InstructionSimplify.h"
  26. #include "llvm/Analysis/IteratedDominanceFrontier.h"
  27. #include "llvm/Analysis/ValueTracking.h"
  28. #include "llvm/IR/BasicBlock.h"
  29. #include "llvm/IR/CFG.h"
  30. #include "llvm/IR/Constant.h"
  31. #include "llvm/IR/Constants.h"
  32. #include "llvm/IR/DIBuilder.h"
  33. #include "llvm/IR/DebugInfo.h"
  34. #include "llvm/IR/DerivedTypes.h"
  35. #include "llvm/IR/Dominators.h"
  36. #include "llvm/IR/Function.h"
  37. #include "llvm/IR/InstrTypes.h"
  38. #include "llvm/IR/Instruction.h"
  39. #include "llvm/IR/Instructions.h"
  40. #include "llvm/IR/IntrinsicInst.h"
  41. #include "llvm/IR/Intrinsics.h"
  42. #include "llvm/IR/LLVMContext.h"
  43. #include "llvm/IR/Module.h"
  44. #include "llvm/IR/Type.h"
  45. #include "llvm/IR/User.h"
  46. #include "llvm/Support/Casting.h"
  47. #include "llvm/Transforms/Utils/Local.h"
  48. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  49. #include <algorithm>
  50. #include <cassert>
  51. #include <iterator>
  52. #include <utility>
  53. #include <vector>
  54. using namespace llvm;
  55. #define DEBUG_TYPE "mem2reg"
  56. STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
  57. STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
  58. STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
  59. STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
  60. bool llvm::isAllocaPromotable(const AllocaInst *AI) {
  61. // Only allow direct and non-volatile loads and stores...
  62. for (const User *U : AI->users()) {
  63. if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
  64. // Note that atomic loads can be transformed; atomic semantics do
  65. // not have any meaning for a local alloca.
  66. if (LI->isVolatile())
  67. return false;
  68. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
  69. if (SI->getValueOperand() == AI ||
  70. SI->getValueOperand()->getType() != AI->getAllocatedType())
  71. return false; // Don't allow a store OF the AI, only INTO the AI.
  72. // Note that atomic stores can be transformed; atomic semantics do
  73. // not have any meaning for a local alloca.
  74. if (SI->isVolatile())
  75. return false;
  76. } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
  77. if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
  78. return false;
  79. } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  80. if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
  81. return false;
  82. } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
  83. if (!GEPI->hasAllZeroIndices())
  84. return false;
  85. if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI))
  86. return false;
  87. } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) {
  88. if (!onlyUsedByLifetimeMarkers(ASCI))
  89. return false;
  90. } else {
  91. return false;
  92. }
  93. }
  94. return true;
  95. }
  96. namespace {
  97. struct AllocaInfo {
  98. using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
  99. SmallVector<BasicBlock *, 32> DefiningBlocks;
  100. SmallVector<BasicBlock *, 32> UsingBlocks;
  101. StoreInst *OnlyStore;
  102. BasicBlock *OnlyBlock;
  103. bool OnlyUsedInOneBlock;
  104. DbgUserVec DbgUsers;
  105. void clear() {
  106. DefiningBlocks.clear();
  107. UsingBlocks.clear();
  108. OnlyStore = nullptr;
  109. OnlyBlock = nullptr;
  110. OnlyUsedInOneBlock = true;
  111. DbgUsers.clear();
  112. }
  113. /// Scan the uses of the specified alloca, filling in the AllocaInfo used
  114. /// by the rest of the pass to reason about the uses of this alloca.
  115. void AnalyzeAlloca(AllocaInst *AI) {
  116. clear();
  117. // As we scan the uses of the alloca instruction, keep track of stores,
  118. // and decide whether all of the loads and stores to the alloca are within
  119. // the same basic block.
  120. for (User *U : AI->users()) {
  121. Instruction *User = cast<Instruction>(U);
  122. if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
  123. // Remember the basic blocks which define new values for the alloca
  124. DefiningBlocks.push_back(SI->getParent());
  125. OnlyStore = SI;
  126. } else {
  127. LoadInst *LI = cast<LoadInst>(User);
  128. // Otherwise it must be a load instruction, keep track of variable
  129. // reads.
  130. UsingBlocks.push_back(LI->getParent());
  131. }
  132. if (OnlyUsedInOneBlock) {
  133. if (!OnlyBlock)
  134. OnlyBlock = User->getParent();
  135. else if (OnlyBlock != User->getParent())
  136. OnlyUsedInOneBlock = false;
  137. }
  138. }
  139. findDbgUsers(DbgUsers, AI);
  140. }
  141. };
  142. /// Data package used by RenamePass().
  143. struct RenamePassData {
  144. using ValVector = std::vector<Value *>;
  145. using LocationVector = std::vector<DebugLoc>;
  146. RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
  147. : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
  148. BasicBlock *BB;
  149. BasicBlock *Pred;
  150. ValVector Values;
  151. LocationVector Locations;
  152. };
  153. /// This assigns and keeps a per-bb relative ordering of load/store
  154. /// instructions in the block that directly load or store an alloca.
  155. ///
  156. /// This functionality is important because it avoids scanning large basic
  157. /// blocks multiple times when promoting many allocas in the same block.
  158. class LargeBlockInfo {
  159. /// For each instruction that we track, keep the index of the
  160. /// instruction.
  161. ///
  162. /// The index starts out as the number of the instruction from the start of
  163. /// the block.
  164. DenseMap<const Instruction *, unsigned> InstNumbers;
  165. public:
  166. /// This code only looks at accesses to allocas.
  167. static bool isInterestingInstruction(const Instruction *I) {
  168. return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
  169. (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
  170. }
  171. /// Get or calculate the index of the specified instruction.
  172. unsigned getInstructionIndex(const Instruction *I) {
  173. assert(isInterestingInstruction(I) &&
  174. "Not a load/store to/from an alloca?");
  175. // If we already have this instruction number, return it.
  176. DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
  177. if (It != InstNumbers.end())
  178. return It->second;
  179. // Scan the whole block to get the instruction. This accumulates
  180. // information for every interesting instruction in the block, in order to
  181. // avoid gratuitus rescans.
  182. const BasicBlock *BB = I->getParent();
  183. unsigned InstNo = 0;
  184. for (const Instruction &BBI : *BB)
  185. if (isInterestingInstruction(&BBI))
  186. InstNumbers[&BBI] = InstNo++;
  187. It = InstNumbers.find(I);
  188. assert(It != InstNumbers.end() && "Didn't insert instruction?");
  189. return It->second;
  190. }
  191. void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
  192. void clear() { InstNumbers.clear(); }
  193. };
  194. struct PromoteMem2Reg {
  195. /// The alloca instructions being promoted.
  196. std::vector<AllocaInst *> Allocas;
  197. DominatorTree &DT;
  198. DIBuilder DIB;
  199. /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
  200. AssumptionCache *AC;
  201. const SimplifyQuery SQ;
  202. /// Reverse mapping of Allocas.
  203. DenseMap<AllocaInst *, unsigned> AllocaLookup;
  204. /// The PhiNodes we're adding.
  205. ///
  206. /// That map is used to simplify some Phi nodes as we iterate over it, so
  207. /// it should have deterministic iterators. We could use a MapVector, but
  208. /// since we already maintain a map from BasicBlock* to a stable numbering
  209. /// (BBNumbers), the DenseMap is more efficient (also supports removal).
  210. DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
  211. /// For each PHI node, keep track of which entry in Allocas it corresponds
  212. /// to.
  213. DenseMap<PHINode *, unsigned> PhiToAllocaMap;
  214. /// For each alloca, we keep track of the dbg.declare intrinsic that
  215. /// describes it, if any, so that we can convert it to a dbg.value
  216. /// intrinsic if the alloca gets promoted.
  217. SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
  218. /// The set of basic blocks the renamer has already visited.
  219. SmallPtrSet<BasicBlock *, 16> Visited;
  220. /// Contains a stable numbering of basic blocks to avoid non-determinstic
  221. /// behavior.
  222. DenseMap<BasicBlock *, unsigned> BBNumbers;
  223. /// Lazily compute the number of predecessors a block has.
  224. DenseMap<const BasicBlock *, unsigned> BBNumPreds;
  225. public:
  226. PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
  227. AssumptionCache *AC)
  228. : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
  229. DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
  230. AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
  231. nullptr, &DT, AC) {}
  232. void run();
  233. private:
  234. void RemoveFromAllocasList(unsigned &AllocaIdx) {
  235. Allocas[AllocaIdx] = Allocas.back();
  236. Allocas.pop_back();
  237. --AllocaIdx;
  238. }
  239. unsigned getNumPreds(const BasicBlock *BB) {
  240. unsigned &NP = BBNumPreds[BB];
  241. if (NP == 0)
  242. NP = pred_size(BB) + 1;
  243. return NP - 1;
  244. }
  245. void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
  246. const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
  247. SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
  248. void RenamePass(BasicBlock *BB, BasicBlock *Pred,
  249. RenamePassData::ValVector &IncVals,
  250. RenamePassData::LocationVector &IncLocs,
  251. std::vector<RenamePassData> &Worklist);
  252. bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
  253. };
  254. } // end anonymous namespace
  255. /// Given a LoadInst LI this adds assume(LI != null) after it.
  256. static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
  257. Function *AssumeIntrinsic =
  258. Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
  259. ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
  260. Constant::getNullValue(LI->getType()));
  261. LoadNotNull->insertAfter(LI);
  262. CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
  263. CI->insertAfter(LoadNotNull);
  264. AC->registerAssumption(cast<AssumeInst>(CI));
  265. }
  266. static void removeIntrinsicUsers(AllocaInst *AI) {
  267. // Knowing that this alloca is promotable, we know that it's safe to kill all
  268. // instructions except for load and store.
  269. for (Use &U : llvm::make_early_inc_range(AI->uses())) {
  270. Instruction *I = cast<Instruction>(U.getUser());
  271. if (isa<LoadInst>(I) || isa<StoreInst>(I))
  272. continue;
  273. // Drop the use of AI in droppable instructions.
  274. if (I->isDroppable()) {
  275. I->dropDroppableUse(U);
  276. continue;
  277. }
  278. if (!I->getType()->isVoidTy()) {
  279. // The only users of this bitcast/GEP instruction are lifetime intrinsics.
  280. // Follow the use/def chain to erase them now instead of leaving it for
  281. // dead code elimination later.
  282. for (Use &UU : llvm::make_early_inc_range(I->uses())) {
  283. Instruction *Inst = cast<Instruction>(UU.getUser());
  284. // Drop the use of I in droppable instructions.
  285. if (Inst->isDroppable()) {
  286. Inst->dropDroppableUse(UU);
  287. continue;
  288. }
  289. Inst->eraseFromParent();
  290. }
  291. }
  292. I->eraseFromParent();
  293. }
  294. }
  295. /// Rewrite as many loads as possible given a single store.
  296. ///
  297. /// When there is only a single store, we can use the domtree to trivially
  298. /// replace all of the dominated loads with the stored value. Do so, and return
  299. /// true if this has successfully promoted the alloca entirely. If this returns
  300. /// false there were some loads which were not dominated by the single store
  301. /// and thus must be phi-ed with undef. We fall back to the standard alloca
  302. /// promotion algorithm in that case.
  303. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
  304. LargeBlockInfo &LBI, const DataLayout &DL,
  305. DominatorTree &DT, AssumptionCache *AC) {
  306. StoreInst *OnlyStore = Info.OnlyStore;
  307. bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
  308. BasicBlock *StoreBB = OnlyStore->getParent();
  309. int StoreIndex = -1;
  310. // Clear out UsingBlocks. We will reconstruct it here if needed.
  311. Info.UsingBlocks.clear();
  312. for (User *U : make_early_inc_range(AI->users())) {
  313. Instruction *UserInst = cast<Instruction>(U);
  314. if (UserInst == OnlyStore)
  315. continue;
  316. LoadInst *LI = cast<LoadInst>(UserInst);
  317. // Okay, if we have a load from the alloca, we want to replace it with the
  318. // only value stored to the alloca. We can do this if the value is
  319. // dominated by the store. If not, we use the rest of the mem2reg machinery
  320. // to insert the phi nodes as needed.
  321. if (!StoringGlobalVal) { // Non-instructions are always dominated.
  322. if (LI->getParent() == StoreBB) {
  323. // If we have a use that is in the same block as the store, compare the
  324. // indices of the two instructions to see which one came first. If the
  325. // load came before the store, we can't handle it.
  326. if (StoreIndex == -1)
  327. StoreIndex = LBI.getInstructionIndex(OnlyStore);
  328. if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
  329. // Can't handle this load, bail out.
  330. Info.UsingBlocks.push_back(StoreBB);
  331. continue;
  332. }
  333. } else if (!DT.dominates(StoreBB, LI->getParent())) {
  334. // If the load and store are in different blocks, use BB dominance to
  335. // check their relationships. If the store doesn't dom the use, bail
  336. // out.
  337. Info.UsingBlocks.push_back(LI->getParent());
  338. continue;
  339. }
  340. }
  341. // Otherwise, we *can* safely rewrite this load.
  342. Value *ReplVal = OnlyStore->getOperand(0);
  343. // If the replacement value is the load, this must occur in unreachable
  344. // code.
  345. if (ReplVal == LI)
  346. ReplVal = PoisonValue::get(LI->getType());
  347. // If the load was marked as nonnull we don't want to lose
  348. // that information when we erase this Load. So we preserve
  349. // it with an assume.
  350. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
  351. !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
  352. addAssumeNonNull(AC, LI);
  353. LI->replaceAllUsesWith(ReplVal);
  354. LI->eraseFromParent();
  355. LBI.deleteValue(LI);
  356. }
  357. // Finally, after the scan, check to see if the store is all that is left.
  358. if (!Info.UsingBlocks.empty())
  359. return false; // If not, we'll have to fall back for the remainder.
  360. // Record debuginfo for the store and remove the declaration's
  361. // debuginfo.
  362. for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
  363. if (DII->isAddressOfVariable()) {
  364. DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
  365. ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
  366. DII->eraseFromParent();
  367. } else if (DII->getExpression()->startsWithDeref()) {
  368. DII->eraseFromParent();
  369. }
  370. }
  371. // Remove the (now dead) store and alloca.
  372. Info.OnlyStore->eraseFromParent();
  373. LBI.deleteValue(Info.OnlyStore);
  374. AI->eraseFromParent();
  375. return true;
  376. }
  377. /// Many allocas are only used within a single basic block. If this is the
  378. /// case, avoid traversing the CFG and inserting a lot of potentially useless
  379. /// PHI nodes by just performing a single linear pass over the basic block
  380. /// using the Alloca.
  381. ///
  382. /// If we cannot promote this alloca (because it is read before it is written),
  383. /// return false. This is necessary in cases where, due to control flow, the
  384. /// alloca is undefined only on some control flow paths. e.g. code like
  385. /// this is correct in LLVM IR:
  386. /// // A is an alloca with no stores so far
  387. /// for (...) {
  388. /// int t = *A;
  389. /// if (!first_iteration)
  390. /// use(t);
  391. /// *A = 42;
  392. /// }
  393. static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
  394. LargeBlockInfo &LBI,
  395. const DataLayout &DL,
  396. DominatorTree &DT,
  397. AssumptionCache *AC) {
  398. // The trickiest case to handle is when we have large blocks. Because of this,
  399. // this code is optimized assuming that large blocks happen. This does not
  400. // significantly pessimize the small block case. This uses LargeBlockInfo to
  401. // make it efficient to get the index of various operations in the block.
  402. // Walk the use-def list of the alloca, getting the locations of all stores.
  403. using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
  404. StoresByIndexTy StoresByIndex;
  405. for (User *U : AI->users())
  406. if (StoreInst *SI = dyn_cast<StoreInst>(U))
  407. StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
  408. // Sort the stores by their index, making it efficient to do a lookup with a
  409. // binary search.
  410. llvm::sort(StoresByIndex, less_first());
  411. // Walk all of the loads from this alloca, replacing them with the nearest
  412. // store above them, if any.
  413. for (User *U : make_early_inc_range(AI->users())) {
  414. LoadInst *LI = dyn_cast<LoadInst>(U);
  415. if (!LI)
  416. continue;
  417. unsigned LoadIdx = LBI.getInstructionIndex(LI);
  418. // Find the nearest store that has a lower index than this load.
  419. StoresByIndexTy::iterator I = llvm::lower_bound(
  420. StoresByIndex,
  421. std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
  422. less_first());
  423. if (I == StoresByIndex.begin()) {
  424. if (StoresByIndex.empty())
  425. // If there are no stores, the load takes the undef value.
  426. LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
  427. else
  428. // There is no store before this load, bail out (load may be affected
  429. // by the following stores - see main comment).
  430. return false;
  431. } else {
  432. // Otherwise, there was a store before this load, the load takes its value.
  433. // Note, if the load was marked as nonnull we don't want to lose that
  434. // information when we erase it. So we preserve it with an assume.
  435. Value *ReplVal = std::prev(I)->second->getOperand(0);
  436. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
  437. !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
  438. addAssumeNonNull(AC, LI);
  439. // If the replacement value is the load, this must occur in unreachable
  440. // code.
  441. if (ReplVal == LI)
  442. ReplVal = PoisonValue::get(LI->getType());
  443. LI->replaceAllUsesWith(ReplVal);
  444. }
  445. LI->eraseFromParent();
  446. LBI.deleteValue(LI);
  447. }
  448. // Remove the (now dead) stores and alloca.
  449. while (!AI->use_empty()) {
  450. StoreInst *SI = cast<StoreInst>(AI->user_back());
  451. // Record debuginfo for the store before removing it.
  452. for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
  453. if (DII->isAddressOfVariable()) {
  454. DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
  455. ConvertDebugDeclareToDebugValue(DII, SI, DIB);
  456. }
  457. }
  458. SI->eraseFromParent();
  459. LBI.deleteValue(SI);
  460. }
  461. AI->eraseFromParent();
  462. // The alloca's debuginfo can be removed as well.
  463. for (DbgVariableIntrinsic *DII : Info.DbgUsers)
  464. if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
  465. DII->eraseFromParent();
  466. ++NumLocalPromoted;
  467. return true;
  468. }
  469. void PromoteMem2Reg::run() {
  470. Function &F = *DT.getRoot()->getParent();
  471. AllocaDbgUsers.resize(Allocas.size());
  472. AllocaInfo Info;
  473. LargeBlockInfo LBI;
  474. ForwardIDFCalculator IDF(DT);
  475. for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
  476. AllocaInst *AI = Allocas[AllocaNum];
  477. assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
  478. assert(AI->getParent()->getParent() == &F &&
  479. "All allocas should be in the same function, which is same as DF!");
  480. removeIntrinsicUsers(AI);
  481. if (AI->use_empty()) {
  482. // If there are no uses of the alloca, just delete it now.
  483. AI->eraseFromParent();
  484. // Remove the alloca from the Allocas list, since it has been processed
  485. RemoveFromAllocasList(AllocaNum);
  486. ++NumDeadAlloca;
  487. continue;
  488. }
  489. // Calculate the set of read and write-locations for each alloca. This is
  490. // analogous to finding the 'uses' and 'definitions' of each variable.
  491. Info.AnalyzeAlloca(AI);
  492. // If there is only a single store to this value, replace any loads of
  493. // it that are directly dominated by the definition with the value stored.
  494. if (Info.DefiningBlocks.size() == 1) {
  495. if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
  496. // The alloca has been processed, move on.
  497. RemoveFromAllocasList(AllocaNum);
  498. ++NumSingleStore;
  499. continue;
  500. }
  501. }
  502. // If the alloca is only read and written in one basic block, just perform a
  503. // linear sweep over the block to eliminate it.
  504. if (Info.OnlyUsedInOneBlock &&
  505. promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
  506. // The alloca has been processed, move on.
  507. RemoveFromAllocasList(AllocaNum);
  508. continue;
  509. }
  510. // If we haven't computed a numbering for the BB's in the function, do so
  511. // now.
  512. if (BBNumbers.empty()) {
  513. unsigned ID = 0;
  514. for (auto &BB : F)
  515. BBNumbers[&BB] = ID++;
  516. }
  517. // Remember the dbg.declare intrinsic describing this alloca, if any.
  518. if (!Info.DbgUsers.empty())
  519. AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
  520. // Keep the reverse mapping of the 'Allocas' array for the rename pass.
  521. AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
  522. // Unique the set of defining blocks for efficient lookup.
  523. SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
  524. Info.DefiningBlocks.end());
  525. // Determine which blocks the value is live in. These are blocks which lead
  526. // to uses.
  527. SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
  528. ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
  529. // At this point, we're committed to promoting the alloca using IDF's, and
  530. // the standard SSA construction algorithm. Determine which blocks need phi
  531. // nodes and see if we can optimize out some work by avoiding insertion of
  532. // dead phi nodes.
  533. IDF.setLiveInBlocks(LiveInBlocks);
  534. IDF.setDefiningBlocks(DefBlocks);
  535. SmallVector<BasicBlock *, 32> PHIBlocks;
  536. IDF.calculate(PHIBlocks);
  537. llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
  538. return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
  539. });
  540. unsigned CurrentVersion = 0;
  541. for (BasicBlock *BB : PHIBlocks)
  542. QueuePhiNode(BB, AllocaNum, CurrentVersion);
  543. }
  544. if (Allocas.empty())
  545. return; // All of the allocas must have been trivial!
  546. LBI.clear();
  547. // Set the incoming values for the basic block to be null values for all of
  548. // the alloca's. We do this in case there is a load of a value that has not
  549. // been stored yet. In this case, it will get this null value.
  550. RenamePassData::ValVector Values(Allocas.size());
  551. for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
  552. Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
  553. // When handling debug info, treat all incoming values as if they have unknown
  554. // locations until proven otherwise.
  555. RenamePassData::LocationVector Locations(Allocas.size());
  556. // Walks all basic blocks in the function performing the SSA rename algorithm
  557. // and inserting the phi nodes we marked as necessary
  558. std::vector<RenamePassData> RenamePassWorkList;
  559. RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
  560. std::move(Locations));
  561. do {
  562. RenamePassData RPD = std::move(RenamePassWorkList.back());
  563. RenamePassWorkList.pop_back();
  564. // RenamePass may add new worklist entries.
  565. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);
  566. } while (!RenamePassWorkList.empty());
  567. // The renamer uses the Visited set to avoid infinite loops. Clear it now.
  568. Visited.clear();
  569. // Remove the allocas themselves from the function.
  570. for (Instruction *A : Allocas) {
  571. // If there are any uses of the alloca instructions left, they must be in
  572. // unreachable basic blocks that were not processed by walking the dominator
  573. // tree. Just delete the users now.
  574. if (!A->use_empty())
  575. A->replaceAllUsesWith(PoisonValue::get(A->getType()));
  576. A->eraseFromParent();
  577. }
  578. // Remove alloca's dbg.declare instrinsics from the function.
  579. for (auto &DbgUsers : AllocaDbgUsers) {
  580. for (auto *DII : DbgUsers)
  581. if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
  582. DII->eraseFromParent();
  583. }
  584. // Loop over all of the PHI nodes and see if there are any that we can get
  585. // rid of because they merge all of the same incoming values. This can
  586. // happen due to undef values coming into the PHI nodes. This process is
  587. // iterative, because eliminating one PHI node can cause others to be removed.
  588. bool EliminatedAPHI = true;
  589. while (EliminatedAPHI) {
  590. EliminatedAPHI = false;
  591. // Iterating over NewPhiNodes is deterministic, so it is safe to try to
  592. // simplify and RAUW them as we go. If it was not, we could add uses to
  593. // the values we replace with in a non-deterministic order, thus creating
  594. // non-deterministic def->use chains.
  595. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
  596. I = NewPhiNodes.begin(),
  597. E = NewPhiNodes.end();
  598. I != E;) {
  599. PHINode *PN = I->second;
  600. // If this PHI node merges one value and/or undefs, get the value.
  601. if (Value *V = SimplifyInstruction(PN, SQ)) {
  602. PN->replaceAllUsesWith(V);
  603. PN->eraseFromParent();
  604. NewPhiNodes.erase(I++);
  605. EliminatedAPHI = true;
  606. continue;
  607. }
  608. ++I;
  609. }
  610. }
  611. // At this point, the renamer has added entries to PHI nodes for all reachable
  612. // code. Unfortunately, there may be unreachable blocks which the renamer
  613. // hasn't traversed. If this is the case, the PHI nodes may not
  614. // have incoming values for all predecessors. Loop over all PHI nodes we have
  615. // created, inserting undef values if they are missing any incoming values.
  616. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
  617. I = NewPhiNodes.begin(),
  618. E = NewPhiNodes.end();
  619. I != E; ++I) {
  620. // We want to do this once per basic block. As such, only process a block
  621. // when we find the PHI that is the first entry in the block.
  622. PHINode *SomePHI = I->second;
  623. BasicBlock *BB = SomePHI->getParent();
  624. if (&BB->front() != SomePHI)
  625. continue;
  626. // Only do work here if there the PHI nodes are missing incoming values. We
  627. // know that all PHI nodes that were inserted in a block will have the same
  628. // number of incoming values, so we can just check any of them.
  629. if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
  630. continue;
  631. // Get the preds for BB.
  632. SmallVector<BasicBlock *, 16> Preds(predecessors(BB));
  633. // Ok, now we know that all of the PHI nodes are missing entries for some
  634. // basic blocks. Start by sorting the incoming predecessors for efficient
  635. // access.
  636. auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
  637. return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
  638. };
  639. llvm::sort(Preds, CompareBBNumbers);
  640. // Now we loop through all BB's which have entries in SomePHI and remove
  641. // them from the Preds list.
  642. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
  643. // Do a log(n) search of the Preds list for the entry we want.
  644. SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
  645. Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
  646. assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
  647. "PHI node has entry for a block which is not a predecessor!");
  648. // Remove the entry
  649. Preds.erase(EntIt);
  650. }
  651. // At this point, the blocks left in the preds list must have dummy
  652. // entries inserted into every PHI nodes for the block. Update all the phi
  653. // nodes in this block that we are inserting (there could be phis before
  654. // mem2reg runs).
  655. unsigned NumBadPreds = SomePHI->getNumIncomingValues();
  656. BasicBlock::iterator BBI = BB->begin();
  657. while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
  658. SomePHI->getNumIncomingValues() == NumBadPreds) {
  659. Value *UndefVal = UndefValue::get(SomePHI->getType());
  660. for (BasicBlock *Pred : Preds)
  661. SomePHI->addIncoming(UndefVal, Pred);
  662. }
  663. }
  664. NewPhiNodes.clear();
  665. }
  666. /// Determine which blocks the value is live in.
  667. ///
  668. /// These are blocks which lead to uses. Knowing this allows us to avoid
  669. /// inserting PHI nodes into blocks which don't lead to uses (thus, the
  670. /// inserted phi nodes would be dead).
  671. void PromoteMem2Reg::ComputeLiveInBlocks(
  672. AllocaInst *AI, AllocaInfo &Info,
  673. const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
  674. SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
  675. // To determine liveness, we must iterate through the predecessors of blocks
  676. // where the def is live. Blocks are added to the worklist if we need to
  677. // check their predecessors. Start with all the using blocks.
  678. SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
  679. Info.UsingBlocks.end());
  680. // If any of the using blocks is also a definition block, check to see if the
  681. // definition occurs before or after the use. If it happens before the use,
  682. // the value isn't really live-in.
  683. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
  684. BasicBlock *BB = LiveInBlockWorklist[i];
  685. if (!DefBlocks.count(BB))
  686. continue;
  687. // Okay, this is a block that both uses and defines the value. If the first
  688. // reference to the alloca is a def (store), then we know it isn't live-in.
  689. for (BasicBlock::iterator I = BB->begin();; ++I) {
  690. if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
  691. if (SI->getOperand(1) != AI)
  692. continue;
  693. // We found a store to the alloca before a load. The alloca is not
  694. // actually live-in here.
  695. LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
  696. LiveInBlockWorklist.pop_back();
  697. --i;
  698. --e;
  699. break;
  700. }
  701. if (LoadInst *LI = dyn_cast<LoadInst>(I))
  702. // Okay, we found a load before a store to the alloca. It is actually
  703. // live into this block.
  704. if (LI->getOperand(0) == AI)
  705. break;
  706. }
  707. }
  708. // Now that we have a set of blocks where the phi is live-in, recursively add
  709. // their predecessors until we find the full region the value is live.
  710. while (!LiveInBlockWorklist.empty()) {
  711. BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
  712. // The block really is live in here, insert it into the set. If already in
  713. // the set, then it has already been processed.
  714. if (!LiveInBlocks.insert(BB).second)
  715. continue;
  716. // Since the value is live into BB, it is either defined in a predecessor or
  717. // live into it to. Add the preds to the worklist unless they are a
  718. // defining block.
  719. for (BasicBlock *P : predecessors(BB)) {
  720. // The value is not live into a predecessor if it defines the value.
  721. if (DefBlocks.count(P))
  722. continue;
  723. // Otherwise it is, add to the worklist.
  724. LiveInBlockWorklist.push_back(P);
  725. }
  726. }
  727. }
  728. /// Queue a phi-node to be added to a basic-block for a specific Alloca.
  729. ///
  730. /// Returns true if there wasn't already a phi-node for that variable
  731. bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
  732. unsigned &Version) {
  733. // Look up the basic-block in question.
  734. PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
  735. // If the BB already has a phi node added for the i'th alloca then we're done!
  736. if (PN)
  737. return false;
  738. // Create a PhiNode using the dereferenced type... and add the phi-node to the
  739. // BasicBlock.
  740. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
  741. Allocas[AllocaNo]->getName() + "." + Twine(Version++),
  742. &BB->front());
  743. ++NumPHIInsert;
  744. PhiToAllocaMap[PN] = AllocaNo;
  745. return true;
  746. }
  747. /// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to
  748. /// create a merged location incorporating \p DL, or to set \p DL directly.
  749. static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
  750. bool ApplyMergedLoc) {
  751. if (ApplyMergedLoc)
  752. PN->applyMergedLocation(PN->getDebugLoc(), DL);
  753. else
  754. PN->setDebugLoc(DL);
  755. }
  756. /// Recursively traverse the CFG of the function, renaming loads and
  757. /// stores to the allocas which we are promoting.
  758. ///
  759. /// IncomingVals indicates what value each Alloca contains on exit from the
  760. /// predecessor block Pred.
  761. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
  762. RenamePassData::ValVector &IncomingVals,
  763. RenamePassData::LocationVector &IncomingLocs,
  764. std::vector<RenamePassData> &Worklist) {
  765. NextIteration:
  766. // If we are inserting any phi nodes into this BB, they will already be in the
  767. // block.
  768. if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
  769. // If we have PHI nodes to update, compute the number of edges from Pred to
  770. // BB.
  771. if (PhiToAllocaMap.count(APN)) {
  772. // We want to be able to distinguish between PHI nodes being inserted by
  773. // this invocation of mem2reg from those phi nodes that already existed in
  774. // the IR before mem2reg was run. We determine that APN is being inserted
  775. // because it is missing incoming edges. All other PHI nodes being
  776. // inserted by this pass of mem2reg will have the same number of incoming
  777. // operands so far. Remember this count.
  778. unsigned NewPHINumOperands = APN->getNumOperands();
  779. unsigned NumEdges = llvm::count(successors(Pred), BB);
  780. assert(NumEdges && "Must be at least one edge from Pred to BB!");
  781. // Add entries for all the phis.
  782. BasicBlock::iterator PNI = BB->begin();
  783. do {
  784. unsigned AllocaNo = PhiToAllocaMap[APN];
  785. // Update the location of the phi node.
  786. updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo],
  787. APN->getNumIncomingValues() > 0);
  788. // Add N incoming values to the PHI node.
  789. for (unsigned i = 0; i != NumEdges; ++i)
  790. APN->addIncoming(IncomingVals[AllocaNo], Pred);
  791. // The currently active variable for this block is now the PHI.
  792. IncomingVals[AllocaNo] = APN;
  793. for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
  794. if (DII->isAddressOfVariable())
  795. ConvertDebugDeclareToDebugValue(DII, APN, DIB);
  796. // Get the next phi node.
  797. ++PNI;
  798. APN = dyn_cast<PHINode>(PNI);
  799. if (!APN)
  800. break;
  801. // Verify that it is missing entries. If not, it is not being inserted
  802. // by this mem2reg invocation so we want to ignore it.
  803. } while (APN->getNumOperands() == NewPHINumOperands);
  804. }
  805. }
  806. // Don't revisit blocks.
  807. if (!Visited.insert(BB).second)
  808. return;
  809. for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
  810. Instruction *I = &*II++; // get the instruction, increment iterator
  811. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  812. AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
  813. if (!Src)
  814. continue;
  815. DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
  816. if (AI == AllocaLookup.end())
  817. continue;
  818. Value *V = IncomingVals[AI->second];
  819. // If the load was marked as nonnull we don't want to lose
  820. // that information when we erase this Load. So we preserve
  821. // it with an assume.
  822. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
  823. !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
  824. addAssumeNonNull(AC, LI);
  825. // Anything using the load now uses the current value.
  826. LI->replaceAllUsesWith(V);
  827. BB->getInstList().erase(LI);
  828. } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
  829. // Delete this instruction and mark the name as the current holder of the
  830. // value
  831. AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
  832. if (!Dest)
  833. continue;
  834. DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
  835. if (ai == AllocaLookup.end())
  836. continue;
  837. // what value were we writing?
  838. unsigned AllocaNo = ai->second;
  839. IncomingVals[AllocaNo] = SI->getOperand(0);
  840. // Record debuginfo for the store before removing it.
  841. IncomingLocs[AllocaNo] = SI->getDebugLoc();
  842. for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
  843. if (DII->isAddressOfVariable())
  844. ConvertDebugDeclareToDebugValue(DII, SI, DIB);
  845. BB->getInstList().erase(SI);
  846. }
  847. }
  848. // 'Recurse' to our successors.
  849. succ_iterator I = succ_begin(BB), E = succ_end(BB);
  850. if (I == E)
  851. return;
  852. // Keep track of the successors so we don't visit the same successor twice
  853. SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
  854. // Handle the first successor without using the worklist.
  855. VisitedSuccs.insert(*I);
  856. Pred = BB;
  857. BB = *I;
  858. ++I;
  859. for (; I != E; ++I)
  860. if (VisitedSuccs.insert(*I).second)
  861. Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);
  862. goto NextIteration;
  863. }
  864. void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
  865. AssumptionCache *AC) {
  866. // If there is nothing to do, bail out...
  867. if (Allocas.empty())
  868. return;
  869. PromoteMem2Reg(Allocas, DT, AC).run();
  870. }