LCSSA.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass transforms loops by placing phi nodes at the end of the loops for
  10. // all values that are live across the loop boundary. For example, it turns
  11. // the left into the right code:
  12. //
  13. // for (...) for (...)
  14. // if (c) if (c)
  15. // X1 = ... X1 = ...
  16. // else else
  17. // X2 = ... X2 = ...
  18. // X3 = phi(X1, X2) X3 = phi(X1, X2)
  19. // ... = X3 + 4 X4 = phi(X3)
  20. // ... = X4 + 4
  21. //
  22. // This is still valid LLVM; the extra phi nodes are purely redundant, and will
  23. // be trivially eliminated by InstCombine. The major benefit of this
  24. // transformation is that it makes many other loop optimizations, such as
  25. // LoopUnswitching, simpler.
  26. //
  27. //===----------------------------------------------------------------------===//
  28. #include "llvm/Transforms/Utils/LCSSA.h"
  29. #include "llvm/ADT/STLExtras.h"
  30. #include "llvm/ADT/Statistic.h"
  31. #include "llvm/Analysis/AliasAnalysis.h"
  32. #include "llvm/Analysis/BasicAliasAnalysis.h"
  33. #include "llvm/Analysis/BranchProbabilityInfo.h"
  34. #include "llvm/Analysis/GlobalsModRef.h"
  35. #include "llvm/Analysis/LoopPass.h"
  36. #include "llvm/Analysis/MemorySSA.h"
  37. #include "llvm/Analysis/ScalarEvolution.h"
  38. #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
  39. #include "llvm/IR/Constants.h"
  40. #include "llvm/IR/DebugInfo.h"
  41. #include "llvm/IR/Dominators.h"
  42. #include "llvm/IR/Function.h"
  43. #include "llvm/IR/IRBuilder.h"
  44. #include "llvm/IR/Instructions.h"
  45. #include "llvm/IR/IntrinsicInst.h"
  46. #include "llvm/IR/PredIteratorCache.h"
  47. #include "llvm/InitializePasses.h"
  48. #include "llvm/Pass.h"
  49. #include "llvm/Support/CommandLine.h"
  50. #include "llvm/Transforms/Utils.h"
  51. #include "llvm/Transforms/Utils/LoopUtils.h"
  52. #include "llvm/Transforms/Utils/SSAUpdater.h"
  53. using namespace llvm;
  54. #define DEBUG_TYPE "lcssa"
  55. STATISTIC(NumLCSSA, "Number of live out of a loop variables");
  56. #ifdef EXPENSIVE_CHECKS
  57. static bool VerifyLoopLCSSA = true;
  58. #else
  59. static bool VerifyLoopLCSSA = false;
  60. #endif
  61. static cl::opt<bool, true>
  62. VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
  63. cl::Hidden,
  64. cl::desc("Verify loop lcssa form (time consuming)"));
  65. /// Return true if the specified block is in the list.
  66. static bool isExitBlock(BasicBlock *BB,
  67. const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  68. return is_contained(ExitBlocks, BB);
  69. }
  70. /// For every instruction from the worklist, check to see if it has any uses
  71. /// that are outside the current loop. If so, insert LCSSA PHI nodes and
  72. /// rewrite the uses.
  73. bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
  74. const DominatorTree &DT, const LoopInfo &LI,
  75. ScalarEvolution *SE, IRBuilderBase &Builder,
  76. SmallVectorImpl<PHINode *> *PHIsToRemove) {
  77. SmallVector<Use *, 16> UsesToRewrite;
  78. SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
  79. PredIteratorCache PredCache;
  80. bool Changed = false;
  81. IRBuilderBase::InsertPointGuard InsertPtGuard(Builder);
  82. // Cache the Loop ExitBlocks across this loop. We expect to get a lot of
  83. // instructions within the same loops, computing the exit blocks is
  84. // expensive, and we're not mutating the loop structure.
  85. SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
  86. while (!Worklist.empty()) {
  87. UsesToRewrite.clear();
  88. Instruction *I = Worklist.pop_back_val();
  89. assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
  90. BasicBlock *InstBB = I->getParent();
  91. Loop *L = LI.getLoopFor(InstBB);
  92. assert(L && "Instruction belongs to a BB that's not part of a loop");
  93. if (!LoopExitBlocks.count(L))
  94. L->getExitBlocks(LoopExitBlocks[L]);
  95. assert(LoopExitBlocks.count(L));
  96. const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
  97. if (ExitBlocks.empty())
  98. continue;
  99. for (Use &U : I->uses()) {
  100. Instruction *User = cast<Instruction>(U.getUser());
  101. BasicBlock *UserBB = User->getParent();
  102. // For practical purposes, we consider that the use in a PHI
  103. // occurs in the respective predecessor block. For more info,
  104. // see the `phi` doc in LangRef and the LCSSA doc.
  105. if (auto *PN = dyn_cast<PHINode>(User))
  106. UserBB = PN->getIncomingBlock(U);
  107. if (InstBB != UserBB && !L->contains(UserBB))
  108. UsesToRewrite.push_back(&U);
  109. }
  110. // If there are no uses outside the loop, exit with no change.
  111. if (UsesToRewrite.empty())
  112. continue;
  113. ++NumLCSSA; // We are applying the transformation
  114. // Invoke instructions are special in that their result value is not
  115. // available along their unwind edge. The code below tests to see whether
  116. // DomBB dominates the value, so adjust DomBB to the normal destination
  117. // block, which is effectively where the value is first usable.
  118. BasicBlock *DomBB = InstBB;
  119. if (auto *Inv = dyn_cast<InvokeInst>(I))
  120. DomBB = Inv->getNormalDest();
  121. const DomTreeNode *DomNode = DT.getNode(DomBB);
  122. SmallVector<PHINode *, 16> AddedPHIs;
  123. SmallVector<PHINode *, 8> PostProcessPHIs;
  124. SmallVector<PHINode *, 4> InsertedPHIs;
  125. SSAUpdater SSAUpdate(&InsertedPHIs);
  126. SSAUpdate.Initialize(I->getType(), I->getName());
  127. // Force re-computation of I, as some users now need to use the new PHI
  128. // node.
  129. if (SE)
  130. SE->forgetValue(I);
  131. // Insert the LCSSA phi's into all of the exit blocks dominated by the
  132. // value, and add them to the Phi's map.
  133. for (BasicBlock *ExitBB : ExitBlocks) {
  134. if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
  135. continue;
  136. // If we already inserted something for this BB, don't reprocess it.
  137. if (SSAUpdate.HasValueForBlock(ExitBB))
  138. continue;
  139. Builder.SetInsertPoint(&ExitBB->front());
  140. PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB),
  141. I->getName() + ".lcssa");
  142. // Get the debug location from the original instruction.
  143. PN->setDebugLoc(I->getDebugLoc());
  144. // Add inputs from inside the loop for this PHI. This is valid
  145. // because `I` dominates `ExitBB` (checked above). This implies
  146. // that every incoming block/edge is dominated by `I` as well,
  147. // i.e. we can add uses of `I` to those incoming edges/append to the incoming
  148. // blocks without violating the SSA dominance property.
  149. for (BasicBlock *Pred : PredCache.get(ExitBB)) {
  150. PN->addIncoming(I, Pred);
  151. // If the exit block has a predecessor not within the loop, arrange for
  152. // the incoming value use corresponding to that predecessor to be
  153. // rewritten in terms of a different LCSSA PHI.
  154. if (!L->contains(Pred))
  155. UsesToRewrite.push_back(
  156. &PN->getOperandUse(PN->getOperandNumForIncomingValue(
  157. PN->getNumIncomingValues() - 1)));
  158. }
  159. AddedPHIs.push_back(PN);
  160. // Remember that this phi makes the value alive in this block.
  161. SSAUpdate.AddAvailableValue(ExitBB, PN);
  162. // LoopSimplify might fail to simplify some loops (e.g. when indirect
  163. // branches are involved). In such situations, it might happen that an
  164. // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
  165. // create PHIs in such an exit block, we are also inserting PHIs into L2's
  166. // header. This could break LCSSA form for L2 because these inserted PHIs
  167. // can also have uses outside of L2. Remember all PHIs in such situation
  168. // as to revisit than later on. FIXME: Remove this if indirectbr support
  169. // into LoopSimplify gets improved.
  170. if (auto *OtherLoop = LI.getLoopFor(ExitBB))
  171. if (!L->contains(OtherLoop))
  172. PostProcessPHIs.push_back(PN);
  173. }
  174. // Rewrite all uses outside the loop in terms of the new PHIs we just
  175. // inserted.
  176. for (Use *UseToRewrite : UsesToRewrite) {
  177. Instruction *User = cast<Instruction>(UseToRewrite->getUser());
  178. BasicBlock *UserBB = User->getParent();
  179. // For practical purposes, we consider that the use in a PHI
  180. // occurs in the respective predecessor block. For more info,
  181. // see the `phi` doc in LangRef and the LCSSA doc.
  182. if (auto *PN = dyn_cast<PHINode>(User))
  183. UserBB = PN->getIncomingBlock(*UseToRewrite);
  184. // If this use is in an exit block, rewrite to use the newly inserted PHI.
  185. // This is required for correctness because SSAUpdate doesn't handle uses
  186. // in the same block. It assumes the PHI we inserted is at the end of the
  187. // block.
  188. if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
  189. UseToRewrite->set(&UserBB->front());
  190. continue;
  191. }
  192. // If we added a single PHI, it must dominate all uses and we can directly
  193. // rename it.
  194. if (AddedPHIs.size() == 1) {
  195. UseToRewrite->set(AddedPHIs[0]);
  196. continue;
  197. }
  198. // Otherwise, do full PHI insertion.
  199. SSAUpdate.RewriteUse(*UseToRewrite);
  200. }
  201. SmallVector<DbgValueInst *, 4> DbgValues;
  202. llvm::findDbgValues(DbgValues, I);
  203. // Update pre-existing debug value uses that reside outside the loop.
  204. for (auto DVI : DbgValues) {
  205. BasicBlock *UserBB = DVI->getParent();
  206. if (InstBB == UserBB || L->contains(UserBB))
  207. continue;
  208. // We currently only handle debug values residing in blocks that were
  209. // traversed while rewriting the uses. If we inserted just a single PHI,
  210. // we will handle all relevant debug values.
  211. Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
  212. : SSAUpdate.FindValueForBlock(UserBB);
  213. if (V)
  214. DVI->replaceVariableLocationOp(I, V);
  215. }
  216. // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
  217. // to post-process them to keep LCSSA form.
  218. for (PHINode *InsertedPN : InsertedPHIs) {
  219. if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
  220. if (!L->contains(OtherLoop))
  221. PostProcessPHIs.push_back(InsertedPN);
  222. }
  223. // Post process PHI instructions that were inserted into another disjoint
  224. // loop and update their exits properly.
  225. for (auto *PostProcessPN : PostProcessPHIs)
  226. if (!PostProcessPN->use_empty())
  227. Worklist.push_back(PostProcessPN);
  228. // Keep track of PHI nodes that we want to remove because they did not have
  229. // any uses rewritten.
  230. for (PHINode *PN : AddedPHIs)
  231. if (PN->use_empty())
  232. LocalPHIsToRemove.insert(PN);
  233. Changed = true;
  234. }
  235. // Remove PHI nodes that did not have any uses rewritten or add them to
  236. // PHIsToRemove, so the caller can remove them after some additional cleanup.
  237. // We need to redo the use_empty() check here, because even if the PHI node
  238. // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be
  239. // using it. This cleanup is not guaranteed to handle trees/cycles of PHI
  240. // nodes that only are used by each other. Such situations has only been
  241. // noticed when the input IR contains unreachable code, and leaving some extra
  242. // redundant PHI nodes in such situations is considered a minor problem.
  243. if (PHIsToRemove) {
  244. PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end());
  245. } else {
  246. for (PHINode *PN : LocalPHIsToRemove)
  247. if (PN->use_empty())
  248. PN->eraseFromParent();
  249. }
  250. return Changed;
  251. }
  252. // Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
  253. static void computeBlocksDominatingExits(
  254. Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
  255. SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
  256. // We start from the exit blocks, as every block trivially dominates itself
  257. // (not strictly).
  258. SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks);
  259. while (!BBWorklist.empty()) {
  260. BasicBlock *BB = BBWorklist.pop_back_val();
  261. // Check if this is a loop header. If this is the case, we're done.
  262. if (L.getHeader() == BB)
  263. continue;
  264. // Otherwise, add its immediate predecessor in the dominator tree to the
  265. // worklist, unless we visited it already.
  266. BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
  267. // Exit blocks can have an immediate dominator not belonging to the
  268. // loop. For an exit block to be immediately dominated by another block
  269. // outside the loop, it implies not all paths from that dominator, to the
  270. // exit block, go through the loop.
  271. // Example:
  272. //
  273. // |---- A
  274. // | |
  275. // | B<--
  276. // | | |
  277. // |---> C --
  278. // |
  279. // D
  280. //
  281. // C is the exit block of the loop and it's immediately dominated by A,
  282. // which doesn't belong to the loop.
  283. if (!L.contains(IDomBB))
  284. continue;
  285. if (BlocksDominatingExits.insert(IDomBB))
  286. BBWorklist.push_back(IDomBB);
  287. }
  288. }
  289. bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
  290. ScalarEvolution *SE) {
  291. bool Changed = false;
  292. #ifdef EXPENSIVE_CHECKS
  293. // Verify all sub-loops are in LCSSA form already.
  294. for (Loop *SubLoop: L) {
  295. (void)SubLoop; // Silence unused variable warning.
  296. assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
  297. }
  298. #endif
  299. SmallVector<BasicBlock *, 8> ExitBlocks;
  300. L.getExitBlocks(ExitBlocks);
  301. if (ExitBlocks.empty())
  302. return false;
  303. SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
  304. // We want to avoid use-scanning leveraging dominance informations.
  305. // If a block doesn't dominate any of the loop exits, the none of the values
  306. // defined in the loop can be used outside.
  307. // We compute the set of blocks fullfilling the conditions in advance
  308. // walking the dominator tree upwards until we hit a loop header.
  309. computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
  310. SmallVector<Instruction *, 8> Worklist;
  311. // Look at all the instructions in the loop, checking to see if they have uses
  312. // outside the loop. If so, put them into the worklist to rewrite those uses.
  313. for (BasicBlock *BB : BlocksDominatingExits) {
  314. // Skip blocks that are part of any sub-loops, they must be in LCSSA
  315. // already.
  316. if (LI->getLoopFor(BB) != &L)
  317. continue;
  318. for (Instruction &I : *BB) {
  319. // Reject two common cases fast: instructions with no uses (like stores)
  320. // and instructions with one use that is in the same block as this.
  321. if (I.use_empty() ||
  322. (I.hasOneUse() && I.user_back()->getParent() == BB &&
  323. !isa<PHINode>(I.user_back())))
  324. continue;
  325. // Tokens cannot be used in PHI nodes, so we skip over them.
  326. // We can run into tokens which are live out of a loop with catchswitch
  327. // instructions in Windows EH if the catchswitch has one catchpad which
  328. // is inside the loop and another which is not.
  329. if (I.getType()->isTokenTy())
  330. continue;
  331. Worklist.push_back(&I);
  332. }
  333. }
  334. IRBuilder<> Builder(L.getHeader()->getContext());
  335. Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder);
  336. // If we modified the code, remove any caches about the loop from SCEV to
  337. // avoid dangling entries.
  338. // FIXME: This is a big hammer, can we clear the cache more selectively?
  339. if (SE && Changed)
  340. SE->forgetLoop(&L);
  341. assert(L.isLCSSAForm(DT));
  342. return Changed;
  343. }
  344. /// Process a loop nest depth first.
  345. bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
  346. const LoopInfo *LI, ScalarEvolution *SE) {
  347. bool Changed = false;
  348. // Recurse depth-first through inner loops.
  349. for (Loop *SubLoop : L.getSubLoops())
  350. Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
  351. Changed |= formLCSSA(L, DT, LI, SE);
  352. return Changed;
  353. }
  354. /// Process all loops in the function, inner-most out.
  355. static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT,
  356. ScalarEvolution *SE) {
  357. bool Changed = false;
  358. for (auto &L : *LI)
  359. Changed |= formLCSSARecursively(*L, DT, LI, SE);
  360. return Changed;
  361. }
  362. namespace {
  363. struct LCSSAWrapperPass : public FunctionPass {
  364. static char ID; // Pass identification, replacement for typeid
  365. LCSSAWrapperPass() : FunctionPass(ID) {
  366. initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
  367. }
  368. // Cached analysis information for the current function.
  369. DominatorTree *DT;
  370. LoopInfo *LI;
  371. ScalarEvolution *SE;
  372. bool runOnFunction(Function &F) override;
  373. void verifyAnalysis() const override {
  374. // This check is very expensive. On the loop intensive compiles it may cause
  375. // up to 10x slowdown. Currently it's disabled by default. LPPassManager
  376. // always does limited form of the LCSSA verification. Similar reasoning
  377. // was used for the LoopInfo verifier.
  378. if (VerifyLoopLCSSA) {
  379. assert(all_of(*LI,
  380. [&](Loop *L) {
  381. return L->isRecursivelyLCSSAForm(*DT, *LI);
  382. }) &&
  383. "LCSSA form is broken!");
  384. }
  385. };
  386. /// This transformation requires natural loop information & requires that
  387. /// loop preheaders be inserted into the CFG. It maintains both of these,
  388. /// as well as the CFG. It also requires dominator information.
  389. void getAnalysisUsage(AnalysisUsage &AU) const override {
  390. AU.setPreservesCFG();
  391. AU.addRequired<DominatorTreeWrapperPass>();
  392. AU.addRequired<LoopInfoWrapperPass>();
  393. AU.addPreservedID(LoopSimplifyID);
  394. AU.addPreserved<AAResultsWrapperPass>();
  395. AU.addPreserved<BasicAAWrapperPass>();
  396. AU.addPreserved<GlobalsAAWrapperPass>();
  397. AU.addPreserved<ScalarEvolutionWrapperPass>();
  398. AU.addPreserved<SCEVAAWrapperPass>();
  399. AU.addPreserved<BranchProbabilityInfoWrapperPass>();
  400. AU.addPreserved<MemorySSAWrapperPass>();
  401. // This is needed to perform LCSSA verification inside LPPassManager
  402. AU.addRequired<LCSSAVerificationPass>();
  403. AU.addPreserved<LCSSAVerificationPass>();
  404. }
  405. };
  406. }
  407. char LCSSAWrapperPass::ID = 0;
  408. INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
  409. false, false)
  410. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  411. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  412. INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
  413. INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
  414. false, false)
  415. Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
  416. char &llvm::LCSSAID = LCSSAWrapperPass::ID;
  417. /// Transform \p F into loop-closed SSA form.
  418. bool LCSSAWrapperPass::runOnFunction(Function &F) {
  419. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  420. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  421. auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
  422. SE = SEWP ? &SEWP->getSE() : nullptr;
  423. return formLCSSAOnAllLoops(LI, *DT, SE);
  424. }
  425. PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
  426. auto &LI = AM.getResult<LoopAnalysis>(F);
  427. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  428. auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
  429. if (!formLCSSAOnAllLoops(&LI, DT, SE))
  430. return PreservedAnalyses::all();
  431. PreservedAnalyses PA;
  432. PA.preserveSet<CFGAnalyses>();
  433. PA.preserve<ScalarEvolutionAnalysis>();
  434. // BPI maps terminators to probabilities, since we don't modify the CFG, no
  435. // updates are needed to preserve it.
  436. PA.preserve<BranchProbabilityAnalysis>();
  437. PA.preserve<MemorySSAAnalysis>();
  438. return PA;
  439. }