FixIrreducible.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. //===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // An irreducible SCC is one which has multiple "header" blocks, i.e., blocks
  10. // with control-flow edges incident from outside the SCC. This pass converts a
  11. // irreducible SCC into a natural loop by applying the following transformation:
  12. //
  13. // 1. Collect the set of headers H of the SCC.
  14. // 2. Collect the set of predecessors P of these headers. These may be inside as
  15. // well as outside the SCC.
  16. // 3. Create block N and redirect every edge from set P to set H through N.
  17. //
  18. // This converts the SCC into a natural loop with N as the header: N is the only
  19. // block with edges incident from outside the SCC, and all backedges in the SCC
  20. // are incident on N, i.e., for every backedge, the head now dominates the tail.
  21. //
  22. // INPUT CFG: The blocks A and B form an irreducible loop with two headers.
  23. //
  24. // Entry
  25. // / \
  26. // v v
  27. // A ----> B
  28. // ^ /|
  29. // `----' |
  30. // v
  31. // Exit
  32. //
  33. // OUTPUT CFG: Edges incident on A and B are now redirected through a
  34. // new block N, forming a natural loop consisting of N, A and B.
  35. //
  36. // Entry
  37. // |
  38. // v
  39. // .---> N <---.
  40. // / / \ \
  41. // | / \ |
  42. // \ v v /
  43. // `-- A B --'
  44. // |
  45. // v
  46. // Exit
  47. //
  48. // The transformation is applied to every maximal SCC that is not already
  49. // recognized as a loop. The pass operates on all maximal SCCs found in the
  50. // function body outside of any loop, as well as those found inside each loop,
  51. // including inside any newly created loops. This ensures that any SCC hidden
  52. // inside a maximal SCC is also transformed.
  53. //
  54. // The actual transformation is handled by function CreateControlFlowHub, which
  55. // takes a set of incoming blocks (the predecessors) and outgoing blocks (the
  56. // headers). The function also moves every PHINode in an outgoing block to the
  57. // hub. Since the hub dominates all the outgoing blocks, each such PHINode
  58. // continues to dominate its uses. Since every header in an SCC has at least two
  59. // predecessors, every value used in the header (or later) but defined in a
  60. // predecessor (or earlier) is represented by a PHINode in a header. Hence the
  61. // above handling of PHINodes is sufficient and no further processing is
  62. // required to restore SSA.
  63. //
  64. // Limitation: The pass cannot handle switch statements and indirect
  65. // branches. Both must be lowered to plain branches first.
  66. //
  67. //===----------------------------------------------------------------------===//
  68. #include "llvm/Transforms/Utils/FixIrreducible.h"
  69. #include "llvm/ADT/SCCIterator.h"
  70. #include "llvm/Analysis/DomTreeUpdater.h"
  71. #include "llvm/Analysis/LoopIterator.h"
  72. #include "llvm/InitializePasses.h"
  73. #include "llvm/Pass.h"
  74. #include "llvm/Transforms/Utils.h"
  75. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  76. #define DEBUG_TYPE "fix-irreducible"
  77. using namespace llvm;
  78. namespace {
  79. struct FixIrreducible : public FunctionPass {
  80. static char ID;
  81. FixIrreducible() : FunctionPass(ID) {
  82. initializeFixIrreduciblePass(*PassRegistry::getPassRegistry());
  83. }
  84. void getAnalysisUsage(AnalysisUsage &AU) const override {
  85. AU.addRequiredID(LowerSwitchID);
  86. AU.addRequired<DominatorTreeWrapperPass>();
  87. AU.addRequired<LoopInfoWrapperPass>();
  88. AU.addPreservedID(LowerSwitchID);
  89. AU.addPreserved<DominatorTreeWrapperPass>();
  90. AU.addPreserved<LoopInfoWrapperPass>();
  91. }
  92. bool runOnFunction(Function &F) override;
  93. };
  94. } // namespace
  95. char FixIrreducible::ID = 0;
  96. FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
  97. INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
  98. "Convert irreducible control-flow into natural loops",
  99. false /* Only looks at CFG */, false /* Analysis Pass */)
  100. INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
  101. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  102. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  103. INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
  104. "Convert irreducible control-flow into natural loops",
  105. false /* Only looks at CFG */, false /* Analysis Pass */)
  106. // When a new loop is created, existing children of the parent loop may now be
  107. // fully inside the new loop. Reconnect these as children of the new loop.
  108. static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
  109. SetVector<BasicBlock *> &Blocks,
  110. SetVector<BasicBlock *> &Headers) {
  111. auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector()
  112. : LI.getTopLevelLoopsVector();
  113. // The new loop cannot be its own child, and any candidate is a
  114. // child iff its header is owned by the new loop. Move all the
  115. // children to a new vector.
  116. auto FirstChild = std::partition(
  117. CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) {
  118. return L == NewLoop || !Blocks.contains(L->getHeader());
  119. });
  120. SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
  121. CandidateLoops.erase(FirstChild, CandidateLoops.end());
  122. for (Loop *Child : ChildLoops) {
  123. LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName()
  124. << "\n");
  125. // TODO: A child loop whose header is also a header in the current
  126. // SCC gets destroyed since its backedges are removed. That may
  127. // not be necessary if we can retain such backedges.
  128. if (Headers.count(Child->getHeader())) {
  129. for (auto *BB : Child->blocks()) {
  130. if (LI.getLoopFor(BB) != Child)
  131. continue;
  132. LI.changeLoopFor(BB, NewLoop);
  133. LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName()
  134. << "\n");
  135. }
  136. std::vector<Loop *> GrandChildLoops;
  137. std::swap(GrandChildLoops, Child->getSubLoopsVector());
  138. for (auto *GrandChildLoop : GrandChildLoops) {
  139. GrandChildLoop->setParentLoop(nullptr);
  140. NewLoop->addChildLoop(GrandChildLoop);
  141. }
  142. LI.destroy(Child);
  143. LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n");
  144. continue;
  145. }
  146. Child->setParentLoop(nullptr);
  147. NewLoop->addChildLoop(Child);
  148. LLVM_DEBUG(dbgs() << "added child loop to new loop\n");
  149. }
  150. }
  151. // Given a set of blocks and headers in an irreducible SCC, convert it into a
  152. // natural loop. Also insert this new loop at its appropriate place in the
  153. // hierarchy of loops.
  154. static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT,
  155. Loop *ParentLoop,
  156. SetVector<BasicBlock *> &Blocks,
  157. SetVector<BasicBlock *> &Headers) {
  158. #ifndef NDEBUG
  159. // All headers are part of the SCC
  160. for (auto *H : Headers) {
  161. assert(Blocks.count(H));
  162. }
  163. #endif
  164. SetVector<BasicBlock *> Predecessors;
  165. for (auto *H : Headers) {
  166. for (auto *P : predecessors(H)) {
  167. Predecessors.insert(P);
  168. }
  169. }
  170. LLVM_DEBUG(
  171. dbgs() << "Found predecessors:";
  172. for (auto P : Predecessors) {
  173. dbgs() << " " << P->getName();
  174. }
  175. dbgs() << "\n");
  176. // Redirect all the backedges through a "hub" consisting of a series
  177. // of guard blocks that manage the flow of control from the
  178. // predecessors to the headers.
  179. SmallVector<BasicBlock *, 8> GuardBlocks;
  180. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
  181. CreateControlFlowHub(&DTU, GuardBlocks, Predecessors, Headers, "irr");
  182. #if defined(EXPENSIVE_CHECKS)
  183. assert(DT.verify(DominatorTree::VerificationLevel::Full));
  184. #else
  185. assert(DT.verify(DominatorTree::VerificationLevel::Fast));
  186. #endif
  187. // Create a new loop from the now-transformed cycle
  188. auto NewLoop = LI.AllocateLoop();
  189. if (ParentLoop) {
  190. ParentLoop->addChildLoop(NewLoop);
  191. } else {
  192. LI.addTopLevelLoop(NewLoop);
  193. }
  194. // Add the guard blocks to the new loop. The first guard block is
  195. // the head of all the backedges, and it is the first to be inserted
  196. // in the loop. This ensures that it is recognized as the
  197. // header. Since the new loop is already in LoopInfo, the new blocks
  198. // are also propagated up the chain of parent loops.
  199. for (auto *G : GuardBlocks) {
  200. LLVM_DEBUG(dbgs() << "added guard block: " << G->getName() << "\n");
  201. NewLoop->addBasicBlockToLoop(G, LI);
  202. }
  203. // Add the SCC blocks to the new loop.
  204. for (auto *BB : Blocks) {
  205. NewLoop->addBlockEntry(BB);
  206. if (LI.getLoopFor(BB) == ParentLoop) {
  207. LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName()
  208. << "\n");
  209. LI.changeLoopFor(BB, NewLoop);
  210. } else {
  211. LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n");
  212. }
  213. }
  214. LLVM_DEBUG(dbgs() << "header for new loop: "
  215. << NewLoop->getHeader()->getName() << "\n");
  216. reconnectChildLoops(LI, ParentLoop, NewLoop, Blocks, Headers);
  217. NewLoop->verifyLoop();
  218. if (ParentLoop) {
  219. ParentLoop->verifyLoop();
  220. }
  221. #if defined(EXPENSIVE_CHECKS)
  222. LI.verify(DT);
  223. #endif // EXPENSIVE_CHECKS
  224. }
  225. namespace llvm {
  226. // Enable the graph traits required for traversing a Loop body.
  227. template <> struct GraphTraits<Loop> : LoopBodyTraits {};
  228. } // namespace llvm
  229. // Overloaded wrappers to go with the function template below.
  230. static BasicBlock *unwrapBlock(BasicBlock *B) { return B; }
  231. static BasicBlock *unwrapBlock(LoopBodyTraits::NodeRef &N) { return N.second; }
  232. static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Function *F,
  233. SetVector<BasicBlock *> &Blocks,
  234. SetVector<BasicBlock *> &Headers) {
  235. createNaturalLoopInternal(LI, DT, nullptr, Blocks, Headers);
  236. }
  237. static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Loop &L,
  238. SetVector<BasicBlock *> &Blocks,
  239. SetVector<BasicBlock *> &Headers) {
  240. createNaturalLoopInternal(LI, DT, &L, Blocks, Headers);
  241. }
  242. // Convert irreducible SCCs; Graph G may be a Function* or a Loop&.
  243. template <class Graph>
  244. static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
  245. bool Changed = false;
  246. for (auto Scc = scc_begin(G); !Scc.isAtEnd(); ++Scc) {
  247. if (Scc->size() < 2)
  248. continue;
  249. SetVector<BasicBlock *> Blocks;
  250. LLVM_DEBUG(dbgs() << "Found SCC:");
  251. for (auto N : *Scc) {
  252. auto BB = unwrapBlock(N);
  253. LLVM_DEBUG(dbgs() << " " << BB->getName());
  254. Blocks.insert(BB);
  255. }
  256. LLVM_DEBUG(dbgs() << "\n");
  257. // Minor optimization: The SCC blocks are usually discovered in an order
  258. // that is the opposite of the order in which these blocks appear as branch
  259. // targets. This results in a lot of condition inversions in the control
  260. // flow out of the new ControlFlowHub, which can be mitigated if the orders
  261. // match. So we discover the headers using the reverse of the block order.
  262. SetVector<BasicBlock *> Headers;
  263. LLVM_DEBUG(dbgs() << "Found headers:");
  264. for (auto *BB : reverse(Blocks)) {
  265. for (const auto P : predecessors(BB)) {
  266. // Skip unreachable predecessors.
  267. if (!DT.isReachableFromEntry(P))
  268. continue;
  269. if (!Blocks.count(P)) {
  270. LLVM_DEBUG(dbgs() << " " << BB->getName());
  271. Headers.insert(BB);
  272. break;
  273. }
  274. }
  275. }
  276. LLVM_DEBUG(dbgs() << "\n");
  277. if (Headers.size() == 1) {
  278. assert(LI.isLoopHeader(Headers.front()));
  279. LLVM_DEBUG(dbgs() << "Natural loop with a single header: skipped\n");
  280. continue;
  281. }
  282. createNaturalLoop(LI, DT, G, Blocks, Headers);
  283. Changed = true;
  284. }
  285. return Changed;
  286. }
  287. static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
  288. LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
  289. << F.getName() << "\n");
  290. bool Changed = false;
  291. SmallVector<Loop *, 8> WorkList;
  292. LLVM_DEBUG(dbgs() << "visiting top-level\n");
  293. Changed |= makeReducible(LI, DT, &F);
  294. // Any SCCs reduced are now already in the list of top-level loops, so simply
  295. // add them all to the worklist.
  296. append_range(WorkList, LI);
  297. while (!WorkList.empty()) {
  298. auto L = WorkList.pop_back_val();
  299. LLVM_DEBUG(dbgs() << "visiting loop with header "
  300. << L->getHeader()->getName() << "\n");
  301. Changed |= makeReducible(LI, DT, *L);
  302. // Any SCCs reduced are now already in the list of child loops, so simply
  303. // add them all to the worklist.
  304. WorkList.append(L->begin(), L->end());
  305. }
  306. return Changed;
  307. }
  308. bool FixIrreducible::runOnFunction(Function &F) {
  309. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  310. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  311. return FixIrreducibleImpl(F, LI, DT);
  312. }
  313. PreservedAnalyses FixIrreduciblePass::run(Function &F,
  314. FunctionAnalysisManager &AM) {
  315. auto &LI = AM.getResult<LoopAnalysis>(F);
  316. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  317. if (!FixIrreducibleImpl(F, LI, DT))
  318. return PreservedAnalyses::all();
  319. PreservedAnalyses PA;
  320. PA.preserve<LoopAnalysis>();
  321. PA.preserve<DominatorTreeAnalysis>();
  322. return PA;
  323. }