StructurizeCFG.cpp 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141
  1. //===- StructurizeCFG.cpp -------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Transforms/Scalar/StructurizeCFG.h"
  9. #include "llvm/ADT/DenseMap.h"
  10. #include "llvm/ADT/MapVector.h"
  11. #include "llvm/ADT/SCCIterator.h"
  12. #include "llvm/ADT/STLExtras.h"
  13. #include "llvm/ADT/SmallPtrSet.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/Analysis/InstructionSimplify.h"
  16. #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
  17. #include "llvm/Analysis/RegionInfo.h"
  18. #include "llvm/Analysis/RegionIterator.h"
  19. #include "llvm/Analysis/RegionPass.h"
  20. #include "llvm/IR/Argument.h"
  21. #include "llvm/IR/BasicBlock.h"
  22. #include "llvm/IR/CFG.h"
  23. #include "llvm/IR/Constant.h"
  24. #include "llvm/IR/Constants.h"
  25. #include "llvm/IR/Dominators.h"
  26. #include "llvm/IR/Function.h"
  27. #include "llvm/IR/InstrTypes.h"
  28. #include "llvm/IR/Instruction.h"
  29. #include "llvm/IR/Instructions.h"
  30. #include "llvm/IR/Metadata.h"
  31. #include "llvm/IR/PassManager.h"
  32. #include "llvm/IR/PatternMatch.h"
  33. #include "llvm/IR/Type.h"
  34. #include "llvm/IR/Use.h"
  35. #include "llvm/IR/User.h"
  36. #include "llvm/IR/Value.h"
  37. #include "llvm/IR/ValueHandle.h"
  38. #include "llvm/InitializePasses.h"
  39. #include "llvm/Pass.h"
  40. #include "llvm/Support/Casting.h"
  41. #include "llvm/Support/CommandLine.h"
  42. #include "llvm/Support/Debug.h"
  43. #include "llvm/Support/ErrorHandling.h"
  44. #include "llvm/Support/raw_ostream.h"
  45. #include "llvm/Transforms/Scalar.h"
  46. #include "llvm/Transforms/Utils.h"
  47. #include "llvm/Transforms/Utils/Local.h"
  48. #include "llvm/Transforms/Utils/SSAUpdater.h"
  49. #include <algorithm>
  50. #include <cassert>
  51. #include <utility>
  52. using namespace llvm;
  53. using namespace llvm::PatternMatch;
  54. #define DEBUG_TYPE "structurizecfg"
  55. // The name for newly created blocks.
  56. const char FlowBlockName[] = "Flow";
  57. namespace {
  58. static cl::opt<bool> ForceSkipUniformRegions(
  59. "structurizecfg-skip-uniform-regions",
  60. cl::Hidden,
  61. cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
  62. cl::init(false));
  63. static cl::opt<bool>
  64. RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
  65. cl::desc("Allow relaxed uniform region checks"),
  66. cl::init(true));
  67. // Definition of the complex types used in this pass.
  68. using BBValuePair = std::pair<BasicBlock *, Value *>;
  69. using RNVector = SmallVector<RegionNode *, 8>;
  70. using BBVector = SmallVector<BasicBlock *, 8>;
  71. using BranchVector = SmallVector<BranchInst *, 8>;
  72. using BBValueVector = SmallVector<BBValuePair, 2>;
  73. using BBSet = SmallPtrSet<BasicBlock *, 8>;
  74. using PhiMap = MapVector<PHINode *, BBValueVector>;
  75. using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
  76. using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
  77. using BBPredicates = DenseMap<BasicBlock *, Value *>;
  78. using PredMap = DenseMap<BasicBlock *, BBPredicates>;
  79. using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
  80. // A traits type that is intended to be used in graph algorithms. The graph
  81. // traits starts at an entry node, and traverses the RegionNodes that are in
  82. // the Nodes set.
  83. struct SubGraphTraits {
  84. using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
  85. using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
  86. // This wraps a set of Nodes into the iterator, so we know which edges to
  87. // filter out.
  88. class WrappedSuccIterator
  89. : public iterator_adaptor_base<
  90. WrappedSuccIterator, BaseSuccIterator,
  91. typename std::iterator_traits<BaseSuccIterator>::iterator_category,
  92. NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
  93. SmallDenseSet<RegionNode *> *Nodes;
  94. public:
  95. WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
  96. : iterator_adaptor_base(It), Nodes(Nodes) {}
  97. NodeRef operator*() const { return {*I, Nodes}; }
  98. };
  99. static bool filterAll(const NodeRef &N) { return true; }
  100. static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
  101. using ChildIteratorType =
  102. filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
  103. static NodeRef getEntryNode(Region *R) {
  104. return {GraphTraits<Region *>::getEntryNode(R), nullptr};
  105. }
  106. static NodeRef getEntryNode(NodeRef N) { return N; }
  107. static iterator_range<ChildIteratorType> children(const NodeRef &N) {
  108. auto *filter = N.second ? &filterSet : &filterAll;
  109. return make_filter_range(
  110. make_range<WrappedSuccIterator>(
  111. {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
  112. {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
  113. filter);
  114. }
  115. static ChildIteratorType child_begin(const NodeRef &N) {
  116. return children(N).begin();
  117. }
  118. static ChildIteratorType child_end(const NodeRef &N) {
  119. return children(N).end();
  120. }
  121. };
  122. /// Finds the nearest common dominator of a set of BasicBlocks.
  123. ///
  124. /// For every BB you add to the set, you can specify whether we "remember" the
  125. /// block. When you get the common dominator, you can also ask whether it's one
  126. /// of the blocks we remembered.
  127. class NearestCommonDominator {
  128. DominatorTree *DT;
  129. BasicBlock *Result = nullptr;
  130. bool ResultIsRemembered = false;
  131. /// Add BB to the resulting dominator.
  132. void addBlock(BasicBlock *BB, bool Remember) {
  133. if (!Result) {
  134. Result = BB;
  135. ResultIsRemembered = Remember;
  136. return;
  137. }
  138. BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
  139. if (NewResult != Result)
  140. ResultIsRemembered = false;
  141. if (NewResult == BB)
  142. ResultIsRemembered |= Remember;
  143. Result = NewResult;
  144. }
  145. public:
  146. explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
  147. void addBlock(BasicBlock *BB) {
  148. addBlock(BB, /* Remember = */ false);
  149. }
  150. void addAndRememberBlock(BasicBlock *BB) {
  151. addBlock(BB, /* Remember = */ true);
  152. }
  153. /// Get the nearest common dominator of all the BBs added via addBlock() and
  154. /// addAndRememberBlock().
  155. BasicBlock *result() { return Result; }
  156. /// Is the BB returned by getResult() one of the blocks we added to the set
  157. /// with addAndRememberBlock()?
  158. bool resultIsRememberedBlock() { return ResultIsRemembered; }
  159. };
  160. /// Transforms the control flow graph on one single entry/exit region
  161. /// at a time.
  162. ///
  163. /// After the transform all "If"/"Then"/"Else" style control flow looks like
  164. /// this:
  165. ///
  166. /// \verbatim
  167. /// 1
  168. /// ||
  169. /// | |
  170. /// 2 |
  171. /// | /
  172. /// |/
  173. /// 3
  174. /// || Where:
  175. /// | | 1 = "If" block, calculates the condition
  176. /// 4 | 2 = "Then" subregion, runs if the condition is true
  177. /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
  178. /// |/ 4 = "Else" optional subregion, runs if the condition is false
  179. /// 5 5 = "End" block, also rejoins the control flow
  180. /// \endverbatim
  181. ///
  182. /// Control flow is expressed as a branch where the true exit goes into the
  183. /// "Then"/"Else" region, while the false exit skips the region
  184. /// The condition for the optional "Else" region is expressed as a PHI node.
  185. /// The incoming values of the PHI node are true for the "If" edge and false
  186. /// for the "Then" edge.
  187. ///
  188. /// Additionally to that even complicated loops look like this:
  189. ///
  190. /// \verbatim
  191. /// 1
  192. /// ||
  193. /// | |
  194. /// 2 ^ Where:
  195. /// | / 1 = "Entry" block
  196. /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
  197. /// 3 3 = "Flow" block, with back edge to entry block
  198. /// |
  199. /// \endverbatim
  200. ///
  201. /// The back edge of the "Flow" block is always on the false side of the branch
  202. /// while the true side continues the general flow. So the loop condition
  203. /// consist of a network of PHI nodes where the true incoming values expresses
  204. /// breaks and the false values expresses continue states.
  205. class StructurizeCFG {
  206. Type *Boolean;
  207. ConstantInt *BoolTrue;
  208. ConstantInt *BoolFalse;
  209. UndefValue *BoolUndef;
  210. Function *Func;
  211. Region *ParentRegion;
  212. LegacyDivergenceAnalysis *DA = nullptr;
  213. DominatorTree *DT;
  214. SmallVector<RegionNode *, 8> Order;
  215. BBSet Visited;
  216. SmallVector<WeakVH, 8> AffectedPhis;
  217. BBPhiMap DeletedPhis;
  218. BB2BBVecMap AddedPhis;
  219. PredMap Predicates;
  220. BranchVector Conditions;
  221. BB2BBMap Loops;
  222. PredMap LoopPreds;
  223. BranchVector LoopConds;
  224. RegionNode *PrevNode;
  225. void orderNodes();
  226. void analyzeLoops(RegionNode *N);
  227. Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
  228. void gatherPredicates(RegionNode *N);
  229. void collectInfos();
  230. void insertConditions(bool Loops);
  231. void simplifyConditions();
  232. void delPhiValues(BasicBlock *From, BasicBlock *To);
  233. void addPhiValues(BasicBlock *From, BasicBlock *To);
  234. void setPhiValues();
  235. void simplifyAffectedPhis();
  236. void killTerminator(BasicBlock *BB);
  237. void changeExit(RegionNode *Node, BasicBlock *NewExit,
  238. bool IncludeDominator);
  239. BasicBlock *getNextFlow(BasicBlock *Dominator);
  240. BasicBlock *needPrefix(bool NeedEmpty);
  241. BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
  242. void setPrevNode(BasicBlock *BB);
  243. bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
  244. bool isPredictableTrue(RegionNode *Node);
  245. void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
  246. void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
  247. void createFlow();
  248. void rebuildSSA();
  249. public:
  250. void init(Region *R);
  251. bool run(Region *R, DominatorTree *DT);
  252. bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
  253. };
  254. class StructurizeCFGLegacyPass : public RegionPass {
  255. bool SkipUniformRegions;
  256. public:
  257. static char ID;
  258. explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
  259. : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
  260. if (ForceSkipUniformRegions.getNumOccurrences())
  261. SkipUniformRegions = ForceSkipUniformRegions.getValue();
  262. initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
  263. }
  264. bool runOnRegion(Region *R, RGPassManager &RGM) override {
  265. StructurizeCFG SCFG;
  266. SCFG.init(R);
  267. if (SkipUniformRegions) {
  268. LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
  269. if (SCFG.makeUniformRegion(R, DA))
  270. return false;
  271. }
  272. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  273. return SCFG.run(R, DT);
  274. }
  275. StringRef getPassName() const override { return "Structurize control flow"; }
  276. void getAnalysisUsage(AnalysisUsage &AU) const override {
  277. if (SkipUniformRegions)
  278. AU.addRequired<LegacyDivergenceAnalysis>();
  279. AU.addRequiredID(LowerSwitchID);
  280. AU.addRequired<DominatorTreeWrapperPass>();
  281. AU.addPreserved<DominatorTreeWrapperPass>();
  282. RegionPass::getAnalysisUsage(AU);
  283. }
  284. };
  285. } // end anonymous namespace
  286. char StructurizeCFGLegacyPass::ID = 0;
  287. INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
  288. "Structurize the CFG", false, false)
  289. INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
  290. INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
  291. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  292. INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
  293. INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
  294. "Structurize the CFG", false, false)
  295. /// Build up the general order of nodes, by performing a topological sort of the
  296. /// parent region's nodes, while ensuring that there is no outer cycle node
  297. /// between any two inner cycle nodes.
  298. void StructurizeCFG::orderNodes() {
  299. Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
  300. GraphTraits<Region *>::nodes_end(ParentRegion)));
  301. if (Order.empty())
  302. return;
  303. SmallDenseSet<RegionNode *> Nodes;
  304. auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
  305. // A list of range indices of SCCs in Order, to be processed.
  306. SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
  307. unsigned I = 0, E = Order.size();
  308. while (true) {
  309. // Run through all the SCCs in the subgraph starting with Entry.
  310. for (auto SCCI =
  311. scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
  312. EntryNode);
  313. !SCCI.isAtEnd(); ++SCCI) {
  314. auto &SCC = *SCCI;
  315. // An SCC up to the size of 2, can be reduced to an entry (the last node),
  316. // and a possible additional node. Therefore, it is already in order, and
  317. // there is no need to add it to the work-list.
  318. unsigned Size = SCC.size();
  319. if (Size > 2)
  320. WorkList.emplace_back(I, I + Size);
  321. // Add the SCC nodes to the Order array.
  322. for (auto &N : SCC) {
  323. assert(I < E && "SCC size mismatch!");
  324. Order[I++] = N.first;
  325. }
  326. }
  327. assert(I == E && "SCC size mismatch!");
  328. // If there are no more SCCs to order, then we are done.
  329. if (WorkList.empty())
  330. break;
  331. std::tie(I, E) = WorkList.pop_back_val();
  332. // Collect the set of nodes in the SCC's subgraph. These are only the
  333. // possible child nodes; we do not add the entry (last node) otherwise we
  334. // will have the same exact SCC all over again.
  335. Nodes.clear();
  336. Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
  337. // Update the entry node.
  338. EntryNode.first = Order[E - 1];
  339. EntryNode.second = &Nodes;
  340. }
  341. }
  342. /// Determine the end of the loops
  343. void StructurizeCFG::analyzeLoops(RegionNode *N) {
  344. if (N->isSubRegion()) {
  345. // Test for exit as back edge
  346. BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
  347. if (Visited.count(Exit))
  348. Loops[Exit] = N->getEntry();
  349. } else {
  350. // Test for successors as back edge
  351. BasicBlock *BB = N->getNodeAs<BasicBlock>();
  352. BranchInst *Term = cast<BranchInst>(BB->getTerminator());
  353. for (BasicBlock *Succ : Term->successors())
  354. if (Visited.count(Succ))
  355. Loops[Succ] = BB;
  356. }
  357. }
  358. /// Build the condition for one edge
  359. Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
  360. bool Invert) {
  361. Value *Cond = Invert ? BoolFalse : BoolTrue;
  362. if (Term->isConditional()) {
  363. Cond = Term->getCondition();
  364. if (Idx != (unsigned)Invert)
  365. Cond = invertCondition(Cond);
  366. }
  367. return Cond;
  368. }
  369. /// Analyze the predecessors of each block and build up predicates
  370. void StructurizeCFG::gatherPredicates(RegionNode *N) {
  371. RegionInfo *RI = ParentRegion->getRegionInfo();
  372. BasicBlock *BB = N->getEntry();
  373. BBPredicates &Pred = Predicates[BB];
  374. BBPredicates &LPred = LoopPreds[BB];
  375. for (BasicBlock *P : predecessors(BB)) {
  376. // Ignore it if it's a branch from outside into our region entry
  377. if (!ParentRegion->contains(P))
  378. continue;
  379. Region *R = RI->getRegionFor(P);
  380. if (R == ParentRegion) {
  381. // It's a top level block in our region
  382. BranchInst *Term = cast<BranchInst>(P->getTerminator());
  383. for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
  384. BasicBlock *Succ = Term->getSuccessor(i);
  385. if (Succ != BB)
  386. continue;
  387. if (Visited.count(P)) {
  388. // Normal forward edge
  389. if (Term->isConditional()) {
  390. // Try to treat it like an ELSE block
  391. BasicBlock *Other = Term->getSuccessor(!i);
  392. if (Visited.count(Other) && !Loops.count(Other) &&
  393. !Pred.count(Other) && !Pred.count(P)) {
  394. Pred[Other] = BoolFalse;
  395. Pred[P] = BoolTrue;
  396. continue;
  397. }
  398. }
  399. Pred[P] = buildCondition(Term, i, false);
  400. } else {
  401. // Back edge
  402. LPred[P] = buildCondition(Term, i, true);
  403. }
  404. }
  405. } else {
  406. // It's an exit from a sub region
  407. while (R->getParent() != ParentRegion)
  408. R = R->getParent();
  409. // Edge from inside a subregion to its entry, ignore it
  410. if (*R == *N)
  411. continue;
  412. BasicBlock *Entry = R->getEntry();
  413. if (Visited.count(Entry))
  414. Pred[Entry] = BoolTrue;
  415. else
  416. LPred[Entry] = BoolFalse;
  417. }
  418. }
  419. }
  420. /// Collect various loop and predicate infos
  421. void StructurizeCFG::collectInfos() {
  422. // Reset predicate
  423. Predicates.clear();
  424. // and loop infos
  425. Loops.clear();
  426. LoopPreds.clear();
  427. // Reset the visited nodes
  428. Visited.clear();
  429. for (RegionNode *RN : reverse(Order)) {
  430. LLVM_DEBUG(dbgs() << "Visiting: "
  431. << (RN->isSubRegion() ? "SubRegion with entry: " : "")
  432. << RN->getEntry()->getName() << "\n");
  433. // Analyze all the conditions leading to a node
  434. gatherPredicates(RN);
  435. // Remember that we've seen this node
  436. Visited.insert(RN->getEntry());
  437. // Find the last back edges
  438. analyzeLoops(RN);
  439. }
  440. }
  441. /// Insert the missing branch conditions
  442. void StructurizeCFG::insertConditions(bool Loops) {
  443. BranchVector &Conds = Loops ? LoopConds : Conditions;
  444. Value *Default = Loops ? BoolTrue : BoolFalse;
  445. SSAUpdater PhiInserter;
  446. for (BranchInst *Term : Conds) {
  447. assert(Term->isConditional());
  448. BasicBlock *Parent = Term->getParent();
  449. BasicBlock *SuccTrue = Term->getSuccessor(0);
  450. BasicBlock *SuccFalse = Term->getSuccessor(1);
  451. PhiInserter.Initialize(Boolean, "");
  452. PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
  453. PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
  454. BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
  455. NearestCommonDominator Dominator(DT);
  456. Dominator.addBlock(Parent);
  457. Value *ParentValue = nullptr;
  458. for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
  459. BasicBlock *BB = BBAndPred.first;
  460. Value *Pred = BBAndPred.second;
  461. if (BB == Parent) {
  462. ParentValue = Pred;
  463. break;
  464. }
  465. PhiInserter.AddAvailableValue(BB, Pred);
  466. Dominator.addAndRememberBlock(BB);
  467. }
  468. if (ParentValue) {
  469. Term->setCondition(ParentValue);
  470. } else {
  471. if (!Dominator.resultIsRememberedBlock())
  472. PhiInserter.AddAvailableValue(Dominator.result(), Default);
  473. Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
  474. }
  475. }
  476. }
  477. /// Simplify any inverted conditions that were built by buildConditions.
  478. void StructurizeCFG::simplifyConditions() {
  479. SmallVector<Instruction *> InstToErase;
  480. for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
  481. auto &Preds = I.second;
  482. for (auto &J : Preds) {
  483. auto &Cond = J.second;
  484. Instruction *Inverted;
  485. if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
  486. !Cond->use_empty()) {
  487. if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
  488. InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
  489. Cond->replaceAllUsesWith(InvertedCmp);
  490. InstToErase.push_back(cast<Instruction>(Cond));
  491. }
  492. }
  493. }
  494. }
  495. for (auto *I : InstToErase)
  496. I->eraseFromParent();
  497. }
  498. /// Remove all PHI values coming from "From" into "To" and remember
  499. /// them in DeletedPhis
  500. void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
  501. PhiMap &Map = DeletedPhis[To];
  502. for (PHINode &Phi : To->phis()) {
  503. bool Recorded = false;
  504. while (Phi.getBasicBlockIndex(From) != -1) {
  505. Value *Deleted = Phi.removeIncomingValue(From, false);
  506. Map[&Phi].push_back(std::make_pair(From, Deleted));
  507. if (!Recorded) {
  508. AffectedPhis.push_back(&Phi);
  509. Recorded = true;
  510. }
  511. }
  512. }
  513. }
  514. /// Add a dummy PHI value as soon as we knew the new predecessor
  515. void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
  516. for (PHINode &Phi : To->phis()) {
  517. Value *Undef = UndefValue::get(Phi.getType());
  518. Phi.addIncoming(Undef, From);
  519. }
  520. AddedPhis[To].push_back(From);
  521. }
  522. /// Add the real PHI value as soon as everything is set up
  523. void StructurizeCFG::setPhiValues() {
  524. SmallVector<PHINode *, 8> InsertedPhis;
  525. SSAUpdater Updater(&InsertedPhis);
  526. for (const auto &AddedPhi : AddedPhis) {
  527. BasicBlock *To = AddedPhi.first;
  528. const BBVector &From = AddedPhi.second;
  529. if (!DeletedPhis.count(To))
  530. continue;
  531. PhiMap &Map = DeletedPhis[To];
  532. for (const auto &PI : Map) {
  533. PHINode *Phi = PI.first;
  534. Value *Undef = UndefValue::get(Phi->getType());
  535. Updater.Initialize(Phi->getType(), "");
  536. Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
  537. Updater.AddAvailableValue(To, Undef);
  538. NearestCommonDominator Dominator(DT);
  539. Dominator.addBlock(To);
  540. for (const auto &VI : PI.second) {
  541. Updater.AddAvailableValue(VI.first, VI.second);
  542. Dominator.addAndRememberBlock(VI.first);
  543. }
  544. if (!Dominator.resultIsRememberedBlock())
  545. Updater.AddAvailableValue(Dominator.result(), Undef);
  546. for (BasicBlock *FI : From)
  547. Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
  548. AffectedPhis.push_back(Phi);
  549. }
  550. DeletedPhis.erase(To);
  551. }
  552. assert(DeletedPhis.empty());
  553. AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
  554. }
  555. void StructurizeCFG::simplifyAffectedPhis() {
  556. bool Changed;
  557. do {
  558. Changed = false;
  559. SimplifyQuery Q(Func->getParent()->getDataLayout());
  560. Q.DT = DT;
  561. for (WeakVH VH : AffectedPhis) {
  562. if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
  563. if (auto NewValue = SimplifyInstruction(Phi, Q)) {
  564. Phi->replaceAllUsesWith(NewValue);
  565. Phi->eraseFromParent();
  566. Changed = true;
  567. }
  568. }
  569. }
  570. } while (Changed);
  571. }
  572. /// Remove phi values from all successors and then remove the terminator.
  573. void StructurizeCFG::killTerminator(BasicBlock *BB) {
  574. Instruction *Term = BB->getTerminator();
  575. if (!Term)
  576. return;
  577. for (BasicBlock *Succ : successors(BB))
  578. delPhiValues(BB, Succ);
  579. if (DA)
  580. DA->removeValue(Term);
  581. Term->eraseFromParent();
  582. }
  583. /// Let node exit(s) point to NewExit
  584. void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
  585. bool IncludeDominator) {
  586. if (Node->isSubRegion()) {
  587. Region *SubRegion = Node->getNodeAs<Region>();
  588. BasicBlock *OldExit = SubRegion->getExit();
  589. BasicBlock *Dominator = nullptr;
  590. // Find all the edges from the sub region to the exit.
  591. // We use make_early_inc_range here because we modify BB's terminator.
  592. for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
  593. if (!SubRegion->contains(BB))
  594. continue;
  595. // Modify the edges to point to the new exit
  596. delPhiValues(BB, OldExit);
  597. BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
  598. addPhiValues(BB, NewExit);
  599. // Find the new dominator (if requested)
  600. if (IncludeDominator) {
  601. if (!Dominator)
  602. Dominator = BB;
  603. else
  604. Dominator = DT->findNearestCommonDominator(Dominator, BB);
  605. }
  606. }
  607. // Change the dominator (if requested)
  608. if (Dominator)
  609. DT->changeImmediateDominator(NewExit, Dominator);
  610. // Update the region info
  611. SubRegion->replaceExit(NewExit);
  612. } else {
  613. BasicBlock *BB = Node->getNodeAs<BasicBlock>();
  614. killTerminator(BB);
  615. BranchInst::Create(NewExit, BB);
  616. addPhiValues(BB, NewExit);
  617. if (IncludeDominator)
  618. DT->changeImmediateDominator(NewExit, BB);
  619. }
  620. }
  621. /// Create a new flow node and update dominator tree and region info
  622. BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
  623. LLVMContext &Context = Func->getContext();
  624. BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
  625. Order.back()->getEntry();
  626. BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
  627. Func, Insert);
  628. DT->addNewBlock(Flow, Dominator);
  629. ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
  630. return Flow;
  631. }
  632. /// Create a new or reuse the previous node as flow node
  633. BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
  634. BasicBlock *Entry = PrevNode->getEntry();
  635. if (!PrevNode->isSubRegion()) {
  636. killTerminator(Entry);
  637. if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
  638. return Entry;
  639. }
  640. // create a new flow node
  641. BasicBlock *Flow = getNextFlow(Entry);
  642. // and wire it up
  643. changeExit(PrevNode, Flow, true);
  644. PrevNode = ParentRegion->getBBNode(Flow);
  645. return Flow;
  646. }
  647. /// Returns the region exit if possible, otherwise just a new flow node
  648. BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
  649. bool ExitUseAllowed) {
  650. if (!Order.empty() || !ExitUseAllowed)
  651. return getNextFlow(Flow);
  652. BasicBlock *Exit = ParentRegion->getExit();
  653. DT->changeImmediateDominator(Exit, Flow);
  654. addPhiValues(Flow, Exit);
  655. return Exit;
  656. }
  657. /// Set the previous node
  658. void StructurizeCFG::setPrevNode(BasicBlock *BB) {
  659. PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
  660. : nullptr;
  661. }
  662. /// Does BB dominate all the predicates of Node?
  663. bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
  664. BBPredicates &Preds = Predicates[Node->getEntry()];
  665. return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
  666. return DT->dominates(BB, Pred.first);
  667. });
  668. }
  669. /// Can we predict that this node will always be called?
  670. bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
  671. BBPredicates &Preds = Predicates[Node->getEntry()];
  672. bool Dominated = false;
  673. // Regionentry is always true
  674. if (!PrevNode)
  675. return true;
  676. for (std::pair<BasicBlock*, Value*> Pred : Preds) {
  677. BasicBlock *BB = Pred.first;
  678. Value *V = Pred.second;
  679. if (V != BoolTrue)
  680. return false;
  681. if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
  682. Dominated = true;
  683. }
  684. // TODO: The dominator check is too strict
  685. return Dominated;
  686. }
  687. /// Take one node from the order vector and wire it up
  688. void StructurizeCFG::wireFlow(bool ExitUseAllowed,
  689. BasicBlock *LoopEnd) {
  690. RegionNode *Node = Order.pop_back_val();
  691. Visited.insert(Node->getEntry());
  692. if (isPredictableTrue(Node)) {
  693. // Just a linear flow
  694. if (PrevNode) {
  695. changeExit(PrevNode, Node->getEntry(), true);
  696. }
  697. PrevNode = Node;
  698. } else {
  699. // Insert extra prefix node (or reuse last one)
  700. BasicBlock *Flow = needPrefix(false);
  701. // Insert extra postfix node (or use exit instead)
  702. BasicBlock *Entry = Node->getEntry();
  703. BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
  704. // let it point to entry and next block
  705. Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
  706. addPhiValues(Flow, Entry);
  707. DT->changeImmediateDominator(Entry, Flow);
  708. PrevNode = Node;
  709. while (!Order.empty() && !Visited.count(LoopEnd) &&
  710. dominatesPredicates(Entry, Order.back())) {
  711. handleLoops(false, LoopEnd);
  712. }
  713. changeExit(PrevNode, Next, false);
  714. setPrevNode(Next);
  715. }
  716. }
  717. void StructurizeCFG::handleLoops(bool ExitUseAllowed,
  718. BasicBlock *LoopEnd) {
  719. RegionNode *Node = Order.back();
  720. BasicBlock *LoopStart = Node->getEntry();
  721. if (!Loops.count(LoopStart)) {
  722. wireFlow(ExitUseAllowed, LoopEnd);
  723. return;
  724. }
  725. if (!isPredictableTrue(Node))
  726. LoopStart = needPrefix(true);
  727. LoopEnd = Loops[Node->getEntry()];
  728. wireFlow(false, LoopEnd);
  729. while (!Visited.count(LoopEnd)) {
  730. handleLoops(false, LoopEnd);
  731. }
  732. // If the start of the loop is the entry block, we can't branch to it so
  733. // insert a new dummy entry block.
  734. Function *LoopFunc = LoopStart->getParent();
  735. if (LoopStart == &LoopFunc->getEntryBlock()) {
  736. LoopStart->setName("entry.orig");
  737. BasicBlock *NewEntry =
  738. BasicBlock::Create(LoopStart->getContext(),
  739. "entry",
  740. LoopFunc,
  741. LoopStart);
  742. BranchInst::Create(LoopStart, NewEntry);
  743. DT->setNewRoot(NewEntry);
  744. }
  745. // Create an extra loop end node
  746. LoopEnd = needPrefix(false);
  747. BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
  748. LoopConds.push_back(BranchInst::Create(Next, LoopStart,
  749. BoolUndef, LoopEnd));
  750. addPhiValues(LoopEnd, LoopStart);
  751. setPrevNode(Next);
  752. }
  753. /// After this function control flow looks like it should be, but
  754. /// branches and PHI nodes only have undefined conditions.
  755. void StructurizeCFG::createFlow() {
  756. BasicBlock *Exit = ParentRegion->getExit();
  757. bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
  758. AffectedPhis.clear();
  759. DeletedPhis.clear();
  760. AddedPhis.clear();
  761. Conditions.clear();
  762. LoopConds.clear();
  763. PrevNode = nullptr;
  764. Visited.clear();
  765. while (!Order.empty()) {
  766. handleLoops(EntryDominatesExit, nullptr);
  767. }
  768. if (PrevNode)
  769. changeExit(PrevNode, Exit, EntryDominatesExit);
  770. else
  771. assert(EntryDominatesExit);
  772. }
  773. /// Handle a rare case where the disintegrated nodes instructions
  774. /// no longer dominate all their uses. Not sure if this is really necessary
  775. void StructurizeCFG::rebuildSSA() {
  776. SSAUpdater Updater;
  777. for (BasicBlock *BB : ParentRegion->blocks())
  778. for (Instruction &I : *BB) {
  779. bool Initialized = false;
  780. // We may modify the use list as we iterate over it, so we use
  781. // make_early_inc_range.
  782. for (Use &U : llvm::make_early_inc_range(I.uses())) {
  783. Instruction *User = cast<Instruction>(U.getUser());
  784. if (User->getParent() == BB) {
  785. continue;
  786. } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
  787. if (UserPN->getIncomingBlock(U) == BB)
  788. continue;
  789. }
  790. if (DT->dominates(&I, User))
  791. continue;
  792. if (!Initialized) {
  793. Value *Undef = UndefValue::get(I.getType());
  794. Updater.Initialize(I.getType(), "");
  795. Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
  796. Updater.AddAvailableValue(BB, &I);
  797. Initialized = true;
  798. }
  799. Updater.RewriteUseAfterInsertions(U);
  800. }
  801. }
  802. }
  803. static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
  804. const LegacyDivergenceAnalysis &DA) {
  805. // Bool for if all sub-regions are uniform.
  806. bool SubRegionsAreUniform = true;
  807. // Count of how many direct children are conditional.
  808. unsigned ConditionalDirectChildren = 0;
  809. for (auto E : R->elements()) {
  810. if (!E->isSubRegion()) {
  811. auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
  812. if (!Br || !Br->isConditional())
  813. continue;
  814. if (!DA.isUniform(Br))
  815. return false;
  816. // One of our direct children is conditional.
  817. ConditionalDirectChildren++;
  818. LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
  819. << " has uniform terminator\n");
  820. } else {
  821. // Explicitly refuse to treat regions as uniform if they have non-uniform
  822. // subregions. We cannot rely on DivergenceAnalysis for branches in
  823. // subregions because those branches may have been removed and re-created,
  824. // so we look for our metadata instead.
  825. //
  826. // Warning: It would be nice to treat regions as uniform based only on
  827. // their direct child basic blocks' terminators, regardless of whether
  828. // subregions are uniform or not. However, this requires a very careful
  829. // look at SIAnnotateControlFlow to make sure nothing breaks there.
  830. for (auto BB : E->getNodeAs<Region>()->blocks()) {
  831. auto Br = dyn_cast<BranchInst>(BB->getTerminator());
  832. if (!Br || !Br->isConditional())
  833. continue;
  834. if (!Br->getMetadata(UniformMDKindID)) {
  835. // Early exit if we cannot have relaxed uniform regions.
  836. if (!RelaxedUniformRegions)
  837. return false;
  838. SubRegionsAreUniform = false;
  839. break;
  840. }
  841. }
  842. }
  843. }
  844. // Our region is uniform if:
  845. // 1. All conditional branches that are direct children are uniform (checked
  846. // above).
  847. // 2. And either:
  848. // a. All sub-regions are uniform.
  849. // b. There is one or less conditional branches among the direct children.
  850. return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
  851. }
  852. void StructurizeCFG::init(Region *R) {
  853. LLVMContext &Context = R->getEntry()->getContext();
  854. Boolean = Type::getInt1Ty(Context);
  855. BoolTrue = ConstantInt::getTrue(Context);
  856. BoolFalse = ConstantInt::getFalse(Context);
  857. BoolUndef = UndefValue::get(Boolean);
  858. this->DA = nullptr;
  859. }
  860. bool StructurizeCFG::makeUniformRegion(Region *R,
  861. LegacyDivergenceAnalysis *DA) {
  862. if (R->isTopLevelRegion())
  863. return false;
  864. this->DA = DA;
  865. // TODO: We could probably be smarter here with how we handle sub-regions.
  866. // We currently rely on the fact that metadata is set by earlier invocations
  867. // of the pass on sub-regions, and that this metadata doesn't get lost --
  868. // but we shouldn't rely on metadata for correctness!
  869. unsigned UniformMDKindID =
  870. R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
  871. if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
  872. LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
  873. << '\n');
  874. // Mark all direct child block terminators as having been treated as
  875. // uniform. To account for a possible future in which non-uniform
  876. // sub-regions are treated more cleverly, indirect children are not
  877. // marked as uniform.
  878. MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
  879. for (RegionNode *E : R->elements()) {
  880. if (E->isSubRegion())
  881. continue;
  882. if (Instruction *Term = E->getEntry()->getTerminator())
  883. Term->setMetadata(UniformMDKindID, MD);
  884. }
  885. return true;
  886. }
  887. return false;
  888. }
  889. /// Run the transformation for each region found
  890. bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
  891. if (R->isTopLevelRegion())
  892. return false;
  893. this->DT = DT;
  894. Func = R->getEntry()->getParent();
  895. ParentRegion = R;
  896. orderNodes();
  897. collectInfos();
  898. createFlow();
  899. insertConditions(false);
  900. insertConditions(true);
  901. simplifyConditions();
  902. setPhiValues();
  903. simplifyAffectedPhis();
  904. rebuildSSA();
  905. // Cleanup
  906. Order.clear();
  907. Visited.clear();
  908. DeletedPhis.clear();
  909. AddedPhis.clear();
  910. Predicates.clear();
  911. Conditions.clear();
  912. Loops.clear();
  913. LoopPreds.clear();
  914. LoopConds.clear();
  915. return true;
  916. }
  917. Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
  918. return new StructurizeCFGLegacyPass(SkipUniformRegions);
  919. }
  920. static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
  921. Regions.push_back(&R);
  922. for (const auto &E : R)
  923. addRegionIntoQueue(*E, Regions);
  924. }
  925. PreservedAnalyses StructurizeCFGPass::run(Function &F,
  926. FunctionAnalysisManager &AM) {
  927. bool Changed = false;
  928. DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
  929. auto &RI = AM.getResult<RegionInfoAnalysis>(F);
  930. std::vector<Region *> Regions;
  931. addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
  932. while (!Regions.empty()) {
  933. Region *R = Regions.back();
  934. StructurizeCFG SCFG;
  935. SCFG.init(R);
  936. Changed |= SCFG.run(R, DT);
  937. Regions.pop_back();
  938. }
  939. if (!Changed)
  940. return PreservedAnalyses::all();
  941. PreservedAnalyses PA;
  942. PA.preserve<DominatorTreeAnalysis>();
  943. return PA;
  944. }