DFAJumpThreading.cpp 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359
  1. //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Transform each threading path to effectively jump thread the DFA. For
  10. // example, the CFG below could be transformed as follows, where the cloned
  11. // blocks unconditionally branch to the next correct case based on what is
  12. // identified in the analysis.
  13. //
  14. // sw.bb sw.bb
  15. // / | \ / | \
  16. // case1 case2 case3 case1 case2 case3
  17. // \ | / | | |
  18. // determinator det.2 det.3 det.1
  19. // br sw.bb / | \
  20. // sw.bb.2 sw.bb.3 sw.bb.1
  21. // br case2 br case3 br case1§
  22. //
  23. // Definitions and Terminology:
  24. //
  25. // * Threading path:
  26. // a list of basic blocks, the exit state, and the block that determines
  27. // the next state, for which the following notation will be used:
  28. // < path of BBs that form a cycle > [ state, determinator ]
  29. //
  30. // * Predictable switch:
  31. // The switch variable is always a known constant so that all conditional
  32. // jumps based on switch variable can be converted to unconditional jump.
  33. //
  34. // * Determinator:
  35. // The basic block that determines the next state of the DFA.
  36. //
  37. // Representing the optimization in C-like pseudocode: the code pattern on the
  38. // left could functionally be transformed to the right pattern if the switch
  39. // condition is predictable.
  40. //
  41. // X = A goto A
  42. // for (...) A:
  43. // switch (X) ...
  44. // case A goto B
  45. // X = B B:
  46. // case B ...
  47. // X = C goto C
  48. //
  49. // The pass first checks that switch variable X is decided by the control flow
  50. // path taken in the loop; for example, in case B, the next value of X is
  51. // decided to be C. It then enumerates through all paths in the loop and labels
  52. // the basic blocks where the next state is decided.
  53. //
  54. // Using this information it creates new paths that unconditionally branch to
  55. // the next case. This involves cloning code, so it only gets triggered if the
  56. // amount of code duplicated is below a threshold.
  57. //
  58. //===----------------------------------------------------------------------===//
  59. #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
  60. #include "llvm/ADT/APInt.h"
  61. #include "llvm/ADT/DenseMap.h"
  62. #include "llvm/ADT/SmallSet.h"
  63. #include "llvm/ADT/Statistic.h"
  64. #include "llvm/Analysis/AssumptionCache.h"
  65. #include "llvm/Analysis/CodeMetrics.h"
  66. #include "llvm/Analysis/DomTreeUpdater.h"
  67. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  68. #include "llvm/Analysis/TargetTransformInfo.h"
  69. #include "llvm/IR/CFG.h"
  70. #include "llvm/IR/Constants.h"
  71. #include "llvm/IR/IntrinsicInst.h"
  72. #include "llvm/InitializePasses.h"
  73. #include "llvm/Pass.h"
  74. #include "llvm/Support/CommandLine.h"
  75. #include "llvm/Support/Debug.h"
  76. #include "llvm/Transforms/Scalar.h"
  77. #include "llvm/Transforms/Utils/Cloning.h"
  78. #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
  79. #include "llvm/Transforms/Utils/ValueMapper.h"
  80. #include <algorithm>
  81. #include <deque>
  82. #ifdef EXPENSIVE_CHECKS
  83. #include "llvm/IR/Verifier.h"
  84. #endif
  85. using namespace llvm;
  86. #define DEBUG_TYPE "dfa-jump-threading"
  87. STATISTIC(NumTransforms, "Number of transformations done");
  88. STATISTIC(NumCloned, "Number of blocks cloned");
  89. STATISTIC(NumPaths, "Number of individual paths threaded");
  90. static cl::opt<bool>
  91. ClViewCfgBefore("dfa-jump-view-cfg-before",
  92. cl::desc("View the CFG before DFA Jump Threading"),
  93. cl::Hidden, cl::init(false));
  94. static cl::opt<unsigned> MaxPathLength(
  95. "dfa-max-path-length",
  96. cl::desc("Max number of blocks searched to find a threading path"),
  97. cl::Hidden, cl::init(20));
  98. static cl::opt<unsigned> MaxNumPaths(
  99. "dfa-max-num-paths",
  100. cl::desc("Max number of paths enumerated around a switch"),
  101. cl::Hidden, cl::init(200));
  102. static cl::opt<unsigned>
  103. CostThreshold("dfa-cost-threshold",
  104. cl::desc("Maximum cost accepted for the transformation"),
  105. cl::Hidden, cl::init(50));
  106. namespace {
  107. class SelectInstToUnfold {
  108. SelectInst *SI;
  109. PHINode *SIUse;
  110. public:
  111. SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
  112. SelectInst *getInst() { return SI; }
  113. PHINode *getUse() { return SIUse; }
  114. explicit operator bool() const { return SI && SIUse; }
  115. };
  116. void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
  117. std::vector<SelectInstToUnfold> *NewSIsToUnfold,
  118. std::vector<BasicBlock *> *NewBBs);
  119. class DFAJumpThreading {
  120. public:
  121. DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
  122. TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
  123. : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
  124. bool run(Function &F);
  125. private:
  126. void
  127. unfoldSelectInstrs(DominatorTree *DT,
  128. const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
  129. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
  130. SmallVector<SelectInstToUnfold, 4> Stack;
  131. for (SelectInstToUnfold SIToUnfold : SelectInsts)
  132. Stack.push_back(SIToUnfold);
  133. while (!Stack.empty()) {
  134. SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
  135. std::vector<SelectInstToUnfold> NewSIsToUnfold;
  136. std::vector<BasicBlock *> NewBBs;
  137. unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
  138. // Put newly discovered select instructions into the work list.
  139. for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
  140. Stack.push_back(NewSIToUnfold);
  141. }
  142. }
  143. AssumptionCache *AC;
  144. DominatorTree *DT;
  145. TargetTransformInfo *TTI;
  146. OptimizationRemarkEmitter *ORE;
  147. };
  148. class DFAJumpThreadingLegacyPass : public FunctionPass {
  149. public:
  150. static char ID; // Pass identification
  151. DFAJumpThreadingLegacyPass() : FunctionPass(ID) {}
  152. void getAnalysisUsage(AnalysisUsage &AU) const override {
  153. AU.addRequired<AssumptionCacheTracker>();
  154. AU.addRequired<DominatorTreeWrapperPass>();
  155. AU.addPreserved<DominatorTreeWrapperPass>();
  156. AU.addRequired<TargetTransformInfoWrapperPass>();
  157. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  158. }
  159. bool runOnFunction(Function &F) override {
  160. if (skipFunction(F))
  161. return false;
  162. AssumptionCache *AC =
  163. &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  164. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  165. TargetTransformInfo *TTI =
  166. &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  167. OptimizationRemarkEmitter *ORE =
  168. &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  169. return DFAJumpThreading(AC, DT, TTI, ORE).run(F);
  170. }
  171. };
  172. } // end anonymous namespace
  173. char DFAJumpThreadingLegacyPass::ID = 0;
  174. INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
  175. "DFA Jump Threading", false, false)
  176. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  177. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  178. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  179. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  180. INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
  181. "DFA Jump Threading", false, false)
  182. // Public interface to the DFA Jump Threading pass
  183. FunctionPass *llvm::createDFAJumpThreadingPass() {
  184. return new DFAJumpThreadingLegacyPass();
  185. }
  186. namespace {
  187. /// Create a new basic block and sink \p SIToSink into it.
  188. void createBasicBlockAndSinkSelectInst(
  189. DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink,
  190. BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock,
  191. BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold,
  192. std::vector<BasicBlock *> *NewBBs) {
  193. assert(SIToSink->hasOneUse());
  194. assert(NewBlock);
  195. assert(NewBranch);
  196. *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName,
  197. EndBlock->getParent(), EndBlock);
  198. NewBBs->push_back(*NewBlock);
  199. *NewBranch = BranchInst::Create(EndBlock, *NewBlock);
  200. SIToSink->moveBefore(*NewBranch);
  201. NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse));
  202. DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}});
  203. }
  204. /// Unfold the select instruction held in \p SIToUnfold by replacing it with
  205. /// control flow.
  206. ///
  207. /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
  208. /// created basic blocks into \p NewBBs.
  209. ///
  210. /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
  211. void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
  212. std::vector<SelectInstToUnfold> *NewSIsToUnfold,
  213. std::vector<BasicBlock *> *NewBBs) {
  214. SelectInst *SI = SIToUnfold.getInst();
  215. PHINode *SIUse = SIToUnfold.getUse();
  216. BasicBlock *StartBlock = SI->getParent();
  217. BasicBlock *EndBlock = SIUse->getParent();
  218. BranchInst *StartBlockTerm =
  219. dyn_cast<BranchInst>(StartBlock->getTerminator());
  220. assert(StartBlockTerm && StartBlockTerm->isUnconditional());
  221. assert(SI->hasOneUse());
  222. // These are the new basic blocks for the conditional branch.
  223. // At least one will become an actual new basic block.
  224. BasicBlock *TrueBlock = nullptr;
  225. BasicBlock *FalseBlock = nullptr;
  226. BranchInst *TrueBranch = nullptr;
  227. BranchInst *FalseBranch = nullptr;
  228. // Sink select instructions to be able to unfold them later.
  229. if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) {
  230. createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
  231. "si.unfold.true", &TrueBlock, &TrueBranch,
  232. NewSIsToUnfold, NewBBs);
  233. }
  234. if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) {
  235. createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
  236. "si.unfold.false", &FalseBlock,
  237. &FalseBranch, NewSIsToUnfold, NewBBs);
  238. }
  239. // If there was nothing to sink, then arbitrarily choose the 'false' side
  240. // for a new input value to the PHI.
  241. if (!TrueBlock && !FalseBlock) {
  242. FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false",
  243. EndBlock->getParent(), EndBlock);
  244. NewBBs->push_back(FalseBlock);
  245. BranchInst::Create(EndBlock, FalseBlock);
  246. DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}});
  247. }
  248. // Insert the real conditional branch based on the original condition.
  249. // If we did not create a new block for one of the 'true' or 'false' paths
  250. // of the condition, it means that side of the branch goes to the end block
  251. // directly and the path originates from the start block from the point of
  252. // view of the new PHI.
  253. BasicBlock *TT = EndBlock;
  254. BasicBlock *FT = EndBlock;
  255. if (TrueBlock && FalseBlock) {
  256. // A diamond.
  257. TT = TrueBlock;
  258. FT = FalseBlock;
  259. // Update the phi node of SI.
  260. SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
  261. SIUse->addIncoming(SI->getTrueValue(), TrueBlock);
  262. SIUse->addIncoming(SI->getFalseValue(), FalseBlock);
  263. // Update any other PHI nodes in EndBlock.
  264. for (PHINode &Phi : EndBlock->phis()) {
  265. if (&Phi != SIUse) {
  266. Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock);
  267. Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock);
  268. }
  269. }
  270. } else {
  271. BasicBlock *NewBlock = nullptr;
  272. Value *SIOp1 = SI->getTrueValue();
  273. Value *SIOp2 = SI->getFalseValue();
  274. // A triangle pointing right.
  275. if (!TrueBlock) {
  276. NewBlock = FalseBlock;
  277. FT = FalseBlock;
  278. }
  279. // A triangle pointing left.
  280. else {
  281. NewBlock = TrueBlock;
  282. TT = TrueBlock;
  283. std::swap(SIOp1, SIOp2);
  284. }
  285. // Update the phi node of SI.
  286. for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) {
  287. if (SIUse->getIncomingBlock(Idx) == StartBlock)
  288. SIUse->setIncomingValue(Idx, SIOp1);
  289. }
  290. SIUse->addIncoming(SIOp2, NewBlock);
  291. // Update any other PHI nodes in EndBlock.
  292. for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
  293. ++II) {
  294. if (Phi != SIUse)
  295. Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock);
  296. }
  297. }
  298. StartBlockTerm->eraseFromParent();
  299. BranchInst::Create(TT, FT, SI->getCondition(), StartBlock);
  300. DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
  301. {DominatorTree::Insert, StartBlock, FT}});
  302. // The select is now dead.
  303. SI->eraseFromParent();
  304. }
  305. struct ClonedBlock {
  306. BasicBlock *BB;
  307. uint64_t State; ///< \p State corresponds to the next value of a switch stmnt.
  308. };
  309. typedef std::deque<BasicBlock *> PathType;
  310. typedef std::vector<PathType> PathsType;
  311. typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
  312. typedef std::vector<ClonedBlock> CloneList;
  313. // This data structure keeps track of all blocks that have been cloned. If two
  314. // different ThreadingPaths clone the same block for a certain state it should
  315. // be reused, and it can be looked up in this map.
  316. typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
  317. // This map keeps track of all the new definitions for an instruction. This
  318. // information is needed when restoring SSA form after cloning blocks.
  319. typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
  320. inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
  321. OS << "< ";
  322. for (const BasicBlock *BB : Path) {
  323. std::string BBName;
  324. if (BB->hasName())
  325. raw_string_ostream(BBName) << BB->getName();
  326. else
  327. raw_string_ostream(BBName) << BB;
  328. OS << BBName << " ";
  329. }
  330. OS << ">";
  331. return OS;
  332. }
  333. /// ThreadingPath is a path in the control flow of a loop that can be threaded
  334. /// by cloning necessary basic blocks and replacing conditional branches with
  335. /// unconditional ones. A threading path includes a list of basic blocks, the
  336. /// exit state, and the block that determines the next state.
  337. struct ThreadingPath {
  338. /// Exit value is DFA's exit state for the given path.
  339. uint64_t getExitValue() const { return ExitVal; }
  340. void setExitValue(const ConstantInt *V) {
  341. ExitVal = V->getZExtValue();
  342. IsExitValSet = true;
  343. }
  344. bool isExitValueSet() const { return IsExitValSet; }
  345. /// Determinator is the basic block that determines the next state of the DFA.
  346. const BasicBlock *getDeterminatorBB() const { return DBB; }
  347. void setDeterminator(const BasicBlock *BB) { DBB = BB; }
  348. /// Path is a list of basic blocks.
  349. const PathType &getPath() const { return Path; }
  350. void setPath(const PathType &NewPath) { Path = NewPath; }
  351. void print(raw_ostream &OS) const {
  352. OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
  353. }
  354. private:
  355. PathType Path;
  356. uint64_t ExitVal;
  357. const BasicBlock *DBB = nullptr;
  358. bool IsExitValSet = false;
  359. };
  360. #ifndef NDEBUG
  361. inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
  362. TPath.print(OS);
  363. return OS;
  364. }
  365. #endif
  366. struct MainSwitch {
  367. MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
  368. if (isCandidate(SI)) {
  369. Instr = SI;
  370. } else {
  371. ORE->emit([&]() {
  372. return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
  373. << "Switch instruction is not predictable.";
  374. });
  375. }
  376. }
  377. virtual ~MainSwitch() = default;
  378. SwitchInst *getInstr() const { return Instr; }
  379. const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
  380. return SelectInsts;
  381. }
  382. private:
  383. /// Do a use-def chain traversal starting from the switch condition to see if
  384. /// \p SI is a potential condidate.
  385. ///
  386. /// Also, collect select instructions to unfold.
  387. bool isCandidate(const SwitchInst *SI) {
  388. std::deque<Value *> Q;
  389. SmallSet<Value *, 16> SeenValues;
  390. SelectInsts.clear();
  391. Value *SICond = SI->getCondition();
  392. LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
  393. if (!isa<PHINode>(SICond))
  394. return false;
  395. addToQueue(SICond, Q, SeenValues);
  396. while (!Q.empty()) {
  397. Value *Current = Q.front();
  398. Q.pop_front();
  399. if (auto *Phi = dyn_cast<PHINode>(Current)) {
  400. for (Value *Incoming : Phi->incoming_values()) {
  401. addToQueue(Incoming, Q, SeenValues);
  402. }
  403. LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
  404. } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
  405. if (!isValidSelectInst(SelI))
  406. return false;
  407. addToQueue(SelI->getTrueValue(), Q, SeenValues);
  408. addToQueue(SelI->getFalseValue(), Q, SeenValues);
  409. LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
  410. if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
  411. SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
  412. } else if (isa<Constant>(Current)) {
  413. LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
  414. continue;
  415. } else {
  416. LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
  417. // Allow unpredictable values. The hope is that those will be the
  418. // initial switch values that can be ignored (they will hit the
  419. // unthreaded switch) but this assumption will get checked later after
  420. // paths have been enumerated (in function getStateDefMap).
  421. continue;
  422. }
  423. }
  424. return true;
  425. }
  426. void addToQueue(Value *Val, std::deque<Value *> &Q,
  427. SmallSet<Value *, 16> &SeenValues) {
  428. if (SeenValues.contains(Val))
  429. return;
  430. Q.push_back(Val);
  431. SeenValues.insert(Val);
  432. }
  433. bool isValidSelectInst(SelectInst *SI) {
  434. if (!SI->hasOneUse())
  435. return false;
  436. Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
  437. // The use of the select inst should be either a phi or another select.
  438. if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
  439. return false;
  440. BasicBlock *SIBB = SI->getParent();
  441. // Currently, we can only expand select instructions in basic blocks with
  442. // one successor.
  443. BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
  444. if (!SITerm || !SITerm->isUnconditional())
  445. return false;
  446. if (isa<PHINode>(SIUse) &&
  447. SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
  448. return false;
  449. // If select will not be sunk during unfolding, and it is in the same basic
  450. // block as another state defining select, then cannot unfold both.
  451. for (SelectInstToUnfold SIToUnfold : SelectInsts) {
  452. SelectInst *PrevSI = SIToUnfold.getInst();
  453. if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
  454. PrevSI->getParent() == SI->getParent())
  455. return false;
  456. }
  457. return true;
  458. }
  459. SwitchInst *Instr = nullptr;
  460. SmallVector<SelectInstToUnfold, 4> SelectInsts;
  461. };
  462. struct AllSwitchPaths {
  463. AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
  464. : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
  465. ORE(ORE) {}
  466. std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
  467. unsigned getNumThreadingPaths() { return TPaths.size(); }
  468. SwitchInst *getSwitchInst() { return Switch; }
  469. BasicBlock *getSwitchBlock() { return SwitchBlock; }
  470. void run() {
  471. VisitedBlocks Visited;
  472. PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
  473. StateDefMap StateDef = getStateDefMap(LoopPaths);
  474. if (StateDef.empty()) {
  475. ORE->emit([&]() {
  476. return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
  477. Switch)
  478. << "Switch instruction is not predictable.";
  479. });
  480. return;
  481. }
  482. for (PathType Path : LoopPaths) {
  483. ThreadingPath TPath;
  484. const BasicBlock *PrevBB = Path.back();
  485. for (const BasicBlock *BB : Path) {
  486. if (StateDef.count(BB) != 0) {
  487. const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);
  488. assert(Phi && "Expected a state-defining instr to be a phi node.");
  489. const Value *V = Phi->getIncomingValueForBlock(PrevBB);
  490. if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) {
  491. TPath.setExitValue(C);
  492. TPath.setDeterminator(BB);
  493. TPath.setPath(Path);
  494. }
  495. }
  496. // Switch block is the determinator, this is the final exit value.
  497. if (TPath.isExitValueSet() && BB == Path.front())
  498. break;
  499. PrevBB = BB;
  500. }
  501. if (TPath.isExitValueSet() && isSupported(TPath))
  502. TPaths.push_back(TPath);
  503. }
  504. }
  505. private:
  506. // Value: an instruction that defines a switch state;
  507. // Key: the parent basic block of that instruction.
  508. typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
  509. PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
  510. unsigned PathDepth) const {
  511. PathsType Res;
  512. // Stop exploring paths after visiting MaxPathLength blocks
  513. if (PathDepth > MaxPathLength) {
  514. ORE->emit([&]() {
  515. return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
  516. Switch)
  517. << "Exploration stopped after visiting MaxPathLength="
  518. << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
  519. });
  520. return Res;
  521. }
  522. Visited.insert(BB);
  523. // Some blocks have multiple edges to the same successor, and this set
  524. // is used to prevent a duplicate path from being generated
  525. SmallSet<BasicBlock *, 4> Successors;
  526. for (BasicBlock *Succ : successors(BB)) {
  527. if (!Successors.insert(Succ).second)
  528. continue;
  529. // Found a cycle through the SwitchBlock
  530. if (Succ == SwitchBlock) {
  531. Res.push_back({BB});
  532. continue;
  533. }
  534. // We have encountered a cycle, do not get caught in it
  535. if (Visited.contains(Succ))
  536. continue;
  537. PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
  538. for (PathType Path : SuccPaths) {
  539. PathType NewPath(Path);
  540. NewPath.push_front(BB);
  541. Res.push_back(NewPath);
  542. if (Res.size() >= MaxNumPaths) {
  543. return Res;
  544. }
  545. }
  546. }
  547. // This block could now be visited again from a different predecessor. Note
  548. // that this will result in exponential runtime. Subpaths could possibly be
  549. // cached but it takes a lot of memory to store them.
  550. Visited.erase(BB);
  551. return Res;
  552. }
  553. /// Walk the use-def chain and collect all the state-defining instructions.
  554. ///
  555. /// Return an empty map if unpredictable values encountered inside the basic
  556. /// blocks of \p LoopPaths.
  557. StateDefMap getStateDefMap(const PathsType &LoopPaths) const {
  558. StateDefMap Res;
  559. // Basic blocks belonging to any of the loops around the switch statement.
  560. SmallPtrSet<BasicBlock *, 16> LoopBBs;
  561. for (const PathType &Path : LoopPaths) {
  562. for (BasicBlock *BB : Path)
  563. LoopBBs.insert(BB);
  564. }
  565. Value *FirstDef = Switch->getOperand(0);
  566. assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
  567. SmallVector<PHINode *, 8> Stack;
  568. Stack.push_back(dyn_cast<PHINode>(FirstDef));
  569. SmallSet<Value *, 16> SeenValues;
  570. while (!Stack.empty()) {
  571. PHINode *CurPhi = Stack.pop_back_val();
  572. Res[CurPhi->getParent()] = CurPhi;
  573. SeenValues.insert(CurPhi);
  574. for (BasicBlock *IncomingBB : CurPhi->blocks()) {
  575. Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB);
  576. bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0;
  577. if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
  578. SeenValues.contains(Incoming) || IsOutsideLoops) {
  579. continue;
  580. }
  581. // Any unpredictable value inside the loops means we must bail out.
  582. if (!isa<PHINode>(Incoming))
  583. return StateDefMap();
  584. Stack.push_back(cast<PHINode>(Incoming));
  585. }
  586. }
  587. return Res;
  588. }
  589. /// The determinator BB should precede the switch-defining BB.
  590. ///
  591. /// Otherwise, it is possible that the state defined in the determinator block
  592. /// defines the state for the next iteration of the loop, rather than for the
  593. /// current one.
  594. ///
  595. /// Currently supported paths:
  596. /// \code
  597. /// < switch bb1 determ def > [ 42, determ ]
  598. /// < switch_and_def bb1 determ > [ 42, determ ]
  599. /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]
  600. /// \endcode
  601. ///
  602. /// Unsupported paths:
  603. /// \code
  604. /// < switch bb1 def determ > [ 43, determ ]
  605. /// < switch_and_determ bb1 def > [ 43, switch_and_determ ]
  606. /// \endcode
  607. bool isSupported(const ThreadingPath &TPath) {
  608. Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition());
  609. assert(SwitchCondI);
  610. if (!SwitchCondI)
  611. return false;
  612. const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();
  613. const BasicBlock *SwitchCondUseBB = Switch->getParent();
  614. const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();
  615. assert(
  616. SwitchCondUseBB == TPath.getPath().front() &&
  617. "The first BB in a threading path should have the switch instruction");
  618. if (SwitchCondUseBB != TPath.getPath().front())
  619. return false;
  620. // Make DeterminatorBB the first element in Path.
  621. PathType Path = TPath.getPath();
  622. auto ItDet = llvm::find(Path, DeterminatorBB);
  623. std::rotate(Path.begin(), ItDet, Path.end());
  624. bool IsDetBBSeen = false;
  625. bool IsDefBBSeen = false;
  626. bool IsUseBBSeen = false;
  627. for (BasicBlock *BB : Path) {
  628. if (BB == DeterminatorBB)
  629. IsDetBBSeen = true;
  630. if (BB == SwitchCondDefBB)
  631. IsDefBBSeen = true;
  632. if (BB == SwitchCondUseBB)
  633. IsUseBBSeen = true;
  634. if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)
  635. return false;
  636. }
  637. return true;
  638. }
  639. SwitchInst *Switch;
  640. BasicBlock *SwitchBlock;
  641. OptimizationRemarkEmitter *ORE;
  642. std::vector<ThreadingPath> TPaths;
  643. };
  644. struct TransformDFA {
  645. TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
  646. AssumptionCache *AC, TargetTransformInfo *TTI,
  647. OptimizationRemarkEmitter *ORE,
  648. SmallPtrSet<const Value *, 32> EphValues)
  649. : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
  650. EphValues(EphValues) {}
  651. void run() {
  652. if (isLegalAndProfitableToTransform()) {
  653. createAllExitPaths();
  654. NumTransforms++;
  655. }
  656. }
  657. private:
  658. /// This function performs both a legality check and profitability check at
  659. /// the same time since it is convenient to do so. It iterates through all
  660. /// blocks that will be cloned, and keeps track of the duplication cost. It
  661. /// also returns false if it is illegal to clone some required block.
  662. bool isLegalAndProfitableToTransform() {
  663. CodeMetrics Metrics;
  664. SwitchInst *Switch = SwitchPaths->getSwitchInst();
  665. // Note that DuplicateBlockMap is not being used as intended here. It is
  666. // just being used to ensure (BB, State) pairs are only counted once.
  667. DuplicateBlockMap DuplicateMap;
  668. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
  669. PathType PathBBs = TPath.getPath();
  670. uint64_t NextState = TPath.getExitValue();
  671. const BasicBlock *Determinator = TPath.getDeterminatorBB();
  672. // Update Metrics for the Switch block, this is always cloned
  673. BasicBlock *BB = SwitchPaths->getSwitchBlock();
  674. BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
  675. if (!VisitedBB) {
  676. Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
  677. DuplicateMap[BB].push_back({BB, NextState});
  678. }
  679. // If the Switch block is the Determinator, then we can continue since
  680. // this is the only block that is cloned and we already counted for it.
  681. if (PathBBs.front() == Determinator)
  682. continue;
  683. // Otherwise update Metrics for all blocks that will be cloned. If any
  684. // block is already cloned and would be reused, don't double count it.
  685. auto DetIt = llvm::find(PathBBs, Determinator);
  686. for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
  687. BB = *BBIt;
  688. VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
  689. if (VisitedBB)
  690. continue;
  691. Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
  692. DuplicateMap[BB].push_back({BB, NextState});
  693. }
  694. if (Metrics.notDuplicatable) {
  695. LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
  696. << "non-duplicatable instructions.\n");
  697. ORE->emit([&]() {
  698. return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
  699. Switch)
  700. << "Contains non-duplicatable instructions.";
  701. });
  702. return false;
  703. }
  704. if (Metrics.convergent) {
  705. LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
  706. << "convergent instructions.\n");
  707. ORE->emit([&]() {
  708. return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
  709. << "Contains convergent instructions.";
  710. });
  711. return false;
  712. }
  713. if (!Metrics.NumInsts.isValid()) {
  714. LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
  715. << "instructions with invalid cost.\n");
  716. ORE->emit([&]() {
  717. return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
  718. << "Contains instructions with invalid cost.";
  719. });
  720. return false;
  721. }
  722. }
  723. InstructionCost DuplicationCost = 0;
  724. unsigned JumpTableSize = 0;
  725. TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
  726. nullptr);
  727. if (JumpTableSize == 0) {
  728. // Factor in the number of conditional branches reduced from jump
  729. // threading. Assume that lowering the switch block is implemented by
  730. // using binary search, hence the LogBase2().
  731. unsigned CondBranches =
  732. APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
  733. DuplicationCost = Metrics.NumInsts / CondBranches;
  734. } else {
  735. // Compared with jump tables, the DFA optimizer removes an indirect branch
  736. // on each loop iteration, thus making branch prediction more precise. The
  737. // more branch targets there are, the more likely it is for the branch
  738. // predictor to make a mistake, and the more benefit there is in the DFA
  739. // optimizer. Thus, the more branch targets there are, the lower is the
  740. // cost of the DFA opt.
  741. DuplicationCost = Metrics.NumInsts / JumpTableSize;
  742. }
  743. LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
  744. << SwitchPaths->getSwitchBlock()->getName()
  745. << " is: " << DuplicationCost << "\n\n");
  746. if (DuplicationCost > CostThreshold) {
  747. LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
  748. << "cost threshold.\n");
  749. ORE->emit([&]() {
  750. return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
  751. << "Duplication cost exceeds the cost threshold (cost="
  752. << ore::NV("Cost", DuplicationCost)
  753. << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
  754. });
  755. return false;
  756. }
  757. ORE->emit([&]() {
  758. return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
  759. << "Switch statement jump-threaded.";
  760. });
  761. return true;
  762. }
  763. /// Transform each threading path to effectively jump thread the DFA.
  764. void createAllExitPaths() {
  765. DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
  766. // Move the switch block to the end of the path, since it will be duplicated
  767. BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
  768. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
  769. LLVM_DEBUG(dbgs() << TPath << "\n");
  770. PathType NewPath(TPath.getPath());
  771. NewPath.push_back(SwitchBlock);
  772. TPath.setPath(NewPath);
  773. }
  774. // Transform the ThreadingPaths and keep track of the cloned values
  775. DuplicateBlockMap DuplicateMap;
  776. DefMap NewDefs;
  777. SmallSet<BasicBlock *, 16> BlocksToClean;
  778. for (BasicBlock *BB : successors(SwitchBlock))
  779. BlocksToClean.insert(BB);
  780. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
  781. createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
  782. NumPaths++;
  783. }
  784. // After all paths are cloned, now update the last successor of the cloned
  785. // path so it skips over the switch statement
  786. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
  787. updateLastSuccessor(TPath, DuplicateMap, &DTU);
  788. // For each instruction that was cloned and used outside, update its uses
  789. updateSSA(NewDefs);
  790. // Clean PHI Nodes for the newly created blocks
  791. for (BasicBlock *BB : BlocksToClean)
  792. cleanPhiNodes(BB);
  793. }
  794. /// For a specific ThreadingPath \p Path, create an exit path starting from
  795. /// the determinator block.
  796. ///
  797. /// To remember the correct destination, we have to duplicate blocks
  798. /// corresponding to each state. Also update the terminating instruction of
  799. /// the predecessors, and phis in the successor blocks.
  800. void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
  801. DuplicateBlockMap &DuplicateMap,
  802. SmallSet<BasicBlock *, 16> &BlocksToClean,
  803. DomTreeUpdater *DTU) {
  804. uint64_t NextState = Path.getExitValue();
  805. const BasicBlock *Determinator = Path.getDeterminatorBB();
  806. PathType PathBBs = Path.getPath();
  807. // Don't select the placeholder block in front
  808. if (PathBBs.front() == Determinator)
  809. PathBBs.pop_front();
  810. auto DetIt = llvm::find(PathBBs, Determinator);
  811. auto Prev = std::prev(DetIt);
  812. BasicBlock *PrevBB = *Prev;
  813. for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
  814. BasicBlock *BB = *BBIt;
  815. BlocksToClean.insert(BB);
  816. // We already cloned BB for this NextState, now just update the branch
  817. // and continue.
  818. BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
  819. if (NextBB) {
  820. updatePredecessor(PrevBB, BB, NextBB, DTU);
  821. PrevBB = NextBB;
  822. continue;
  823. }
  824. // Clone the BB and update the successor of Prev to jump to the new block
  825. BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
  826. BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
  827. DuplicateMap[BB].push_back({NewBB, NextState});
  828. BlocksToClean.insert(NewBB);
  829. PrevBB = NewBB;
  830. }
  831. }
  832. /// Restore SSA form after cloning blocks.
  833. ///
  834. /// Each cloned block creates new defs for a variable, and the uses need to be
  835. /// updated to reflect this. The uses may be replaced with a cloned value, or
  836. /// some derived phi instruction. Note that all uses of a value defined in the
  837. /// same block were already remapped when cloning the block.
  838. void updateSSA(DefMap &NewDefs) {
  839. SSAUpdaterBulk SSAUpdate;
  840. SmallVector<Use *, 16> UsesToRename;
  841. for (auto KV : NewDefs) {
  842. Instruction *I = KV.first;
  843. BasicBlock *BB = I->getParent();
  844. std::vector<Instruction *> Cloned = KV.second;
  845. // Scan all uses of this instruction to see if it is used outside of its
  846. // block, and if so, record them in UsesToRename.
  847. for (Use &U : I->uses()) {
  848. Instruction *User = cast<Instruction>(U.getUser());
  849. if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
  850. if (UserPN->getIncomingBlock(U) == BB)
  851. continue;
  852. } else if (User->getParent() == BB) {
  853. continue;
  854. }
  855. UsesToRename.push_back(&U);
  856. }
  857. // If there are no uses outside the block, we're done with this
  858. // instruction.
  859. if (UsesToRename.empty())
  860. continue;
  861. LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
  862. << "\n");
  863. // We found a use of I outside of BB. Rename all uses of I that are
  864. // outside its block to be uses of the appropriate PHI node etc. See
  865. // ValuesInBlocks with the values we know.
  866. unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
  867. SSAUpdate.AddAvailableValue(VarNum, BB, I);
  868. for (Instruction *New : Cloned)
  869. SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
  870. while (!UsesToRename.empty())
  871. SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
  872. LLVM_DEBUG(dbgs() << "\n");
  873. }
  874. // SSAUpdater handles phi placement and renaming uses with the appropriate
  875. // value.
  876. SSAUpdate.RewriteAllUses(DT);
  877. }
  878. /// Clones a basic block, and adds it to the CFG.
  879. ///
  880. /// This function also includes updating phi nodes in the successors of the
  881. /// BB, and remapping uses that were defined locally in the cloned BB.
  882. BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
  883. uint64_t NextState,
  884. DuplicateBlockMap &DuplicateMap,
  885. DefMap &NewDefs,
  886. DomTreeUpdater *DTU) {
  887. ValueToValueMapTy VMap;
  888. BasicBlock *NewBB = CloneBasicBlock(
  889. BB, VMap, ".jt" + std::to_string(NextState), BB->getParent());
  890. NewBB->moveAfter(BB);
  891. NumCloned++;
  892. for (Instruction &I : *NewBB) {
  893. // Do not remap operands of PHINode in case a definition in BB is an
  894. // incoming value to a phi in the same block. This incoming value will
  895. // be renamed later while restoring SSA.
  896. if (isa<PHINode>(&I))
  897. continue;
  898. RemapInstruction(&I, VMap,
  899. RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
  900. if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
  901. AC->registerAssumption(II);
  902. }
  903. updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
  904. updatePredecessor(PrevBB, BB, NewBB, DTU);
  905. updateDefMap(NewDefs, VMap);
  906. // Add all successors to the DominatorTree
  907. SmallPtrSet<BasicBlock *, 4> SuccSet;
  908. for (auto *SuccBB : successors(NewBB)) {
  909. if (SuccSet.insert(SuccBB).second)
  910. DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
  911. }
  912. SuccSet.clear();
  913. return NewBB;
  914. }
  915. /// Update the phi nodes in BB's successors.
  916. ///
  917. /// This means creating a new incoming value from NewBB with the new
  918. /// instruction wherever there is an incoming value from BB.
  919. void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
  920. uint64_t NextState, ValueToValueMapTy &VMap,
  921. DuplicateBlockMap &DuplicateMap) {
  922. std::vector<BasicBlock *> BlocksToUpdate;
  923. // If BB is the last block in the path, we can simply update the one case
  924. // successor that will be reached.
  925. if (BB == SwitchPaths->getSwitchBlock()) {
  926. SwitchInst *Switch = SwitchPaths->getSwitchInst();
  927. BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
  928. BlocksToUpdate.push_back(NextCase);
  929. BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
  930. if (ClonedSucc)
  931. BlocksToUpdate.push_back(ClonedSucc);
  932. }
  933. // Otherwise update phis in all successors.
  934. else {
  935. for (BasicBlock *Succ : successors(BB)) {
  936. BlocksToUpdate.push_back(Succ);
  937. // Check if a successor has already been cloned for the particular exit
  938. // value. In this case if a successor was already cloned, the phi nodes
  939. // in the cloned block should be updated directly.
  940. BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
  941. if (ClonedSucc)
  942. BlocksToUpdate.push_back(ClonedSucc);
  943. }
  944. }
  945. // If there is a phi with an incoming value from BB, create a new incoming
  946. // value for the new predecessor ClonedBB. The value will either be the same
  947. // value from BB or a cloned value.
  948. for (BasicBlock *Succ : BlocksToUpdate) {
  949. for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
  950. ++II) {
  951. Value *Incoming = Phi->getIncomingValueForBlock(BB);
  952. if (Incoming) {
  953. if (isa<Constant>(Incoming)) {
  954. Phi->addIncoming(Incoming, ClonedBB);
  955. continue;
  956. }
  957. Value *ClonedVal = VMap[Incoming];
  958. if (ClonedVal)
  959. Phi->addIncoming(ClonedVal, ClonedBB);
  960. else
  961. Phi->addIncoming(Incoming, ClonedBB);
  962. }
  963. }
  964. }
  965. }
  966. /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
  967. /// other successors are kept as well.
  968. void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
  969. BasicBlock *NewBB, DomTreeUpdater *DTU) {
  970. // When a path is reused, there is a chance that predecessors were already
  971. // updated before. Check if the predecessor needs to be updated first.
  972. if (!isPredecessor(OldBB, PrevBB))
  973. return;
  974. Instruction *PrevTerm = PrevBB->getTerminator();
  975. for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
  976. if (PrevTerm->getSuccessor(Idx) == OldBB) {
  977. OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
  978. PrevTerm->setSuccessor(Idx, NewBB);
  979. }
  980. }
  981. DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
  982. {DominatorTree::Insert, PrevBB, NewBB}});
  983. }
  984. /// Add new value mappings to the DefMap to keep track of all new definitions
  985. /// for a particular instruction. These will be used while updating SSA form.
  986. void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
  987. SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
  988. NewDefsVector.reserve(VMap.size());
  989. for (auto Entry : VMap) {
  990. Instruction *Inst =
  991. dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
  992. if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
  993. isa<SwitchInst>(Inst)) {
  994. continue;
  995. }
  996. Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
  997. if (!Cloned)
  998. continue;
  999. NewDefsVector.push_back({Inst, Cloned});
  1000. }
  1001. // Sort the defs to get deterministic insertion order into NewDefs.
  1002. sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
  1003. if (LHS.first == RHS.first)
  1004. return LHS.second->comesBefore(RHS.second);
  1005. return LHS.first->comesBefore(RHS.first);
  1006. });
  1007. for (const auto &KV : NewDefsVector)
  1008. NewDefs[KV.first].push_back(KV.second);
  1009. }
  1010. /// Update the last branch of a particular cloned path to point to the correct
  1011. /// case successor.
  1012. ///
  1013. /// Note that this is an optional step and would have been done in later
  1014. /// optimizations, but it makes the CFG significantly easier to work with.
  1015. void updateLastSuccessor(ThreadingPath &TPath,
  1016. DuplicateBlockMap &DuplicateMap,
  1017. DomTreeUpdater *DTU) {
  1018. uint64_t NextState = TPath.getExitValue();
  1019. BasicBlock *BB = TPath.getPath().back();
  1020. BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
  1021. // Note multiple paths can end at the same block so check that it is not
  1022. // updated yet
  1023. if (!isa<SwitchInst>(LastBlock->getTerminator()))
  1024. return;
  1025. SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
  1026. BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
  1027. std::vector<DominatorTree::UpdateType> DTUpdates;
  1028. SmallPtrSet<BasicBlock *, 4> SuccSet;
  1029. for (BasicBlock *Succ : successors(LastBlock)) {
  1030. if (Succ != NextCase && SuccSet.insert(Succ).second)
  1031. DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
  1032. }
  1033. Switch->eraseFromParent();
  1034. BranchInst::Create(NextCase, LastBlock);
  1035. DTU->applyUpdates(DTUpdates);
  1036. }
  1037. /// After cloning blocks, some of the phi nodes have extra incoming values
  1038. /// that are no longer used. This function removes them.
  1039. void cleanPhiNodes(BasicBlock *BB) {
  1040. // If BB is no longer reachable, remove any remaining phi nodes
  1041. if (pred_empty(BB)) {
  1042. std::vector<PHINode *> PhiToRemove;
  1043. for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
  1044. PhiToRemove.push_back(Phi);
  1045. }
  1046. for (PHINode *PN : PhiToRemove) {
  1047. PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
  1048. PN->eraseFromParent();
  1049. }
  1050. return;
  1051. }
  1052. // Remove any incoming values that come from an invalid predecessor
  1053. for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
  1054. std::vector<BasicBlock *> BlocksToRemove;
  1055. for (BasicBlock *IncomingBB : Phi->blocks()) {
  1056. if (!isPredecessor(BB, IncomingBB))
  1057. BlocksToRemove.push_back(IncomingBB);
  1058. }
  1059. for (BasicBlock *BB : BlocksToRemove)
  1060. Phi->removeIncomingValue(BB);
  1061. }
  1062. }
  1063. /// Checks if BB was already cloned for a particular next state value. If it
  1064. /// was then it returns this cloned block, and otherwise null.
  1065. BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState,
  1066. DuplicateBlockMap &DuplicateMap) {
  1067. CloneList ClonedBBs = DuplicateMap[BB];
  1068. // Find an entry in the CloneList with this NextState. If it exists then
  1069. // return the corresponding BB
  1070. auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
  1071. return C.State == NextState;
  1072. });
  1073. return It != ClonedBBs.end() ? (*It).BB : nullptr;
  1074. }
  1075. /// Helper to get the successor corresponding to a particular case value for
  1076. /// a switch statement.
  1077. BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) {
  1078. BasicBlock *NextCase = nullptr;
  1079. for (auto Case : Switch->cases()) {
  1080. if (Case.getCaseValue()->getZExtValue() == NextState) {
  1081. NextCase = Case.getCaseSuccessor();
  1082. break;
  1083. }
  1084. }
  1085. if (!NextCase)
  1086. NextCase = Switch->getDefaultDest();
  1087. return NextCase;
  1088. }
  1089. /// Returns true if IncomingBB is a predecessor of BB.
  1090. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
  1091. return llvm::is_contained(predecessors(BB), IncomingBB);
  1092. }
  1093. AllSwitchPaths *SwitchPaths;
  1094. DominatorTree *DT;
  1095. AssumptionCache *AC;
  1096. TargetTransformInfo *TTI;
  1097. OptimizationRemarkEmitter *ORE;
  1098. SmallPtrSet<const Value *, 32> EphValues;
  1099. std::vector<ThreadingPath> TPaths;
  1100. };
  1101. bool DFAJumpThreading::run(Function &F) {
  1102. LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
  1103. if (F.hasOptSize()) {
  1104. LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
  1105. return false;
  1106. }
  1107. if (ClViewCfgBefore)
  1108. F.viewCFG();
  1109. SmallVector<AllSwitchPaths, 2> ThreadableLoops;
  1110. bool MadeChanges = false;
  1111. for (BasicBlock &BB : F) {
  1112. auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
  1113. if (!SI)
  1114. continue;
  1115. LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
  1116. << " is a candidate\n");
  1117. MainSwitch Switch(SI, ORE);
  1118. if (!Switch.getInstr())
  1119. continue;
  1120. LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
  1121. << "candidate for jump threading\n");
  1122. LLVM_DEBUG(SI->dump());
  1123. unfoldSelectInstrs(DT, Switch.getSelectInsts());
  1124. if (!Switch.getSelectInsts().empty())
  1125. MadeChanges = true;
  1126. AllSwitchPaths SwitchPaths(&Switch, ORE);
  1127. SwitchPaths.run();
  1128. if (SwitchPaths.getNumThreadingPaths() > 0) {
  1129. ThreadableLoops.push_back(SwitchPaths);
  1130. // For the time being limit this optimization to occurring once in a
  1131. // function since it can change the CFG significantly. This is not a
  1132. // strict requirement but it can cause buggy behavior if there is an
  1133. // overlap of blocks in different opportunities. There is a lot of room to
  1134. // experiment with catching more opportunities here.
  1135. break;
  1136. }
  1137. }
  1138. SmallPtrSet<const Value *, 32> EphValues;
  1139. if (ThreadableLoops.size() > 0)
  1140. CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
  1141. for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
  1142. TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
  1143. Transform.run();
  1144. MadeChanges = true;
  1145. }
  1146. #ifdef EXPENSIVE_CHECKS
  1147. assert(DT->verify(DominatorTree::VerificationLevel::Full));
  1148. verifyFunction(F, &dbgs());
  1149. #endif
  1150. return MadeChanges;
  1151. }
  1152. } // end anonymous namespace
  1153. /// Integrate with the new Pass Manager
  1154. PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
  1155. FunctionAnalysisManager &AM) {
  1156. AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
  1157. DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
  1158. TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
  1159. OptimizationRemarkEmitter ORE(&F);
  1160. if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
  1161. return PreservedAnalyses::all();
  1162. PreservedAnalyses PA;
  1163. PA.preserve<DominatorTreeAnalysis>();
  1164. return PA;
  1165. }