DFAJumpThreading.cpp 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344
  1. //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Transform each threading path to effectively jump thread the DFA. For
  10. // example, the CFG below could be transformed as follows, where the cloned
  11. // blocks unconditionally branch to the next correct case based on what is
  12. // identified in the analysis.
  13. //
  14. // sw.bb sw.bb
  15. // / | \ / | \
  16. // case1 case2 case3 case1 case2 case3
  17. // \ | / | | |
  18. // determinator det.2 det.3 det.1
  19. // br sw.bb / | \
  20. // sw.bb.2 sw.bb.3 sw.bb.1
  21. // br case2 br case3 br case1§
  22. //
  23. // Definitions and Terminology:
  24. //
  25. // * Threading path:
  26. // a list of basic blocks, the exit state, and the block that determines
  27. // the next state, for which the following notation will be used:
  28. // < path of BBs that form a cycle > [ state, determinator ]
  29. //
  30. // * Predictable switch:
  31. // The switch variable is always a known constant so that all conditional
  32. // jumps based on switch variable can be converted to unconditional jump.
  33. //
  34. // * Determinator:
  35. // The basic block that determines the next state of the DFA.
  36. //
  37. // Representing the optimization in C-like pseudocode: the code pattern on the
  38. // left could functionally be transformed to the right pattern if the switch
  39. // condition is predictable.
  40. //
  41. // X = A goto A
  42. // for (...) A:
  43. // switch (X) ...
  44. // case A goto B
  45. // X = B B:
  46. // case B ...
  47. // X = C goto C
  48. //
  49. // The pass first checks that switch variable X is decided by the control flow
  50. // path taken in the loop; for example, in case B, the next value of X is
  51. // decided to be C. It then enumerates through all paths in the loop and labels
  52. // the basic blocks where the next state is decided.
  53. //
  54. // Using this information it creates new paths that unconditionally branch to
  55. // the next case. This involves cloning code, so it only gets triggered if the
  56. // amount of code duplicated is below a threshold.
  57. //
  58. //===----------------------------------------------------------------------===//
  59. #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
  60. #include "llvm/ADT/APInt.h"
  61. #include "llvm/ADT/DenseMap.h"
  62. #include "llvm/ADT/DepthFirstIterator.h"
  63. #include "llvm/ADT/SmallSet.h"
  64. #include "llvm/ADT/Statistic.h"
  65. #include "llvm/Analysis/AssumptionCache.h"
  66. #include "llvm/Analysis/CodeMetrics.h"
  67. #include "llvm/Analysis/LoopIterator.h"
  68. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  69. #include "llvm/Analysis/TargetTransformInfo.h"
  70. #include "llvm/IR/CFG.h"
  71. #include "llvm/IR/Constants.h"
  72. #include "llvm/IR/IntrinsicInst.h"
  73. #include "llvm/IR/Verifier.h"
  74. #include "llvm/InitializePasses.h"
  75. #include "llvm/Pass.h"
  76. #include "llvm/Support/CommandLine.h"
  77. #include "llvm/Support/Debug.h"
  78. #include "llvm/Transforms/Scalar.h"
  79. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  80. #include "llvm/Transforms/Utils/Cloning.h"
  81. #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
  82. #include "llvm/Transforms/Utils/ValueMapper.h"
  83. #include <algorithm>
  84. #include <deque>
  85. using namespace llvm;
  86. #define DEBUG_TYPE "dfa-jump-threading"
  87. STATISTIC(NumTransforms, "Number of transformations done");
  88. STATISTIC(NumCloned, "Number of blocks cloned");
  89. STATISTIC(NumPaths, "Number of individual paths threaded");
  90. static cl::opt<bool>
  91. ClViewCfgBefore("dfa-jump-view-cfg-before",
  92. cl::desc("View the CFG before DFA Jump Threading"),
  93. cl::Hidden, cl::init(false));
  94. static cl::opt<unsigned> MaxPathLength(
  95. "dfa-max-path-length",
  96. cl::desc("Max number of blocks searched to find a threading path"),
  97. cl::Hidden, cl::init(20));
  98. static cl::opt<unsigned>
  99. CostThreshold("dfa-cost-threshold",
  100. cl::desc("Maximum cost accepted for the transformation"),
  101. cl::Hidden, cl::init(50));
  102. namespace {
  103. class SelectInstToUnfold {
  104. SelectInst *SI;
  105. PHINode *SIUse;
  106. public:
  107. SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
  108. SelectInst *getInst() { return SI; }
  109. PHINode *getUse() { return SIUse; }
  110. explicit operator bool() const { return SI && SIUse; }
  111. };
  112. void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
  113. std::vector<SelectInstToUnfold> *NewSIsToUnfold,
  114. std::vector<BasicBlock *> *NewBBs);
  115. class DFAJumpThreading {
  116. public:
  117. DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
  118. TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
  119. : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
  120. bool run(Function &F);
  121. private:
  122. void
  123. unfoldSelectInstrs(DominatorTree *DT,
  124. const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
  125. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
  126. SmallVector<SelectInstToUnfold, 4> Stack;
  127. for (SelectInstToUnfold SIToUnfold : SelectInsts)
  128. Stack.push_back(SIToUnfold);
  129. while (!Stack.empty()) {
  130. SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
  131. std::vector<SelectInstToUnfold> NewSIsToUnfold;
  132. std::vector<BasicBlock *> NewBBs;
  133. unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
  134. // Put newly discovered select instructions into the work list.
  135. for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
  136. Stack.push_back(NewSIToUnfold);
  137. }
  138. }
  139. AssumptionCache *AC;
  140. DominatorTree *DT;
  141. TargetTransformInfo *TTI;
  142. OptimizationRemarkEmitter *ORE;
  143. };
  144. class DFAJumpThreadingLegacyPass : public FunctionPass {
  145. public:
  146. static char ID; // Pass identification
  147. DFAJumpThreadingLegacyPass() : FunctionPass(ID) {}
  148. void getAnalysisUsage(AnalysisUsage &AU) const override {
  149. AU.addRequired<AssumptionCacheTracker>();
  150. AU.addRequired<DominatorTreeWrapperPass>();
  151. AU.addPreserved<DominatorTreeWrapperPass>();
  152. AU.addRequired<TargetTransformInfoWrapperPass>();
  153. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  154. }
  155. bool runOnFunction(Function &F) override {
  156. if (skipFunction(F))
  157. return false;
  158. AssumptionCache *AC =
  159. &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  160. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  161. TargetTransformInfo *TTI =
  162. &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  163. OptimizationRemarkEmitter *ORE =
  164. &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  165. return DFAJumpThreading(AC, DT, TTI, ORE).run(F);
  166. }
  167. };
  168. } // end anonymous namespace
  169. char DFAJumpThreadingLegacyPass::ID = 0;
  170. INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
  171. "DFA Jump Threading", false, false)
  172. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  173. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  174. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  175. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  176. INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
  177. "DFA Jump Threading", false, false)
  178. // Public interface to the DFA Jump Threading pass
  179. FunctionPass *llvm::createDFAJumpThreadingPass() {
  180. return new DFAJumpThreadingLegacyPass();
  181. }
  182. namespace {
  183. /// Create a new basic block and sink \p SIToSink into it.
  184. void createBasicBlockAndSinkSelectInst(
  185. DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink,
  186. BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock,
  187. BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold,
  188. std::vector<BasicBlock *> *NewBBs) {
  189. assert(SIToSink->hasOneUse());
  190. assert(NewBlock);
  191. assert(NewBranch);
  192. *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName,
  193. EndBlock->getParent(), EndBlock);
  194. NewBBs->push_back(*NewBlock);
  195. *NewBranch = BranchInst::Create(EndBlock, *NewBlock);
  196. SIToSink->moveBefore(*NewBranch);
  197. NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse));
  198. DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}});
  199. }
  200. /// Unfold the select instruction held in \p SIToUnfold by replacing it with
  201. /// control flow.
  202. ///
  203. /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
  204. /// created basic blocks into \p NewBBs.
  205. ///
  206. /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
  207. void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
  208. std::vector<SelectInstToUnfold> *NewSIsToUnfold,
  209. std::vector<BasicBlock *> *NewBBs) {
  210. SelectInst *SI = SIToUnfold.getInst();
  211. PHINode *SIUse = SIToUnfold.getUse();
  212. BasicBlock *StartBlock = SI->getParent();
  213. BasicBlock *EndBlock = SIUse->getParent();
  214. BranchInst *StartBlockTerm =
  215. dyn_cast<BranchInst>(StartBlock->getTerminator());
  216. assert(StartBlockTerm && StartBlockTerm->isUnconditional());
  217. assert(SI->hasOneUse());
  218. // These are the new basic blocks for the conditional branch.
  219. // At least one will become an actual new basic block.
  220. BasicBlock *TrueBlock = nullptr;
  221. BasicBlock *FalseBlock = nullptr;
  222. BranchInst *TrueBranch = nullptr;
  223. BranchInst *FalseBranch = nullptr;
  224. // Sink select instructions to be able to unfold them later.
  225. if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) {
  226. createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
  227. "si.unfold.true", &TrueBlock, &TrueBranch,
  228. NewSIsToUnfold, NewBBs);
  229. }
  230. if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) {
  231. createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
  232. "si.unfold.false", &FalseBlock,
  233. &FalseBranch, NewSIsToUnfold, NewBBs);
  234. }
  235. // If there was nothing to sink, then arbitrarily choose the 'false' side
  236. // for a new input value to the PHI.
  237. if (!TrueBlock && !FalseBlock) {
  238. FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false",
  239. EndBlock->getParent(), EndBlock);
  240. NewBBs->push_back(FalseBlock);
  241. BranchInst::Create(EndBlock, FalseBlock);
  242. DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}});
  243. }
  244. // Insert the real conditional branch based on the original condition.
  245. // If we did not create a new block for one of the 'true' or 'false' paths
  246. // of the condition, it means that side of the branch goes to the end block
  247. // directly and the path originates from the start block from the point of
  248. // view of the new PHI.
  249. BasicBlock *TT = EndBlock;
  250. BasicBlock *FT = EndBlock;
  251. if (TrueBlock && FalseBlock) {
  252. // A diamond.
  253. TT = TrueBlock;
  254. FT = FalseBlock;
  255. // Update the phi node of SI.
  256. SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
  257. SIUse->addIncoming(SI->getTrueValue(), TrueBlock);
  258. SIUse->addIncoming(SI->getFalseValue(), FalseBlock);
  259. // Update any other PHI nodes in EndBlock.
  260. for (PHINode &Phi : EndBlock->phis()) {
  261. if (&Phi != SIUse) {
  262. Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock);
  263. Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock);
  264. }
  265. }
  266. } else {
  267. BasicBlock *NewBlock = nullptr;
  268. Value *SIOp1 = SI->getTrueValue();
  269. Value *SIOp2 = SI->getFalseValue();
  270. // A triangle pointing right.
  271. if (!TrueBlock) {
  272. NewBlock = FalseBlock;
  273. FT = FalseBlock;
  274. }
  275. // A triangle pointing left.
  276. else {
  277. NewBlock = TrueBlock;
  278. TT = TrueBlock;
  279. std::swap(SIOp1, SIOp2);
  280. }
  281. // Update the phi node of SI.
  282. for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) {
  283. if (SIUse->getIncomingBlock(Idx) == StartBlock)
  284. SIUse->setIncomingValue(Idx, SIOp1);
  285. }
  286. SIUse->addIncoming(SIOp2, NewBlock);
  287. // Update any other PHI nodes in EndBlock.
  288. for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
  289. ++II) {
  290. if (Phi != SIUse)
  291. Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock);
  292. }
  293. }
  294. StartBlockTerm->eraseFromParent();
  295. BranchInst::Create(TT, FT, SI->getCondition(), StartBlock);
  296. DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
  297. {DominatorTree::Insert, StartBlock, FT}});
  298. // The select is now dead.
  299. SI->eraseFromParent();
  300. }
  301. struct ClonedBlock {
  302. BasicBlock *BB;
  303. uint64_t State; ///< \p State corresponds to the next value of a switch stmnt.
  304. };
  305. typedef std::deque<BasicBlock *> PathType;
  306. typedef std::vector<PathType> PathsType;
  307. typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
  308. typedef std::vector<ClonedBlock> CloneList;
  309. // This data structure keeps track of all blocks that have been cloned. If two
  310. // different ThreadingPaths clone the same block for a certain state it should
  311. // be reused, and it can be looked up in this map.
  312. typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
  313. // This map keeps track of all the new definitions for an instruction. This
  314. // information is needed when restoring SSA form after cloning blocks.
  315. typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
  316. inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
  317. OS << "< ";
  318. for (const BasicBlock *BB : Path) {
  319. std::string BBName;
  320. if (BB->hasName())
  321. raw_string_ostream(BBName) << BB->getName();
  322. else
  323. raw_string_ostream(BBName) << BB;
  324. OS << BBName << " ";
  325. }
  326. OS << ">";
  327. return OS;
  328. }
  329. /// ThreadingPath is a path in the control flow of a loop that can be threaded
  330. /// by cloning necessary basic blocks and replacing conditional branches with
  331. /// unconditional ones. A threading path includes a list of basic blocks, the
  332. /// exit state, and the block that determines the next state.
  333. struct ThreadingPath {
  334. /// Exit value is DFA's exit state for the given path.
  335. uint64_t getExitValue() const { return ExitVal; }
  336. void setExitValue(const ConstantInt *V) {
  337. ExitVal = V->getZExtValue();
  338. IsExitValSet = true;
  339. }
  340. bool isExitValueSet() const { return IsExitValSet; }
  341. /// Determinator is the basic block that determines the next state of the DFA.
  342. const BasicBlock *getDeterminatorBB() const { return DBB; }
  343. void setDeterminator(const BasicBlock *BB) { DBB = BB; }
  344. /// Path is a list of basic blocks.
  345. const PathType &getPath() const { return Path; }
  346. void setPath(const PathType &NewPath) { Path = NewPath; }
  347. void print(raw_ostream &OS) const {
  348. OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
  349. }
  350. private:
  351. PathType Path;
  352. uint64_t ExitVal;
  353. const BasicBlock *DBB = nullptr;
  354. bool IsExitValSet = false;
  355. };
  356. #ifndef NDEBUG
  357. inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
  358. TPath.print(OS);
  359. return OS;
  360. }
  361. #endif
  362. struct MainSwitch {
  363. MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
  364. if (isPredictable(SI)) {
  365. Instr = SI;
  366. } else {
  367. ORE->emit([&]() {
  368. return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
  369. << "Switch instruction is not predictable.";
  370. });
  371. }
  372. }
  373. virtual ~MainSwitch() = default;
  374. SwitchInst *getInstr() const { return Instr; }
  375. const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
  376. return SelectInsts;
  377. }
  378. private:
  379. /// Do a use-def chain traversal. Make sure the value of the switch variable
  380. /// is always a known constant. This means that all conditional jumps based on
  381. /// switch variable can be converted to unconditional jumps.
  382. bool isPredictable(const SwitchInst *SI) {
  383. std::deque<Instruction *> Q;
  384. SmallSet<Value *, 16> SeenValues;
  385. SelectInsts.clear();
  386. Value *FirstDef = SI->getOperand(0);
  387. auto *Inst = dyn_cast<Instruction>(FirstDef);
  388. // If this is a function argument or another non-instruction, then give up.
  389. // We are interested in loop local variables.
  390. if (!Inst)
  391. return false;
  392. // Require the first definition to be a PHINode
  393. if (!isa<PHINode>(Inst))
  394. return false;
  395. LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n");
  396. Q.push_back(Inst);
  397. SeenValues.insert(FirstDef);
  398. while (!Q.empty()) {
  399. Instruction *Current = Q.front();
  400. Q.pop_front();
  401. if (auto *Phi = dyn_cast<PHINode>(Current)) {
  402. for (Value *Incoming : Phi->incoming_values()) {
  403. if (!isPredictableValue(Incoming, SeenValues))
  404. return false;
  405. addInstToQueue(Incoming, Q, SeenValues);
  406. }
  407. LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n");
  408. } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
  409. if (!isValidSelectInst(SelI))
  410. return false;
  411. if (!isPredictableValue(SelI->getTrueValue(), SeenValues) ||
  412. !isPredictableValue(SelI->getFalseValue(), SeenValues)) {
  413. return false;
  414. }
  415. addInstToQueue(SelI->getTrueValue(), Q, SeenValues);
  416. addInstToQueue(SelI->getFalseValue(), Q, SeenValues);
  417. LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n");
  418. if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
  419. SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
  420. } else {
  421. // If it is neither a phi nor a select, then we give up.
  422. return false;
  423. }
  424. }
  425. return true;
  426. }
  427. bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
  428. if (SeenValues.contains(InpVal))
  429. return true;
  430. if (isa<ConstantInt>(InpVal))
  431. return true;
  432. // If this is a function argument or another non-instruction, then give up.
  433. if (!isa<Instruction>(InpVal))
  434. return false;
  435. return true;
  436. }
  437. void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
  438. SmallSet<Value *, 16> &SeenValues) {
  439. if (SeenValues.contains(Val))
  440. return;
  441. if (Instruction *I = dyn_cast<Instruction>(Val))
  442. Q.push_back(I);
  443. SeenValues.insert(Val);
  444. }
  445. bool isValidSelectInst(SelectInst *SI) {
  446. if (!SI->hasOneUse())
  447. return false;
  448. Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
  449. // The use of the select inst should be either a phi or another select.
  450. if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
  451. return false;
  452. BasicBlock *SIBB = SI->getParent();
  453. // Currently, we can only expand select instructions in basic blocks with
  454. // one successor.
  455. BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
  456. if (!SITerm || !SITerm->isUnconditional())
  457. return false;
  458. if (isa<PHINode>(SIUse) &&
  459. SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
  460. return false;
  461. // If select will not be sunk during unfolding, and it is in the same basic
  462. // block as another state defining select, then cannot unfold both.
  463. for (SelectInstToUnfold SIToUnfold : SelectInsts) {
  464. SelectInst *PrevSI = SIToUnfold.getInst();
  465. if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
  466. PrevSI->getParent() == SI->getParent())
  467. return false;
  468. }
  469. return true;
  470. }
  471. SwitchInst *Instr = nullptr;
  472. SmallVector<SelectInstToUnfold, 4> SelectInsts;
  473. };
  474. struct AllSwitchPaths {
  475. AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
  476. : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
  477. ORE(ORE) {}
  478. std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
  479. unsigned getNumThreadingPaths() { return TPaths.size(); }
  480. SwitchInst *getSwitchInst() { return Switch; }
  481. BasicBlock *getSwitchBlock() { return SwitchBlock; }
  482. void run() {
  483. VisitedBlocks Visited;
  484. PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
  485. StateDefMap StateDef = getStateDefMap();
  486. for (PathType Path : LoopPaths) {
  487. ThreadingPath TPath;
  488. const BasicBlock *PrevBB = Path.back();
  489. for (const BasicBlock *BB : Path) {
  490. if (StateDef.count(BB) != 0) {
  491. const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);
  492. assert(Phi && "Expected a state-defining instr to be a phi node.");
  493. const Value *V = Phi->getIncomingValueForBlock(PrevBB);
  494. if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) {
  495. TPath.setExitValue(C);
  496. TPath.setDeterminator(BB);
  497. TPath.setPath(Path);
  498. }
  499. }
  500. // Switch block is the determinator, this is the final exit value.
  501. if (TPath.isExitValueSet() && BB == Path.front())
  502. break;
  503. PrevBB = BB;
  504. }
  505. if (TPath.isExitValueSet() && isSupported(TPath))
  506. TPaths.push_back(TPath);
  507. }
  508. }
  509. private:
  510. // Value: an instruction that defines a switch state;
  511. // Key: the parent basic block of that instruction.
  512. typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
  513. PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
  514. unsigned PathDepth) const {
  515. PathsType Res;
  516. // Stop exploring paths after visiting MaxPathLength blocks
  517. if (PathDepth > MaxPathLength) {
  518. ORE->emit([&]() {
  519. return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
  520. Switch)
  521. << "Exploration stopped after visiting MaxPathLength="
  522. << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
  523. });
  524. return Res;
  525. }
  526. Visited.insert(BB);
  527. // Some blocks have multiple edges to the same successor, and this set
  528. // is used to prevent a duplicate path from being generated
  529. SmallSet<BasicBlock *, 4> Successors;
  530. for (BasicBlock *Succ : successors(BB)) {
  531. if (!Successors.insert(Succ).second)
  532. continue;
  533. // Found a cycle through the SwitchBlock
  534. if (Succ == SwitchBlock) {
  535. Res.push_back({BB});
  536. continue;
  537. }
  538. // We have encountered a cycle, do not get caught in it
  539. if (Visited.contains(Succ))
  540. continue;
  541. PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
  542. for (PathType Path : SuccPaths) {
  543. PathType NewPath(Path);
  544. NewPath.push_front(BB);
  545. Res.push_back(NewPath);
  546. }
  547. }
  548. // This block could now be visited again from a different predecessor. Note
  549. // that this will result in exponential runtime. Subpaths could possibly be
  550. // cached but it takes a lot of memory to store them.
  551. Visited.erase(BB);
  552. return Res;
  553. }
  554. /// Walk the use-def chain and collect all the state-defining instructions.
  555. StateDefMap getStateDefMap() const {
  556. StateDefMap Res;
  557. Value *FirstDef = Switch->getOperand(0);
  558. assert(isa<PHINode>(FirstDef) && "After select unfolding, all state "
  559. "definitions are expected to be phi "
  560. "nodes.");
  561. SmallVector<PHINode *, 8> Stack;
  562. Stack.push_back(dyn_cast<PHINode>(FirstDef));
  563. SmallSet<Value *, 16> SeenValues;
  564. while (!Stack.empty()) {
  565. PHINode *CurPhi = Stack.pop_back_val();
  566. Res[CurPhi->getParent()] = CurPhi;
  567. SeenValues.insert(CurPhi);
  568. for (Value *Incoming : CurPhi->incoming_values()) {
  569. if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
  570. SeenValues.contains(Incoming)) {
  571. continue;
  572. }
  573. assert(isa<PHINode>(Incoming) && "After select unfolding, all state "
  574. "definitions are expected to be phi "
  575. "nodes.");
  576. Stack.push_back(cast<PHINode>(Incoming));
  577. }
  578. }
  579. return Res;
  580. }
  581. /// The determinator BB should precede the switch-defining BB.
  582. ///
  583. /// Otherwise, it is possible that the state defined in the determinator block
  584. /// defines the state for the next iteration of the loop, rather than for the
  585. /// current one.
  586. ///
  587. /// Currently supported paths:
  588. /// \code
  589. /// < switch bb1 determ def > [ 42, determ ]
  590. /// < switch_and_def bb1 determ > [ 42, determ ]
  591. /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]
  592. /// \endcode
  593. ///
  594. /// Unsupported paths:
  595. /// \code
  596. /// < switch bb1 def determ > [ 43, determ ]
  597. /// < switch_and_determ bb1 def > [ 43, switch_and_determ ]
  598. /// \endcode
  599. bool isSupported(const ThreadingPath &TPath) {
  600. Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition());
  601. assert(SwitchCondI);
  602. if (!SwitchCondI)
  603. return false;
  604. const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();
  605. const BasicBlock *SwitchCondUseBB = Switch->getParent();
  606. const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();
  607. assert(
  608. SwitchCondUseBB == TPath.getPath().front() &&
  609. "The first BB in a threading path should have the switch instruction");
  610. if (SwitchCondUseBB != TPath.getPath().front())
  611. return false;
  612. // Make DeterminatorBB the first element in Path.
  613. PathType Path = TPath.getPath();
  614. auto ItDet = std::find(Path.begin(), Path.end(), DeterminatorBB);
  615. std::rotate(Path.begin(), ItDet, Path.end());
  616. bool IsDetBBSeen = false;
  617. bool IsDefBBSeen = false;
  618. bool IsUseBBSeen = false;
  619. for (BasicBlock *BB : Path) {
  620. if (BB == DeterminatorBB)
  621. IsDetBBSeen = true;
  622. if (BB == SwitchCondDefBB)
  623. IsDefBBSeen = true;
  624. if (BB == SwitchCondUseBB)
  625. IsUseBBSeen = true;
  626. if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)
  627. return false;
  628. }
  629. return true;
  630. }
  631. SwitchInst *Switch;
  632. BasicBlock *SwitchBlock;
  633. OptimizationRemarkEmitter *ORE;
  634. std::vector<ThreadingPath> TPaths;
  635. };
  636. struct TransformDFA {
  637. TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
  638. AssumptionCache *AC, TargetTransformInfo *TTI,
  639. OptimizationRemarkEmitter *ORE,
  640. SmallPtrSet<const Value *, 32> EphValues)
  641. : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
  642. EphValues(EphValues) {}
  643. void run() {
  644. if (isLegalAndProfitableToTransform()) {
  645. createAllExitPaths();
  646. NumTransforms++;
  647. }
  648. }
  649. private:
  650. /// This function performs both a legality check and profitability check at
  651. /// the same time since it is convenient to do so. It iterates through all
  652. /// blocks that will be cloned, and keeps track of the duplication cost. It
  653. /// also returns false if it is illegal to clone some required block.
  654. bool isLegalAndProfitableToTransform() {
  655. CodeMetrics Metrics;
  656. SwitchInst *Switch = SwitchPaths->getSwitchInst();
  657. // Note that DuplicateBlockMap is not being used as intended here. It is
  658. // just being used to ensure (BB, State) pairs are only counted once.
  659. DuplicateBlockMap DuplicateMap;
  660. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
  661. PathType PathBBs = TPath.getPath();
  662. uint64_t NextState = TPath.getExitValue();
  663. const BasicBlock *Determinator = TPath.getDeterminatorBB();
  664. // Update Metrics for the Switch block, this is always cloned
  665. BasicBlock *BB = SwitchPaths->getSwitchBlock();
  666. BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
  667. if (!VisitedBB) {
  668. Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
  669. DuplicateMap[BB].push_back({BB, NextState});
  670. }
  671. // If the Switch block is the Determinator, then we can continue since
  672. // this is the only block that is cloned and we already counted for it.
  673. if (PathBBs.front() == Determinator)
  674. continue;
  675. // Otherwise update Metrics for all blocks that will be cloned. If any
  676. // block is already cloned and would be reused, don't double count it.
  677. auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator);
  678. for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
  679. BB = *BBIt;
  680. VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
  681. if (VisitedBB)
  682. continue;
  683. Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
  684. DuplicateMap[BB].push_back({BB, NextState});
  685. }
  686. if (Metrics.notDuplicatable) {
  687. LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
  688. << "non-duplicatable instructions.\n");
  689. ORE->emit([&]() {
  690. return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
  691. Switch)
  692. << "Contains non-duplicatable instructions.";
  693. });
  694. return false;
  695. }
  696. if (Metrics.convergent) {
  697. LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
  698. << "convergent instructions.\n");
  699. ORE->emit([&]() {
  700. return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
  701. << "Contains convergent instructions.";
  702. });
  703. return false;
  704. }
  705. }
  706. unsigned DuplicationCost = 0;
  707. unsigned JumpTableSize = 0;
  708. TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
  709. nullptr);
  710. if (JumpTableSize == 0) {
  711. // Factor in the number of conditional branches reduced from jump
  712. // threading. Assume that lowering the switch block is implemented by
  713. // using binary search, hence the LogBase2().
  714. unsigned CondBranches =
  715. APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
  716. DuplicationCost = Metrics.NumInsts / CondBranches;
  717. } else {
  718. // Compared with jump tables, the DFA optimizer removes an indirect branch
  719. // on each loop iteration, thus making branch prediction more precise. The
  720. // more branch targets there are, the more likely it is for the branch
  721. // predictor to make a mistake, and the more benefit there is in the DFA
  722. // optimizer. Thus, the more branch targets there are, the lower is the
  723. // cost of the DFA opt.
  724. DuplicationCost = Metrics.NumInsts / JumpTableSize;
  725. }
  726. LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
  727. << SwitchPaths->getSwitchBlock()->getName()
  728. << " is: " << DuplicationCost << "\n\n");
  729. if (DuplicationCost > CostThreshold) {
  730. LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
  731. << "cost threshold.\n");
  732. ORE->emit([&]() {
  733. return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
  734. << "Duplication cost exceeds the cost threshold (cost="
  735. << ore::NV("Cost", DuplicationCost)
  736. << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
  737. });
  738. return false;
  739. }
  740. ORE->emit([&]() {
  741. return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
  742. << "Switch statement jump-threaded.";
  743. });
  744. return true;
  745. }
  746. /// Transform each threading path to effectively jump thread the DFA.
  747. void createAllExitPaths() {
  748. DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
  749. // Move the switch block to the end of the path, since it will be duplicated
  750. BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
  751. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
  752. LLVM_DEBUG(dbgs() << TPath << "\n");
  753. PathType NewPath(TPath.getPath());
  754. NewPath.push_back(SwitchBlock);
  755. TPath.setPath(NewPath);
  756. }
  757. // Transform the ThreadingPaths and keep track of the cloned values
  758. DuplicateBlockMap DuplicateMap;
  759. DefMap NewDefs;
  760. SmallSet<BasicBlock *, 16> BlocksToClean;
  761. for (BasicBlock *BB : successors(SwitchBlock))
  762. BlocksToClean.insert(BB);
  763. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
  764. createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
  765. NumPaths++;
  766. }
  767. // After all paths are cloned, now update the last successor of the cloned
  768. // path so it skips over the switch statement
  769. for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
  770. updateLastSuccessor(TPath, DuplicateMap, &DTU);
  771. // For each instruction that was cloned and used outside, update its uses
  772. updateSSA(NewDefs);
  773. // Clean PHI Nodes for the newly created blocks
  774. for (BasicBlock *BB : BlocksToClean)
  775. cleanPhiNodes(BB);
  776. }
  777. /// For a specific ThreadingPath \p Path, create an exit path starting from
  778. /// the determinator block.
  779. ///
  780. /// To remember the correct destination, we have to duplicate blocks
  781. /// corresponding to each state. Also update the terminating instruction of
  782. /// the predecessors, and phis in the successor blocks.
  783. void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
  784. DuplicateBlockMap &DuplicateMap,
  785. SmallSet<BasicBlock *, 16> &BlocksToClean,
  786. DomTreeUpdater *DTU) {
  787. uint64_t NextState = Path.getExitValue();
  788. const BasicBlock *Determinator = Path.getDeterminatorBB();
  789. PathType PathBBs = Path.getPath();
  790. // Don't select the placeholder block in front
  791. if (PathBBs.front() == Determinator)
  792. PathBBs.pop_front();
  793. auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator);
  794. auto Prev = std::prev(DetIt);
  795. BasicBlock *PrevBB = *Prev;
  796. for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
  797. BasicBlock *BB = *BBIt;
  798. BlocksToClean.insert(BB);
  799. // We already cloned BB for this NextState, now just update the branch
  800. // and continue.
  801. BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
  802. if (NextBB) {
  803. updatePredecessor(PrevBB, BB, NextBB, DTU);
  804. PrevBB = NextBB;
  805. continue;
  806. }
  807. // Clone the BB and update the successor of Prev to jump to the new block
  808. BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
  809. BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
  810. DuplicateMap[BB].push_back({NewBB, NextState});
  811. BlocksToClean.insert(NewBB);
  812. PrevBB = NewBB;
  813. }
  814. }
  815. /// Restore SSA form after cloning blocks.
  816. ///
  817. /// Each cloned block creates new defs for a variable, and the uses need to be
  818. /// updated to reflect this. The uses may be replaced with a cloned value, or
  819. /// some derived phi instruction. Note that all uses of a value defined in the
  820. /// same block were already remapped when cloning the block.
  821. void updateSSA(DefMap &NewDefs) {
  822. SSAUpdaterBulk SSAUpdate;
  823. SmallVector<Use *, 16> UsesToRename;
  824. for (auto KV : NewDefs) {
  825. Instruction *I = KV.first;
  826. BasicBlock *BB = I->getParent();
  827. std::vector<Instruction *> Cloned = KV.second;
  828. // Scan all uses of this instruction to see if it is used outside of its
  829. // block, and if so, record them in UsesToRename.
  830. for (Use &U : I->uses()) {
  831. Instruction *User = cast<Instruction>(U.getUser());
  832. if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
  833. if (UserPN->getIncomingBlock(U) == BB)
  834. continue;
  835. } else if (User->getParent() == BB) {
  836. continue;
  837. }
  838. UsesToRename.push_back(&U);
  839. }
  840. // If there are no uses outside the block, we're done with this
  841. // instruction.
  842. if (UsesToRename.empty())
  843. continue;
  844. LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
  845. << "\n");
  846. // We found a use of I outside of BB. Rename all uses of I that are
  847. // outside its block to be uses of the appropriate PHI node etc. See
  848. // ValuesInBlocks with the values we know.
  849. unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
  850. SSAUpdate.AddAvailableValue(VarNum, BB, I);
  851. for (Instruction *New : Cloned)
  852. SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
  853. while (!UsesToRename.empty())
  854. SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
  855. LLVM_DEBUG(dbgs() << "\n");
  856. }
  857. // SSAUpdater handles phi placement and renaming uses with the appropriate
  858. // value.
  859. SSAUpdate.RewriteAllUses(DT);
  860. }
  861. /// Clones a basic block, and adds it to the CFG.
  862. ///
  863. /// This function also includes updating phi nodes in the successors of the
  864. /// BB, and remapping uses that were defined locally in the cloned BB.
  865. BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
  866. uint64_t NextState,
  867. DuplicateBlockMap &DuplicateMap,
  868. DefMap &NewDefs,
  869. DomTreeUpdater *DTU) {
  870. ValueToValueMapTy VMap;
  871. BasicBlock *NewBB = CloneBasicBlock(
  872. BB, VMap, ".jt" + std::to_string(NextState), BB->getParent());
  873. NewBB->moveAfter(BB);
  874. NumCloned++;
  875. for (Instruction &I : *NewBB) {
  876. // Do not remap operands of PHINode in case a definition in BB is an
  877. // incoming value to a phi in the same block. This incoming value will
  878. // be renamed later while restoring SSA.
  879. if (isa<PHINode>(&I))
  880. continue;
  881. RemapInstruction(&I, VMap,
  882. RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
  883. if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
  884. AC->registerAssumption(II);
  885. }
  886. updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
  887. updatePredecessor(PrevBB, BB, NewBB, DTU);
  888. updateDefMap(NewDefs, VMap);
  889. // Add all successors to the DominatorTree
  890. SmallPtrSet<BasicBlock *, 4> SuccSet;
  891. for (auto *SuccBB : successors(NewBB)) {
  892. if (SuccSet.insert(SuccBB).second)
  893. DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
  894. }
  895. SuccSet.clear();
  896. return NewBB;
  897. }
  898. /// Update the phi nodes in BB's successors.
  899. ///
  900. /// This means creating a new incoming value from NewBB with the new
  901. /// instruction wherever there is an incoming value from BB.
  902. void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
  903. uint64_t NextState, ValueToValueMapTy &VMap,
  904. DuplicateBlockMap &DuplicateMap) {
  905. std::vector<BasicBlock *> BlocksToUpdate;
  906. // If BB is the last block in the path, we can simply update the one case
  907. // successor that will be reached.
  908. if (BB == SwitchPaths->getSwitchBlock()) {
  909. SwitchInst *Switch = SwitchPaths->getSwitchInst();
  910. BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
  911. BlocksToUpdate.push_back(NextCase);
  912. BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
  913. if (ClonedSucc)
  914. BlocksToUpdate.push_back(ClonedSucc);
  915. }
  916. // Otherwise update phis in all successors.
  917. else {
  918. for (BasicBlock *Succ : successors(BB)) {
  919. BlocksToUpdate.push_back(Succ);
  920. // Check if a successor has already been cloned for the particular exit
  921. // value. In this case if a successor was already cloned, the phi nodes
  922. // in the cloned block should be updated directly.
  923. BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
  924. if (ClonedSucc)
  925. BlocksToUpdate.push_back(ClonedSucc);
  926. }
  927. }
  928. // If there is a phi with an incoming value from BB, create a new incoming
  929. // value for the new predecessor ClonedBB. The value will either be the same
  930. // value from BB or a cloned value.
  931. for (BasicBlock *Succ : BlocksToUpdate) {
  932. for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
  933. ++II) {
  934. Value *Incoming = Phi->getIncomingValueForBlock(BB);
  935. if (Incoming) {
  936. if (isa<Constant>(Incoming)) {
  937. Phi->addIncoming(Incoming, ClonedBB);
  938. continue;
  939. }
  940. Value *ClonedVal = VMap[Incoming];
  941. if (ClonedVal)
  942. Phi->addIncoming(ClonedVal, ClonedBB);
  943. else
  944. Phi->addIncoming(Incoming, ClonedBB);
  945. }
  946. }
  947. }
  948. }
  949. /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
  950. /// other successors are kept as well.
  951. void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
  952. BasicBlock *NewBB, DomTreeUpdater *DTU) {
  953. // When a path is reused, there is a chance that predecessors were already
  954. // updated before. Check if the predecessor needs to be updated first.
  955. if (!isPredecessor(OldBB, PrevBB))
  956. return;
  957. Instruction *PrevTerm = PrevBB->getTerminator();
  958. for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
  959. if (PrevTerm->getSuccessor(Idx) == OldBB) {
  960. OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
  961. PrevTerm->setSuccessor(Idx, NewBB);
  962. }
  963. }
  964. DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
  965. {DominatorTree::Insert, PrevBB, NewBB}});
  966. }
  967. /// Add new value mappings to the DefMap to keep track of all new definitions
  968. /// for a particular instruction. These will be used while updating SSA form.
  969. void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
  970. SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
  971. NewDefsVector.reserve(VMap.size());
  972. for (auto Entry : VMap) {
  973. Instruction *Inst =
  974. dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
  975. if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
  976. isa<SwitchInst>(Inst)) {
  977. continue;
  978. }
  979. Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
  980. if (!Cloned)
  981. continue;
  982. NewDefsVector.push_back({Inst, Cloned});
  983. }
  984. // Sort the defs to get deterministic insertion order into NewDefs.
  985. sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
  986. if (LHS.first == RHS.first)
  987. return LHS.second->comesBefore(RHS.second);
  988. return LHS.first->comesBefore(RHS.first);
  989. });
  990. for (const auto &KV : NewDefsVector)
  991. NewDefs[KV.first].push_back(KV.second);
  992. }
  993. /// Update the last branch of a particular cloned path to point to the correct
  994. /// case successor.
  995. ///
  996. /// Note that this is an optional step and would have been done in later
  997. /// optimizations, but it makes the CFG significantly easier to work with.
  998. void updateLastSuccessor(ThreadingPath &TPath,
  999. DuplicateBlockMap &DuplicateMap,
  1000. DomTreeUpdater *DTU) {
  1001. uint64_t NextState = TPath.getExitValue();
  1002. BasicBlock *BB = TPath.getPath().back();
  1003. BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
  1004. // Note multiple paths can end at the same block so check that it is not
  1005. // updated yet
  1006. if (!isa<SwitchInst>(LastBlock->getTerminator()))
  1007. return;
  1008. SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
  1009. BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
  1010. std::vector<DominatorTree::UpdateType> DTUpdates;
  1011. SmallPtrSet<BasicBlock *, 4> SuccSet;
  1012. for (BasicBlock *Succ : successors(LastBlock)) {
  1013. if (Succ != NextCase && SuccSet.insert(Succ).second)
  1014. DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
  1015. }
  1016. Switch->eraseFromParent();
  1017. BranchInst::Create(NextCase, LastBlock);
  1018. DTU->applyUpdates(DTUpdates);
  1019. }
  1020. /// After cloning blocks, some of the phi nodes have extra incoming values
  1021. /// that are no longer used. This function removes them.
  1022. void cleanPhiNodes(BasicBlock *BB) {
  1023. // If BB is no longer reachable, remove any remaining phi nodes
  1024. if (pred_empty(BB)) {
  1025. std::vector<PHINode *> PhiToRemove;
  1026. for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
  1027. PhiToRemove.push_back(Phi);
  1028. }
  1029. for (PHINode *PN : PhiToRemove) {
  1030. PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
  1031. PN->eraseFromParent();
  1032. }
  1033. return;
  1034. }
  1035. // Remove any incoming values that come from an invalid predecessor
  1036. for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
  1037. std::vector<BasicBlock *> BlocksToRemove;
  1038. for (BasicBlock *IncomingBB : Phi->blocks()) {
  1039. if (!isPredecessor(BB, IncomingBB))
  1040. BlocksToRemove.push_back(IncomingBB);
  1041. }
  1042. for (BasicBlock *BB : BlocksToRemove)
  1043. Phi->removeIncomingValue(BB);
  1044. }
  1045. }
  1046. /// Checks if BB was already cloned for a particular next state value. If it
  1047. /// was then it returns this cloned block, and otherwise null.
  1048. BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState,
  1049. DuplicateBlockMap &DuplicateMap) {
  1050. CloneList ClonedBBs = DuplicateMap[BB];
  1051. // Find an entry in the CloneList with this NextState. If it exists then
  1052. // return the corresponding BB
  1053. auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
  1054. return C.State == NextState;
  1055. });
  1056. return It != ClonedBBs.end() ? (*It).BB : nullptr;
  1057. }
  1058. /// Helper to get the successor corresponding to a particular case value for
  1059. /// a switch statement.
  1060. BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) {
  1061. BasicBlock *NextCase = nullptr;
  1062. for (auto Case : Switch->cases()) {
  1063. if (Case.getCaseValue()->getZExtValue() == NextState) {
  1064. NextCase = Case.getCaseSuccessor();
  1065. break;
  1066. }
  1067. }
  1068. if (!NextCase)
  1069. NextCase = Switch->getDefaultDest();
  1070. return NextCase;
  1071. }
  1072. /// Returns true if IncomingBB is a predecessor of BB.
  1073. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
  1074. return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB);
  1075. }
  1076. AllSwitchPaths *SwitchPaths;
  1077. DominatorTree *DT;
  1078. AssumptionCache *AC;
  1079. TargetTransformInfo *TTI;
  1080. OptimizationRemarkEmitter *ORE;
  1081. SmallPtrSet<const Value *, 32> EphValues;
  1082. std::vector<ThreadingPath> TPaths;
  1083. };
  1084. bool DFAJumpThreading::run(Function &F) {
  1085. LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
  1086. if (F.hasOptSize()) {
  1087. LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
  1088. return false;
  1089. }
  1090. if (ClViewCfgBefore)
  1091. F.viewCFG();
  1092. SmallVector<AllSwitchPaths, 2> ThreadableLoops;
  1093. bool MadeChanges = false;
  1094. for (BasicBlock &BB : F) {
  1095. auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
  1096. if (!SI)
  1097. continue;
  1098. LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
  1099. << " is predictable\n");
  1100. MainSwitch Switch(SI, ORE);
  1101. if (!Switch.getInstr())
  1102. continue;
  1103. LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
  1104. << "candidate for jump threading\n");
  1105. LLVM_DEBUG(SI->dump());
  1106. unfoldSelectInstrs(DT, Switch.getSelectInsts());
  1107. if (!Switch.getSelectInsts().empty())
  1108. MadeChanges = true;
  1109. AllSwitchPaths SwitchPaths(&Switch, ORE);
  1110. SwitchPaths.run();
  1111. if (SwitchPaths.getNumThreadingPaths() > 0) {
  1112. ThreadableLoops.push_back(SwitchPaths);
  1113. // For the time being limit this optimization to occurring once in a
  1114. // function since it can change the CFG significantly. This is not a
  1115. // strict requirement but it can cause buggy behavior if there is an
  1116. // overlap of blocks in different opportunities. There is a lot of room to
  1117. // experiment with catching more opportunities here.
  1118. break;
  1119. }
  1120. }
  1121. SmallPtrSet<const Value *, 32> EphValues;
  1122. if (ThreadableLoops.size() > 0)
  1123. CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
  1124. for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
  1125. TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
  1126. Transform.run();
  1127. MadeChanges = true;
  1128. }
  1129. #ifdef EXPENSIVE_CHECKS
  1130. assert(DT->verify(DominatorTree::VerificationLevel::Full));
  1131. verifyFunction(F, &dbgs());
  1132. #endif
  1133. return MadeChanges;
  1134. }
  1135. } // end anonymous namespace
  1136. /// Integrate with the new Pass Manager
  1137. PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
  1138. FunctionAnalysisManager &AM) {
  1139. AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
  1140. DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
  1141. TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
  1142. OptimizationRemarkEmitter ORE(&F);
  1143. if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
  1144. return PreservedAnalyses::all();
  1145. PreservedAnalyses PA;
  1146. PA.preserve<DominatorTreeAnalysis>();
  1147. return PA;
  1148. }