JumpThreading.cpp 120 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114
  1. //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the Jump Threading pass.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Transforms/Scalar/JumpThreading.h"
  13. #include "llvm/ADT/DenseMap.h"
  14. #include "llvm/ADT/DenseSet.h"
  15. #include "llvm/ADT/MapVector.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/SmallPtrSet.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/Analysis/BlockFrequencyInfo.h"
  22. #include "llvm/Analysis/BranchProbabilityInfo.h"
  23. #include "llvm/Analysis/CFG.h"
  24. #include "llvm/Analysis/ConstantFolding.h"
  25. #include "llvm/Analysis/DomTreeUpdater.h"
  26. #include "llvm/Analysis/GlobalsModRef.h"
  27. #include "llvm/Analysis/GuardUtils.h"
  28. #include "llvm/Analysis/InstructionSimplify.h"
  29. #include "llvm/Analysis/LazyValueInfo.h"
  30. #include "llvm/Analysis/Loads.h"
  31. #include "llvm/Analysis/LoopInfo.h"
  32. #include "llvm/Analysis/MemoryLocation.h"
  33. #include "llvm/Analysis/TargetLibraryInfo.h"
  34. #include "llvm/Analysis/TargetTransformInfo.h"
  35. #include "llvm/Analysis/ValueTracking.h"
  36. #include "llvm/IR/BasicBlock.h"
  37. #include "llvm/IR/CFG.h"
  38. #include "llvm/IR/Constant.h"
  39. #include "llvm/IR/ConstantRange.h"
  40. #include "llvm/IR/Constants.h"
  41. #include "llvm/IR/DataLayout.h"
  42. #include "llvm/IR/Dominators.h"
  43. #include "llvm/IR/Function.h"
  44. #include "llvm/IR/InstrTypes.h"
  45. #include "llvm/IR/Instruction.h"
  46. #include "llvm/IR/Instructions.h"
  47. #include "llvm/IR/IntrinsicInst.h"
  48. #include "llvm/IR/Intrinsics.h"
  49. #include "llvm/IR/LLVMContext.h"
  50. #include "llvm/IR/MDBuilder.h"
  51. #include "llvm/IR/Metadata.h"
  52. #include "llvm/IR/Module.h"
  53. #include "llvm/IR/PassManager.h"
  54. #include "llvm/IR/PatternMatch.h"
  55. #include "llvm/IR/ProfDataUtils.h"
  56. #include "llvm/IR/Type.h"
  57. #include "llvm/IR/Use.h"
  58. #include "llvm/IR/Value.h"
  59. #include "llvm/InitializePasses.h"
  60. #include "llvm/Pass.h"
  61. #include "llvm/Support/BlockFrequency.h"
  62. #include "llvm/Support/BranchProbability.h"
  63. #include "llvm/Support/Casting.h"
  64. #include "llvm/Support/CommandLine.h"
  65. #include "llvm/Support/Debug.h"
  66. #include "llvm/Support/raw_ostream.h"
  67. #include "llvm/Transforms/Scalar.h"
  68. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  69. #include "llvm/Transforms/Utils/Cloning.h"
  70. #include "llvm/Transforms/Utils/Local.h"
  71. #include "llvm/Transforms/Utils/SSAUpdater.h"
  72. #include "llvm/Transforms/Utils/ValueMapper.h"
  73. #include <algorithm>
  74. #include <cassert>
  75. #include <cstdint>
  76. #include <iterator>
  77. #include <memory>
  78. #include <utility>
  79. using namespace llvm;
  80. using namespace jumpthreading;
  81. #define DEBUG_TYPE "jump-threading"
  82. STATISTIC(NumThreads, "Number of jumps threaded");
  83. STATISTIC(NumFolds, "Number of terminators folded");
  84. STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
  85. static cl::opt<unsigned>
  86. BBDuplicateThreshold("jump-threading-threshold",
  87. cl::desc("Max block size to duplicate for jump threading"),
  88. cl::init(6), cl::Hidden);
  89. static cl::opt<unsigned>
  90. ImplicationSearchThreshold(
  91. "jump-threading-implication-search-threshold",
  92. cl::desc("The number of predecessors to search for a stronger "
  93. "condition to use to thread over a weaker condition"),
  94. cl::init(3), cl::Hidden);
  95. static cl::opt<unsigned> PhiDuplicateThreshold(
  96. "jump-threading-phi-threshold",
  97. cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
  98. cl::Hidden);
  99. static cl::opt<bool> PrintLVIAfterJumpThreading(
  100. "print-lvi-after-jump-threading",
  101. cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
  102. cl::Hidden);
  103. static cl::opt<bool> ThreadAcrossLoopHeaders(
  104. "jump-threading-across-loop-headers",
  105. cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
  106. cl::init(false), cl::Hidden);
  107. namespace {
  108. /// This pass performs 'jump threading', which looks at blocks that have
  109. /// multiple predecessors and multiple successors. If one or more of the
  110. /// predecessors of the block can be proven to always jump to one of the
  111. /// successors, we forward the edge from the predecessor to the successor by
  112. /// duplicating the contents of this block.
  113. ///
  114. /// An example of when this can occur is code like this:
  115. ///
  116. /// if () { ...
  117. /// X = 4;
  118. /// }
  119. /// if (X < 3) {
  120. ///
  121. /// In this case, the unconditional branch at the end of the first if can be
  122. /// revectored to the false side of the second if.
  123. class JumpThreading : public FunctionPass {
  124. JumpThreadingPass Impl;
  125. public:
  126. static char ID; // Pass identification
  127. JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
  128. initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
  129. }
  130. bool runOnFunction(Function &F) override;
  131. void getAnalysisUsage(AnalysisUsage &AU) const override {
  132. AU.addRequired<DominatorTreeWrapperPass>();
  133. AU.addPreserved<DominatorTreeWrapperPass>();
  134. AU.addRequired<AAResultsWrapperPass>();
  135. AU.addRequired<LazyValueInfoWrapperPass>();
  136. AU.addPreserved<LazyValueInfoWrapperPass>();
  137. AU.addPreserved<GlobalsAAWrapperPass>();
  138. AU.addRequired<TargetLibraryInfoWrapperPass>();
  139. AU.addRequired<TargetTransformInfoWrapperPass>();
  140. }
  141. void releaseMemory() override { Impl.releaseMemory(); }
  142. };
  143. } // end anonymous namespace
  144. char JumpThreading::ID = 0;
  145. INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
  146. "Jump Threading", false, false)
  147. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  148. INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
  149. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  150. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  151. INITIALIZE_PASS_END(JumpThreading, "jump-threading",
  152. "Jump Threading", false, false)
  153. // Public interface to the Jump Threading pass
  154. FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
  155. return new JumpThreading(Threshold);
  156. }
  157. JumpThreadingPass::JumpThreadingPass(int T) {
  158. DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
  159. }
  160. // Update branch probability information according to conditional
  161. // branch probability. This is usually made possible for cloned branches
  162. // in inline instances by the context specific profile in the caller.
  163. // For instance,
  164. //
  165. // [Block PredBB]
  166. // [Branch PredBr]
  167. // if (t) {
  168. // Block A;
  169. // } else {
  170. // Block B;
  171. // }
  172. //
  173. // [Block BB]
  174. // cond = PN([true, %A], [..., %B]); // PHI node
  175. // [Branch CondBr]
  176. // if (cond) {
  177. // ... // P(cond == true) = 1%
  178. // }
  179. //
  180. // Here we know that when block A is taken, cond must be true, which means
  181. // P(cond == true | A) = 1
  182. //
  183. // Given that P(cond == true) = P(cond == true | A) * P(A) +
  184. // P(cond == true | B) * P(B)
  185. // we get:
  186. // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
  187. //
  188. // which gives us:
  189. // P(A) is less than P(cond == true), i.e.
  190. // P(t == true) <= P(cond == true)
  191. //
  192. // In other words, if we know P(cond == true) is unlikely, we know
  193. // that P(t == true) is also unlikely.
  194. //
  195. static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
  196. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  197. if (!CondBr)
  198. return;
  199. uint64_t TrueWeight, FalseWeight;
  200. if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
  201. return;
  202. if (TrueWeight + FalseWeight == 0)
  203. // Zero branch_weights do not give a hint for getting branch probabilities.
  204. // Technically it would result in division by zero denominator, which is
  205. // TrueWeight + FalseWeight.
  206. return;
  207. // Returns the outgoing edge of the dominating predecessor block
  208. // that leads to the PhiNode's incoming block:
  209. auto GetPredOutEdge =
  210. [](BasicBlock *IncomingBB,
  211. BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
  212. auto *PredBB = IncomingBB;
  213. auto *SuccBB = PhiBB;
  214. SmallPtrSet<BasicBlock *, 16> Visited;
  215. while (true) {
  216. BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
  217. if (PredBr && PredBr->isConditional())
  218. return {PredBB, SuccBB};
  219. Visited.insert(PredBB);
  220. auto *SinglePredBB = PredBB->getSinglePredecessor();
  221. if (!SinglePredBB)
  222. return {nullptr, nullptr};
  223. // Stop searching when SinglePredBB has been visited. It means we see
  224. // an unreachable loop.
  225. if (Visited.count(SinglePredBB))
  226. return {nullptr, nullptr};
  227. SuccBB = PredBB;
  228. PredBB = SinglePredBB;
  229. }
  230. };
  231. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  232. Value *PhiOpnd = PN->getIncomingValue(i);
  233. ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
  234. if (!CI || !CI->getType()->isIntegerTy(1))
  235. continue;
  236. BranchProbability BP =
  237. (CI->isOne() ? BranchProbability::getBranchProbability(
  238. TrueWeight, TrueWeight + FalseWeight)
  239. : BranchProbability::getBranchProbability(
  240. FalseWeight, TrueWeight + FalseWeight));
  241. auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
  242. if (!PredOutEdge.first)
  243. return;
  244. BasicBlock *PredBB = PredOutEdge.first;
  245. BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
  246. if (!PredBr)
  247. return;
  248. uint64_t PredTrueWeight, PredFalseWeight;
  249. // FIXME: We currently only set the profile data when it is missing.
  250. // With PGO, this can be used to refine even existing profile data with
  251. // context information. This needs to be done after more performance
  252. // testing.
  253. if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
  254. continue;
  255. // We can not infer anything useful when BP >= 50%, because BP is the
  256. // upper bound probability value.
  257. if (BP >= BranchProbability(50, 100))
  258. continue;
  259. SmallVector<uint32_t, 2> Weights;
  260. if (PredBr->getSuccessor(0) == PredOutEdge.second) {
  261. Weights.push_back(BP.getNumerator());
  262. Weights.push_back(BP.getCompl().getNumerator());
  263. } else {
  264. Weights.push_back(BP.getCompl().getNumerator());
  265. Weights.push_back(BP.getNumerator());
  266. }
  267. PredBr->setMetadata(LLVMContext::MD_prof,
  268. MDBuilder(PredBr->getParent()->getContext())
  269. .createBranchWeights(Weights));
  270. }
  271. }
  272. /// runOnFunction - Toplevel algorithm.
  273. bool JumpThreading::runOnFunction(Function &F) {
  274. if (skipFunction(F))
  275. return false;
  276. auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  277. // Jump Threading has no sense for the targets with divergent CF
  278. if (TTI->hasBranchDivergence())
  279. return false;
  280. auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  281. auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  282. auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
  283. auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  284. DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
  285. std::unique_ptr<BlockFrequencyInfo> BFI;
  286. std::unique_ptr<BranchProbabilityInfo> BPI;
  287. if (F.hasProfileData()) {
  288. LoopInfo LI{*DT};
  289. BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
  290. BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
  291. }
  292. bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
  293. std::move(BFI), std::move(BPI));
  294. if (PrintLVIAfterJumpThreading) {
  295. dbgs() << "LVI for function '" << F.getName() << "':\n";
  296. LVI->printLVI(F, DTU.getDomTree(), dbgs());
  297. }
  298. return Changed;
  299. }
  300. PreservedAnalyses JumpThreadingPass::run(Function &F,
  301. FunctionAnalysisManager &AM) {
  302. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  303. // Jump Threading has no sense for the targets with divergent CF
  304. if (TTI.hasBranchDivergence())
  305. return PreservedAnalyses::all();
  306. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  307. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  308. auto &LVI = AM.getResult<LazyValueAnalysis>(F);
  309. auto &AA = AM.getResult<AAManager>(F);
  310. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
  311. std::unique_ptr<BlockFrequencyInfo> BFI;
  312. std::unique_ptr<BranchProbabilityInfo> BPI;
  313. if (F.hasProfileData()) {
  314. LoopInfo LI{DT};
  315. BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
  316. BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
  317. }
  318. bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
  319. std::move(BFI), std::move(BPI));
  320. if (PrintLVIAfterJumpThreading) {
  321. dbgs() << "LVI for function '" << F.getName() << "':\n";
  322. LVI.printLVI(F, DTU.getDomTree(), dbgs());
  323. }
  324. if (!Changed)
  325. return PreservedAnalyses::all();
  326. PreservedAnalyses PA;
  327. PA.preserve<DominatorTreeAnalysis>();
  328. PA.preserve<LazyValueAnalysis>();
  329. return PA;
  330. }
  331. bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
  332. TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
  333. AliasAnalysis *AA_, DomTreeUpdater *DTU_,
  334. bool HasProfileData_,
  335. std::unique_ptr<BlockFrequencyInfo> BFI_,
  336. std::unique_ptr<BranchProbabilityInfo> BPI_) {
  337. LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
  338. TLI = TLI_;
  339. TTI = TTI_;
  340. LVI = LVI_;
  341. AA = AA_;
  342. DTU = DTU_;
  343. BFI.reset();
  344. BPI.reset();
  345. // When profile data is available, we need to update edge weights after
  346. // successful jump threading, which requires both BPI and BFI being available.
  347. HasProfileData = HasProfileData_;
  348. auto *GuardDecl = F.getParent()->getFunction(
  349. Intrinsic::getName(Intrinsic::experimental_guard));
  350. HasGuards = GuardDecl && !GuardDecl->use_empty();
  351. if (HasProfileData) {
  352. BPI = std::move(BPI_);
  353. BFI = std::move(BFI_);
  354. }
  355. // Reduce the number of instructions duplicated when optimizing strictly for
  356. // size.
  357. if (BBDuplicateThreshold.getNumOccurrences())
  358. BBDupThreshold = BBDuplicateThreshold;
  359. else if (F.hasFnAttribute(Attribute::MinSize))
  360. BBDupThreshold = 3;
  361. else
  362. BBDupThreshold = DefaultBBDupThreshold;
  363. // JumpThreading must not processes blocks unreachable from entry. It's a
  364. // waste of compute time and can potentially lead to hangs.
  365. SmallPtrSet<BasicBlock *, 16> Unreachable;
  366. assert(DTU && "DTU isn't passed into JumpThreading before using it.");
  367. assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
  368. DominatorTree &DT = DTU->getDomTree();
  369. for (auto &BB : F)
  370. if (!DT.isReachableFromEntry(&BB))
  371. Unreachable.insert(&BB);
  372. if (!ThreadAcrossLoopHeaders)
  373. findLoopHeaders(F);
  374. bool EverChanged = false;
  375. bool Changed;
  376. do {
  377. Changed = false;
  378. for (auto &BB : F) {
  379. if (Unreachable.count(&BB))
  380. continue;
  381. while (processBlock(&BB)) // Thread all of the branches we can over BB.
  382. Changed = true;
  383. // Jump threading may have introduced redundant debug values into BB
  384. // which should be removed.
  385. if (Changed)
  386. RemoveRedundantDbgInstrs(&BB);
  387. // Stop processing BB if it's the entry or is now deleted. The following
  388. // routines attempt to eliminate BB and locating a suitable replacement
  389. // for the entry is non-trivial.
  390. if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
  391. continue;
  392. if (pred_empty(&BB)) {
  393. // When processBlock makes BB unreachable it doesn't bother to fix up
  394. // the instructions in it. We must remove BB to prevent invalid IR.
  395. LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
  396. << "' with terminator: " << *BB.getTerminator()
  397. << '\n');
  398. LoopHeaders.erase(&BB);
  399. LVI->eraseBlock(&BB);
  400. DeleteDeadBlock(&BB, DTU);
  401. Changed = true;
  402. continue;
  403. }
  404. // processBlock doesn't thread BBs with unconditional TIs. However, if BB
  405. // is "almost empty", we attempt to merge BB with its sole successor.
  406. auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
  407. if (BI && BI->isUnconditional()) {
  408. BasicBlock *Succ = BI->getSuccessor(0);
  409. if (
  410. // The terminator must be the only non-phi instruction in BB.
  411. BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
  412. // Don't alter Loop headers and latches to ensure another pass can
  413. // detect and transform nested loops later.
  414. !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
  415. TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
  416. RemoveRedundantDbgInstrs(Succ);
  417. // BB is valid for cleanup here because we passed in DTU. F remains
  418. // BB's parent until a DTU->getDomTree() event.
  419. LVI->eraseBlock(&BB);
  420. Changed = true;
  421. }
  422. }
  423. }
  424. EverChanged |= Changed;
  425. } while (Changed);
  426. LoopHeaders.clear();
  427. return EverChanged;
  428. }
  429. // Replace uses of Cond with ToVal when safe to do so. If all uses are
  430. // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
  431. // because we may incorrectly replace uses when guards/assumes are uses of
  432. // of `Cond` and we used the guards/assume to reason about the `Cond` value
  433. // at the end of block. RAUW unconditionally replaces all uses
  434. // including the guards/assumes themselves and the uses before the
  435. // guard/assume.
  436. static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
  437. BasicBlock *KnownAtEndOfBB) {
  438. bool Changed = false;
  439. assert(Cond->getType() == ToVal->getType());
  440. // We can unconditionally replace all uses in non-local blocks (i.e. uses
  441. // strictly dominated by BB), since LVI information is true from the
  442. // terminator of BB.
  443. if (Cond->getParent() == KnownAtEndOfBB)
  444. Changed |= replaceNonLocalUsesWith(Cond, ToVal);
  445. for (Instruction &I : reverse(*KnownAtEndOfBB)) {
  446. // Reached the Cond whose uses we are trying to replace, so there are no
  447. // more uses.
  448. if (&I == Cond)
  449. break;
  450. // We only replace uses in instructions that are guaranteed to reach the end
  451. // of BB, where we know Cond is ToVal.
  452. if (!isGuaranteedToTransferExecutionToSuccessor(&I))
  453. break;
  454. Changed |= I.replaceUsesOfWith(Cond, ToVal);
  455. }
  456. if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
  457. Cond->eraseFromParent();
  458. Changed = true;
  459. }
  460. return Changed;
  461. }
  462. /// Return the cost of duplicating a piece of this block from first non-phi
  463. /// and before StopAt instruction to thread across it. Stop scanning the block
  464. /// when exceeding the threshold. If duplication is impossible, returns ~0U.
  465. static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
  466. BasicBlock *BB,
  467. Instruction *StopAt,
  468. unsigned Threshold) {
  469. assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
  470. // Do not duplicate the BB if it has a lot of PHI nodes.
  471. // If a threadable chain is too long then the number of PHI nodes can add up,
  472. // leading to a substantial increase in compile time when rewriting the SSA.
  473. unsigned PhiCount = 0;
  474. Instruction *FirstNonPHI = nullptr;
  475. for (Instruction &I : *BB) {
  476. if (!isa<PHINode>(&I)) {
  477. FirstNonPHI = &I;
  478. break;
  479. }
  480. if (++PhiCount > PhiDuplicateThreshold)
  481. return ~0U;
  482. }
  483. /// Ignore PHI nodes, these will be flattened when duplication happens.
  484. BasicBlock::const_iterator I(FirstNonPHI);
  485. // FIXME: THREADING will delete values that are just used to compute the
  486. // branch, so they shouldn't count against the duplication cost.
  487. unsigned Bonus = 0;
  488. if (BB->getTerminator() == StopAt) {
  489. // Threading through a switch statement is particularly profitable. If this
  490. // block ends in a switch, decrease its cost to make it more likely to
  491. // happen.
  492. if (isa<SwitchInst>(StopAt))
  493. Bonus = 6;
  494. // The same holds for indirect branches, but slightly more so.
  495. if (isa<IndirectBrInst>(StopAt))
  496. Bonus = 8;
  497. }
  498. // Bump the threshold up so the early exit from the loop doesn't skip the
  499. // terminator-based Size adjustment at the end.
  500. Threshold += Bonus;
  501. // Sum up the cost of each instruction until we get to the terminator. Don't
  502. // include the terminator because the copy won't include it.
  503. unsigned Size = 0;
  504. for (; &*I != StopAt; ++I) {
  505. // Stop scanning the block if we've reached the threshold.
  506. if (Size > Threshold)
  507. return Size;
  508. // Bail out if this instruction gives back a token type, it is not possible
  509. // to duplicate it if it is used outside this BB.
  510. if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
  511. return ~0U;
  512. // Blocks with NoDuplicate are modelled as having infinite cost, so they
  513. // are never duplicated.
  514. if (const CallInst *CI = dyn_cast<CallInst>(I))
  515. if (CI->cannotDuplicate() || CI->isConvergent())
  516. return ~0U;
  517. if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) ==
  518. TargetTransformInfo::TCC_Free)
  519. continue;
  520. // All other instructions count for at least one unit.
  521. ++Size;
  522. // Calls are more expensive. If they are non-intrinsic calls, we model them
  523. // as having cost of 4. If they are a non-vector intrinsic, we model them
  524. // as having cost of 2 total, and if they are a vector intrinsic, we model
  525. // them as having cost 1.
  526. if (const CallInst *CI = dyn_cast<CallInst>(I)) {
  527. if (!isa<IntrinsicInst>(CI))
  528. Size += 3;
  529. else if (!CI->getType()->isVectorTy())
  530. Size += 1;
  531. }
  532. }
  533. return Size > Bonus ? Size - Bonus : 0;
  534. }
  535. /// findLoopHeaders - We do not want jump threading to turn proper loop
  536. /// structures into irreducible loops. Doing this breaks up the loop nesting
  537. /// hierarchy and pessimizes later transformations. To prevent this from
  538. /// happening, we first have to find the loop headers. Here we approximate this
  539. /// by finding targets of backedges in the CFG.
  540. ///
  541. /// Note that there definitely are cases when we want to allow threading of
  542. /// edges across a loop header. For example, threading a jump from outside the
  543. /// loop (the preheader) to an exit block of the loop is definitely profitable.
  544. /// It is also almost always profitable to thread backedges from within the loop
  545. /// to exit blocks, and is often profitable to thread backedges to other blocks
  546. /// within the loop (forming a nested loop). This simple analysis is not rich
  547. /// enough to track all of these properties and keep it up-to-date as the CFG
  548. /// mutates, so we don't allow any of these transformations.
  549. void JumpThreadingPass::findLoopHeaders(Function &F) {
  550. SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
  551. FindFunctionBackedges(F, Edges);
  552. for (const auto &Edge : Edges)
  553. LoopHeaders.insert(Edge.second);
  554. }
  555. /// getKnownConstant - Helper method to determine if we can thread over a
  556. /// terminator with the given value as its condition, and if so what value to
  557. /// use for that. What kind of value this is depends on whether we want an
  558. /// integer or a block address, but an undef is always accepted.
  559. /// Returns null if Val is null or not an appropriate constant.
  560. static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
  561. if (!Val)
  562. return nullptr;
  563. // Undef is "known" enough.
  564. if (UndefValue *U = dyn_cast<UndefValue>(Val))
  565. return U;
  566. if (Preference == WantBlockAddress)
  567. return dyn_cast<BlockAddress>(Val->stripPointerCasts());
  568. return dyn_cast<ConstantInt>(Val);
  569. }
  570. /// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
  571. /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
  572. /// in any of our predecessors. If so, return the known list of value and pred
  573. /// BB in the result vector.
  574. ///
  575. /// This returns true if there were any known values.
  576. bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
  577. Value *V, BasicBlock *BB, PredValueInfo &Result,
  578. ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
  579. Instruction *CxtI) {
  580. // This method walks up use-def chains recursively. Because of this, we could
  581. // get into an infinite loop going around loops in the use-def chain. To
  582. // prevent this, keep track of what (value, block) pairs we've already visited
  583. // and terminate the search if we loop back to them
  584. if (!RecursionSet.insert(V).second)
  585. return false;
  586. // If V is a constant, then it is known in all predecessors.
  587. if (Constant *KC = getKnownConstant(V, Preference)) {
  588. for (BasicBlock *Pred : predecessors(BB))
  589. Result.emplace_back(KC, Pred);
  590. return !Result.empty();
  591. }
  592. // If V is a non-instruction value, or an instruction in a different block,
  593. // then it can't be derived from a PHI.
  594. Instruction *I = dyn_cast<Instruction>(V);
  595. if (!I || I->getParent() != BB) {
  596. // Okay, if this is a live-in value, see if it has a known value at the any
  597. // edge from our predecessors.
  598. for (BasicBlock *P : predecessors(BB)) {
  599. using namespace PatternMatch;
  600. // If the value is known by LazyValueInfo to be a constant in a
  601. // predecessor, use that information to try to thread this block.
  602. Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
  603. // If I is a non-local compare-with-constant instruction, use more-rich
  604. // 'getPredicateOnEdge' method. This would be able to handle value
  605. // inequalities better, for example if the compare is "X < 4" and "X < 3"
  606. // is known true but "X < 4" itself is not available.
  607. CmpInst::Predicate Pred;
  608. Value *Val;
  609. Constant *Cst;
  610. if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
  611. auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
  612. if (Res != LazyValueInfo::Unknown)
  613. PredCst = ConstantInt::getBool(V->getContext(), Res);
  614. }
  615. if (Constant *KC = getKnownConstant(PredCst, Preference))
  616. Result.emplace_back(KC, P);
  617. }
  618. return !Result.empty();
  619. }
  620. /// If I is a PHI node, then we know the incoming values for any constants.
  621. if (PHINode *PN = dyn_cast<PHINode>(I)) {
  622. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  623. Value *InVal = PN->getIncomingValue(i);
  624. if (Constant *KC = getKnownConstant(InVal, Preference)) {
  625. Result.emplace_back(KC, PN->getIncomingBlock(i));
  626. } else {
  627. Constant *CI = LVI->getConstantOnEdge(InVal,
  628. PN->getIncomingBlock(i),
  629. BB, CxtI);
  630. if (Constant *KC = getKnownConstant(CI, Preference))
  631. Result.emplace_back(KC, PN->getIncomingBlock(i));
  632. }
  633. }
  634. return !Result.empty();
  635. }
  636. // Handle Cast instructions.
  637. if (CastInst *CI = dyn_cast<CastInst>(I)) {
  638. Value *Source = CI->getOperand(0);
  639. computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
  640. RecursionSet, CxtI);
  641. if (Result.empty())
  642. return false;
  643. // Convert the known values.
  644. for (auto &R : Result)
  645. R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
  646. return true;
  647. }
  648. if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
  649. Value *Source = FI->getOperand(0);
  650. computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
  651. RecursionSet, CxtI);
  652. erase_if(Result, [](auto &Pair) {
  653. return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
  654. });
  655. return !Result.empty();
  656. }
  657. // Handle some boolean conditions.
  658. if (I->getType()->getPrimitiveSizeInBits() == 1) {
  659. using namespace PatternMatch;
  660. if (Preference != WantInteger)
  661. return false;
  662. // X | true -> true
  663. // X & false -> false
  664. Value *Op0, *Op1;
  665. if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
  666. match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
  667. PredValueInfoTy LHSVals, RHSVals;
  668. computeValueKnownInPredecessorsImpl(Op0, BB, LHSVals, WantInteger,
  669. RecursionSet, CxtI);
  670. computeValueKnownInPredecessorsImpl(Op1, BB, RHSVals, WantInteger,
  671. RecursionSet, CxtI);
  672. if (LHSVals.empty() && RHSVals.empty())
  673. return false;
  674. ConstantInt *InterestingVal;
  675. if (match(I, m_LogicalOr()))
  676. InterestingVal = ConstantInt::getTrue(I->getContext());
  677. else
  678. InterestingVal = ConstantInt::getFalse(I->getContext());
  679. SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
  680. // Scan for the sentinel. If we find an undef, force it to the
  681. // interesting value: x|undef -> true and x&undef -> false.
  682. for (const auto &LHSVal : LHSVals)
  683. if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
  684. Result.emplace_back(InterestingVal, LHSVal.second);
  685. LHSKnownBBs.insert(LHSVal.second);
  686. }
  687. for (const auto &RHSVal : RHSVals)
  688. if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
  689. // If we already inferred a value for this block on the LHS, don't
  690. // re-add it.
  691. if (!LHSKnownBBs.count(RHSVal.second))
  692. Result.emplace_back(InterestingVal, RHSVal.second);
  693. }
  694. return !Result.empty();
  695. }
  696. // Handle the NOT form of XOR.
  697. if (I->getOpcode() == Instruction::Xor &&
  698. isa<ConstantInt>(I->getOperand(1)) &&
  699. cast<ConstantInt>(I->getOperand(1))->isOne()) {
  700. computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
  701. WantInteger, RecursionSet, CxtI);
  702. if (Result.empty())
  703. return false;
  704. // Invert the known values.
  705. for (auto &R : Result)
  706. R.first = ConstantExpr::getNot(R.first);
  707. return true;
  708. }
  709. // Try to simplify some other binary operator values.
  710. } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
  711. if (Preference != WantInteger)
  712. return false;
  713. if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
  714. const DataLayout &DL = BO->getModule()->getDataLayout();
  715. PredValueInfoTy LHSVals;
  716. computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
  717. WantInteger, RecursionSet, CxtI);
  718. // Try to use constant folding to simplify the binary operator.
  719. for (const auto &LHSVal : LHSVals) {
  720. Constant *V = LHSVal.first;
  721. Constant *Folded =
  722. ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
  723. if (Constant *KC = getKnownConstant(Folded, WantInteger))
  724. Result.emplace_back(KC, LHSVal.second);
  725. }
  726. }
  727. return !Result.empty();
  728. }
  729. // Handle compare with phi operand, where the PHI is defined in this block.
  730. if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
  731. if (Preference != WantInteger)
  732. return false;
  733. Type *CmpType = Cmp->getType();
  734. Value *CmpLHS = Cmp->getOperand(0);
  735. Value *CmpRHS = Cmp->getOperand(1);
  736. CmpInst::Predicate Pred = Cmp->getPredicate();
  737. PHINode *PN = dyn_cast<PHINode>(CmpLHS);
  738. if (!PN)
  739. PN = dyn_cast<PHINode>(CmpRHS);
  740. if (PN && PN->getParent() == BB) {
  741. const DataLayout &DL = PN->getModule()->getDataLayout();
  742. // We can do this simplification if any comparisons fold to true or false.
  743. // See if any do.
  744. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  745. BasicBlock *PredBB = PN->getIncomingBlock(i);
  746. Value *LHS, *RHS;
  747. if (PN == CmpLHS) {
  748. LHS = PN->getIncomingValue(i);
  749. RHS = CmpRHS->DoPHITranslation(BB, PredBB);
  750. } else {
  751. LHS = CmpLHS->DoPHITranslation(BB, PredBB);
  752. RHS = PN->getIncomingValue(i);
  753. }
  754. Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
  755. if (!Res) {
  756. if (!isa<Constant>(RHS))
  757. continue;
  758. // getPredicateOnEdge call will make no sense if LHS is defined in BB.
  759. auto LHSInst = dyn_cast<Instruction>(LHS);
  760. if (LHSInst && LHSInst->getParent() == BB)
  761. continue;
  762. LazyValueInfo::Tristate
  763. ResT = LVI->getPredicateOnEdge(Pred, LHS,
  764. cast<Constant>(RHS), PredBB, BB,
  765. CxtI ? CxtI : Cmp);
  766. if (ResT == LazyValueInfo::Unknown)
  767. continue;
  768. Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
  769. }
  770. if (Constant *KC = getKnownConstant(Res, WantInteger))
  771. Result.emplace_back(KC, PredBB);
  772. }
  773. return !Result.empty();
  774. }
  775. // If comparing a live-in value against a constant, see if we know the
  776. // live-in value on any predecessors.
  777. if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
  778. Constant *CmpConst = cast<Constant>(CmpRHS);
  779. if (!isa<Instruction>(CmpLHS) ||
  780. cast<Instruction>(CmpLHS)->getParent() != BB) {
  781. for (BasicBlock *P : predecessors(BB)) {
  782. // If the value is known by LazyValueInfo to be a constant in a
  783. // predecessor, use that information to try to thread this block.
  784. LazyValueInfo::Tristate Res =
  785. LVI->getPredicateOnEdge(Pred, CmpLHS,
  786. CmpConst, P, BB, CxtI ? CxtI : Cmp);
  787. if (Res == LazyValueInfo::Unknown)
  788. continue;
  789. Constant *ResC = ConstantInt::get(CmpType, Res);
  790. Result.emplace_back(ResC, P);
  791. }
  792. return !Result.empty();
  793. }
  794. // InstCombine can fold some forms of constant range checks into
  795. // (icmp (add (x, C1)), C2). See if we have we have such a thing with
  796. // x as a live-in.
  797. {
  798. using namespace PatternMatch;
  799. Value *AddLHS;
  800. ConstantInt *AddConst;
  801. if (isa<ConstantInt>(CmpConst) &&
  802. match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
  803. if (!isa<Instruction>(AddLHS) ||
  804. cast<Instruction>(AddLHS)->getParent() != BB) {
  805. for (BasicBlock *P : predecessors(BB)) {
  806. // If the value is known by LazyValueInfo to be a ConstantRange in
  807. // a predecessor, use that information to try to thread this
  808. // block.
  809. ConstantRange CR = LVI->getConstantRangeOnEdge(
  810. AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
  811. // Propagate the range through the addition.
  812. CR = CR.add(AddConst->getValue());
  813. // Get the range where the compare returns true.
  814. ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
  815. Pred, cast<ConstantInt>(CmpConst)->getValue());
  816. Constant *ResC;
  817. if (CmpRange.contains(CR))
  818. ResC = ConstantInt::getTrue(CmpType);
  819. else if (CmpRange.inverse().contains(CR))
  820. ResC = ConstantInt::getFalse(CmpType);
  821. else
  822. continue;
  823. Result.emplace_back(ResC, P);
  824. }
  825. return !Result.empty();
  826. }
  827. }
  828. }
  829. // Try to find a constant value for the LHS of a comparison,
  830. // and evaluate it statically if we can.
  831. PredValueInfoTy LHSVals;
  832. computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
  833. WantInteger, RecursionSet, CxtI);
  834. for (const auto &LHSVal : LHSVals) {
  835. Constant *V = LHSVal.first;
  836. Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
  837. if (Constant *KC = getKnownConstant(Folded, WantInteger))
  838. Result.emplace_back(KC, LHSVal.second);
  839. }
  840. return !Result.empty();
  841. }
  842. }
  843. if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
  844. // Handle select instructions where at least one operand is a known constant
  845. // and we can figure out the condition value for any predecessor block.
  846. Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
  847. Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
  848. PredValueInfoTy Conds;
  849. if ((TrueVal || FalseVal) &&
  850. computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
  851. WantInteger, RecursionSet, CxtI)) {
  852. for (auto &C : Conds) {
  853. Constant *Cond = C.first;
  854. // Figure out what value to use for the condition.
  855. bool KnownCond;
  856. if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
  857. // A known boolean.
  858. KnownCond = CI->isOne();
  859. } else {
  860. assert(isa<UndefValue>(Cond) && "Unexpected condition value");
  861. // Either operand will do, so be sure to pick the one that's a known
  862. // constant.
  863. // FIXME: Do this more cleverly if both values are known constants?
  864. KnownCond = (TrueVal != nullptr);
  865. }
  866. // See if the select has a known constant value for this predecessor.
  867. if (Constant *Val = KnownCond ? TrueVal : FalseVal)
  868. Result.emplace_back(Val, C.second);
  869. }
  870. return !Result.empty();
  871. }
  872. }
  873. // If all else fails, see if LVI can figure out a constant value for us.
  874. assert(CxtI->getParent() == BB && "CxtI should be in BB");
  875. Constant *CI = LVI->getConstant(V, CxtI);
  876. if (Constant *KC = getKnownConstant(CI, Preference)) {
  877. for (BasicBlock *Pred : predecessors(BB))
  878. Result.emplace_back(KC, Pred);
  879. }
  880. return !Result.empty();
  881. }
  882. /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
  883. /// in an undefined jump, decide which block is best to revector to.
  884. ///
  885. /// Since we can pick an arbitrary destination, we pick the successor with the
  886. /// fewest predecessors. This should reduce the in-degree of the others.
  887. static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {
  888. Instruction *BBTerm = BB->getTerminator();
  889. unsigned MinSucc = 0;
  890. BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
  891. // Compute the successor with the minimum number of predecessors.
  892. unsigned MinNumPreds = pred_size(TestBB);
  893. for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
  894. TestBB = BBTerm->getSuccessor(i);
  895. unsigned NumPreds = pred_size(TestBB);
  896. if (NumPreds < MinNumPreds) {
  897. MinSucc = i;
  898. MinNumPreds = NumPreds;
  899. }
  900. }
  901. return MinSucc;
  902. }
  903. static bool hasAddressTakenAndUsed(BasicBlock *BB) {
  904. if (!BB->hasAddressTaken()) return false;
  905. // If the block has its address taken, it may be a tree of dead constants
  906. // hanging off of it. These shouldn't keep the block alive.
  907. BlockAddress *BA = BlockAddress::get(BB);
  908. BA->removeDeadConstantUsers();
  909. return !BA->use_empty();
  910. }
  911. /// processBlock - If there are any predecessors whose control can be threaded
  912. /// through to a successor, transform them now.
  913. bool JumpThreadingPass::processBlock(BasicBlock *BB) {
  914. // If the block is trivially dead, just return and let the caller nuke it.
  915. // This simplifies other transformations.
  916. if (DTU->isBBPendingDeletion(BB) ||
  917. (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
  918. return false;
  919. // If this block has a single predecessor, and if that pred has a single
  920. // successor, merge the blocks. This encourages recursive jump threading
  921. // because now the condition in this block can be threaded through
  922. // predecessors of our predecessor block.
  923. if (maybeMergeBasicBlockIntoOnlyPred(BB))
  924. return true;
  925. if (tryToUnfoldSelectInCurrBB(BB))
  926. return true;
  927. // Look if we can propagate guards to predecessors.
  928. if (HasGuards && processGuards(BB))
  929. return true;
  930. // What kind of constant we're looking for.
  931. ConstantPreference Preference = WantInteger;
  932. // Look to see if the terminator is a conditional branch, switch or indirect
  933. // branch, if not we can't thread it.
  934. Value *Condition;
  935. Instruction *Terminator = BB->getTerminator();
  936. if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
  937. // Can't thread an unconditional jump.
  938. if (BI->isUnconditional()) return false;
  939. Condition = BI->getCondition();
  940. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
  941. Condition = SI->getCondition();
  942. } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
  943. // Can't thread indirect branch with no successors.
  944. if (IB->getNumSuccessors() == 0) return false;
  945. Condition = IB->getAddress()->stripPointerCasts();
  946. Preference = WantBlockAddress;
  947. } else {
  948. return false; // Must be an invoke or callbr.
  949. }
  950. // Keep track if we constant folded the condition in this invocation.
  951. bool ConstantFolded = false;
  952. // Run constant folding to see if we can reduce the condition to a simple
  953. // constant.
  954. if (Instruction *I = dyn_cast<Instruction>(Condition)) {
  955. Value *SimpleVal =
  956. ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
  957. if (SimpleVal) {
  958. I->replaceAllUsesWith(SimpleVal);
  959. if (isInstructionTriviallyDead(I, TLI))
  960. I->eraseFromParent();
  961. Condition = SimpleVal;
  962. ConstantFolded = true;
  963. }
  964. }
  965. // If the terminator is branching on an undef or freeze undef, we can pick any
  966. // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
  967. auto *FI = dyn_cast<FreezeInst>(Condition);
  968. if (isa<UndefValue>(Condition) ||
  969. (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
  970. unsigned BestSucc = getBestDestForJumpOnUndef(BB);
  971. std::vector<DominatorTree::UpdateType> Updates;
  972. // Fold the branch/switch.
  973. Instruction *BBTerm = BB->getTerminator();
  974. Updates.reserve(BBTerm->getNumSuccessors());
  975. for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
  976. if (i == BestSucc) continue;
  977. BasicBlock *Succ = BBTerm->getSuccessor(i);
  978. Succ->removePredecessor(BB, true);
  979. Updates.push_back({DominatorTree::Delete, BB, Succ});
  980. }
  981. LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
  982. << "' folding undef terminator: " << *BBTerm << '\n');
  983. BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
  984. ++NumFolds;
  985. BBTerm->eraseFromParent();
  986. DTU->applyUpdatesPermissive(Updates);
  987. if (FI)
  988. FI->eraseFromParent();
  989. return true;
  990. }
  991. // If the terminator of this block is branching on a constant, simplify the
  992. // terminator to an unconditional branch. This can occur due to threading in
  993. // other blocks.
  994. if (getKnownConstant(Condition, Preference)) {
  995. LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
  996. << "' folding terminator: " << *BB->getTerminator()
  997. << '\n');
  998. ++NumFolds;
  999. ConstantFoldTerminator(BB, true, nullptr, DTU);
  1000. if (HasProfileData)
  1001. BPI->eraseBlock(BB);
  1002. return true;
  1003. }
  1004. Instruction *CondInst = dyn_cast<Instruction>(Condition);
  1005. // All the rest of our checks depend on the condition being an instruction.
  1006. if (!CondInst) {
  1007. // FIXME: Unify this with code below.
  1008. if (processThreadableEdges(Condition, BB, Preference, Terminator))
  1009. return true;
  1010. return ConstantFolded;
  1011. }
  1012. // Some of the following optimization can safely work on the unfrozen cond.
  1013. Value *CondWithoutFreeze = CondInst;
  1014. if (auto *FI = dyn_cast<FreezeInst>(CondInst))
  1015. CondWithoutFreeze = FI->getOperand(0);
  1016. if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
  1017. // If we're branching on a conditional, LVI might be able to determine
  1018. // it's value at the branch instruction. We only handle comparisons
  1019. // against a constant at this time.
  1020. if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
  1021. LazyValueInfo::Tristate Ret =
  1022. LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
  1023. CondConst, BB->getTerminator(),
  1024. /*UseBlockValue=*/false);
  1025. if (Ret != LazyValueInfo::Unknown) {
  1026. // We can safely replace *some* uses of the CondInst if it has
  1027. // exactly one value as returned by LVI. RAUW is incorrect in the
  1028. // presence of guards and assumes, that have the `Cond` as the use. This
  1029. // is because we use the guards/assume to reason about the `Cond` value
  1030. // at the end of block, but RAUW unconditionally replaces all uses
  1031. // including the guards/assumes themselves and the uses before the
  1032. // guard/assume.
  1033. auto *CI = Ret == LazyValueInfo::True ?
  1034. ConstantInt::getTrue(CondCmp->getType()) :
  1035. ConstantInt::getFalse(CondCmp->getType());
  1036. if (replaceFoldableUses(CondCmp, CI, BB))
  1037. return true;
  1038. }
  1039. // We did not manage to simplify this branch, try to see whether
  1040. // CondCmp depends on a known phi-select pattern.
  1041. if (tryToUnfoldSelect(CondCmp, BB))
  1042. return true;
  1043. }
  1044. }
  1045. if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
  1046. if (tryToUnfoldSelect(SI, BB))
  1047. return true;
  1048. // Check for some cases that are worth simplifying. Right now we want to look
  1049. // for loads that are used by a switch or by the condition for the branch. If
  1050. // we see one, check to see if it's partially redundant. If so, insert a PHI
  1051. // which can then be used to thread the values.
  1052. Value *SimplifyValue = CondWithoutFreeze;
  1053. if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
  1054. if (isa<Constant>(CondCmp->getOperand(1)))
  1055. SimplifyValue = CondCmp->getOperand(0);
  1056. // TODO: There are other places where load PRE would be profitable, such as
  1057. // more complex comparisons.
  1058. if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
  1059. if (simplifyPartiallyRedundantLoad(LoadI))
  1060. return true;
  1061. // Before threading, try to propagate profile data backwards:
  1062. if (PHINode *PN = dyn_cast<PHINode>(CondInst))
  1063. if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
  1064. updatePredecessorProfileMetadata(PN, BB);
  1065. // Handle a variety of cases where we are branching on something derived from
  1066. // a PHI node in the current block. If we can prove that any predecessors
  1067. // compute a predictable value based on a PHI node, thread those predecessors.
  1068. if (processThreadableEdges(CondInst, BB, Preference, Terminator))
  1069. return true;
  1070. // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
  1071. // the current block, see if we can simplify.
  1072. PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
  1073. if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
  1074. return processBranchOnPHI(PN);
  1075. // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
  1076. if (CondInst->getOpcode() == Instruction::Xor &&
  1077. CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
  1078. return processBranchOnXOR(cast<BinaryOperator>(CondInst));
  1079. // Search for a stronger dominating condition that can be used to simplify a
  1080. // conditional branch leaving BB.
  1081. if (processImpliedCondition(BB))
  1082. return true;
  1083. return false;
  1084. }
  1085. bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
  1086. auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  1087. if (!BI || !BI->isConditional())
  1088. return false;
  1089. Value *Cond = BI->getCondition();
  1090. // Assuming that predecessor's branch was taken, if pred's branch condition
  1091. // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
  1092. // freeze(Cond) is either true or a nondeterministic value.
  1093. // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
  1094. // without affecting other instructions.
  1095. auto *FICond = dyn_cast<FreezeInst>(Cond);
  1096. if (FICond && FICond->hasOneUse())
  1097. Cond = FICond->getOperand(0);
  1098. else
  1099. FICond = nullptr;
  1100. BasicBlock *CurrentBB = BB;
  1101. BasicBlock *CurrentPred = BB->getSinglePredecessor();
  1102. unsigned Iter = 0;
  1103. auto &DL = BB->getModule()->getDataLayout();
  1104. while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
  1105. auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
  1106. if (!PBI || !PBI->isConditional())
  1107. return false;
  1108. if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
  1109. return false;
  1110. bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
  1111. std::optional<bool> Implication =
  1112. isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
  1113. // If the branch condition of BB (which is Cond) and CurrentPred are
  1114. // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
  1115. if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
  1116. if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
  1117. FICond->getOperand(0))
  1118. Implication = CondIsTrue;
  1119. }
  1120. if (Implication) {
  1121. BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
  1122. BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
  1123. RemoveSucc->removePredecessor(BB);
  1124. BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
  1125. UncondBI->setDebugLoc(BI->getDebugLoc());
  1126. ++NumFolds;
  1127. BI->eraseFromParent();
  1128. if (FICond)
  1129. FICond->eraseFromParent();
  1130. DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
  1131. if (HasProfileData)
  1132. BPI->eraseBlock(BB);
  1133. return true;
  1134. }
  1135. CurrentBB = CurrentPred;
  1136. CurrentPred = CurrentBB->getSinglePredecessor();
  1137. }
  1138. return false;
  1139. }
  1140. /// Return true if Op is an instruction defined in the given block.
  1141. static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
  1142. if (Instruction *OpInst = dyn_cast<Instruction>(Op))
  1143. if (OpInst->getParent() == BB)
  1144. return true;
  1145. return false;
  1146. }
  1147. /// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
  1148. /// redundant load instruction, eliminate it by replacing it with a PHI node.
  1149. /// This is an important optimization that encourages jump threading, and needs
  1150. /// to be run interlaced with other jump threading tasks.
  1151. bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
  1152. // Don't hack volatile and ordered loads.
  1153. if (!LoadI->isUnordered()) return false;
  1154. // If the load is defined in a block with exactly one predecessor, it can't be
  1155. // partially redundant.
  1156. BasicBlock *LoadBB = LoadI->getParent();
  1157. if (LoadBB->getSinglePredecessor())
  1158. return false;
  1159. // If the load is defined in an EH pad, it can't be partially redundant,
  1160. // because the edges between the invoke and the EH pad cannot have other
  1161. // instructions between them.
  1162. if (LoadBB->isEHPad())
  1163. return false;
  1164. Value *LoadedPtr = LoadI->getOperand(0);
  1165. // If the loaded operand is defined in the LoadBB and its not a phi,
  1166. // it can't be available in predecessors.
  1167. if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
  1168. return false;
  1169. // Scan a few instructions up from the load, to see if it is obviously live at
  1170. // the entry to its block.
  1171. BasicBlock::iterator BBIt(LoadI);
  1172. bool IsLoadCSE;
  1173. if (Value *AvailableVal = FindAvailableLoadedValue(
  1174. LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
  1175. // If the value of the load is locally available within the block, just use
  1176. // it. This frequently occurs for reg2mem'd allocas.
  1177. if (IsLoadCSE) {
  1178. LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
  1179. combineMetadataForCSE(NLoadI, LoadI, false);
  1180. };
  1181. // If the returned value is the load itself, replace with poison. This can
  1182. // only happen in dead loops.
  1183. if (AvailableVal == LoadI)
  1184. AvailableVal = PoisonValue::get(LoadI->getType());
  1185. if (AvailableVal->getType() != LoadI->getType())
  1186. AvailableVal = CastInst::CreateBitOrPointerCast(
  1187. AvailableVal, LoadI->getType(), "", LoadI);
  1188. LoadI->replaceAllUsesWith(AvailableVal);
  1189. LoadI->eraseFromParent();
  1190. return true;
  1191. }
  1192. // Otherwise, if we scanned the whole block and got to the top of the block,
  1193. // we know the block is locally transparent to the load. If not, something
  1194. // might clobber its value.
  1195. if (BBIt != LoadBB->begin())
  1196. return false;
  1197. // If all of the loads and stores that feed the value have the same AA tags,
  1198. // then we can propagate them onto any newly inserted loads.
  1199. AAMDNodes AATags = LoadI->getAAMetadata();
  1200. SmallPtrSet<BasicBlock*, 8> PredsScanned;
  1201. using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
  1202. AvailablePredsTy AvailablePreds;
  1203. BasicBlock *OneUnavailablePred = nullptr;
  1204. SmallVector<LoadInst*, 8> CSELoads;
  1205. // If we got here, the loaded value is transparent through to the start of the
  1206. // block. Check to see if it is available in any of the predecessor blocks.
  1207. for (BasicBlock *PredBB : predecessors(LoadBB)) {
  1208. // If we already scanned this predecessor, skip it.
  1209. if (!PredsScanned.insert(PredBB).second)
  1210. continue;
  1211. BBIt = PredBB->end();
  1212. unsigned NumScanedInst = 0;
  1213. Value *PredAvailable = nullptr;
  1214. // NOTE: We don't CSE load that is volatile or anything stronger than
  1215. // unordered, that should have been checked when we entered the function.
  1216. assert(LoadI->isUnordered() &&
  1217. "Attempting to CSE volatile or atomic loads");
  1218. // If this is a load on a phi pointer, phi-translate it and search
  1219. // for available load/store to the pointer in predecessors.
  1220. Type *AccessTy = LoadI->getType();
  1221. const auto &DL = LoadI->getModule()->getDataLayout();
  1222. MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
  1223. LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
  1224. AATags);
  1225. PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
  1226. PredBB, BBIt, DefMaxInstsToScan,
  1227. AA, &IsLoadCSE, &NumScanedInst);
  1228. // If PredBB has a single predecessor, continue scanning through the
  1229. // single predecessor.
  1230. BasicBlock *SinglePredBB = PredBB;
  1231. while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
  1232. NumScanedInst < DefMaxInstsToScan) {
  1233. SinglePredBB = SinglePredBB->getSinglePredecessor();
  1234. if (SinglePredBB) {
  1235. BBIt = SinglePredBB->end();
  1236. PredAvailable = findAvailablePtrLoadStore(
  1237. Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
  1238. (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
  1239. &NumScanedInst);
  1240. }
  1241. }
  1242. if (!PredAvailable) {
  1243. OneUnavailablePred = PredBB;
  1244. continue;
  1245. }
  1246. if (IsLoadCSE)
  1247. CSELoads.push_back(cast<LoadInst>(PredAvailable));
  1248. // If so, this load is partially redundant. Remember this info so that we
  1249. // can create a PHI node.
  1250. AvailablePreds.emplace_back(PredBB, PredAvailable);
  1251. }
  1252. // If the loaded value isn't available in any predecessor, it isn't partially
  1253. // redundant.
  1254. if (AvailablePreds.empty()) return false;
  1255. // Okay, the loaded value is available in at least one (and maybe all!)
  1256. // predecessors. If the value is unavailable in more than one unique
  1257. // predecessor, we want to insert a merge block for those common predecessors.
  1258. // This ensures that we only have to insert one reload, thus not increasing
  1259. // code size.
  1260. BasicBlock *UnavailablePred = nullptr;
  1261. // If the value is unavailable in one of predecessors, we will end up
  1262. // inserting a new instruction into them. It is only valid if all the
  1263. // instructions before LoadI are guaranteed to pass execution to its
  1264. // successor, or if LoadI is safe to speculate.
  1265. // TODO: If this logic becomes more complex, and we will perform PRE insertion
  1266. // farther than to a predecessor, we need to reuse the code from GVN's PRE.
  1267. // It requires domination tree analysis, so for this simple case it is an
  1268. // overkill.
  1269. if (PredsScanned.size() != AvailablePreds.size() &&
  1270. !isSafeToSpeculativelyExecute(LoadI))
  1271. for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
  1272. if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
  1273. return false;
  1274. // If there is exactly one predecessor where the value is unavailable, the
  1275. // already computed 'OneUnavailablePred' block is it. If it ends in an
  1276. // unconditional branch, we know that it isn't a critical edge.
  1277. if (PredsScanned.size() == AvailablePreds.size()+1 &&
  1278. OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
  1279. UnavailablePred = OneUnavailablePred;
  1280. } else if (PredsScanned.size() != AvailablePreds.size()) {
  1281. // Otherwise, we had multiple unavailable predecessors or we had a critical
  1282. // edge from the one.
  1283. SmallVector<BasicBlock*, 8> PredsToSplit;
  1284. SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
  1285. for (const auto &AvailablePred : AvailablePreds)
  1286. AvailablePredSet.insert(AvailablePred.first);
  1287. // Add all the unavailable predecessors to the PredsToSplit list.
  1288. for (BasicBlock *P : predecessors(LoadBB)) {
  1289. // If the predecessor is an indirect goto, we can't split the edge.
  1290. if (isa<IndirectBrInst>(P->getTerminator()))
  1291. return false;
  1292. if (!AvailablePredSet.count(P))
  1293. PredsToSplit.push_back(P);
  1294. }
  1295. // Split them out to their own block.
  1296. UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
  1297. }
  1298. // If the value isn't available in all predecessors, then there will be
  1299. // exactly one where it isn't available. Insert a load on that edge and add
  1300. // it to the AvailablePreds list.
  1301. if (UnavailablePred) {
  1302. assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
  1303. "Can't handle critical edge here!");
  1304. LoadInst *NewVal = new LoadInst(
  1305. LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
  1306. LoadI->getName() + ".pr", false, LoadI->getAlign(),
  1307. LoadI->getOrdering(), LoadI->getSyncScopeID(),
  1308. UnavailablePred->getTerminator());
  1309. NewVal->setDebugLoc(LoadI->getDebugLoc());
  1310. if (AATags)
  1311. NewVal->setAAMetadata(AATags);
  1312. AvailablePreds.emplace_back(UnavailablePred, NewVal);
  1313. }
  1314. // Now we know that each predecessor of this block has a value in
  1315. // AvailablePreds, sort them for efficient access as we're walking the preds.
  1316. array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
  1317. // Create a PHI node at the start of the block for the PRE'd load value.
  1318. pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
  1319. PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "",
  1320. &LoadBB->front());
  1321. PN->takeName(LoadI);
  1322. PN->setDebugLoc(LoadI->getDebugLoc());
  1323. // Insert new entries into the PHI for each predecessor. A single block may
  1324. // have multiple entries here.
  1325. for (pred_iterator PI = PB; PI != PE; ++PI) {
  1326. BasicBlock *P = *PI;
  1327. AvailablePredsTy::iterator I =
  1328. llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
  1329. assert(I != AvailablePreds.end() && I->first == P &&
  1330. "Didn't find entry for predecessor!");
  1331. // If we have an available predecessor but it requires casting, insert the
  1332. // cast in the predecessor and use the cast. Note that we have to update the
  1333. // AvailablePreds vector as we go so that all of the PHI entries for this
  1334. // predecessor use the same bitcast.
  1335. Value *&PredV = I->second;
  1336. if (PredV->getType() != LoadI->getType())
  1337. PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
  1338. P->getTerminator());
  1339. PN->addIncoming(PredV, I->first);
  1340. }
  1341. for (LoadInst *PredLoadI : CSELoads) {
  1342. combineMetadataForCSE(PredLoadI, LoadI, true);
  1343. }
  1344. LoadI->replaceAllUsesWith(PN);
  1345. LoadI->eraseFromParent();
  1346. return true;
  1347. }
  1348. /// findMostPopularDest - The specified list contains multiple possible
  1349. /// threadable destinations. Pick the one that occurs the most frequently in
  1350. /// the list.
  1351. static BasicBlock *
  1352. findMostPopularDest(BasicBlock *BB,
  1353. const SmallVectorImpl<std::pair<BasicBlock *,
  1354. BasicBlock *>> &PredToDestList) {
  1355. assert(!PredToDestList.empty());
  1356. // Determine popularity. If there are multiple possible destinations, we
  1357. // explicitly choose to ignore 'undef' destinations. We prefer to thread
  1358. // blocks with known and real destinations to threading undef. We'll handle
  1359. // them later if interesting.
  1360. MapVector<BasicBlock *, unsigned> DestPopularity;
  1361. // Populate DestPopularity with the successors in the order they appear in the
  1362. // successor list. This way, we ensure determinism by iterating it in the
  1363. // same order in std::max_element below. We map nullptr to 0 so that we can
  1364. // return nullptr when PredToDestList contains nullptr only.
  1365. DestPopularity[nullptr] = 0;
  1366. for (auto *SuccBB : successors(BB))
  1367. DestPopularity[SuccBB] = 0;
  1368. for (const auto &PredToDest : PredToDestList)
  1369. if (PredToDest.second)
  1370. DestPopularity[PredToDest.second]++;
  1371. // Find the most popular dest.
  1372. auto MostPopular = std::max_element(
  1373. DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
  1374. // Okay, we have finally picked the most popular destination.
  1375. return MostPopular->first;
  1376. }
  1377. // Try to evaluate the value of V when the control flows from PredPredBB to
  1378. // BB->getSinglePredecessor() and then on to BB.
  1379. Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
  1380. BasicBlock *PredPredBB,
  1381. Value *V) {
  1382. BasicBlock *PredBB = BB->getSinglePredecessor();
  1383. assert(PredBB && "Expected a single predecessor");
  1384. if (Constant *Cst = dyn_cast<Constant>(V)) {
  1385. return Cst;
  1386. }
  1387. // Consult LVI if V is not an instruction in BB or PredBB.
  1388. Instruction *I = dyn_cast<Instruction>(V);
  1389. if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
  1390. return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
  1391. }
  1392. // Look into a PHI argument.
  1393. if (PHINode *PHI = dyn_cast<PHINode>(V)) {
  1394. if (PHI->getParent() == PredBB)
  1395. return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
  1396. return nullptr;
  1397. }
  1398. // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
  1399. if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
  1400. if (CondCmp->getParent() == BB) {
  1401. Constant *Op0 =
  1402. evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
  1403. Constant *Op1 =
  1404. evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
  1405. if (Op0 && Op1) {
  1406. return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
  1407. }
  1408. }
  1409. return nullptr;
  1410. }
  1411. return nullptr;
  1412. }
  1413. bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
  1414. ConstantPreference Preference,
  1415. Instruction *CxtI) {
  1416. // If threading this would thread across a loop header, don't even try to
  1417. // thread the edge.
  1418. if (LoopHeaders.count(BB))
  1419. return false;
  1420. PredValueInfoTy PredValues;
  1421. if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
  1422. CxtI)) {
  1423. // We don't have known values in predecessors. See if we can thread through
  1424. // BB and its sole predecessor.
  1425. return maybethreadThroughTwoBasicBlocks(BB, Cond);
  1426. }
  1427. assert(!PredValues.empty() &&
  1428. "computeValueKnownInPredecessors returned true with no values");
  1429. LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
  1430. for (const auto &PredValue : PredValues) {
  1431. dbgs() << " BB '" << BB->getName()
  1432. << "': FOUND condition = " << *PredValue.first
  1433. << " for pred '" << PredValue.second->getName() << "'.\n";
  1434. });
  1435. // Decide what we want to thread through. Convert our list of known values to
  1436. // a list of known destinations for each pred. This also discards duplicate
  1437. // predecessors and keeps track of the undefined inputs (which are represented
  1438. // as a null dest in the PredToDestList).
  1439. SmallPtrSet<BasicBlock*, 16> SeenPreds;
  1440. SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
  1441. BasicBlock *OnlyDest = nullptr;
  1442. BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
  1443. Constant *OnlyVal = nullptr;
  1444. Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
  1445. for (const auto &PredValue : PredValues) {
  1446. BasicBlock *Pred = PredValue.second;
  1447. if (!SeenPreds.insert(Pred).second)
  1448. continue; // Duplicate predecessor entry.
  1449. Constant *Val = PredValue.first;
  1450. BasicBlock *DestBB;
  1451. if (isa<UndefValue>(Val))
  1452. DestBB = nullptr;
  1453. else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
  1454. assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
  1455. DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
  1456. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
  1457. assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
  1458. DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
  1459. } else {
  1460. assert(isa<IndirectBrInst>(BB->getTerminator())
  1461. && "Unexpected terminator");
  1462. assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
  1463. DestBB = cast<BlockAddress>(Val)->getBasicBlock();
  1464. }
  1465. // If we have exactly one destination, remember it for efficiency below.
  1466. if (PredToDestList.empty()) {
  1467. OnlyDest = DestBB;
  1468. OnlyVal = Val;
  1469. } else {
  1470. if (OnlyDest != DestBB)
  1471. OnlyDest = MultipleDestSentinel;
  1472. // It possible we have same destination, but different value, e.g. default
  1473. // case in switchinst.
  1474. if (Val != OnlyVal)
  1475. OnlyVal = MultipleVal;
  1476. }
  1477. // If the predecessor ends with an indirect goto, we can't change its
  1478. // destination.
  1479. if (isa<IndirectBrInst>(Pred->getTerminator()))
  1480. continue;
  1481. PredToDestList.emplace_back(Pred, DestBB);
  1482. }
  1483. // If all edges were unthreadable, we fail.
  1484. if (PredToDestList.empty())
  1485. return false;
  1486. // If all the predecessors go to a single known successor, we want to fold,
  1487. // not thread. By doing so, we do not need to duplicate the current block and
  1488. // also miss potential opportunities in case we dont/cant duplicate.
  1489. if (OnlyDest && OnlyDest != MultipleDestSentinel) {
  1490. if (BB->hasNPredecessors(PredToDestList.size())) {
  1491. bool SeenFirstBranchToOnlyDest = false;
  1492. std::vector <DominatorTree::UpdateType> Updates;
  1493. Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
  1494. for (BasicBlock *SuccBB : successors(BB)) {
  1495. if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
  1496. SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
  1497. } else {
  1498. SuccBB->removePredecessor(BB, true); // This is unreachable successor.
  1499. Updates.push_back({DominatorTree::Delete, BB, SuccBB});
  1500. }
  1501. }
  1502. // Finally update the terminator.
  1503. Instruction *Term = BB->getTerminator();
  1504. BranchInst::Create(OnlyDest, Term);
  1505. ++NumFolds;
  1506. Term->eraseFromParent();
  1507. DTU->applyUpdatesPermissive(Updates);
  1508. if (HasProfileData)
  1509. BPI->eraseBlock(BB);
  1510. // If the condition is now dead due to the removal of the old terminator,
  1511. // erase it.
  1512. if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
  1513. if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
  1514. CondInst->eraseFromParent();
  1515. // We can safely replace *some* uses of the CondInst if it has
  1516. // exactly one value as returned by LVI. RAUW is incorrect in the
  1517. // presence of guards and assumes, that have the `Cond` as the use. This
  1518. // is because we use the guards/assume to reason about the `Cond` value
  1519. // at the end of block, but RAUW unconditionally replaces all uses
  1520. // including the guards/assumes themselves and the uses before the
  1521. // guard/assume.
  1522. else if (OnlyVal && OnlyVal != MultipleVal)
  1523. replaceFoldableUses(CondInst, OnlyVal, BB);
  1524. }
  1525. return true;
  1526. }
  1527. }
  1528. // Determine which is the most common successor. If we have many inputs and
  1529. // this block is a switch, we want to start by threading the batch that goes
  1530. // to the most popular destination first. If we only know about one
  1531. // threadable destination (the common case) we can avoid this.
  1532. BasicBlock *MostPopularDest = OnlyDest;
  1533. if (MostPopularDest == MultipleDestSentinel) {
  1534. // Remove any loop headers from the Dest list, threadEdge conservatively
  1535. // won't process them, but we might have other destination that are eligible
  1536. // and we still want to process.
  1537. erase_if(PredToDestList,
  1538. [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
  1539. return LoopHeaders.contains(PredToDest.second);
  1540. });
  1541. if (PredToDestList.empty())
  1542. return false;
  1543. MostPopularDest = findMostPopularDest(BB, PredToDestList);
  1544. }
  1545. // Now that we know what the most popular destination is, factor all
  1546. // predecessors that will jump to it into a single predecessor.
  1547. SmallVector<BasicBlock*, 16> PredsToFactor;
  1548. for (const auto &PredToDest : PredToDestList)
  1549. if (PredToDest.second == MostPopularDest) {
  1550. BasicBlock *Pred = PredToDest.first;
  1551. // This predecessor may be a switch or something else that has multiple
  1552. // edges to the block. Factor each of these edges by listing them
  1553. // according to # occurrences in PredsToFactor.
  1554. for (BasicBlock *Succ : successors(Pred))
  1555. if (Succ == BB)
  1556. PredsToFactor.push_back(Pred);
  1557. }
  1558. // If the threadable edges are branching on an undefined value, we get to pick
  1559. // the destination that these predecessors should get to.
  1560. if (!MostPopularDest)
  1561. MostPopularDest = BB->getTerminator()->
  1562. getSuccessor(getBestDestForJumpOnUndef(BB));
  1563. // Ok, try to thread it!
  1564. return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
  1565. }
  1566. /// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
  1567. /// a PHI node (or freeze PHI) in the current block. See if there are any
  1568. /// simplifications we can do based on inputs to the phi node.
  1569. bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
  1570. BasicBlock *BB = PN->getParent();
  1571. // TODO: We could make use of this to do it once for blocks with common PHI
  1572. // values.
  1573. SmallVector<BasicBlock*, 1> PredBBs;
  1574. PredBBs.resize(1);
  1575. // If any of the predecessor blocks end in an unconditional branch, we can
  1576. // *duplicate* the conditional branch into that block in order to further
  1577. // encourage jump threading and to eliminate cases where we have branch on a
  1578. // phi of an icmp (branch on icmp is much better).
  1579. // This is still beneficial when a frozen phi is used as the branch condition
  1580. // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
  1581. // to br(icmp(freeze ...)).
  1582. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  1583. BasicBlock *PredBB = PN->getIncomingBlock(i);
  1584. if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
  1585. if (PredBr->isUnconditional()) {
  1586. PredBBs[0] = PredBB;
  1587. // Try to duplicate BB into PredBB.
  1588. if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
  1589. return true;
  1590. }
  1591. }
  1592. return false;
  1593. }
  1594. /// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
  1595. /// a xor instruction in the current block. See if there are any
  1596. /// simplifications we can do based on inputs to the xor.
  1597. bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
  1598. BasicBlock *BB = BO->getParent();
  1599. // If either the LHS or RHS of the xor is a constant, don't do this
  1600. // optimization.
  1601. if (isa<ConstantInt>(BO->getOperand(0)) ||
  1602. isa<ConstantInt>(BO->getOperand(1)))
  1603. return false;
  1604. // If the first instruction in BB isn't a phi, we won't be able to infer
  1605. // anything special about any particular predecessor.
  1606. if (!isa<PHINode>(BB->front()))
  1607. return false;
  1608. // If this BB is a landing pad, we won't be able to split the edge into it.
  1609. if (BB->isEHPad())
  1610. return false;
  1611. // If we have a xor as the branch input to this block, and we know that the
  1612. // LHS or RHS of the xor in any predecessor is true/false, then we can clone
  1613. // the condition into the predecessor and fix that value to true, saving some
  1614. // logical ops on that path and encouraging other paths to simplify.
  1615. //
  1616. // This copies something like this:
  1617. //
  1618. // BB:
  1619. // %X = phi i1 [1], [%X']
  1620. // %Y = icmp eq i32 %A, %B
  1621. // %Z = xor i1 %X, %Y
  1622. // br i1 %Z, ...
  1623. //
  1624. // Into:
  1625. // BB':
  1626. // %Y = icmp ne i32 %A, %B
  1627. // br i1 %Y, ...
  1628. PredValueInfoTy XorOpValues;
  1629. bool isLHS = true;
  1630. if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
  1631. WantInteger, BO)) {
  1632. assert(XorOpValues.empty());
  1633. if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
  1634. WantInteger, BO))
  1635. return false;
  1636. isLHS = false;
  1637. }
  1638. assert(!XorOpValues.empty() &&
  1639. "computeValueKnownInPredecessors returned true with no values");
  1640. // Scan the information to see which is most popular: true or false. The
  1641. // predecessors can be of the set true, false, or undef.
  1642. unsigned NumTrue = 0, NumFalse = 0;
  1643. for (const auto &XorOpValue : XorOpValues) {
  1644. if (isa<UndefValue>(XorOpValue.first))
  1645. // Ignore undefs for the count.
  1646. continue;
  1647. if (cast<ConstantInt>(XorOpValue.first)->isZero())
  1648. ++NumFalse;
  1649. else
  1650. ++NumTrue;
  1651. }
  1652. // Determine which value to split on, true, false, or undef if neither.
  1653. ConstantInt *SplitVal = nullptr;
  1654. if (NumTrue > NumFalse)
  1655. SplitVal = ConstantInt::getTrue(BB->getContext());
  1656. else if (NumTrue != 0 || NumFalse != 0)
  1657. SplitVal = ConstantInt::getFalse(BB->getContext());
  1658. // Collect all of the blocks that this can be folded into so that we can
  1659. // factor this once and clone it once.
  1660. SmallVector<BasicBlock*, 8> BlocksToFoldInto;
  1661. for (const auto &XorOpValue : XorOpValues) {
  1662. if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
  1663. continue;
  1664. BlocksToFoldInto.push_back(XorOpValue.second);
  1665. }
  1666. // If we inferred a value for all of the predecessors, then duplication won't
  1667. // help us. However, we can just replace the LHS or RHS with the constant.
  1668. if (BlocksToFoldInto.size() ==
  1669. cast<PHINode>(BB->front()).getNumIncomingValues()) {
  1670. if (!SplitVal) {
  1671. // If all preds provide undef, just nuke the xor, because it is undef too.
  1672. BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
  1673. BO->eraseFromParent();
  1674. } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
  1675. // If all preds provide 0, replace the xor with the other input.
  1676. BO->replaceAllUsesWith(BO->getOperand(isLHS));
  1677. BO->eraseFromParent();
  1678. } else {
  1679. // If all preds provide 1, set the computed value to 1.
  1680. BO->setOperand(!isLHS, SplitVal);
  1681. }
  1682. return true;
  1683. }
  1684. // If any of predecessors end with an indirect goto, we can't change its
  1685. // destination.
  1686. if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
  1687. return isa<IndirectBrInst>(Pred->getTerminator());
  1688. }))
  1689. return false;
  1690. // Try to duplicate BB into PredBB.
  1691. return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
  1692. }
  1693. /// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
  1694. /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
  1695. /// NewPred using the entries from OldPred (suitably mapped).
  1696. static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
  1697. BasicBlock *OldPred,
  1698. BasicBlock *NewPred,
  1699. DenseMap<Instruction*, Value*> &ValueMap) {
  1700. for (PHINode &PN : PHIBB->phis()) {
  1701. // Ok, we have a PHI node. Figure out what the incoming value was for the
  1702. // DestBlock.
  1703. Value *IV = PN.getIncomingValueForBlock(OldPred);
  1704. // Remap the value if necessary.
  1705. if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
  1706. DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
  1707. if (I != ValueMap.end())
  1708. IV = I->second;
  1709. }
  1710. PN.addIncoming(IV, NewPred);
  1711. }
  1712. }
  1713. /// Merge basic block BB into its sole predecessor if possible.
  1714. bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
  1715. BasicBlock *SinglePred = BB->getSinglePredecessor();
  1716. if (!SinglePred)
  1717. return false;
  1718. const Instruction *TI = SinglePred->getTerminator();
  1719. if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
  1720. SinglePred == BB || hasAddressTakenAndUsed(BB))
  1721. return false;
  1722. // If SinglePred was a loop header, BB becomes one.
  1723. if (LoopHeaders.erase(SinglePred))
  1724. LoopHeaders.insert(BB);
  1725. LVI->eraseBlock(SinglePred);
  1726. MergeBasicBlockIntoOnlyPred(BB, DTU);
  1727. // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
  1728. // BB code within one basic block `BB`), we need to invalidate the LVI
  1729. // information associated with BB, because the LVI information need not be
  1730. // true for all of BB after the merge. For example,
  1731. // Before the merge, LVI info and code is as follows:
  1732. // SinglePred: <LVI info1 for %p val>
  1733. // %y = use of %p
  1734. // call @exit() // need not transfer execution to successor.
  1735. // assume(%p) // from this point on %p is true
  1736. // br label %BB
  1737. // BB: <LVI info2 for %p val, i.e. %p is true>
  1738. // %x = use of %p
  1739. // br label exit
  1740. //
  1741. // Note that this LVI info for blocks BB and SinglPred is correct for %p
  1742. // (info2 and info1 respectively). After the merge and the deletion of the
  1743. // LVI info1 for SinglePred. We have the following code:
  1744. // BB: <LVI info2 for %p val>
  1745. // %y = use of %p
  1746. // call @exit()
  1747. // assume(%p)
  1748. // %x = use of %p <-- LVI info2 is correct from here onwards.
  1749. // br label exit
  1750. // LVI info2 for BB is incorrect at the beginning of BB.
  1751. // Invalidate LVI information for BB if the LVI is not provably true for
  1752. // all of BB.
  1753. if (!isGuaranteedToTransferExecutionToSuccessor(BB))
  1754. LVI->eraseBlock(BB);
  1755. return true;
  1756. }
  1757. /// Update the SSA form. NewBB contains instructions that are copied from BB.
  1758. /// ValueMapping maps old values in BB to new ones in NewBB.
  1759. void JumpThreadingPass::updateSSA(
  1760. BasicBlock *BB, BasicBlock *NewBB,
  1761. DenseMap<Instruction *, Value *> &ValueMapping) {
  1762. // If there were values defined in BB that are used outside the block, then we
  1763. // now have to update all uses of the value to use either the original value,
  1764. // the cloned value, or some PHI derived value. This can require arbitrary
  1765. // PHI insertion, of which we are prepared to do, clean these up now.
  1766. SSAUpdater SSAUpdate;
  1767. SmallVector<Use *, 16> UsesToRename;
  1768. for (Instruction &I : *BB) {
  1769. // Scan all uses of this instruction to see if it is used outside of its
  1770. // block, and if so, record them in UsesToRename.
  1771. for (Use &U : I.uses()) {
  1772. Instruction *User = cast<Instruction>(U.getUser());
  1773. if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
  1774. if (UserPN->getIncomingBlock(U) == BB)
  1775. continue;
  1776. } else if (User->getParent() == BB)
  1777. continue;
  1778. UsesToRename.push_back(&U);
  1779. }
  1780. // If there are no uses outside the block, we're done with this instruction.
  1781. if (UsesToRename.empty())
  1782. continue;
  1783. LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
  1784. // We found a use of I outside of BB. Rename all uses of I that are outside
  1785. // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
  1786. // with the two values we know.
  1787. SSAUpdate.Initialize(I.getType(), I.getName());
  1788. SSAUpdate.AddAvailableValue(BB, &I);
  1789. SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
  1790. while (!UsesToRename.empty())
  1791. SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
  1792. LLVM_DEBUG(dbgs() << "\n");
  1793. }
  1794. }
  1795. /// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
  1796. /// arguments that come from PredBB. Return the map from the variables in the
  1797. /// source basic block to the variables in the newly created basic block.
  1798. DenseMap<Instruction *, Value *>
  1799. JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
  1800. BasicBlock::iterator BE, BasicBlock *NewBB,
  1801. BasicBlock *PredBB) {
  1802. // We are going to have to map operands from the source basic block to the new
  1803. // copy of the block 'NewBB'. If there are PHI nodes in the source basic
  1804. // block, evaluate them to account for entry from PredBB.
  1805. DenseMap<Instruction *, Value *> ValueMapping;
  1806. // Retargets llvm.dbg.value to any renamed variables.
  1807. auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
  1808. auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
  1809. if (!DbgInstruction)
  1810. return false;
  1811. SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
  1812. for (auto DbgOperand : DbgInstruction->location_ops()) {
  1813. auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
  1814. if (!DbgOperandInstruction)
  1815. continue;
  1816. auto I = ValueMapping.find(DbgOperandInstruction);
  1817. if (I != ValueMapping.end()) {
  1818. OperandsToRemap.insert(
  1819. std::pair<Value *, Value *>(DbgOperand, I->second));
  1820. }
  1821. }
  1822. for (auto &[OldOp, MappedOp] : OperandsToRemap)
  1823. DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
  1824. return true;
  1825. };
  1826. // Clone the phi nodes of the source basic block into NewBB. The resulting
  1827. // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
  1828. // might need to rewrite the operand of the cloned phi.
  1829. for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
  1830. PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
  1831. NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
  1832. ValueMapping[PN] = NewPN;
  1833. }
  1834. // Clone noalias scope declarations in the threaded block. When threading a
  1835. // loop exit, we would otherwise end up with two idential scope declarations
  1836. // visible at the same time.
  1837. SmallVector<MDNode *> NoAliasScopes;
  1838. DenseMap<MDNode *, MDNode *> ClonedScopes;
  1839. LLVMContext &Context = PredBB->getContext();
  1840. identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
  1841. cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
  1842. // Clone the non-phi instructions of the source basic block into NewBB,
  1843. // keeping track of the mapping and using it to remap operands in the cloned
  1844. // instructions.
  1845. for (; BI != BE; ++BI) {
  1846. Instruction *New = BI->clone();
  1847. New->setName(BI->getName());
  1848. New->insertInto(NewBB, NewBB->end());
  1849. ValueMapping[&*BI] = New;
  1850. adaptNoAliasScopes(New, ClonedScopes, Context);
  1851. if (RetargetDbgValueIfPossible(New))
  1852. continue;
  1853. // Remap operands to patch up intra-block references.
  1854. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
  1855. if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
  1856. DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
  1857. if (I != ValueMapping.end())
  1858. New->setOperand(i, I->second);
  1859. }
  1860. }
  1861. return ValueMapping;
  1862. }
  1863. /// Attempt to thread through two successive basic blocks.
  1864. bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
  1865. Value *Cond) {
  1866. // Consider:
  1867. //
  1868. // PredBB:
  1869. // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
  1870. // %tobool = icmp eq i32 %cond, 0
  1871. // br i1 %tobool, label %BB, label ...
  1872. //
  1873. // BB:
  1874. // %cmp = icmp eq i32* %var, null
  1875. // br i1 %cmp, label ..., label ...
  1876. //
  1877. // We don't know the value of %var at BB even if we know which incoming edge
  1878. // we take to BB. However, once we duplicate PredBB for each of its incoming
  1879. // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
  1880. // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
  1881. // Require that BB end with a Branch for simplicity.
  1882. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  1883. if (!CondBr)
  1884. return false;
  1885. // BB must have exactly one predecessor.
  1886. BasicBlock *PredBB = BB->getSinglePredecessor();
  1887. if (!PredBB)
  1888. return false;
  1889. // Require that PredBB end with a conditional Branch. If PredBB ends with an
  1890. // unconditional branch, we should be merging PredBB and BB instead. For
  1891. // simplicity, we don't deal with a switch.
  1892. BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
  1893. if (!PredBBBranch || PredBBBranch->isUnconditional())
  1894. return false;
  1895. // If PredBB has exactly one incoming edge, we don't gain anything by copying
  1896. // PredBB.
  1897. if (PredBB->getSinglePredecessor())
  1898. return false;
  1899. // Don't thread through PredBB if it contains a successor edge to itself, in
  1900. // which case we would infinite loop. Suppose we are threading an edge from
  1901. // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
  1902. // successor edge to itself. If we allowed jump threading in this case, we
  1903. // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
  1904. // PredBB.thread has a successor edge to PredBB, we would immediately come up
  1905. // with another jump threading opportunity from PredBB.thread through PredBB
  1906. // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
  1907. // would keep peeling one iteration from PredBB.
  1908. if (llvm::is_contained(successors(PredBB), PredBB))
  1909. return false;
  1910. // Don't thread across a loop header.
  1911. if (LoopHeaders.count(PredBB))
  1912. return false;
  1913. // Avoid complication with duplicating EH pads.
  1914. if (PredBB->isEHPad())
  1915. return false;
  1916. // Find a predecessor that we can thread. For simplicity, we only consider a
  1917. // successor edge out of BB to which we thread exactly one incoming edge into
  1918. // PredBB.
  1919. unsigned ZeroCount = 0;
  1920. unsigned OneCount = 0;
  1921. BasicBlock *ZeroPred = nullptr;
  1922. BasicBlock *OnePred = nullptr;
  1923. for (BasicBlock *P : predecessors(PredBB)) {
  1924. // If PredPred ends with IndirectBrInst, we can't handle it.
  1925. if (isa<IndirectBrInst>(P->getTerminator()))
  1926. continue;
  1927. if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
  1928. evaluateOnPredecessorEdge(BB, P, Cond))) {
  1929. if (CI->isZero()) {
  1930. ZeroCount++;
  1931. ZeroPred = P;
  1932. } else if (CI->isOne()) {
  1933. OneCount++;
  1934. OnePred = P;
  1935. }
  1936. }
  1937. }
  1938. // Disregard complicated cases where we have to thread multiple edges.
  1939. BasicBlock *PredPredBB;
  1940. if (ZeroCount == 1) {
  1941. PredPredBB = ZeroPred;
  1942. } else if (OneCount == 1) {
  1943. PredPredBB = OnePred;
  1944. } else {
  1945. return false;
  1946. }
  1947. BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
  1948. // If threading to the same block as we come from, we would infinite loop.
  1949. if (SuccBB == BB) {
  1950. LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
  1951. << "' - would thread to self!\n");
  1952. return false;
  1953. }
  1954. // If threading this would thread across a loop header, don't thread the edge.
  1955. // See the comments above findLoopHeaders for justifications and caveats.
  1956. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
  1957. LLVM_DEBUG({
  1958. bool BBIsHeader = LoopHeaders.count(BB);
  1959. bool SuccIsHeader = LoopHeaders.count(SuccBB);
  1960. dbgs() << " Not threading across "
  1961. << (BBIsHeader ? "loop header BB '" : "block BB '")
  1962. << BB->getName() << "' to dest "
  1963. << (SuccIsHeader ? "loop header BB '" : "block BB '")
  1964. << SuccBB->getName()
  1965. << "' - it might create an irreducible loop!\n";
  1966. });
  1967. return false;
  1968. }
  1969. // Compute the cost of duplicating BB and PredBB.
  1970. unsigned BBCost = getJumpThreadDuplicationCost(
  1971. TTI, BB, BB->getTerminator(), BBDupThreshold);
  1972. unsigned PredBBCost = getJumpThreadDuplicationCost(
  1973. TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
  1974. // Give up if costs are too high. We need to check BBCost and PredBBCost
  1975. // individually before checking their sum because getJumpThreadDuplicationCost
  1976. // return (unsigned)~0 for those basic blocks that cannot be duplicated.
  1977. if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
  1978. BBCost + PredBBCost > BBDupThreshold) {
  1979. LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
  1980. << "' - Cost is too high: " << PredBBCost
  1981. << " for PredBB, " << BBCost << "for BB\n");
  1982. return false;
  1983. }
  1984. // Now we are ready to duplicate PredBB.
  1985. threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
  1986. return true;
  1987. }
  1988. void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
  1989. BasicBlock *PredBB,
  1990. BasicBlock *BB,
  1991. BasicBlock *SuccBB) {
  1992. LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
  1993. << BB->getName() << "'\n");
  1994. BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
  1995. BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
  1996. BasicBlock *NewBB =
  1997. BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
  1998. PredBB->getParent(), PredBB);
  1999. NewBB->moveAfter(PredBB);
  2000. // Set the block frequency of NewBB.
  2001. if (HasProfileData) {
  2002. auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
  2003. BPI->getEdgeProbability(PredPredBB, PredBB);
  2004. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  2005. }
  2006. // We are going to have to map operands from the original BB block to the new
  2007. // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
  2008. // to account for entry from PredPredBB.
  2009. DenseMap<Instruction *, Value *> ValueMapping =
  2010. cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
  2011. // Copy the edge probabilities from PredBB to NewBB.
  2012. if (HasProfileData)
  2013. BPI->copyEdgeProbabilities(PredBB, NewBB);
  2014. // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
  2015. // This eliminates predecessors from PredPredBB, which requires us to simplify
  2016. // any PHI nodes in PredBB.
  2017. Instruction *PredPredTerm = PredPredBB->getTerminator();
  2018. for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
  2019. if (PredPredTerm->getSuccessor(i) == PredBB) {
  2020. PredBB->removePredecessor(PredPredBB, true);
  2021. PredPredTerm->setSuccessor(i, NewBB);
  2022. }
  2023. addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
  2024. ValueMapping);
  2025. addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
  2026. ValueMapping);
  2027. DTU->applyUpdatesPermissive(
  2028. {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
  2029. {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
  2030. {DominatorTree::Insert, PredPredBB, NewBB},
  2031. {DominatorTree::Delete, PredPredBB, PredBB}});
  2032. updateSSA(PredBB, NewBB, ValueMapping);
  2033. // Clean up things like PHI nodes with single operands, dead instructions,
  2034. // etc.
  2035. SimplifyInstructionsInBlock(NewBB, TLI);
  2036. SimplifyInstructionsInBlock(PredBB, TLI);
  2037. SmallVector<BasicBlock *, 1> PredsToFactor;
  2038. PredsToFactor.push_back(NewBB);
  2039. threadEdge(BB, PredsToFactor, SuccBB);
  2040. }
  2041. /// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
  2042. bool JumpThreadingPass::tryThreadEdge(
  2043. BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
  2044. BasicBlock *SuccBB) {
  2045. // If threading to the same block as we come from, we would infinite loop.
  2046. if (SuccBB == BB) {
  2047. LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
  2048. << "' - would thread to self!\n");
  2049. return false;
  2050. }
  2051. // If threading this would thread across a loop header, don't thread the edge.
  2052. // See the comments above findLoopHeaders for justifications and caveats.
  2053. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
  2054. LLVM_DEBUG({
  2055. bool BBIsHeader = LoopHeaders.count(BB);
  2056. bool SuccIsHeader = LoopHeaders.count(SuccBB);
  2057. dbgs() << " Not threading across "
  2058. << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
  2059. << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
  2060. << SuccBB->getName() << "' - it might create an irreducible loop!\n";
  2061. });
  2062. return false;
  2063. }
  2064. unsigned JumpThreadCost = getJumpThreadDuplicationCost(
  2065. TTI, BB, BB->getTerminator(), BBDupThreshold);
  2066. if (JumpThreadCost > BBDupThreshold) {
  2067. LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
  2068. << "' - Cost is too high: " << JumpThreadCost << "\n");
  2069. return false;
  2070. }
  2071. threadEdge(BB, PredBBs, SuccBB);
  2072. return true;
  2073. }
  2074. /// threadEdge - We have decided that it is safe and profitable to factor the
  2075. /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
  2076. /// across BB. Transform the IR to reflect this change.
  2077. void JumpThreadingPass::threadEdge(BasicBlock *BB,
  2078. const SmallVectorImpl<BasicBlock *> &PredBBs,
  2079. BasicBlock *SuccBB) {
  2080. assert(SuccBB != BB && "Don't create an infinite loop");
  2081. assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
  2082. "Don't thread across loop headers");
  2083. // And finally, do it! Start by factoring the predecessors if needed.
  2084. BasicBlock *PredBB;
  2085. if (PredBBs.size() == 1)
  2086. PredBB = PredBBs[0];
  2087. else {
  2088. LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
  2089. << " common predecessors.\n");
  2090. PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
  2091. }
  2092. // And finally, do it!
  2093. LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
  2094. << "' to '" << SuccBB->getName()
  2095. << ", across block:\n " << *BB << "\n");
  2096. LVI->threadEdge(PredBB, BB, SuccBB);
  2097. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
  2098. BB->getName()+".thread",
  2099. BB->getParent(), BB);
  2100. NewBB->moveAfter(PredBB);
  2101. // Set the block frequency of NewBB.
  2102. if (HasProfileData) {
  2103. auto NewBBFreq =
  2104. BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
  2105. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  2106. }
  2107. // Copy all the instructions from BB to NewBB except the terminator.
  2108. DenseMap<Instruction *, Value *> ValueMapping =
  2109. cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
  2110. // We didn't copy the terminator from BB over to NewBB, because there is now
  2111. // an unconditional jump to SuccBB. Insert the unconditional jump.
  2112. BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
  2113. NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
  2114. // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
  2115. // PHI nodes for NewBB now.
  2116. addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
  2117. // Update the terminator of PredBB to jump to NewBB instead of BB. This
  2118. // eliminates predecessors from BB, which requires us to simplify any PHI
  2119. // nodes in BB.
  2120. Instruction *PredTerm = PredBB->getTerminator();
  2121. for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
  2122. if (PredTerm->getSuccessor(i) == BB) {
  2123. BB->removePredecessor(PredBB, true);
  2124. PredTerm->setSuccessor(i, NewBB);
  2125. }
  2126. // Enqueue required DT updates.
  2127. DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
  2128. {DominatorTree::Insert, PredBB, NewBB},
  2129. {DominatorTree::Delete, PredBB, BB}});
  2130. updateSSA(BB, NewBB, ValueMapping);
  2131. // At this point, the IR is fully up to date and consistent. Do a quick scan
  2132. // over the new instructions and zap any that are constants or dead. This
  2133. // frequently happens because of phi translation.
  2134. SimplifyInstructionsInBlock(NewBB, TLI);
  2135. // Update the edge weight from BB to SuccBB, which should be less than before.
  2136. updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
  2137. // Threaded an edge!
  2138. ++NumThreads;
  2139. }
  2140. /// Create a new basic block that will be the predecessor of BB and successor of
  2141. /// all blocks in Preds. When profile data is available, update the frequency of
  2142. /// this new block.
  2143. BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
  2144. ArrayRef<BasicBlock *> Preds,
  2145. const char *Suffix) {
  2146. SmallVector<BasicBlock *, 2> NewBBs;
  2147. // Collect the frequencies of all predecessors of BB, which will be used to
  2148. // update the edge weight of the result of splitting predecessors.
  2149. DenseMap<BasicBlock *, BlockFrequency> FreqMap;
  2150. if (HasProfileData)
  2151. for (auto *Pred : Preds)
  2152. FreqMap.insert(std::make_pair(
  2153. Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
  2154. // In the case when BB is a LandingPad block we create 2 new predecessors
  2155. // instead of just one.
  2156. if (BB->isLandingPad()) {
  2157. std::string NewName = std::string(Suffix) + ".split-lp";
  2158. SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
  2159. } else {
  2160. NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
  2161. }
  2162. std::vector<DominatorTree::UpdateType> Updates;
  2163. Updates.reserve((2 * Preds.size()) + NewBBs.size());
  2164. for (auto *NewBB : NewBBs) {
  2165. BlockFrequency NewBBFreq(0);
  2166. Updates.push_back({DominatorTree::Insert, NewBB, BB});
  2167. for (auto *Pred : predecessors(NewBB)) {
  2168. Updates.push_back({DominatorTree::Delete, Pred, BB});
  2169. Updates.push_back({DominatorTree::Insert, Pred, NewBB});
  2170. if (HasProfileData) // Update frequencies between Pred -> NewBB.
  2171. NewBBFreq += FreqMap.lookup(Pred);
  2172. }
  2173. if (HasProfileData) // Apply the summed frequency to NewBB.
  2174. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  2175. }
  2176. DTU->applyUpdatesPermissive(Updates);
  2177. return NewBBs[0];
  2178. }
  2179. bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
  2180. const Instruction *TI = BB->getTerminator();
  2181. assert(TI->getNumSuccessors() > 1 && "not a split");
  2182. return hasValidBranchWeightMD(*TI);
  2183. }
  2184. /// Update the block frequency of BB and branch weight and the metadata on the
  2185. /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
  2186. /// Freq(PredBB->BB) / Freq(BB->SuccBB).
  2187. void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
  2188. BasicBlock *BB,
  2189. BasicBlock *NewBB,
  2190. BasicBlock *SuccBB) {
  2191. if (!HasProfileData)
  2192. return;
  2193. assert(BFI && BPI && "BFI & BPI should have been created here");
  2194. // As the edge from PredBB to BB is deleted, we have to update the block
  2195. // frequency of BB.
  2196. auto BBOrigFreq = BFI->getBlockFreq(BB);
  2197. auto NewBBFreq = BFI->getBlockFreq(NewBB);
  2198. auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
  2199. auto BBNewFreq = BBOrigFreq - NewBBFreq;
  2200. BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
  2201. // Collect updated outgoing edges' frequencies from BB and use them to update
  2202. // edge probabilities.
  2203. SmallVector<uint64_t, 4> BBSuccFreq;
  2204. for (BasicBlock *Succ : successors(BB)) {
  2205. auto SuccFreq = (Succ == SuccBB)
  2206. ? BB2SuccBBFreq - NewBBFreq
  2207. : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
  2208. BBSuccFreq.push_back(SuccFreq.getFrequency());
  2209. }
  2210. uint64_t MaxBBSuccFreq =
  2211. *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
  2212. SmallVector<BranchProbability, 4> BBSuccProbs;
  2213. if (MaxBBSuccFreq == 0)
  2214. BBSuccProbs.assign(BBSuccFreq.size(),
  2215. {1, static_cast<unsigned>(BBSuccFreq.size())});
  2216. else {
  2217. for (uint64_t Freq : BBSuccFreq)
  2218. BBSuccProbs.push_back(
  2219. BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
  2220. // Normalize edge probabilities so that they sum up to one.
  2221. BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
  2222. BBSuccProbs.end());
  2223. }
  2224. // Update edge probabilities in BPI.
  2225. BPI->setEdgeProbability(BB, BBSuccProbs);
  2226. // Update the profile metadata as well.
  2227. //
  2228. // Don't do this if the profile of the transformed blocks was statically
  2229. // estimated. (This could occur despite the function having an entry
  2230. // frequency in completely cold parts of the CFG.)
  2231. //
  2232. // In this case we don't want to suggest to subsequent passes that the
  2233. // calculated weights are fully consistent. Consider this graph:
  2234. //
  2235. // check_1
  2236. // 50% / |
  2237. // eq_1 | 50%
  2238. // \ |
  2239. // check_2
  2240. // 50% / |
  2241. // eq_2 | 50%
  2242. // \ |
  2243. // check_3
  2244. // 50% / |
  2245. // eq_3 | 50%
  2246. // \ |
  2247. //
  2248. // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
  2249. // the overall probabilities are inconsistent; the total probability that the
  2250. // value is either 1, 2 or 3 is 150%.
  2251. //
  2252. // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
  2253. // becomes 0%. This is even worse if the edge whose probability becomes 0% is
  2254. // the loop exit edge. Then based solely on static estimation we would assume
  2255. // the loop was extremely hot.
  2256. //
  2257. // FIXME this locally as well so that BPI and BFI are consistent as well. We
  2258. // shouldn't make edges extremely likely or unlikely based solely on static
  2259. // estimation.
  2260. if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
  2261. SmallVector<uint32_t, 4> Weights;
  2262. for (auto Prob : BBSuccProbs)
  2263. Weights.push_back(Prob.getNumerator());
  2264. auto TI = BB->getTerminator();
  2265. TI->setMetadata(
  2266. LLVMContext::MD_prof,
  2267. MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
  2268. }
  2269. }
  2270. /// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
  2271. /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
  2272. /// If we can duplicate the contents of BB up into PredBB do so now, this
  2273. /// improves the odds that the branch will be on an analyzable instruction like
  2274. /// a compare.
  2275. bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
  2276. BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
  2277. assert(!PredBBs.empty() && "Can't handle an empty set");
  2278. // If BB is a loop header, then duplicating this block outside the loop would
  2279. // cause us to transform this into an irreducible loop, don't do this.
  2280. // See the comments above findLoopHeaders for justifications and caveats.
  2281. if (LoopHeaders.count(BB)) {
  2282. LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
  2283. << "' into predecessor block '" << PredBBs[0]->getName()
  2284. << "' - it might create an irreducible loop!\n");
  2285. return false;
  2286. }
  2287. unsigned DuplicationCost = getJumpThreadDuplicationCost(
  2288. TTI, BB, BB->getTerminator(), BBDupThreshold);
  2289. if (DuplicationCost > BBDupThreshold) {
  2290. LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
  2291. << "' - Cost is too high: " << DuplicationCost << "\n");
  2292. return false;
  2293. }
  2294. // And finally, do it! Start by factoring the predecessors if needed.
  2295. std::vector<DominatorTree::UpdateType> Updates;
  2296. BasicBlock *PredBB;
  2297. if (PredBBs.size() == 1)
  2298. PredBB = PredBBs[0];
  2299. else {
  2300. LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
  2301. << " common predecessors.\n");
  2302. PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
  2303. }
  2304. Updates.push_back({DominatorTree::Delete, PredBB, BB});
  2305. // Okay, we decided to do this! Clone all the instructions in BB onto the end
  2306. // of PredBB.
  2307. LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
  2308. << "' into end of '" << PredBB->getName()
  2309. << "' to eliminate branch on phi. Cost: "
  2310. << DuplicationCost << " block is:" << *BB << "\n");
  2311. // Unless PredBB ends with an unconditional branch, split the edge so that we
  2312. // can just clone the bits from BB into the end of the new PredBB.
  2313. BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
  2314. if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
  2315. BasicBlock *OldPredBB = PredBB;
  2316. PredBB = SplitEdge(OldPredBB, BB);
  2317. Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
  2318. Updates.push_back({DominatorTree::Insert, PredBB, BB});
  2319. Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
  2320. OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
  2321. }
  2322. // We are going to have to map operands from the original BB block into the
  2323. // PredBB block. Evaluate PHI nodes in BB.
  2324. DenseMap<Instruction*, Value*> ValueMapping;
  2325. BasicBlock::iterator BI = BB->begin();
  2326. for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
  2327. ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
  2328. // Clone the non-phi instructions of BB into PredBB, keeping track of the
  2329. // mapping and using it to remap operands in the cloned instructions.
  2330. for (; BI != BB->end(); ++BI) {
  2331. Instruction *New = BI->clone();
  2332. // Remap operands to patch up intra-block references.
  2333. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
  2334. if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
  2335. DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
  2336. if (I != ValueMapping.end())
  2337. New->setOperand(i, I->second);
  2338. }
  2339. // If this instruction can be simplified after the operands are updated,
  2340. // just use the simplified value instead. This frequently happens due to
  2341. // phi translation.
  2342. if (Value *IV = simplifyInstruction(
  2343. New,
  2344. {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
  2345. ValueMapping[&*BI] = IV;
  2346. if (!New->mayHaveSideEffects()) {
  2347. New->deleteValue();
  2348. New = nullptr;
  2349. }
  2350. } else {
  2351. ValueMapping[&*BI] = New;
  2352. }
  2353. if (New) {
  2354. // Otherwise, insert the new instruction into the block.
  2355. New->setName(BI->getName());
  2356. New->insertInto(PredBB, OldPredBranch->getIterator());
  2357. // Update Dominance from simplified New instruction operands.
  2358. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
  2359. if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
  2360. Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
  2361. }
  2362. }
  2363. // Check to see if the targets of the branch had PHI nodes. If so, we need to
  2364. // add entries to the PHI nodes for branch from PredBB now.
  2365. BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
  2366. addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
  2367. ValueMapping);
  2368. addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
  2369. ValueMapping);
  2370. updateSSA(BB, PredBB, ValueMapping);
  2371. // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
  2372. // that we nuked.
  2373. BB->removePredecessor(PredBB, true);
  2374. // Remove the unconditional branch at the end of the PredBB block.
  2375. OldPredBranch->eraseFromParent();
  2376. if (HasProfileData)
  2377. BPI->copyEdgeProbabilities(BB, PredBB);
  2378. DTU->applyUpdatesPermissive(Updates);
  2379. ++NumDupes;
  2380. return true;
  2381. }
  2382. // Pred is a predecessor of BB with an unconditional branch to BB. SI is
  2383. // a Select instruction in Pred. BB has other predecessors and SI is used in
  2384. // a PHI node in BB. SI has no other use.
  2385. // A new basic block, NewBB, is created and SI is converted to compare and
  2386. // conditional branch. SI is erased from parent.
  2387. void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
  2388. SelectInst *SI, PHINode *SIUse,
  2389. unsigned Idx) {
  2390. // Expand the select.
  2391. //
  2392. // Pred --
  2393. // | v
  2394. // | NewBB
  2395. // | |
  2396. // |-----
  2397. // v
  2398. // BB
  2399. BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
  2400. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
  2401. BB->getParent(), BB);
  2402. // Move the unconditional branch to NewBB.
  2403. PredTerm->removeFromParent();
  2404. PredTerm->insertInto(NewBB, NewBB->end());
  2405. // Create a conditional branch and update PHI nodes.
  2406. auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
  2407. BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
  2408. BI->copyMetadata(*SI, {LLVMContext::MD_prof});
  2409. SIUse->setIncomingValue(Idx, SI->getFalseValue());
  2410. SIUse->addIncoming(SI->getTrueValue(), NewBB);
  2411. // Set the block frequency of NewBB.
  2412. if (HasProfileData) {
  2413. uint64_t TrueWeight, FalseWeight;
  2414. if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
  2415. (TrueWeight + FalseWeight) != 0) {
  2416. SmallVector<BranchProbability, 2> BP;
  2417. BP.emplace_back(BranchProbability::getBranchProbability(
  2418. TrueWeight, TrueWeight + FalseWeight));
  2419. BP.emplace_back(BranchProbability::getBranchProbability(
  2420. FalseWeight, TrueWeight + FalseWeight));
  2421. BPI->setEdgeProbability(Pred, BP);
  2422. }
  2423. auto NewBBFreq =
  2424. BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB);
  2425. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  2426. }
  2427. // The select is now dead.
  2428. SI->eraseFromParent();
  2429. DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
  2430. {DominatorTree::Insert, Pred, NewBB}});
  2431. // Update any other PHI nodes in BB.
  2432. for (BasicBlock::iterator BI = BB->begin();
  2433. PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
  2434. if (Phi != SIUse)
  2435. Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
  2436. }
  2437. bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
  2438. PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
  2439. if (!CondPHI || CondPHI->getParent() != BB)
  2440. return false;
  2441. for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
  2442. BasicBlock *Pred = CondPHI->getIncomingBlock(I);
  2443. SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
  2444. // The second and third condition can be potentially relaxed. Currently
  2445. // the conditions help to simplify the code and allow us to reuse existing
  2446. // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
  2447. if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
  2448. continue;
  2449. BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
  2450. if (!PredTerm || !PredTerm->isUnconditional())
  2451. continue;
  2452. unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
  2453. return true;
  2454. }
  2455. return false;
  2456. }
  2457. /// tryToUnfoldSelect - Look for blocks of the form
  2458. /// bb1:
  2459. /// %a = select
  2460. /// br bb2
  2461. ///
  2462. /// bb2:
  2463. /// %p = phi [%a, %bb1] ...
  2464. /// %c = icmp %p
  2465. /// br i1 %c
  2466. ///
  2467. /// And expand the select into a branch structure if one of its arms allows %c
  2468. /// to be folded. This later enables threading from bb1 over bb2.
  2469. bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
  2470. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  2471. PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
  2472. Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
  2473. if (!CondBr || !CondBr->isConditional() || !CondLHS ||
  2474. CondLHS->getParent() != BB)
  2475. return false;
  2476. for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
  2477. BasicBlock *Pred = CondLHS->getIncomingBlock(I);
  2478. SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
  2479. // Look if one of the incoming values is a select in the corresponding
  2480. // predecessor.
  2481. if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
  2482. continue;
  2483. BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
  2484. if (!PredTerm || !PredTerm->isUnconditional())
  2485. continue;
  2486. // Now check if one of the select values would allow us to constant fold the
  2487. // terminator in BB. We don't do the transform if both sides fold, those
  2488. // cases will be threaded in any case.
  2489. LazyValueInfo::Tristate LHSFolds =
  2490. LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
  2491. CondRHS, Pred, BB, CondCmp);
  2492. LazyValueInfo::Tristate RHSFolds =
  2493. LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
  2494. CondRHS, Pred, BB, CondCmp);
  2495. if ((LHSFolds != LazyValueInfo::Unknown ||
  2496. RHSFolds != LazyValueInfo::Unknown) &&
  2497. LHSFolds != RHSFolds) {
  2498. unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
  2499. return true;
  2500. }
  2501. }
  2502. return false;
  2503. }
  2504. /// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
  2505. /// same BB in the form
  2506. /// bb:
  2507. /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
  2508. /// %s = select %p, trueval, falseval
  2509. ///
  2510. /// or
  2511. ///
  2512. /// bb:
  2513. /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
  2514. /// %c = cmp %p, 0
  2515. /// %s = select %c, trueval, falseval
  2516. ///
  2517. /// And expand the select into a branch structure. This later enables
  2518. /// jump-threading over bb in this pass.
  2519. ///
  2520. /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
  2521. /// select if the associated PHI has at least one constant. If the unfolded
  2522. /// select is not jump-threaded, it will be folded again in the later
  2523. /// optimizations.
  2524. bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
  2525. // This transform would reduce the quality of msan diagnostics.
  2526. // Disable this transform under MemorySanitizer.
  2527. if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
  2528. return false;
  2529. // If threading this would thread across a loop header, don't thread the edge.
  2530. // See the comments above findLoopHeaders for justifications and caveats.
  2531. if (LoopHeaders.count(BB))
  2532. return false;
  2533. for (BasicBlock::iterator BI = BB->begin();
  2534. PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
  2535. // Look for a Phi having at least one constant incoming value.
  2536. if (llvm::all_of(PN->incoming_values(),
  2537. [](Value *V) { return !isa<ConstantInt>(V); }))
  2538. continue;
  2539. auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
  2540. using namespace PatternMatch;
  2541. // Check if SI is in BB and use V as condition.
  2542. if (SI->getParent() != BB)
  2543. return false;
  2544. Value *Cond = SI->getCondition();
  2545. bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
  2546. return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
  2547. };
  2548. SelectInst *SI = nullptr;
  2549. for (Use &U : PN->uses()) {
  2550. if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
  2551. // Look for a ICmp in BB that compares PN with a constant and is the
  2552. // condition of a Select.
  2553. if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
  2554. isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
  2555. if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
  2556. if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
  2557. SI = SelectI;
  2558. break;
  2559. }
  2560. } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
  2561. // Look for a Select in BB that uses PN as condition.
  2562. if (isUnfoldCandidate(SelectI, U.get())) {
  2563. SI = SelectI;
  2564. break;
  2565. }
  2566. }
  2567. }
  2568. if (!SI)
  2569. continue;
  2570. // Expand the select.
  2571. Value *Cond = SI->getCondition();
  2572. if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
  2573. Cond = new FreezeInst(Cond, "cond.fr", SI);
  2574. Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
  2575. BasicBlock *SplitBB = SI->getParent();
  2576. BasicBlock *NewBB = Term->getParent();
  2577. PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
  2578. NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
  2579. NewPN->addIncoming(SI->getFalseValue(), BB);
  2580. SI->replaceAllUsesWith(NewPN);
  2581. SI->eraseFromParent();
  2582. // NewBB and SplitBB are newly created blocks which require insertion.
  2583. std::vector<DominatorTree::UpdateType> Updates;
  2584. Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
  2585. Updates.push_back({DominatorTree::Insert, BB, SplitBB});
  2586. Updates.push_back({DominatorTree::Insert, BB, NewBB});
  2587. Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
  2588. // BB's successors were moved to SplitBB, update DTU accordingly.
  2589. for (auto *Succ : successors(SplitBB)) {
  2590. Updates.push_back({DominatorTree::Delete, BB, Succ});
  2591. Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
  2592. }
  2593. DTU->applyUpdatesPermissive(Updates);
  2594. return true;
  2595. }
  2596. return false;
  2597. }
  2598. /// Try to propagate a guard from the current BB into one of its predecessors
  2599. /// in case if another branch of execution implies that the condition of this
  2600. /// guard is always true. Currently we only process the simplest case that
  2601. /// looks like:
  2602. ///
  2603. /// Start:
  2604. /// %cond = ...
  2605. /// br i1 %cond, label %T1, label %F1
  2606. /// T1:
  2607. /// br label %Merge
  2608. /// F1:
  2609. /// br label %Merge
  2610. /// Merge:
  2611. /// %condGuard = ...
  2612. /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
  2613. ///
  2614. /// And cond either implies condGuard or !condGuard. In this case all the
  2615. /// instructions before the guard can be duplicated in both branches, and the
  2616. /// guard is then threaded to one of them.
  2617. bool JumpThreadingPass::processGuards(BasicBlock *BB) {
  2618. using namespace PatternMatch;
  2619. // We only want to deal with two predecessors.
  2620. BasicBlock *Pred1, *Pred2;
  2621. auto PI = pred_begin(BB), PE = pred_end(BB);
  2622. if (PI == PE)
  2623. return false;
  2624. Pred1 = *PI++;
  2625. if (PI == PE)
  2626. return false;
  2627. Pred2 = *PI++;
  2628. if (PI != PE)
  2629. return false;
  2630. if (Pred1 == Pred2)
  2631. return false;
  2632. // Try to thread one of the guards of the block.
  2633. // TODO: Look up deeper than to immediate predecessor?
  2634. auto *Parent = Pred1->getSinglePredecessor();
  2635. if (!Parent || Parent != Pred2->getSinglePredecessor())
  2636. return false;
  2637. if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
  2638. for (auto &I : *BB)
  2639. if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
  2640. return true;
  2641. return false;
  2642. }
  2643. /// Try to propagate the guard from BB which is the lower block of a diamond
  2644. /// to one of its branches, in case if diamond's condition implies guard's
  2645. /// condition.
  2646. bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
  2647. BranchInst *BI) {
  2648. assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
  2649. assert(BI->isConditional() && "Unconditional branch has 2 successors?");
  2650. Value *GuardCond = Guard->getArgOperand(0);
  2651. Value *BranchCond = BI->getCondition();
  2652. BasicBlock *TrueDest = BI->getSuccessor(0);
  2653. BasicBlock *FalseDest = BI->getSuccessor(1);
  2654. auto &DL = BB->getModule()->getDataLayout();
  2655. bool TrueDestIsSafe = false;
  2656. bool FalseDestIsSafe = false;
  2657. // True dest is safe if BranchCond => GuardCond.
  2658. auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
  2659. if (Impl && *Impl)
  2660. TrueDestIsSafe = true;
  2661. else {
  2662. // False dest is safe if !BranchCond => GuardCond.
  2663. Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
  2664. if (Impl && *Impl)
  2665. FalseDestIsSafe = true;
  2666. }
  2667. if (!TrueDestIsSafe && !FalseDestIsSafe)
  2668. return false;
  2669. BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
  2670. BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
  2671. ValueToValueMapTy UnguardedMapping, GuardedMapping;
  2672. Instruction *AfterGuard = Guard->getNextNode();
  2673. unsigned Cost =
  2674. getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
  2675. if (Cost > BBDupThreshold)
  2676. return false;
  2677. // Duplicate all instructions before the guard and the guard itself to the
  2678. // branch where implication is not proved.
  2679. BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
  2680. BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
  2681. assert(GuardedBlock && "Could not create the guarded block?");
  2682. // Duplicate all instructions before the guard in the unguarded branch.
  2683. // Since we have successfully duplicated the guarded block and this block
  2684. // has fewer instructions, we expect it to succeed.
  2685. BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
  2686. BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
  2687. assert(UnguardedBlock && "Could not create the unguarded block?");
  2688. LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
  2689. << GuardedBlock->getName() << "\n");
  2690. // Some instructions before the guard may still have uses. For them, we need
  2691. // to create Phi nodes merging their copies in both guarded and unguarded
  2692. // branches. Those instructions that have no uses can be just removed.
  2693. SmallVector<Instruction *, 4> ToRemove;
  2694. for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
  2695. if (!isa<PHINode>(&*BI))
  2696. ToRemove.push_back(&*BI);
  2697. Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
  2698. assert(InsertionPoint && "Empty block?");
  2699. // Substitute with Phis & remove.
  2700. for (auto *Inst : reverse(ToRemove)) {
  2701. if (!Inst->use_empty()) {
  2702. PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
  2703. NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
  2704. NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
  2705. NewPN->insertBefore(InsertionPoint);
  2706. Inst->replaceAllUsesWith(NewPN);
  2707. }
  2708. Inst->eraseFromParent();
  2709. }
  2710. return true;
  2711. }