JumpThreading.cpp 118 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058
  1. //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the Jump Threading pass.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Transforms/Scalar/JumpThreading.h"
  13. #include "llvm/ADT/DenseMap.h"
  14. #include "llvm/ADT/DenseSet.h"
  15. #include "llvm/ADT/MapVector.h"
  16. #include "llvm/ADT/Optional.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SmallPtrSet.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/AliasAnalysis.h"
  22. #include "llvm/Analysis/BlockFrequencyInfo.h"
  23. #include "llvm/Analysis/BranchProbabilityInfo.h"
  24. #include "llvm/Analysis/CFG.h"
  25. #include "llvm/Analysis/ConstantFolding.h"
  26. #include "llvm/Analysis/DomTreeUpdater.h"
  27. #include "llvm/Analysis/GlobalsModRef.h"
  28. #include "llvm/Analysis/GuardUtils.h"
  29. #include "llvm/Analysis/InstructionSimplify.h"
  30. #include "llvm/Analysis/LazyValueInfo.h"
  31. #include "llvm/Analysis/Loads.h"
  32. #include "llvm/Analysis/LoopInfo.h"
  33. #include "llvm/Analysis/TargetLibraryInfo.h"
  34. #include "llvm/Analysis/TargetTransformInfo.h"
  35. #include "llvm/Analysis/ValueTracking.h"
  36. #include "llvm/IR/BasicBlock.h"
  37. #include "llvm/IR/CFG.h"
  38. #include "llvm/IR/Constant.h"
  39. #include "llvm/IR/ConstantRange.h"
  40. #include "llvm/IR/Constants.h"
  41. #include "llvm/IR/DataLayout.h"
  42. #include "llvm/IR/Dominators.h"
  43. #include "llvm/IR/Function.h"
  44. #include "llvm/IR/InstrTypes.h"
  45. #include "llvm/IR/Instruction.h"
  46. #include "llvm/IR/Instructions.h"
  47. #include "llvm/IR/IntrinsicInst.h"
  48. #include "llvm/IR/Intrinsics.h"
  49. #include "llvm/IR/LLVMContext.h"
  50. #include "llvm/IR/MDBuilder.h"
  51. #include "llvm/IR/Metadata.h"
  52. #include "llvm/IR/Module.h"
  53. #include "llvm/IR/PassManager.h"
  54. #include "llvm/IR/PatternMatch.h"
  55. #include "llvm/IR/Type.h"
  56. #include "llvm/IR/Use.h"
  57. #include "llvm/IR/User.h"
  58. #include "llvm/IR/Value.h"
  59. #include "llvm/InitializePasses.h"
  60. #include "llvm/Pass.h"
  61. #include "llvm/Support/BlockFrequency.h"
  62. #include "llvm/Support/BranchProbability.h"
  63. #include "llvm/Support/Casting.h"
  64. #include "llvm/Support/CommandLine.h"
  65. #include "llvm/Support/Debug.h"
  66. #include "llvm/Support/raw_ostream.h"
  67. #include "llvm/Transforms/Scalar.h"
  68. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  69. #include "llvm/Transforms/Utils/Cloning.h"
  70. #include "llvm/Transforms/Utils/Local.h"
  71. #include "llvm/Transforms/Utils/SSAUpdater.h"
  72. #include "llvm/Transforms/Utils/ValueMapper.h"
  73. #include <algorithm>
  74. #include <cassert>
  75. #include <cstddef>
  76. #include <cstdint>
  77. #include <iterator>
  78. #include <memory>
  79. #include <utility>
  80. using namespace llvm;
  81. using namespace jumpthreading;
  82. #define DEBUG_TYPE "jump-threading"
  83. STATISTIC(NumThreads, "Number of jumps threaded");
  84. STATISTIC(NumFolds, "Number of terminators folded");
  85. STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
  86. static cl::opt<unsigned>
  87. BBDuplicateThreshold("jump-threading-threshold",
  88. cl::desc("Max block size to duplicate for jump threading"),
  89. cl::init(6), cl::Hidden);
  90. static cl::opt<unsigned>
  91. ImplicationSearchThreshold(
  92. "jump-threading-implication-search-threshold",
  93. cl::desc("The number of predecessors to search for a stronger "
  94. "condition to use to thread over a weaker condition"),
  95. cl::init(3), cl::Hidden);
  96. static cl::opt<bool> PrintLVIAfterJumpThreading(
  97. "print-lvi-after-jump-threading",
  98. cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
  99. cl::Hidden);
  100. static cl::opt<bool> JumpThreadingFreezeSelectCond(
  101. "jump-threading-freeze-select-cond",
  102. cl::desc("Freeze the condition when unfolding select"), cl::init(false),
  103. cl::Hidden);
  104. static cl::opt<bool> ThreadAcrossLoopHeaders(
  105. "jump-threading-across-loop-headers",
  106. cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
  107. cl::init(false), cl::Hidden);
  108. namespace {
  109. /// This pass performs 'jump threading', which looks at blocks that have
  110. /// multiple predecessors and multiple successors. If one or more of the
  111. /// predecessors of the block can be proven to always jump to one of the
  112. /// successors, we forward the edge from the predecessor to the successor by
  113. /// duplicating the contents of this block.
  114. ///
  115. /// An example of when this can occur is code like this:
  116. ///
  117. /// if () { ...
  118. /// X = 4;
  119. /// }
  120. /// if (X < 3) {
  121. ///
  122. /// In this case, the unconditional branch at the end of the first if can be
  123. /// revectored to the false side of the second if.
  124. class JumpThreading : public FunctionPass {
  125. JumpThreadingPass Impl;
  126. public:
  127. static char ID; // Pass identification
  128. JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
  129. : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
  130. initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
  131. }
  132. bool runOnFunction(Function &F) override;
  133. void getAnalysisUsage(AnalysisUsage &AU) const override {
  134. AU.addRequired<DominatorTreeWrapperPass>();
  135. AU.addPreserved<DominatorTreeWrapperPass>();
  136. AU.addRequired<AAResultsWrapperPass>();
  137. AU.addRequired<LazyValueInfoWrapperPass>();
  138. AU.addPreserved<LazyValueInfoWrapperPass>();
  139. AU.addPreserved<GlobalsAAWrapperPass>();
  140. AU.addRequired<TargetLibraryInfoWrapperPass>();
  141. AU.addRequired<TargetTransformInfoWrapperPass>();
  142. }
  143. void releaseMemory() override { Impl.releaseMemory(); }
  144. };
  145. } // end anonymous namespace
  146. char JumpThreading::ID = 0;
  147. INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
  148. "Jump Threading", false, false)
  149. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  150. INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
  151. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  152. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  153. INITIALIZE_PASS_END(JumpThreading, "jump-threading",
  154. "Jump Threading", false, false)
  155. // Public interface to the Jump Threading pass
  156. FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
  157. return new JumpThreading(InsertFr, Threshold);
  158. }
  159. JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
  160. InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
  161. DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
  162. }
  163. // Update branch probability information according to conditional
  164. // branch probability. This is usually made possible for cloned branches
  165. // in inline instances by the context specific profile in the caller.
  166. // For instance,
  167. //
  168. // [Block PredBB]
  169. // [Branch PredBr]
  170. // if (t) {
  171. // Block A;
  172. // } else {
  173. // Block B;
  174. // }
  175. //
  176. // [Block BB]
  177. // cond = PN([true, %A], [..., %B]); // PHI node
  178. // [Branch CondBr]
  179. // if (cond) {
  180. // ... // P(cond == true) = 1%
  181. // }
  182. //
  183. // Here we know that when block A is taken, cond must be true, which means
  184. // P(cond == true | A) = 1
  185. //
  186. // Given that P(cond == true) = P(cond == true | A) * P(A) +
  187. // P(cond == true | B) * P(B)
  188. // we get:
  189. // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
  190. //
  191. // which gives us:
  192. // P(A) is less than P(cond == true), i.e.
  193. // P(t == true) <= P(cond == true)
  194. //
  195. // In other words, if we know P(cond == true) is unlikely, we know
  196. // that P(t == true) is also unlikely.
  197. //
  198. static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
  199. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  200. if (!CondBr)
  201. return;
  202. uint64_t TrueWeight, FalseWeight;
  203. if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
  204. return;
  205. if (TrueWeight + FalseWeight == 0)
  206. // Zero branch_weights do not give a hint for getting branch probabilities.
  207. // Technically it would result in division by zero denominator, which is
  208. // TrueWeight + FalseWeight.
  209. return;
  210. // Returns the outgoing edge of the dominating predecessor block
  211. // that leads to the PhiNode's incoming block:
  212. auto GetPredOutEdge =
  213. [](BasicBlock *IncomingBB,
  214. BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
  215. auto *PredBB = IncomingBB;
  216. auto *SuccBB = PhiBB;
  217. SmallPtrSet<BasicBlock *, 16> Visited;
  218. while (true) {
  219. BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
  220. if (PredBr && PredBr->isConditional())
  221. return {PredBB, SuccBB};
  222. Visited.insert(PredBB);
  223. auto *SinglePredBB = PredBB->getSinglePredecessor();
  224. if (!SinglePredBB)
  225. return {nullptr, nullptr};
  226. // Stop searching when SinglePredBB has been visited. It means we see
  227. // an unreachable loop.
  228. if (Visited.count(SinglePredBB))
  229. return {nullptr, nullptr};
  230. SuccBB = PredBB;
  231. PredBB = SinglePredBB;
  232. }
  233. };
  234. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  235. Value *PhiOpnd = PN->getIncomingValue(i);
  236. ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
  237. if (!CI || !CI->getType()->isIntegerTy(1))
  238. continue;
  239. BranchProbability BP =
  240. (CI->isOne() ? BranchProbability::getBranchProbability(
  241. TrueWeight, TrueWeight + FalseWeight)
  242. : BranchProbability::getBranchProbability(
  243. FalseWeight, TrueWeight + FalseWeight));
  244. auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
  245. if (!PredOutEdge.first)
  246. return;
  247. BasicBlock *PredBB = PredOutEdge.first;
  248. BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
  249. if (!PredBr)
  250. return;
  251. uint64_t PredTrueWeight, PredFalseWeight;
  252. // FIXME: We currently only set the profile data when it is missing.
  253. // With PGO, this can be used to refine even existing profile data with
  254. // context information. This needs to be done after more performance
  255. // testing.
  256. if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
  257. continue;
  258. // We can not infer anything useful when BP >= 50%, because BP is the
  259. // upper bound probability value.
  260. if (BP >= BranchProbability(50, 100))
  261. continue;
  262. SmallVector<uint32_t, 2> Weights;
  263. if (PredBr->getSuccessor(0) == PredOutEdge.second) {
  264. Weights.push_back(BP.getNumerator());
  265. Weights.push_back(BP.getCompl().getNumerator());
  266. } else {
  267. Weights.push_back(BP.getCompl().getNumerator());
  268. Weights.push_back(BP.getNumerator());
  269. }
  270. PredBr->setMetadata(LLVMContext::MD_prof,
  271. MDBuilder(PredBr->getParent()->getContext())
  272. .createBranchWeights(Weights));
  273. }
  274. }
  275. /// runOnFunction - Toplevel algorithm.
  276. bool JumpThreading::runOnFunction(Function &F) {
  277. if (skipFunction(F))
  278. return false;
  279. auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  280. // Jump Threading has no sense for the targets with divergent CF
  281. if (TTI->hasBranchDivergence())
  282. return false;
  283. auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  284. auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  285. auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
  286. auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  287. DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
  288. std::unique_ptr<BlockFrequencyInfo> BFI;
  289. std::unique_ptr<BranchProbabilityInfo> BPI;
  290. if (F.hasProfileData()) {
  291. LoopInfo LI{DominatorTree(F)};
  292. BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
  293. BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
  294. }
  295. bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
  296. std::move(BFI), std::move(BPI));
  297. if (PrintLVIAfterJumpThreading) {
  298. dbgs() << "LVI for function '" << F.getName() << "':\n";
  299. LVI->printLVI(F, DTU.getDomTree(), dbgs());
  300. }
  301. return Changed;
  302. }
  303. PreservedAnalyses JumpThreadingPass::run(Function &F,
  304. FunctionAnalysisManager &AM) {
  305. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  306. // Jump Threading has no sense for the targets with divergent CF
  307. if (TTI.hasBranchDivergence())
  308. return PreservedAnalyses::all();
  309. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  310. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  311. auto &LVI = AM.getResult<LazyValueAnalysis>(F);
  312. auto &AA = AM.getResult<AAManager>(F);
  313. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
  314. std::unique_ptr<BlockFrequencyInfo> BFI;
  315. std::unique_ptr<BranchProbabilityInfo> BPI;
  316. if (F.hasProfileData()) {
  317. LoopInfo LI{DominatorTree(F)};
  318. BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
  319. BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
  320. }
  321. bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
  322. std::move(BFI), std::move(BPI));
  323. if (PrintLVIAfterJumpThreading) {
  324. dbgs() << "LVI for function '" << F.getName() << "':\n";
  325. LVI.printLVI(F, DTU.getDomTree(), dbgs());
  326. }
  327. if (!Changed)
  328. return PreservedAnalyses::all();
  329. PreservedAnalyses PA;
  330. PA.preserve<GlobalsAA>();
  331. PA.preserve<DominatorTreeAnalysis>();
  332. PA.preserve<LazyValueAnalysis>();
  333. return PA;
  334. }
  335. bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
  336. LazyValueInfo *LVI_, AliasAnalysis *AA_,
  337. DomTreeUpdater *DTU_, bool HasProfileData_,
  338. std::unique_ptr<BlockFrequencyInfo> BFI_,
  339. std::unique_ptr<BranchProbabilityInfo> BPI_) {
  340. LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
  341. TLI = TLI_;
  342. LVI = LVI_;
  343. AA = AA_;
  344. DTU = DTU_;
  345. BFI.reset();
  346. BPI.reset();
  347. // When profile data is available, we need to update edge weights after
  348. // successful jump threading, which requires both BPI and BFI being available.
  349. HasProfileData = HasProfileData_;
  350. auto *GuardDecl = F.getParent()->getFunction(
  351. Intrinsic::getName(Intrinsic::experimental_guard));
  352. HasGuards = GuardDecl && !GuardDecl->use_empty();
  353. if (HasProfileData) {
  354. BPI = std::move(BPI_);
  355. BFI = std::move(BFI_);
  356. }
  357. // Reduce the number of instructions duplicated when optimizing strictly for
  358. // size.
  359. if (BBDuplicateThreshold.getNumOccurrences())
  360. BBDupThreshold = BBDuplicateThreshold;
  361. else if (F.hasFnAttribute(Attribute::MinSize))
  362. BBDupThreshold = 3;
  363. else
  364. BBDupThreshold = DefaultBBDupThreshold;
  365. // JumpThreading must not processes blocks unreachable from entry. It's a
  366. // waste of compute time and can potentially lead to hangs.
  367. SmallPtrSet<BasicBlock *, 16> Unreachable;
  368. assert(DTU && "DTU isn't passed into JumpThreading before using it.");
  369. assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
  370. DominatorTree &DT = DTU->getDomTree();
  371. for (auto &BB : F)
  372. if (!DT.isReachableFromEntry(&BB))
  373. Unreachable.insert(&BB);
  374. if (!ThreadAcrossLoopHeaders)
  375. findLoopHeaders(F);
  376. bool EverChanged = false;
  377. bool Changed;
  378. do {
  379. Changed = false;
  380. for (auto &BB : F) {
  381. if (Unreachable.count(&BB))
  382. continue;
  383. while (processBlock(&BB)) // Thread all of the branches we can over BB.
  384. Changed = true;
  385. // Jump threading may have introduced redundant debug values into BB
  386. // which should be removed.
  387. if (Changed)
  388. RemoveRedundantDbgInstrs(&BB);
  389. // Stop processing BB if it's the entry or is now deleted. The following
  390. // routines attempt to eliminate BB and locating a suitable replacement
  391. // for the entry is non-trivial.
  392. if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
  393. continue;
  394. if (pred_empty(&BB)) {
  395. // When processBlock makes BB unreachable it doesn't bother to fix up
  396. // the instructions in it. We must remove BB to prevent invalid IR.
  397. LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
  398. << "' with terminator: " << *BB.getTerminator()
  399. << '\n');
  400. LoopHeaders.erase(&BB);
  401. LVI->eraseBlock(&BB);
  402. DeleteDeadBlock(&BB, DTU);
  403. Changed = true;
  404. continue;
  405. }
  406. // processBlock doesn't thread BBs with unconditional TIs. However, if BB
  407. // is "almost empty", we attempt to merge BB with its sole successor.
  408. auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
  409. if (BI && BI->isUnconditional()) {
  410. BasicBlock *Succ = BI->getSuccessor(0);
  411. if (
  412. // The terminator must be the only non-phi instruction in BB.
  413. BB.getFirstNonPHIOrDbg()->isTerminator() &&
  414. // Don't alter Loop headers and latches to ensure another pass can
  415. // detect and transform nested loops later.
  416. !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
  417. TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
  418. RemoveRedundantDbgInstrs(Succ);
  419. // BB is valid for cleanup here because we passed in DTU. F remains
  420. // BB's parent until a DTU->getDomTree() event.
  421. LVI->eraseBlock(&BB);
  422. Changed = true;
  423. }
  424. }
  425. }
  426. EverChanged |= Changed;
  427. } while (Changed);
  428. LoopHeaders.clear();
  429. return EverChanged;
  430. }
  431. // Replace uses of Cond with ToVal when safe to do so. If all uses are
  432. // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
  433. // because we may incorrectly replace uses when guards/assumes are uses of
  434. // of `Cond` and we used the guards/assume to reason about the `Cond` value
  435. // at the end of block. RAUW unconditionally replaces all uses
  436. // including the guards/assumes themselves and the uses before the
  437. // guard/assume.
  438. static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
  439. assert(Cond->getType() == ToVal->getType());
  440. auto *BB = Cond->getParent();
  441. // We can unconditionally replace all uses in non-local blocks (i.e. uses
  442. // strictly dominated by BB), since LVI information is true from the
  443. // terminator of BB.
  444. replaceNonLocalUsesWith(Cond, ToVal);
  445. for (Instruction &I : reverse(*BB)) {
  446. // Reached the Cond whose uses we are trying to replace, so there are no
  447. // more uses.
  448. if (&I == Cond)
  449. break;
  450. // We only replace uses in instructions that are guaranteed to reach the end
  451. // of BB, where we know Cond is ToVal.
  452. if (!isGuaranteedToTransferExecutionToSuccessor(&I))
  453. break;
  454. I.replaceUsesOfWith(Cond, ToVal);
  455. }
  456. if (Cond->use_empty() && !Cond->mayHaveSideEffects())
  457. Cond->eraseFromParent();
  458. }
  459. /// Return the cost of duplicating a piece of this block from first non-phi
  460. /// and before StopAt instruction to thread across it. Stop scanning the block
  461. /// when exceeding the threshold. If duplication is impossible, returns ~0U.
  462. static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
  463. Instruction *StopAt,
  464. unsigned Threshold) {
  465. assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
  466. /// Ignore PHI nodes, these will be flattened when duplication happens.
  467. BasicBlock::const_iterator I(BB->getFirstNonPHI());
  468. // FIXME: THREADING will delete values that are just used to compute the
  469. // branch, so they shouldn't count against the duplication cost.
  470. unsigned Bonus = 0;
  471. if (BB->getTerminator() == StopAt) {
  472. // Threading through a switch statement is particularly profitable. If this
  473. // block ends in a switch, decrease its cost to make it more likely to
  474. // happen.
  475. if (isa<SwitchInst>(StopAt))
  476. Bonus = 6;
  477. // The same holds for indirect branches, but slightly more so.
  478. if (isa<IndirectBrInst>(StopAt))
  479. Bonus = 8;
  480. }
  481. // Bump the threshold up so the early exit from the loop doesn't skip the
  482. // terminator-based Size adjustment at the end.
  483. Threshold += Bonus;
  484. // Sum up the cost of each instruction until we get to the terminator. Don't
  485. // include the terminator because the copy won't include it.
  486. unsigned Size = 0;
  487. for (; &*I != StopAt; ++I) {
  488. // Stop scanning the block if we've reached the threshold.
  489. if (Size > Threshold)
  490. return Size;
  491. // Debugger intrinsics don't incur code size.
  492. if (isa<DbgInfoIntrinsic>(I)) continue;
  493. // Pseudo-probes don't incur code size.
  494. if (isa<PseudoProbeInst>(I))
  495. continue;
  496. // If this is a pointer->pointer bitcast, it is free.
  497. if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
  498. continue;
  499. // Freeze instruction is free, too.
  500. if (isa<FreezeInst>(I))
  501. continue;
  502. // Bail out if this instruction gives back a token type, it is not possible
  503. // to duplicate it if it is used outside this BB.
  504. if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
  505. return ~0U;
  506. // All other instructions count for at least one unit.
  507. ++Size;
  508. // Calls are more expensive. If they are non-intrinsic calls, we model them
  509. // as having cost of 4. If they are a non-vector intrinsic, we model them
  510. // as having cost of 2 total, and if they are a vector intrinsic, we model
  511. // them as having cost 1.
  512. if (const CallInst *CI = dyn_cast<CallInst>(I)) {
  513. if (CI->cannotDuplicate() || CI->isConvergent())
  514. // Blocks with NoDuplicate are modelled as having infinite cost, so they
  515. // are never duplicated.
  516. return ~0U;
  517. else if (!isa<IntrinsicInst>(CI))
  518. Size += 3;
  519. else if (!CI->getType()->isVectorTy())
  520. Size += 1;
  521. }
  522. }
  523. return Size > Bonus ? Size - Bonus : 0;
  524. }
  525. /// findLoopHeaders - We do not want jump threading to turn proper loop
  526. /// structures into irreducible loops. Doing this breaks up the loop nesting
  527. /// hierarchy and pessimizes later transformations. To prevent this from
  528. /// happening, we first have to find the loop headers. Here we approximate this
  529. /// by finding targets of backedges in the CFG.
  530. ///
  531. /// Note that there definitely are cases when we want to allow threading of
  532. /// edges across a loop header. For example, threading a jump from outside the
  533. /// loop (the preheader) to an exit block of the loop is definitely profitable.
  534. /// It is also almost always profitable to thread backedges from within the loop
  535. /// to exit blocks, and is often profitable to thread backedges to other blocks
  536. /// within the loop (forming a nested loop). This simple analysis is not rich
  537. /// enough to track all of these properties and keep it up-to-date as the CFG
  538. /// mutates, so we don't allow any of these transformations.
  539. void JumpThreadingPass::findLoopHeaders(Function &F) {
  540. SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
  541. FindFunctionBackedges(F, Edges);
  542. for (const auto &Edge : Edges)
  543. LoopHeaders.insert(Edge.second);
  544. }
  545. /// getKnownConstant - Helper method to determine if we can thread over a
  546. /// terminator with the given value as its condition, and if so what value to
  547. /// use for that. What kind of value this is depends on whether we want an
  548. /// integer or a block address, but an undef is always accepted.
  549. /// Returns null if Val is null or not an appropriate constant.
  550. static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
  551. if (!Val)
  552. return nullptr;
  553. // Undef is "known" enough.
  554. if (UndefValue *U = dyn_cast<UndefValue>(Val))
  555. return U;
  556. if (Preference == WantBlockAddress)
  557. return dyn_cast<BlockAddress>(Val->stripPointerCasts());
  558. return dyn_cast<ConstantInt>(Val);
  559. }
  560. /// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
  561. /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
  562. /// in any of our predecessors. If so, return the known list of value and pred
  563. /// BB in the result vector.
  564. ///
  565. /// This returns true if there were any known values.
  566. bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
  567. Value *V, BasicBlock *BB, PredValueInfo &Result,
  568. ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
  569. Instruction *CxtI) {
  570. // This method walks up use-def chains recursively. Because of this, we could
  571. // get into an infinite loop going around loops in the use-def chain. To
  572. // prevent this, keep track of what (value, block) pairs we've already visited
  573. // and terminate the search if we loop back to them
  574. if (!RecursionSet.insert(V).second)
  575. return false;
  576. // If V is a constant, then it is known in all predecessors.
  577. if (Constant *KC = getKnownConstant(V, Preference)) {
  578. for (BasicBlock *Pred : predecessors(BB))
  579. Result.emplace_back(KC, Pred);
  580. return !Result.empty();
  581. }
  582. // If V is a non-instruction value, or an instruction in a different block,
  583. // then it can't be derived from a PHI.
  584. Instruction *I = dyn_cast<Instruction>(V);
  585. if (!I || I->getParent() != BB) {
  586. // Okay, if this is a live-in value, see if it has a known value at the end
  587. // of any of our predecessors.
  588. //
  589. // FIXME: This should be an edge property, not a block end property.
  590. /// TODO: Per PR2563, we could infer value range information about a
  591. /// predecessor based on its terminator.
  592. //
  593. // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
  594. // "I" is a non-local compare-with-a-constant instruction. This would be
  595. // able to handle value inequalities better, for example if the compare is
  596. // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
  597. // Perhaps getConstantOnEdge should be smart enough to do this?
  598. for (BasicBlock *P : predecessors(BB)) {
  599. // If the value is known by LazyValueInfo to be a constant in a
  600. // predecessor, use that information to try to thread this block.
  601. Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
  602. if (Constant *KC = getKnownConstant(PredCst, Preference))
  603. Result.emplace_back(KC, P);
  604. }
  605. return !Result.empty();
  606. }
  607. /// If I is a PHI node, then we know the incoming values for any constants.
  608. if (PHINode *PN = dyn_cast<PHINode>(I)) {
  609. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  610. Value *InVal = PN->getIncomingValue(i);
  611. if (Constant *KC = getKnownConstant(InVal, Preference)) {
  612. Result.emplace_back(KC, PN->getIncomingBlock(i));
  613. } else {
  614. Constant *CI = LVI->getConstantOnEdge(InVal,
  615. PN->getIncomingBlock(i),
  616. BB, CxtI);
  617. if (Constant *KC = getKnownConstant(CI, Preference))
  618. Result.emplace_back(KC, PN->getIncomingBlock(i));
  619. }
  620. }
  621. return !Result.empty();
  622. }
  623. // Handle Cast instructions.
  624. if (CastInst *CI = dyn_cast<CastInst>(I)) {
  625. Value *Source = CI->getOperand(0);
  626. computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
  627. RecursionSet, CxtI);
  628. if (Result.empty())
  629. return false;
  630. // Convert the known values.
  631. for (auto &R : Result)
  632. R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
  633. return true;
  634. }
  635. if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
  636. Value *Source = FI->getOperand(0);
  637. computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
  638. RecursionSet, CxtI);
  639. erase_if(Result, [](auto &Pair) {
  640. return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
  641. });
  642. return !Result.empty();
  643. }
  644. // Handle some boolean conditions.
  645. if (I->getType()->getPrimitiveSizeInBits() == 1) {
  646. assert(Preference == WantInteger && "One-bit non-integer type?");
  647. // X | true -> true
  648. // X & false -> false
  649. if (I->getOpcode() == Instruction::Or ||
  650. I->getOpcode() == Instruction::And) {
  651. PredValueInfoTy LHSVals, RHSVals;
  652. computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
  653. WantInteger, RecursionSet, CxtI);
  654. computeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
  655. WantInteger, RecursionSet, CxtI);
  656. if (LHSVals.empty() && RHSVals.empty())
  657. return false;
  658. ConstantInt *InterestingVal;
  659. if (I->getOpcode() == Instruction::Or)
  660. InterestingVal = ConstantInt::getTrue(I->getContext());
  661. else
  662. InterestingVal = ConstantInt::getFalse(I->getContext());
  663. SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
  664. // Scan for the sentinel. If we find an undef, force it to the
  665. // interesting value: x|undef -> true and x&undef -> false.
  666. for (const auto &LHSVal : LHSVals)
  667. if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
  668. Result.emplace_back(InterestingVal, LHSVal.second);
  669. LHSKnownBBs.insert(LHSVal.second);
  670. }
  671. for (const auto &RHSVal : RHSVals)
  672. if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
  673. // If we already inferred a value for this block on the LHS, don't
  674. // re-add it.
  675. if (!LHSKnownBBs.count(RHSVal.second))
  676. Result.emplace_back(InterestingVal, RHSVal.second);
  677. }
  678. return !Result.empty();
  679. }
  680. // Handle the NOT form of XOR.
  681. if (I->getOpcode() == Instruction::Xor &&
  682. isa<ConstantInt>(I->getOperand(1)) &&
  683. cast<ConstantInt>(I->getOperand(1))->isOne()) {
  684. computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
  685. WantInteger, RecursionSet, CxtI);
  686. if (Result.empty())
  687. return false;
  688. // Invert the known values.
  689. for (auto &R : Result)
  690. R.first = ConstantExpr::getNot(R.first);
  691. return true;
  692. }
  693. // Try to simplify some other binary operator values.
  694. } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
  695. assert(Preference != WantBlockAddress
  696. && "A binary operator creating a block address?");
  697. if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
  698. PredValueInfoTy LHSVals;
  699. computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
  700. WantInteger, RecursionSet, CxtI);
  701. // Try to use constant folding to simplify the binary operator.
  702. for (const auto &LHSVal : LHSVals) {
  703. Constant *V = LHSVal.first;
  704. Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
  705. if (Constant *KC = getKnownConstant(Folded, WantInteger))
  706. Result.emplace_back(KC, LHSVal.second);
  707. }
  708. }
  709. return !Result.empty();
  710. }
  711. // Handle compare with phi operand, where the PHI is defined in this block.
  712. if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
  713. assert(Preference == WantInteger && "Compares only produce integers");
  714. Type *CmpType = Cmp->getType();
  715. Value *CmpLHS = Cmp->getOperand(0);
  716. Value *CmpRHS = Cmp->getOperand(1);
  717. CmpInst::Predicate Pred = Cmp->getPredicate();
  718. PHINode *PN = dyn_cast<PHINode>(CmpLHS);
  719. if (!PN)
  720. PN = dyn_cast<PHINode>(CmpRHS);
  721. if (PN && PN->getParent() == BB) {
  722. const DataLayout &DL = PN->getModule()->getDataLayout();
  723. // We can do this simplification if any comparisons fold to true or false.
  724. // See if any do.
  725. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  726. BasicBlock *PredBB = PN->getIncomingBlock(i);
  727. Value *LHS, *RHS;
  728. if (PN == CmpLHS) {
  729. LHS = PN->getIncomingValue(i);
  730. RHS = CmpRHS->DoPHITranslation(BB, PredBB);
  731. } else {
  732. LHS = CmpLHS->DoPHITranslation(BB, PredBB);
  733. RHS = PN->getIncomingValue(i);
  734. }
  735. Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
  736. if (!Res) {
  737. if (!isa<Constant>(RHS))
  738. continue;
  739. // getPredicateOnEdge call will make no sense if LHS is defined in BB.
  740. auto LHSInst = dyn_cast<Instruction>(LHS);
  741. if (LHSInst && LHSInst->getParent() == BB)
  742. continue;
  743. LazyValueInfo::Tristate
  744. ResT = LVI->getPredicateOnEdge(Pred, LHS,
  745. cast<Constant>(RHS), PredBB, BB,
  746. CxtI ? CxtI : Cmp);
  747. if (ResT == LazyValueInfo::Unknown)
  748. continue;
  749. Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
  750. }
  751. if (Constant *KC = getKnownConstant(Res, WantInteger))
  752. Result.emplace_back(KC, PredBB);
  753. }
  754. return !Result.empty();
  755. }
  756. // If comparing a live-in value against a constant, see if we know the
  757. // live-in value on any predecessors.
  758. if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
  759. Constant *CmpConst = cast<Constant>(CmpRHS);
  760. if (!isa<Instruction>(CmpLHS) ||
  761. cast<Instruction>(CmpLHS)->getParent() != BB) {
  762. for (BasicBlock *P : predecessors(BB)) {
  763. // If the value is known by LazyValueInfo to be a constant in a
  764. // predecessor, use that information to try to thread this block.
  765. LazyValueInfo::Tristate Res =
  766. LVI->getPredicateOnEdge(Pred, CmpLHS,
  767. CmpConst, P, BB, CxtI ? CxtI : Cmp);
  768. if (Res == LazyValueInfo::Unknown)
  769. continue;
  770. Constant *ResC = ConstantInt::get(CmpType, Res);
  771. Result.emplace_back(ResC, P);
  772. }
  773. return !Result.empty();
  774. }
  775. // InstCombine can fold some forms of constant range checks into
  776. // (icmp (add (x, C1)), C2). See if we have we have such a thing with
  777. // x as a live-in.
  778. {
  779. using namespace PatternMatch;
  780. Value *AddLHS;
  781. ConstantInt *AddConst;
  782. if (isa<ConstantInt>(CmpConst) &&
  783. match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
  784. if (!isa<Instruction>(AddLHS) ||
  785. cast<Instruction>(AddLHS)->getParent() != BB) {
  786. for (BasicBlock *P : predecessors(BB)) {
  787. // If the value is known by LazyValueInfo to be a ConstantRange in
  788. // a predecessor, use that information to try to thread this
  789. // block.
  790. ConstantRange CR = LVI->getConstantRangeOnEdge(
  791. AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
  792. // Propagate the range through the addition.
  793. CR = CR.add(AddConst->getValue());
  794. // Get the range where the compare returns true.
  795. ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
  796. Pred, cast<ConstantInt>(CmpConst)->getValue());
  797. Constant *ResC;
  798. if (CmpRange.contains(CR))
  799. ResC = ConstantInt::getTrue(CmpType);
  800. else if (CmpRange.inverse().contains(CR))
  801. ResC = ConstantInt::getFalse(CmpType);
  802. else
  803. continue;
  804. Result.emplace_back(ResC, P);
  805. }
  806. return !Result.empty();
  807. }
  808. }
  809. }
  810. // Try to find a constant value for the LHS of a comparison,
  811. // and evaluate it statically if we can.
  812. PredValueInfoTy LHSVals;
  813. computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
  814. WantInteger, RecursionSet, CxtI);
  815. for (const auto &LHSVal : LHSVals) {
  816. Constant *V = LHSVal.first;
  817. Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
  818. if (Constant *KC = getKnownConstant(Folded, WantInteger))
  819. Result.emplace_back(KC, LHSVal.second);
  820. }
  821. return !Result.empty();
  822. }
  823. }
  824. if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
  825. // Handle select instructions where at least one operand is a known constant
  826. // and we can figure out the condition value for any predecessor block.
  827. Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
  828. Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
  829. PredValueInfoTy Conds;
  830. if ((TrueVal || FalseVal) &&
  831. computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
  832. WantInteger, RecursionSet, CxtI)) {
  833. for (auto &C : Conds) {
  834. Constant *Cond = C.first;
  835. // Figure out what value to use for the condition.
  836. bool KnownCond;
  837. if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
  838. // A known boolean.
  839. KnownCond = CI->isOne();
  840. } else {
  841. assert(isa<UndefValue>(Cond) && "Unexpected condition value");
  842. // Either operand will do, so be sure to pick the one that's a known
  843. // constant.
  844. // FIXME: Do this more cleverly if both values are known constants?
  845. KnownCond = (TrueVal != nullptr);
  846. }
  847. // See if the select has a known constant value for this predecessor.
  848. if (Constant *Val = KnownCond ? TrueVal : FalseVal)
  849. Result.emplace_back(Val, C.second);
  850. }
  851. return !Result.empty();
  852. }
  853. }
  854. // If all else fails, see if LVI can figure out a constant value for us.
  855. assert(CxtI->getParent() == BB && "CxtI should be in BB");
  856. Constant *CI = LVI->getConstant(V, CxtI);
  857. if (Constant *KC = getKnownConstant(CI, Preference)) {
  858. for (BasicBlock *Pred : predecessors(BB))
  859. Result.emplace_back(KC, Pred);
  860. }
  861. return !Result.empty();
  862. }
  863. /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
  864. /// in an undefined jump, decide which block is best to revector to.
  865. ///
  866. /// Since we can pick an arbitrary destination, we pick the successor with the
  867. /// fewest predecessors. This should reduce the in-degree of the others.
  868. static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {
  869. Instruction *BBTerm = BB->getTerminator();
  870. unsigned MinSucc = 0;
  871. BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
  872. // Compute the successor with the minimum number of predecessors.
  873. unsigned MinNumPreds = pred_size(TestBB);
  874. for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
  875. TestBB = BBTerm->getSuccessor(i);
  876. unsigned NumPreds = pred_size(TestBB);
  877. if (NumPreds < MinNumPreds) {
  878. MinSucc = i;
  879. MinNumPreds = NumPreds;
  880. }
  881. }
  882. return MinSucc;
  883. }
  884. static bool hasAddressTakenAndUsed(BasicBlock *BB) {
  885. if (!BB->hasAddressTaken()) return false;
  886. // If the block has its address taken, it may be a tree of dead constants
  887. // hanging off of it. These shouldn't keep the block alive.
  888. BlockAddress *BA = BlockAddress::get(BB);
  889. BA->removeDeadConstantUsers();
  890. return !BA->use_empty();
  891. }
  892. /// processBlock - If there are any predecessors whose control can be threaded
  893. /// through to a successor, transform them now.
  894. bool JumpThreadingPass::processBlock(BasicBlock *BB) {
  895. // If the block is trivially dead, just return and let the caller nuke it.
  896. // This simplifies other transformations.
  897. if (DTU->isBBPendingDeletion(BB) ||
  898. (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
  899. return false;
  900. // If this block has a single predecessor, and if that pred has a single
  901. // successor, merge the blocks. This encourages recursive jump threading
  902. // because now the condition in this block can be threaded through
  903. // predecessors of our predecessor block.
  904. if (maybeMergeBasicBlockIntoOnlyPred(BB))
  905. return true;
  906. if (tryToUnfoldSelectInCurrBB(BB))
  907. return true;
  908. // Look if we can propagate guards to predecessors.
  909. if (HasGuards && processGuards(BB))
  910. return true;
  911. // What kind of constant we're looking for.
  912. ConstantPreference Preference = WantInteger;
  913. // Look to see if the terminator is a conditional branch, switch or indirect
  914. // branch, if not we can't thread it.
  915. Value *Condition;
  916. Instruction *Terminator = BB->getTerminator();
  917. if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
  918. // Can't thread an unconditional jump.
  919. if (BI->isUnconditional()) return false;
  920. Condition = BI->getCondition();
  921. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
  922. Condition = SI->getCondition();
  923. } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
  924. // Can't thread indirect branch with no successors.
  925. if (IB->getNumSuccessors() == 0) return false;
  926. Condition = IB->getAddress()->stripPointerCasts();
  927. Preference = WantBlockAddress;
  928. } else {
  929. return false; // Must be an invoke or callbr.
  930. }
  931. // Keep track if we constant folded the condition in this invocation.
  932. bool ConstantFolded = false;
  933. // Run constant folding to see if we can reduce the condition to a simple
  934. // constant.
  935. if (Instruction *I = dyn_cast<Instruction>(Condition)) {
  936. Value *SimpleVal =
  937. ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
  938. if (SimpleVal) {
  939. I->replaceAllUsesWith(SimpleVal);
  940. if (isInstructionTriviallyDead(I, TLI))
  941. I->eraseFromParent();
  942. Condition = SimpleVal;
  943. ConstantFolded = true;
  944. }
  945. }
  946. // If the terminator is branching on an undef or freeze undef, we can pick any
  947. // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
  948. auto *FI = dyn_cast<FreezeInst>(Condition);
  949. if (isa<UndefValue>(Condition) ||
  950. (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
  951. unsigned BestSucc = getBestDestForJumpOnUndef(BB);
  952. std::vector<DominatorTree::UpdateType> Updates;
  953. // Fold the branch/switch.
  954. Instruction *BBTerm = BB->getTerminator();
  955. Updates.reserve(BBTerm->getNumSuccessors());
  956. for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
  957. if (i == BestSucc) continue;
  958. BasicBlock *Succ = BBTerm->getSuccessor(i);
  959. Succ->removePredecessor(BB, true);
  960. Updates.push_back({DominatorTree::Delete, BB, Succ});
  961. }
  962. LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
  963. << "' folding undef terminator: " << *BBTerm << '\n');
  964. BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
  965. BBTerm->eraseFromParent();
  966. DTU->applyUpdatesPermissive(Updates);
  967. if (FI)
  968. FI->eraseFromParent();
  969. return true;
  970. }
  971. // If the terminator of this block is branching on a constant, simplify the
  972. // terminator to an unconditional branch. This can occur due to threading in
  973. // other blocks.
  974. if (getKnownConstant(Condition, Preference)) {
  975. LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
  976. << "' folding terminator: " << *BB->getTerminator()
  977. << '\n');
  978. ++NumFolds;
  979. ConstantFoldTerminator(BB, true, nullptr, DTU);
  980. if (HasProfileData)
  981. BPI->eraseBlock(BB);
  982. return true;
  983. }
  984. Instruction *CondInst = dyn_cast<Instruction>(Condition);
  985. // All the rest of our checks depend on the condition being an instruction.
  986. if (!CondInst) {
  987. // FIXME: Unify this with code below.
  988. if (processThreadableEdges(Condition, BB, Preference, Terminator))
  989. return true;
  990. return ConstantFolded;
  991. }
  992. if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
  993. // If we're branching on a conditional, LVI might be able to determine
  994. // it's value at the branch instruction. We only handle comparisons
  995. // against a constant at this time.
  996. // TODO: This should be extended to handle switches as well.
  997. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  998. Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
  999. if (CondBr && CondConst) {
  1000. // We should have returned as soon as we turn a conditional branch to
  1001. // unconditional. Because its no longer interesting as far as jump
  1002. // threading is concerned.
  1003. assert(CondBr->isConditional() && "Threading on unconditional terminator");
  1004. LazyValueInfo::Tristate Ret =
  1005. LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
  1006. CondConst, CondBr);
  1007. if (Ret != LazyValueInfo::Unknown) {
  1008. unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
  1009. unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
  1010. BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
  1011. ToRemoveSucc->removePredecessor(BB, true);
  1012. BranchInst *UncondBr =
  1013. BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
  1014. UncondBr->setDebugLoc(CondBr->getDebugLoc());
  1015. CondBr->eraseFromParent();
  1016. if (CondCmp->use_empty())
  1017. CondCmp->eraseFromParent();
  1018. // We can safely replace *some* uses of the CondInst if it has
  1019. // exactly one value as returned by LVI. RAUW is incorrect in the
  1020. // presence of guards and assumes, that have the `Cond` as the use. This
  1021. // is because we use the guards/assume to reason about the `Cond` value
  1022. // at the end of block, but RAUW unconditionally replaces all uses
  1023. // including the guards/assumes themselves and the uses before the
  1024. // guard/assume.
  1025. else if (CondCmp->getParent() == BB) {
  1026. auto *CI = Ret == LazyValueInfo::True ?
  1027. ConstantInt::getTrue(CondCmp->getType()) :
  1028. ConstantInt::getFalse(CondCmp->getType());
  1029. replaceFoldableUses(CondCmp, CI);
  1030. }
  1031. DTU->applyUpdatesPermissive(
  1032. {{DominatorTree::Delete, BB, ToRemoveSucc}});
  1033. if (HasProfileData)
  1034. BPI->eraseBlock(BB);
  1035. return true;
  1036. }
  1037. // We did not manage to simplify this branch, try to see whether
  1038. // CondCmp depends on a known phi-select pattern.
  1039. if (tryToUnfoldSelect(CondCmp, BB))
  1040. return true;
  1041. }
  1042. }
  1043. if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
  1044. if (tryToUnfoldSelect(SI, BB))
  1045. return true;
  1046. // Check for some cases that are worth simplifying. Right now we want to look
  1047. // for loads that are used by a switch or by the condition for the branch. If
  1048. // we see one, check to see if it's partially redundant. If so, insert a PHI
  1049. // which can then be used to thread the values.
  1050. Value *SimplifyValue = CondInst;
  1051. if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
  1052. // Look into freeze's operand
  1053. SimplifyValue = FI->getOperand(0);
  1054. if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
  1055. if (isa<Constant>(CondCmp->getOperand(1)))
  1056. SimplifyValue = CondCmp->getOperand(0);
  1057. // TODO: There are other places where load PRE would be profitable, such as
  1058. // more complex comparisons.
  1059. if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
  1060. if (simplifyPartiallyRedundantLoad(LoadI))
  1061. return true;
  1062. // Before threading, try to propagate profile data backwards:
  1063. if (PHINode *PN = dyn_cast<PHINode>(CondInst))
  1064. if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
  1065. updatePredecessorProfileMetadata(PN, BB);
  1066. // Handle a variety of cases where we are branching on something derived from
  1067. // a PHI node in the current block. If we can prove that any predecessors
  1068. // compute a predictable value based on a PHI node, thread those predecessors.
  1069. if (processThreadableEdges(CondInst, BB, Preference, Terminator))
  1070. return true;
  1071. // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
  1072. // the current block, see if we can simplify.
  1073. PHINode *PN = dyn_cast<PHINode>(
  1074. isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
  1075. : CondInst);
  1076. if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
  1077. return processBranchOnPHI(PN);
  1078. // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
  1079. if (CondInst->getOpcode() == Instruction::Xor &&
  1080. CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
  1081. return processBranchOnXOR(cast<BinaryOperator>(CondInst));
  1082. // Search for a stronger dominating condition that can be used to simplify a
  1083. // conditional branch leaving BB.
  1084. if (processImpliedCondition(BB))
  1085. return true;
  1086. return false;
  1087. }
  1088. bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
  1089. auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  1090. if (!BI || !BI->isConditional())
  1091. return false;
  1092. Value *Cond = BI->getCondition();
  1093. BasicBlock *CurrentBB = BB;
  1094. BasicBlock *CurrentPred = BB->getSinglePredecessor();
  1095. unsigned Iter = 0;
  1096. auto &DL = BB->getModule()->getDataLayout();
  1097. while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
  1098. auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
  1099. if (!PBI || !PBI->isConditional())
  1100. return false;
  1101. if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
  1102. return false;
  1103. bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
  1104. Optional<bool> Implication =
  1105. isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
  1106. if (Implication) {
  1107. BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
  1108. BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
  1109. RemoveSucc->removePredecessor(BB);
  1110. BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
  1111. UncondBI->setDebugLoc(BI->getDebugLoc());
  1112. BI->eraseFromParent();
  1113. DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
  1114. if (HasProfileData)
  1115. BPI->eraseBlock(BB);
  1116. return true;
  1117. }
  1118. CurrentBB = CurrentPred;
  1119. CurrentPred = CurrentBB->getSinglePredecessor();
  1120. }
  1121. return false;
  1122. }
  1123. /// Return true if Op is an instruction defined in the given block.
  1124. static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
  1125. if (Instruction *OpInst = dyn_cast<Instruction>(Op))
  1126. if (OpInst->getParent() == BB)
  1127. return true;
  1128. return false;
  1129. }
  1130. /// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
  1131. /// redundant load instruction, eliminate it by replacing it with a PHI node.
  1132. /// This is an important optimization that encourages jump threading, and needs
  1133. /// to be run interlaced with other jump threading tasks.
  1134. bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
  1135. // Don't hack volatile and ordered loads.
  1136. if (!LoadI->isUnordered()) return false;
  1137. // If the load is defined in a block with exactly one predecessor, it can't be
  1138. // partially redundant.
  1139. BasicBlock *LoadBB = LoadI->getParent();
  1140. if (LoadBB->getSinglePredecessor())
  1141. return false;
  1142. // If the load is defined in an EH pad, it can't be partially redundant,
  1143. // because the edges between the invoke and the EH pad cannot have other
  1144. // instructions between them.
  1145. if (LoadBB->isEHPad())
  1146. return false;
  1147. Value *LoadedPtr = LoadI->getOperand(0);
  1148. // If the loaded operand is defined in the LoadBB and its not a phi,
  1149. // it can't be available in predecessors.
  1150. if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
  1151. return false;
  1152. // Scan a few instructions up from the load, to see if it is obviously live at
  1153. // the entry to its block.
  1154. BasicBlock::iterator BBIt(LoadI);
  1155. bool IsLoadCSE;
  1156. if (Value *AvailableVal = FindAvailableLoadedValue(
  1157. LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
  1158. // If the value of the load is locally available within the block, just use
  1159. // it. This frequently occurs for reg2mem'd allocas.
  1160. if (IsLoadCSE) {
  1161. LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
  1162. combineMetadataForCSE(NLoadI, LoadI, false);
  1163. };
  1164. // If the returned value is the load itself, replace with an undef. This can
  1165. // only happen in dead loops.
  1166. if (AvailableVal == LoadI)
  1167. AvailableVal = UndefValue::get(LoadI->getType());
  1168. if (AvailableVal->getType() != LoadI->getType())
  1169. AvailableVal = CastInst::CreateBitOrPointerCast(
  1170. AvailableVal, LoadI->getType(), "", LoadI);
  1171. LoadI->replaceAllUsesWith(AvailableVal);
  1172. LoadI->eraseFromParent();
  1173. return true;
  1174. }
  1175. // Otherwise, if we scanned the whole block and got to the top of the block,
  1176. // we know the block is locally transparent to the load. If not, something
  1177. // might clobber its value.
  1178. if (BBIt != LoadBB->begin())
  1179. return false;
  1180. // If all of the loads and stores that feed the value have the same AA tags,
  1181. // then we can propagate them onto any newly inserted loads.
  1182. AAMDNodes AATags;
  1183. LoadI->getAAMetadata(AATags);
  1184. SmallPtrSet<BasicBlock*, 8> PredsScanned;
  1185. using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
  1186. AvailablePredsTy AvailablePreds;
  1187. BasicBlock *OneUnavailablePred = nullptr;
  1188. SmallVector<LoadInst*, 8> CSELoads;
  1189. // If we got here, the loaded value is transparent through to the start of the
  1190. // block. Check to see if it is available in any of the predecessor blocks.
  1191. for (BasicBlock *PredBB : predecessors(LoadBB)) {
  1192. // If we already scanned this predecessor, skip it.
  1193. if (!PredsScanned.insert(PredBB).second)
  1194. continue;
  1195. BBIt = PredBB->end();
  1196. unsigned NumScanedInst = 0;
  1197. Value *PredAvailable = nullptr;
  1198. // NOTE: We don't CSE load that is volatile or anything stronger than
  1199. // unordered, that should have been checked when we entered the function.
  1200. assert(LoadI->isUnordered() &&
  1201. "Attempting to CSE volatile or atomic loads");
  1202. // If this is a load on a phi pointer, phi-translate it and search
  1203. // for available load/store to the pointer in predecessors.
  1204. Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
  1205. PredAvailable = FindAvailablePtrLoadStore(
  1206. Ptr, LoadI->getType(), LoadI->isAtomic(), PredBB, BBIt,
  1207. DefMaxInstsToScan, AA, &IsLoadCSE, &NumScanedInst);
  1208. // If PredBB has a single predecessor, continue scanning through the
  1209. // single predecessor.
  1210. BasicBlock *SinglePredBB = PredBB;
  1211. while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
  1212. NumScanedInst < DefMaxInstsToScan) {
  1213. SinglePredBB = SinglePredBB->getSinglePredecessor();
  1214. if (SinglePredBB) {
  1215. BBIt = SinglePredBB->end();
  1216. PredAvailable = FindAvailablePtrLoadStore(
  1217. Ptr, LoadI->getType(), LoadI->isAtomic(), SinglePredBB, BBIt,
  1218. (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
  1219. &NumScanedInst);
  1220. }
  1221. }
  1222. if (!PredAvailable) {
  1223. OneUnavailablePred = PredBB;
  1224. continue;
  1225. }
  1226. if (IsLoadCSE)
  1227. CSELoads.push_back(cast<LoadInst>(PredAvailable));
  1228. // If so, this load is partially redundant. Remember this info so that we
  1229. // can create a PHI node.
  1230. AvailablePreds.emplace_back(PredBB, PredAvailable);
  1231. }
  1232. // If the loaded value isn't available in any predecessor, it isn't partially
  1233. // redundant.
  1234. if (AvailablePreds.empty()) return false;
  1235. // Okay, the loaded value is available in at least one (and maybe all!)
  1236. // predecessors. If the value is unavailable in more than one unique
  1237. // predecessor, we want to insert a merge block for those common predecessors.
  1238. // This ensures that we only have to insert one reload, thus not increasing
  1239. // code size.
  1240. BasicBlock *UnavailablePred = nullptr;
  1241. // If the value is unavailable in one of predecessors, we will end up
  1242. // inserting a new instruction into them. It is only valid if all the
  1243. // instructions before LoadI are guaranteed to pass execution to its
  1244. // successor, or if LoadI is safe to speculate.
  1245. // TODO: If this logic becomes more complex, and we will perform PRE insertion
  1246. // farther than to a predecessor, we need to reuse the code from GVN's PRE.
  1247. // It requires domination tree analysis, so for this simple case it is an
  1248. // overkill.
  1249. if (PredsScanned.size() != AvailablePreds.size() &&
  1250. !isSafeToSpeculativelyExecute(LoadI))
  1251. for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
  1252. if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
  1253. return false;
  1254. // If there is exactly one predecessor where the value is unavailable, the
  1255. // already computed 'OneUnavailablePred' block is it. If it ends in an
  1256. // unconditional branch, we know that it isn't a critical edge.
  1257. if (PredsScanned.size() == AvailablePreds.size()+1 &&
  1258. OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
  1259. UnavailablePred = OneUnavailablePred;
  1260. } else if (PredsScanned.size() != AvailablePreds.size()) {
  1261. // Otherwise, we had multiple unavailable predecessors or we had a critical
  1262. // edge from the one.
  1263. SmallVector<BasicBlock*, 8> PredsToSplit;
  1264. SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
  1265. for (const auto &AvailablePred : AvailablePreds)
  1266. AvailablePredSet.insert(AvailablePred.first);
  1267. // Add all the unavailable predecessors to the PredsToSplit list.
  1268. for (BasicBlock *P : predecessors(LoadBB)) {
  1269. // If the predecessor is an indirect goto, we can't split the edge.
  1270. // Same for CallBr.
  1271. if (isa<IndirectBrInst>(P->getTerminator()) ||
  1272. isa<CallBrInst>(P->getTerminator()))
  1273. return false;
  1274. if (!AvailablePredSet.count(P))
  1275. PredsToSplit.push_back(P);
  1276. }
  1277. // Split them out to their own block.
  1278. UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
  1279. }
  1280. // If the value isn't available in all predecessors, then there will be
  1281. // exactly one where it isn't available. Insert a load on that edge and add
  1282. // it to the AvailablePreds list.
  1283. if (UnavailablePred) {
  1284. assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
  1285. "Can't handle critical edge here!");
  1286. LoadInst *NewVal = new LoadInst(
  1287. LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
  1288. LoadI->getName() + ".pr", false, LoadI->getAlign(),
  1289. LoadI->getOrdering(), LoadI->getSyncScopeID(),
  1290. UnavailablePred->getTerminator());
  1291. NewVal->setDebugLoc(LoadI->getDebugLoc());
  1292. if (AATags)
  1293. NewVal->setAAMetadata(AATags);
  1294. AvailablePreds.emplace_back(UnavailablePred, NewVal);
  1295. }
  1296. // Now we know that each predecessor of this block has a value in
  1297. // AvailablePreds, sort them for efficient access as we're walking the preds.
  1298. array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
  1299. // Create a PHI node at the start of the block for the PRE'd load value.
  1300. pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
  1301. PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "",
  1302. &LoadBB->front());
  1303. PN->takeName(LoadI);
  1304. PN->setDebugLoc(LoadI->getDebugLoc());
  1305. // Insert new entries into the PHI for each predecessor. A single block may
  1306. // have multiple entries here.
  1307. for (pred_iterator PI = PB; PI != PE; ++PI) {
  1308. BasicBlock *P = *PI;
  1309. AvailablePredsTy::iterator I =
  1310. llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
  1311. assert(I != AvailablePreds.end() && I->first == P &&
  1312. "Didn't find entry for predecessor!");
  1313. // If we have an available predecessor but it requires casting, insert the
  1314. // cast in the predecessor and use the cast. Note that we have to update the
  1315. // AvailablePreds vector as we go so that all of the PHI entries for this
  1316. // predecessor use the same bitcast.
  1317. Value *&PredV = I->second;
  1318. if (PredV->getType() != LoadI->getType())
  1319. PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
  1320. P->getTerminator());
  1321. PN->addIncoming(PredV, I->first);
  1322. }
  1323. for (LoadInst *PredLoadI : CSELoads) {
  1324. combineMetadataForCSE(PredLoadI, LoadI, true);
  1325. }
  1326. LoadI->replaceAllUsesWith(PN);
  1327. LoadI->eraseFromParent();
  1328. return true;
  1329. }
  1330. /// findMostPopularDest - The specified list contains multiple possible
  1331. /// threadable destinations. Pick the one that occurs the most frequently in
  1332. /// the list.
  1333. static BasicBlock *
  1334. findMostPopularDest(BasicBlock *BB,
  1335. const SmallVectorImpl<std::pair<BasicBlock *,
  1336. BasicBlock *>> &PredToDestList) {
  1337. assert(!PredToDestList.empty());
  1338. // Determine popularity. If there are multiple possible destinations, we
  1339. // explicitly choose to ignore 'undef' destinations. We prefer to thread
  1340. // blocks with known and real destinations to threading undef. We'll handle
  1341. // them later if interesting.
  1342. MapVector<BasicBlock *, unsigned> DestPopularity;
  1343. // Populate DestPopularity with the successors in the order they appear in the
  1344. // successor list. This way, we ensure determinism by iterating it in the
  1345. // same order in std::max_element below. We map nullptr to 0 so that we can
  1346. // return nullptr when PredToDestList contains nullptr only.
  1347. DestPopularity[nullptr] = 0;
  1348. for (auto *SuccBB : successors(BB))
  1349. DestPopularity[SuccBB] = 0;
  1350. for (const auto &PredToDest : PredToDestList)
  1351. if (PredToDest.second)
  1352. DestPopularity[PredToDest.second]++;
  1353. // Find the most popular dest.
  1354. using VT = decltype(DestPopularity)::value_type;
  1355. auto MostPopular = std::max_element(
  1356. DestPopularity.begin(), DestPopularity.end(),
  1357. [](const VT &L, const VT &R) { return L.second < R.second; });
  1358. // Okay, we have finally picked the most popular destination.
  1359. return MostPopular->first;
  1360. }
  1361. // Try to evaluate the value of V when the control flows from PredPredBB to
  1362. // BB->getSinglePredecessor() and then on to BB.
  1363. Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
  1364. BasicBlock *PredPredBB,
  1365. Value *V) {
  1366. BasicBlock *PredBB = BB->getSinglePredecessor();
  1367. assert(PredBB && "Expected a single predecessor");
  1368. if (Constant *Cst = dyn_cast<Constant>(V)) {
  1369. return Cst;
  1370. }
  1371. // Consult LVI if V is not an instruction in BB or PredBB.
  1372. Instruction *I = dyn_cast<Instruction>(V);
  1373. if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
  1374. return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
  1375. }
  1376. // Look into a PHI argument.
  1377. if (PHINode *PHI = dyn_cast<PHINode>(V)) {
  1378. if (PHI->getParent() == PredBB)
  1379. return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
  1380. return nullptr;
  1381. }
  1382. // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
  1383. if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
  1384. if (CondCmp->getParent() == BB) {
  1385. Constant *Op0 =
  1386. evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
  1387. Constant *Op1 =
  1388. evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
  1389. if (Op0 && Op1) {
  1390. return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
  1391. }
  1392. }
  1393. return nullptr;
  1394. }
  1395. return nullptr;
  1396. }
  1397. bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
  1398. ConstantPreference Preference,
  1399. Instruction *CxtI) {
  1400. // If threading this would thread across a loop header, don't even try to
  1401. // thread the edge.
  1402. if (LoopHeaders.count(BB))
  1403. return false;
  1404. PredValueInfoTy PredValues;
  1405. if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
  1406. CxtI)) {
  1407. // We don't have known values in predecessors. See if we can thread through
  1408. // BB and its sole predecessor.
  1409. return maybethreadThroughTwoBasicBlocks(BB, Cond);
  1410. }
  1411. assert(!PredValues.empty() &&
  1412. "computeValueKnownInPredecessors returned true with no values");
  1413. LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
  1414. for (const auto &PredValue : PredValues) {
  1415. dbgs() << " BB '" << BB->getName()
  1416. << "': FOUND condition = " << *PredValue.first
  1417. << " for pred '" << PredValue.second->getName() << "'.\n";
  1418. });
  1419. // Decide what we want to thread through. Convert our list of known values to
  1420. // a list of known destinations for each pred. This also discards duplicate
  1421. // predecessors and keeps track of the undefined inputs (which are represented
  1422. // as a null dest in the PredToDestList).
  1423. SmallPtrSet<BasicBlock*, 16> SeenPreds;
  1424. SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
  1425. BasicBlock *OnlyDest = nullptr;
  1426. BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
  1427. Constant *OnlyVal = nullptr;
  1428. Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
  1429. for (const auto &PredValue : PredValues) {
  1430. BasicBlock *Pred = PredValue.second;
  1431. if (!SeenPreds.insert(Pred).second)
  1432. continue; // Duplicate predecessor entry.
  1433. Constant *Val = PredValue.first;
  1434. BasicBlock *DestBB;
  1435. if (isa<UndefValue>(Val))
  1436. DestBB = nullptr;
  1437. else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
  1438. assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
  1439. DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
  1440. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
  1441. assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
  1442. DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
  1443. } else {
  1444. assert(isa<IndirectBrInst>(BB->getTerminator())
  1445. && "Unexpected terminator");
  1446. assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
  1447. DestBB = cast<BlockAddress>(Val)->getBasicBlock();
  1448. }
  1449. // If we have exactly one destination, remember it for efficiency below.
  1450. if (PredToDestList.empty()) {
  1451. OnlyDest = DestBB;
  1452. OnlyVal = Val;
  1453. } else {
  1454. if (OnlyDest != DestBB)
  1455. OnlyDest = MultipleDestSentinel;
  1456. // It possible we have same destination, but different value, e.g. default
  1457. // case in switchinst.
  1458. if (Val != OnlyVal)
  1459. OnlyVal = MultipleVal;
  1460. }
  1461. // If the predecessor ends with an indirect goto, we can't change its
  1462. // destination. Same for CallBr.
  1463. if (isa<IndirectBrInst>(Pred->getTerminator()) ||
  1464. isa<CallBrInst>(Pred->getTerminator()))
  1465. continue;
  1466. PredToDestList.emplace_back(Pred, DestBB);
  1467. }
  1468. // If all edges were unthreadable, we fail.
  1469. if (PredToDestList.empty())
  1470. return false;
  1471. // If all the predecessors go to a single known successor, we want to fold,
  1472. // not thread. By doing so, we do not need to duplicate the current block and
  1473. // also miss potential opportunities in case we dont/cant duplicate.
  1474. if (OnlyDest && OnlyDest != MultipleDestSentinel) {
  1475. if (BB->hasNPredecessors(PredToDestList.size())) {
  1476. bool SeenFirstBranchToOnlyDest = false;
  1477. std::vector <DominatorTree::UpdateType> Updates;
  1478. Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
  1479. for (BasicBlock *SuccBB : successors(BB)) {
  1480. if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
  1481. SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
  1482. } else {
  1483. SuccBB->removePredecessor(BB, true); // This is unreachable successor.
  1484. Updates.push_back({DominatorTree::Delete, BB, SuccBB});
  1485. }
  1486. }
  1487. // Finally update the terminator.
  1488. Instruction *Term = BB->getTerminator();
  1489. BranchInst::Create(OnlyDest, Term);
  1490. Term->eraseFromParent();
  1491. DTU->applyUpdatesPermissive(Updates);
  1492. if (HasProfileData)
  1493. BPI->eraseBlock(BB);
  1494. // If the condition is now dead due to the removal of the old terminator,
  1495. // erase it.
  1496. if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
  1497. if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
  1498. CondInst->eraseFromParent();
  1499. // We can safely replace *some* uses of the CondInst if it has
  1500. // exactly one value as returned by LVI. RAUW is incorrect in the
  1501. // presence of guards and assumes, that have the `Cond` as the use. This
  1502. // is because we use the guards/assume to reason about the `Cond` value
  1503. // at the end of block, but RAUW unconditionally replaces all uses
  1504. // including the guards/assumes themselves and the uses before the
  1505. // guard/assume.
  1506. else if (OnlyVal && OnlyVal != MultipleVal &&
  1507. CondInst->getParent() == BB)
  1508. replaceFoldableUses(CondInst, OnlyVal);
  1509. }
  1510. return true;
  1511. }
  1512. }
  1513. // Determine which is the most common successor. If we have many inputs and
  1514. // this block is a switch, we want to start by threading the batch that goes
  1515. // to the most popular destination first. If we only know about one
  1516. // threadable destination (the common case) we can avoid this.
  1517. BasicBlock *MostPopularDest = OnlyDest;
  1518. if (MostPopularDest == MultipleDestSentinel) {
  1519. // Remove any loop headers from the Dest list, threadEdge conservatively
  1520. // won't process them, but we might have other destination that are eligible
  1521. // and we still want to process.
  1522. erase_if(PredToDestList,
  1523. [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
  1524. return LoopHeaders.contains(PredToDest.second);
  1525. });
  1526. if (PredToDestList.empty())
  1527. return false;
  1528. MostPopularDest = findMostPopularDest(BB, PredToDestList);
  1529. }
  1530. // Now that we know what the most popular destination is, factor all
  1531. // predecessors that will jump to it into a single predecessor.
  1532. SmallVector<BasicBlock*, 16> PredsToFactor;
  1533. for (const auto &PredToDest : PredToDestList)
  1534. if (PredToDest.second == MostPopularDest) {
  1535. BasicBlock *Pred = PredToDest.first;
  1536. // This predecessor may be a switch or something else that has multiple
  1537. // edges to the block. Factor each of these edges by listing them
  1538. // according to # occurrences in PredsToFactor.
  1539. for (BasicBlock *Succ : successors(Pred))
  1540. if (Succ == BB)
  1541. PredsToFactor.push_back(Pred);
  1542. }
  1543. // If the threadable edges are branching on an undefined value, we get to pick
  1544. // the destination that these predecessors should get to.
  1545. if (!MostPopularDest)
  1546. MostPopularDest = BB->getTerminator()->
  1547. getSuccessor(getBestDestForJumpOnUndef(BB));
  1548. // Ok, try to thread it!
  1549. return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
  1550. }
  1551. /// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
  1552. /// a PHI node (or freeze PHI) in the current block. See if there are any
  1553. /// simplifications we can do based on inputs to the phi node.
  1554. bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
  1555. BasicBlock *BB = PN->getParent();
  1556. // TODO: We could make use of this to do it once for blocks with common PHI
  1557. // values.
  1558. SmallVector<BasicBlock*, 1> PredBBs;
  1559. PredBBs.resize(1);
  1560. // If any of the predecessor blocks end in an unconditional branch, we can
  1561. // *duplicate* the conditional branch into that block in order to further
  1562. // encourage jump threading and to eliminate cases where we have branch on a
  1563. // phi of an icmp (branch on icmp is much better).
  1564. // This is still beneficial when a frozen phi is used as the branch condition
  1565. // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
  1566. // to br(icmp(freeze ...)).
  1567. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  1568. BasicBlock *PredBB = PN->getIncomingBlock(i);
  1569. if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
  1570. if (PredBr->isUnconditional()) {
  1571. PredBBs[0] = PredBB;
  1572. // Try to duplicate BB into PredBB.
  1573. if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
  1574. return true;
  1575. }
  1576. }
  1577. return false;
  1578. }
  1579. /// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
  1580. /// a xor instruction in the current block. See if there are any
  1581. /// simplifications we can do based on inputs to the xor.
  1582. bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
  1583. BasicBlock *BB = BO->getParent();
  1584. // If either the LHS or RHS of the xor is a constant, don't do this
  1585. // optimization.
  1586. if (isa<ConstantInt>(BO->getOperand(0)) ||
  1587. isa<ConstantInt>(BO->getOperand(1)))
  1588. return false;
  1589. // If the first instruction in BB isn't a phi, we won't be able to infer
  1590. // anything special about any particular predecessor.
  1591. if (!isa<PHINode>(BB->front()))
  1592. return false;
  1593. // If this BB is a landing pad, we won't be able to split the edge into it.
  1594. if (BB->isEHPad())
  1595. return false;
  1596. // If we have a xor as the branch input to this block, and we know that the
  1597. // LHS or RHS of the xor in any predecessor is true/false, then we can clone
  1598. // the condition into the predecessor and fix that value to true, saving some
  1599. // logical ops on that path and encouraging other paths to simplify.
  1600. //
  1601. // This copies something like this:
  1602. //
  1603. // BB:
  1604. // %X = phi i1 [1], [%X']
  1605. // %Y = icmp eq i32 %A, %B
  1606. // %Z = xor i1 %X, %Y
  1607. // br i1 %Z, ...
  1608. //
  1609. // Into:
  1610. // BB':
  1611. // %Y = icmp ne i32 %A, %B
  1612. // br i1 %Y, ...
  1613. PredValueInfoTy XorOpValues;
  1614. bool isLHS = true;
  1615. if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
  1616. WantInteger, BO)) {
  1617. assert(XorOpValues.empty());
  1618. if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
  1619. WantInteger, BO))
  1620. return false;
  1621. isLHS = false;
  1622. }
  1623. assert(!XorOpValues.empty() &&
  1624. "computeValueKnownInPredecessors returned true with no values");
  1625. // Scan the information to see which is most popular: true or false. The
  1626. // predecessors can be of the set true, false, or undef.
  1627. unsigned NumTrue = 0, NumFalse = 0;
  1628. for (const auto &XorOpValue : XorOpValues) {
  1629. if (isa<UndefValue>(XorOpValue.first))
  1630. // Ignore undefs for the count.
  1631. continue;
  1632. if (cast<ConstantInt>(XorOpValue.first)->isZero())
  1633. ++NumFalse;
  1634. else
  1635. ++NumTrue;
  1636. }
  1637. // Determine which value to split on, true, false, or undef if neither.
  1638. ConstantInt *SplitVal = nullptr;
  1639. if (NumTrue > NumFalse)
  1640. SplitVal = ConstantInt::getTrue(BB->getContext());
  1641. else if (NumTrue != 0 || NumFalse != 0)
  1642. SplitVal = ConstantInt::getFalse(BB->getContext());
  1643. // Collect all of the blocks that this can be folded into so that we can
  1644. // factor this once and clone it once.
  1645. SmallVector<BasicBlock*, 8> BlocksToFoldInto;
  1646. for (const auto &XorOpValue : XorOpValues) {
  1647. if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
  1648. continue;
  1649. BlocksToFoldInto.push_back(XorOpValue.second);
  1650. }
  1651. // If we inferred a value for all of the predecessors, then duplication won't
  1652. // help us. However, we can just replace the LHS or RHS with the constant.
  1653. if (BlocksToFoldInto.size() ==
  1654. cast<PHINode>(BB->front()).getNumIncomingValues()) {
  1655. if (!SplitVal) {
  1656. // If all preds provide undef, just nuke the xor, because it is undef too.
  1657. BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
  1658. BO->eraseFromParent();
  1659. } else if (SplitVal->isZero()) {
  1660. // If all preds provide 0, replace the xor with the other input.
  1661. BO->replaceAllUsesWith(BO->getOperand(isLHS));
  1662. BO->eraseFromParent();
  1663. } else {
  1664. // If all preds provide 1, set the computed value to 1.
  1665. BO->setOperand(!isLHS, SplitVal);
  1666. }
  1667. return true;
  1668. }
  1669. // If any of predecessors end with an indirect goto, we can't change its
  1670. // destination. Same for CallBr.
  1671. if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
  1672. return isa<IndirectBrInst>(Pred->getTerminator()) ||
  1673. isa<CallBrInst>(Pred->getTerminator());
  1674. }))
  1675. return false;
  1676. // Try to duplicate BB into PredBB.
  1677. return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
  1678. }
  1679. /// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
  1680. /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
  1681. /// NewPred using the entries from OldPred (suitably mapped).
  1682. static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
  1683. BasicBlock *OldPred,
  1684. BasicBlock *NewPred,
  1685. DenseMap<Instruction*, Value*> &ValueMap) {
  1686. for (PHINode &PN : PHIBB->phis()) {
  1687. // Ok, we have a PHI node. Figure out what the incoming value was for the
  1688. // DestBlock.
  1689. Value *IV = PN.getIncomingValueForBlock(OldPred);
  1690. // Remap the value if necessary.
  1691. if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
  1692. DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
  1693. if (I != ValueMap.end())
  1694. IV = I->second;
  1695. }
  1696. PN.addIncoming(IV, NewPred);
  1697. }
  1698. }
  1699. /// Merge basic block BB into its sole predecessor if possible.
  1700. bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
  1701. BasicBlock *SinglePred = BB->getSinglePredecessor();
  1702. if (!SinglePred)
  1703. return false;
  1704. const Instruction *TI = SinglePred->getTerminator();
  1705. if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
  1706. SinglePred == BB || hasAddressTakenAndUsed(BB))
  1707. return false;
  1708. // If SinglePred was a loop header, BB becomes one.
  1709. if (LoopHeaders.erase(SinglePred))
  1710. LoopHeaders.insert(BB);
  1711. LVI->eraseBlock(SinglePred);
  1712. MergeBasicBlockIntoOnlyPred(BB, DTU);
  1713. // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
  1714. // BB code within one basic block `BB`), we need to invalidate the LVI
  1715. // information associated with BB, because the LVI information need not be
  1716. // true for all of BB after the merge. For example,
  1717. // Before the merge, LVI info and code is as follows:
  1718. // SinglePred: <LVI info1 for %p val>
  1719. // %y = use of %p
  1720. // call @exit() // need not transfer execution to successor.
  1721. // assume(%p) // from this point on %p is true
  1722. // br label %BB
  1723. // BB: <LVI info2 for %p val, i.e. %p is true>
  1724. // %x = use of %p
  1725. // br label exit
  1726. //
  1727. // Note that this LVI info for blocks BB and SinglPred is correct for %p
  1728. // (info2 and info1 respectively). After the merge and the deletion of the
  1729. // LVI info1 for SinglePred. We have the following code:
  1730. // BB: <LVI info2 for %p val>
  1731. // %y = use of %p
  1732. // call @exit()
  1733. // assume(%p)
  1734. // %x = use of %p <-- LVI info2 is correct from here onwards.
  1735. // br label exit
  1736. // LVI info2 for BB is incorrect at the beginning of BB.
  1737. // Invalidate LVI information for BB if the LVI is not provably true for
  1738. // all of BB.
  1739. if (!isGuaranteedToTransferExecutionToSuccessor(BB))
  1740. LVI->eraseBlock(BB);
  1741. return true;
  1742. }
  1743. /// Update the SSA form. NewBB contains instructions that are copied from BB.
  1744. /// ValueMapping maps old values in BB to new ones in NewBB.
  1745. void JumpThreadingPass::updateSSA(
  1746. BasicBlock *BB, BasicBlock *NewBB,
  1747. DenseMap<Instruction *, Value *> &ValueMapping) {
  1748. // If there were values defined in BB that are used outside the block, then we
  1749. // now have to update all uses of the value to use either the original value,
  1750. // the cloned value, or some PHI derived value. This can require arbitrary
  1751. // PHI insertion, of which we are prepared to do, clean these up now.
  1752. SSAUpdater SSAUpdate;
  1753. SmallVector<Use *, 16> UsesToRename;
  1754. for (Instruction &I : *BB) {
  1755. // Scan all uses of this instruction to see if it is used outside of its
  1756. // block, and if so, record them in UsesToRename.
  1757. for (Use &U : I.uses()) {
  1758. Instruction *User = cast<Instruction>(U.getUser());
  1759. if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
  1760. if (UserPN->getIncomingBlock(U) == BB)
  1761. continue;
  1762. } else if (User->getParent() == BB)
  1763. continue;
  1764. UsesToRename.push_back(&U);
  1765. }
  1766. // If there are no uses outside the block, we're done with this instruction.
  1767. if (UsesToRename.empty())
  1768. continue;
  1769. LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
  1770. // We found a use of I outside of BB. Rename all uses of I that are outside
  1771. // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
  1772. // with the two values we know.
  1773. SSAUpdate.Initialize(I.getType(), I.getName());
  1774. SSAUpdate.AddAvailableValue(BB, &I);
  1775. SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
  1776. while (!UsesToRename.empty())
  1777. SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
  1778. LLVM_DEBUG(dbgs() << "\n");
  1779. }
  1780. }
  1781. /// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
  1782. /// arguments that come from PredBB. Return the map from the variables in the
  1783. /// source basic block to the variables in the newly created basic block.
  1784. DenseMap<Instruction *, Value *>
  1785. JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
  1786. BasicBlock::iterator BE, BasicBlock *NewBB,
  1787. BasicBlock *PredBB) {
  1788. // We are going to have to map operands from the source basic block to the new
  1789. // copy of the block 'NewBB'. If there are PHI nodes in the source basic
  1790. // block, evaluate them to account for entry from PredBB.
  1791. DenseMap<Instruction *, Value *> ValueMapping;
  1792. // Clone the phi nodes of the source basic block into NewBB. The resulting
  1793. // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
  1794. // might need to rewrite the operand of the cloned phi.
  1795. for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
  1796. PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
  1797. NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
  1798. ValueMapping[PN] = NewPN;
  1799. }
  1800. // Clone noalias scope declarations in the threaded block. When threading a
  1801. // loop exit, we would otherwise end up with two idential scope declarations
  1802. // visible at the same time.
  1803. SmallVector<MDNode *> NoAliasScopes;
  1804. DenseMap<MDNode *, MDNode *> ClonedScopes;
  1805. LLVMContext &Context = PredBB->getContext();
  1806. identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
  1807. cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
  1808. // Clone the non-phi instructions of the source basic block into NewBB,
  1809. // keeping track of the mapping and using it to remap operands in the cloned
  1810. // instructions.
  1811. for (; BI != BE; ++BI) {
  1812. Instruction *New = BI->clone();
  1813. New->setName(BI->getName());
  1814. NewBB->getInstList().push_back(New);
  1815. ValueMapping[&*BI] = New;
  1816. adaptNoAliasScopes(New, ClonedScopes, Context);
  1817. // Remap operands to patch up intra-block references.
  1818. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
  1819. if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
  1820. DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
  1821. if (I != ValueMapping.end())
  1822. New->setOperand(i, I->second);
  1823. }
  1824. }
  1825. return ValueMapping;
  1826. }
  1827. /// Attempt to thread through two successive basic blocks.
  1828. bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
  1829. Value *Cond) {
  1830. // Consider:
  1831. //
  1832. // PredBB:
  1833. // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
  1834. // %tobool = icmp eq i32 %cond, 0
  1835. // br i1 %tobool, label %BB, label ...
  1836. //
  1837. // BB:
  1838. // %cmp = icmp eq i32* %var, null
  1839. // br i1 %cmp, label ..., label ...
  1840. //
  1841. // We don't know the value of %var at BB even if we know which incoming edge
  1842. // we take to BB. However, once we duplicate PredBB for each of its incoming
  1843. // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
  1844. // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
  1845. // Require that BB end with a Branch for simplicity.
  1846. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  1847. if (!CondBr)
  1848. return false;
  1849. // BB must have exactly one predecessor.
  1850. BasicBlock *PredBB = BB->getSinglePredecessor();
  1851. if (!PredBB)
  1852. return false;
  1853. // Require that PredBB end with a conditional Branch. If PredBB ends with an
  1854. // unconditional branch, we should be merging PredBB and BB instead. For
  1855. // simplicity, we don't deal with a switch.
  1856. BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
  1857. if (!PredBBBranch || PredBBBranch->isUnconditional())
  1858. return false;
  1859. // If PredBB has exactly one incoming edge, we don't gain anything by copying
  1860. // PredBB.
  1861. if (PredBB->getSinglePredecessor())
  1862. return false;
  1863. // Don't thread through PredBB if it contains a successor edge to itself, in
  1864. // which case we would infinite loop. Suppose we are threading an edge from
  1865. // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
  1866. // successor edge to itself. If we allowed jump threading in this case, we
  1867. // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
  1868. // PredBB.thread has a successor edge to PredBB, we would immediately come up
  1869. // with another jump threading opportunity from PredBB.thread through PredBB
  1870. // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
  1871. // would keep peeling one iteration from PredBB.
  1872. if (llvm::is_contained(successors(PredBB), PredBB))
  1873. return false;
  1874. // Don't thread across a loop header.
  1875. if (LoopHeaders.count(PredBB))
  1876. return false;
  1877. // Avoid complication with duplicating EH pads.
  1878. if (PredBB->isEHPad())
  1879. return false;
  1880. // Find a predecessor that we can thread. For simplicity, we only consider a
  1881. // successor edge out of BB to which we thread exactly one incoming edge into
  1882. // PredBB.
  1883. unsigned ZeroCount = 0;
  1884. unsigned OneCount = 0;
  1885. BasicBlock *ZeroPred = nullptr;
  1886. BasicBlock *OnePred = nullptr;
  1887. for (BasicBlock *P : predecessors(PredBB)) {
  1888. if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
  1889. evaluateOnPredecessorEdge(BB, P, Cond))) {
  1890. if (CI->isZero()) {
  1891. ZeroCount++;
  1892. ZeroPred = P;
  1893. } else if (CI->isOne()) {
  1894. OneCount++;
  1895. OnePred = P;
  1896. }
  1897. }
  1898. }
  1899. // Disregard complicated cases where we have to thread multiple edges.
  1900. BasicBlock *PredPredBB;
  1901. if (ZeroCount == 1) {
  1902. PredPredBB = ZeroPred;
  1903. } else if (OneCount == 1) {
  1904. PredPredBB = OnePred;
  1905. } else {
  1906. return false;
  1907. }
  1908. BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
  1909. // If threading to the same block as we come from, we would infinite loop.
  1910. if (SuccBB == BB) {
  1911. LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
  1912. << "' - would thread to self!\n");
  1913. return false;
  1914. }
  1915. // If threading this would thread across a loop header, don't thread the edge.
  1916. // See the comments above findLoopHeaders for justifications and caveats.
  1917. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
  1918. LLVM_DEBUG({
  1919. bool BBIsHeader = LoopHeaders.count(BB);
  1920. bool SuccIsHeader = LoopHeaders.count(SuccBB);
  1921. dbgs() << " Not threading across "
  1922. << (BBIsHeader ? "loop header BB '" : "block BB '")
  1923. << BB->getName() << "' to dest "
  1924. << (SuccIsHeader ? "loop header BB '" : "block BB '")
  1925. << SuccBB->getName()
  1926. << "' - it might create an irreducible loop!\n";
  1927. });
  1928. return false;
  1929. }
  1930. // Compute the cost of duplicating BB and PredBB.
  1931. unsigned BBCost =
  1932. getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
  1933. unsigned PredBBCost = getJumpThreadDuplicationCost(
  1934. PredBB, PredBB->getTerminator(), BBDupThreshold);
  1935. // Give up if costs are too high. We need to check BBCost and PredBBCost
  1936. // individually before checking their sum because getJumpThreadDuplicationCost
  1937. // return (unsigned)~0 for those basic blocks that cannot be duplicated.
  1938. if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
  1939. BBCost + PredBBCost > BBDupThreshold) {
  1940. LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
  1941. << "' - Cost is too high: " << PredBBCost
  1942. << " for PredBB, " << BBCost << "for BB\n");
  1943. return false;
  1944. }
  1945. // Now we are ready to duplicate PredBB.
  1946. threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
  1947. return true;
  1948. }
  1949. void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
  1950. BasicBlock *PredBB,
  1951. BasicBlock *BB,
  1952. BasicBlock *SuccBB) {
  1953. LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
  1954. << BB->getName() << "'\n");
  1955. BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
  1956. BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
  1957. BasicBlock *NewBB =
  1958. BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
  1959. PredBB->getParent(), PredBB);
  1960. NewBB->moveAfter(PredBB);
  1961. // Set the block frequency of NewBB.
  1962. if (HasProfileData) {
  1963. auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
  1964. BPI->getEdgeProbability(PredPredBB, PredBB);
  1965. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  1966. }
  1967. // We are going to have to map operands from the original BB block to the new
  1968. // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
  1969. // to account for entry from PredPredBB.
  1970. DenseMap<Instruction *, Value *> ValueMapping =
  1971. cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
  1972. // Copy the edge probabilities from PredBB to NewBB.
  1973. if (HasProfileData)
  1974. BPI->copyEdgeProbabilities(PredBB, NewBB);
  1975. // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
  1976. // This eliminates predecessors from PredPredBB, which requires us to simplify
  1977. // any PHI nodes in PredBB.
  1978. Instruction *PredPredTerm = PredPredBB->getTerminator();
  1979. for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
  1980. if (PredPredTerm->getSuccessor(i) == PredBB) {
  1981. PredBB->removePredecessor(PredPredBB, true);
  1982. PredPredTerm->setSuccessor(i, NewBB);
  1983. }
  1984. addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
  1985. ValueMapping);
  1986. addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
  1987. ValueMapping);
  1988. DTU->applyUpdatesPermissive(
  1989. {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
  1990. {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
  1991. {DominatorTree::Insert, PredPredBB, NewBB},
  1992. {DominatorTree::Delete, PredPredBB, PredBB}});
  1993. updateSSA(PredBB, NewBB, ValueMapping);
  1994. // Clean up things like PHI nodes with single operands, dead instructions,
  1995. // etc.
  1996. SimplifyInstructionsInBlock(NewBB, TLI);
  1997. SimplifyInstructionsInBlock(PredBB, TLI);
  1998. SmallVector<BasicBlock *, 1> PredsToFactor;
  1999. PredsToFactor.push_back(NewBB);
  2000. threadEdge(BB, PredsToFactor, SuccBB);
  2001. }
  2002. /// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
  2003. bool JumpThreadingPass::tryThreadEdge(
  2004. BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
  2005. BasicBlock *SuccBB) {
  2006. // If threading to the same block as we come from, we would infinite loop.
  2007. if (SuccBB == BB) {
  2008. LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
  2009. << "' - would thread to self!\n");
  2010. return false;
  2011. }
  2012. // If threading this would thread across a loop header, don't thread the edge.
  2013. // See the comments above findLoopHeaders for justifications and caveats.
  2014. if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
  2015. LLVM_DEBUG({
  2016. bool BBIsHeader = LoopHeaders.count(BB);
  2017. bool SuccIsHeader = LoopHeaders.count(SuccBB);
  2018. dbgs() << " Not threading across "
  2019. << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
  2020. << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
  2021. << SuccBB->getName() << "' - it might create an irreducible loop!\n";
  2022. });
  2023. return false;
  2024. }
  2025. unsigned JumpThreadCost =
  2026. getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
  2027. if (JumpThreadCost > BBDupThreshold) {
  2028. LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
  2029. << "' - Cost is too high: " << JumpThreadCost << "\n");
  2030. return false;
  2031. }
  2032. threadEdge(BB, PredBBs, SuccBB);
  2033. return true;
  2034. }
  2035. /// threadEdge - We have decided that it is safe and profitable to factor the
  2036. /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
  2037. /// across BB. Transform the IR to reflect this change.
  2038. void JumpThreadingPass::threadEdge(BasicBlock *BB,
  2039. const SmallVectorImpl<BasicBlock *> &PredBBs,
  2040. BasicBlock *SuccBB) {
  2041. assert(SuccBB != BB && "Don't create an infinite loop");
  2042. assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
  2043. "Don't thread across loop headers");
  2044. // And finally, do it! Start by factoring the predecessors if needed.
  2045. BasicBlock *PredBB;
  2046. if (PredBBs.size() == 1)
  2047. PredBB = PredBBs[0];
  2048. else {
  2049. LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
  2050. << " common predecessors.\n");
  2051. PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
  2052. }
  2053. // And finally, do it!
  2054. LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
  2055. << "' to '" << SuccBB->getName()
  2056. << ", across block:\n " << *BB << "\n");
  2057. LVI->threadEdge(PredBB, BB, SuccBB);
  2058. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
  2059. BB->getName()+".thread",
  2060. BB->getParent(), BB);
  2061. NewBB->moveAfter(PredBB);
  2062. // Set the block frequency of NewBB.
  2063. if (HasProfileData) {
  2064. auto NewBBFreq =
  2065. BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
  2066. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  2067. }
  2068. // Copy all the instructions from BB to NewBB except the terminator.
  2069. DenseMap<Instruction *, Value *> ValueMapping =
  2070. cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
  2071. // We didn't copy the terminator from BB over to NewBB, because there is now
  2072. // an unconditional jump to SuccBB. Insert the unconditional jump.
  2073. BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
  2074. NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
  2075. // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
  2076. // PHI nodes for NewBB now.
  2077. addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
  2078. // Update the terminator of PredBB to jump to NewBB instead of BB. This
  2079. // eliminates predecessors from BB, which requires us to simplify any PHI
  2080. // nodes in BB.
  2081. Instruction *PredTerm = PredBB->getTerminator();
  2082. for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
  2083. if (PredTerm->getSuccessor(i) == BB) {
  2084. BB->removePredecessor(PredBB, true);
  2085. PredTerm->setSuccessor(i, NewBB);
  2086. }
  2087. // Enqueue required DT updates.
  2088. DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
  2089. {DominatorTree::Insert, PredBB, NewBB},
  2090. {DominatorTree::Delete, PredBB, BB}});
  2091. updateSSA(BB, NewBB, ValueMapping);
  2092. // At this point, the IR is fully up to date and consistent. Do a quick scan
  2093. // over the new instructions and zap any that are constants or dead. This
  2094. // frequently happens because of phi translation.
  2095. SimplifyInstructionsInBlock(NewBB, TLI);
  2096. // Update the edge weight from BB to SuccBB, which should be less than before.
  2097. updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
  2098. // Threaded an edge!
  2099. ++NumThreads;
  2100. }
  2101. /// Create a new basic block that will be the predecessor of BB and successor of
  2102. /// all blocks in Preds. When profile data is available, update the frequency of
  2103. /// this new block.
  2104. BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
  2105. ArrayRef<BasicBlock *> Preds,
  2106. const char *Suffix) {
  2107. SmallVector<BasicBlock *, 2> NewBBs;
  2108. // Collect the frequencies of all predecessors of BB, which will be used to
  2109. // update the edge weight of the result of splitting predecessors.
  2110. DenseMap<BasicBlock *, BlockFrequency> FreqMap;
  2111. if (HasProfileData)
  2112. for (auto Pred : Preds)
  2113. FreqMap.insert(std::make_pair(
  2114. Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
  2115. // In the case when BB is a LandingPad block we create 2 new predecessors
  2116. // instead of just one.
  2117. if (BB->isLandingPad()) {
  2118. std::string NewName = std::string(Suffix) + ".split-lp";
  2119. SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
  2120. } else {
  2121. NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
  2122. }
  2123. std::vector<DominatorTree::UpdateType> Updates;
  2124. Updates.reserve((2 * Preds.size()) + NewBBs.size());
  2125. for (auto NewBB : NewBBs) {
  2126. BlockFrequency NewBBFreq(0);
  2127. Updates.push_back({DominatorTree::Insert, NewBB, BB});
  2128. for (auto Pred : predecessors(NewBB)) {
  2129. Updates.push_back({DominatorTree::Delete, Pred, BB});
  2130. Updates.push_back({DominatorTree::Insert, Pred, NewBB});
  2131. if (HasProfileData) // Update frequencies between Pred -> NewBB.
  2132. NewBBFreq += FreqMap.lookup(Pred);
  2133. }
  2134. if (HasProfileData) // Apply the summed frequency to NewBB.
  2135. BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  2136. }
  2137. DTU->applyUpdatesPermissive(Updates);
  2138. return NewBBs[0];
  2139. }
  2140. bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
  2141. const Instruction *TI = BB->getTerminator();
  2142. assert(TI->getNumSuccessors() > 1 && "not a split");
  2143. MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
  2144. if (!WeightsNode)
  2145. return false;
  2146. MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
  2147. if (MDName->getString() != "branch_weights")
  2148. return false;
  2149. // Ensure there are weights for all of the successors. Note that the first
  2150. // operand to the metadata node is a name, not a weight.
  2151. return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
  2152. }
  2153. /// Update the block frequency of BB and branch weight and the metadata on the
  2154. /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
  2155. /// Freq(PredBB->BB) / Freq(BB->SuccBB).
  2156. void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
  2157. BasicBlock *BB,
  2158. BasicBlock *NewBB,
  2159. BasicBlock *SuccBB) {
  2160. if (!HasProfileData)
  2161. return;
  2162. assert(BFI && BPI && "BFI & BPI should have been created here");
  2163. // As the edge from PredBB to BB is deleted, we have to update the block
  2164. // frequency of BB.
  2165. auto BBOrigFreq = BFI->getBlockFreq(BB);
  2166. auto NewBBFreq = BFI->getBlockFreq(NewBB);
  2167. auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
  2168. auto BBNewFreq = BBOrigFreq - NewBBFreq;
  2169. BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
  2170. // Collect updated outgoing edges' frequencies from BB and use them to update
  2171. // edge probabilities.
  2172. SmallVector<uint64_t, 4> BBSuccFreq;
  2173. for (BasicBlock *Succ : successors(BB)) {
  2174. auto SuccFreq = (Succ == SuccBB)
  2175. ? BB2SuccBBFreq - NewBBFreq
  2176. : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
  2177. BBSuccFreq.push_back(SuccFreq.getFrequency());
  2178. }
  2179. uint64_t MaxBBSuccFreq =
  2180. *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
  2181. SmallVector<BranchProbability, 4> BBSuccProbs;
  2182. if (MaxBBSuccFreq == 0)
  2183. BBSuccProbs.assign(BBSuccFreq.size(),
  2184. {1, static_cast<unsigned>(BBSuccFreq.size())});
  2185. else {
  2186. for (uint64_t Freq : BBSuccFreq)
  2187. BBSuccProbs.push_back(
  2188. BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
  2189. // Normalize edge probabilities so that they sum up to one.
  2190. BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
  2191. BBSuccProbs.end());
  2192. }
  2193. // Update edge probabilities in BPI.
  2194. BPI->setEdgeProbability(BB, BBSuccProbs);
  2195. // Update the profile metadata as well.
  2196. //
  2197. // Don't do this if the profile of the transformed blocks was statically
  2198. // estimated. (This could occur despite the function having an entry
  2199. // frequency in completely cold parts of the CFG.)
  2200. //
  2201. // In this case we don't want to suggest to subsequent passes that the
  2202. // calculated weights are fully consistent. Consider this graph:
  2203. //
  2204. // check_1
  2205. // 50% / |
  2206. // eq_1 | 50%
  2207. // \ |
  2208. // check_2
  2209. // 50% / |
  2210. // eq_2 | 50%
  2211. // \ |
  2212. // check_3
  2213. // 50% / |
  2214. // eq_3 | 50%
  2215. // \ |
  2216. //
  2217. // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
  2218. // the overall probabilities are inconsistent; the total probability that the
  2219. // value is either 1, 2 or 3 is 150%.
  2220. //
  2221. // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
  2222. // becomes 0%. This is even worse if the edge whose probability becomes 0% is
  2223. // the loop exit edge. Then based solely on static estimation we would assume
  2224. // the loop was extremely hot.
  2225. //
  2226. // FIXME this locally as well so that BPI and BFI are consistent as well. We
  2227. // shouldn't make edges extremely likely or unlikely based solely on static
  2228. // estimation.
  2229. if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
  2230. SmallVector<uint32_t, 4> Weights;
  2231. for (auto Prob : BBSuccProbs)
  2232. Weights.push_back(Prob.getNumerator());
  2233. auto TI = BB->getTerminator();
  2234. TI->setMetadata(
  2235. LLVMContext::MD_prof,
  2236. MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
  2237. }
  2238. }
  2239. /// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
  2240. /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
  2241. /// If we can duplicate the contents of BB up into PredBB do so now, this
  2242. /// improves the odds that the branch will be on an analyzable instruction like
  2243. /// a compare.
  2244. bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
  2245. BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
  2246. assert(!PredBBs.empty() && "Can't handle an empty set");
  2247. // If BB is a loop header, then duplicating this block outside the loop would
  2248. // cause us to transform this into an irreducible loop, don't do this.
  2249. // See the comments above findLoopHeaders for justifications and caveats.
  2250. if (LoopHeaders.count(BB)) {
  2251. LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
  2252. << "' into predecessor block '" << PredBBs[0]->getName()
  2253. << "' - it might create an irreducible loop!\n");
  2254. return false;
  2255. }
  2256. unsigned DuplicationCost =
  2257. getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
  2258. if (DuplicationCost > BBDupThreshold) {
  2259. LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
  2260. << "' - Cost is too high: " << DuplicationCost << "\n");
  2261. return false;
  2262. }
  2263. // And finally, do it! Start by factoring the predecessors if needed.
  2264. std::vector<DominatorTree::UpdateType> Updates;
  2265. BasicBlock *PredBB;
  2266. if (PredBBs.size() == 1)
  2267. PredBB = PredBBs[0];
  2268. else {
  2269. LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
  2270. << " common predecessors.\n");
  2271. PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
  2272. }
  2273. Updates.push_back({DominatorTree::Delete, PredBB, BB});
  2274. // Okay, we decided to do this! Clone all the instructions in BB onto the end
  2275. // of PredBB.
  2276. LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
  2277. << "' into end of '" << PredBB->getName()
  2278. << "' to eliminate branch on phi. Cost: "
  2279. << DuplicationCost << " block is:" << *BB << "\n");
  2280. // Unless PredBB ends with an unconditional branch, split the edge so that we
  2281. // can just clone the bits from BB into the end of the new PredBB.
  2282. BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
  2283. if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
  2284. BasicBlock *OldPredBB = PredBB;
  2285. PredBB = SplitEdge(OldPredBB, BB);
  2286. Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
  2287. Updates.push_back({DominatorTree::Insert, PredBB, BB});
  2288. Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
  2289. OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
  2290. }
  2291. // We are going to have to map operands from the original BB block into the
  2292. // PredBB block. Evaluate PHI nodes in BB.
  2293. DenseMap<Instruction*, Value*> ValueMapping;
  2294. BasicBlock::iterator BI = BB->begin();
  2295. for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
  2296. ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
  2297. // Clone the non-phi instructions of BB into PredBB, keeping track of the
  2298. // mapping and using it to remap operands in the cloned instructions.
  2299. for (; BI != BB->end(); ++BI) {
  2300. Instruction *New = BI->clone();
  2301. // Remap operands to patch up intra-block references.
  2302. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
  2303. if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
  2304. DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
  2305. if (I != ValueMapping.end())
  2306. New->setOperand(i, I->second);
  2307. }
  2308. // If this instruction can be simplified after the operands are updated,
  2309. // just use the simplified value instead. This frequently happens due to
  2310. // phi translation.
  2311. if (Value *IV = SimplifyInstruction(
  2312. New,
  2313. {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
  2314. ValueMapping[&*BI] = IV;
  2315. if (!New->mayHaveSideEffects()) {
  2316. New->deleteValue();
  2317. New = nullptr;
  2318. }
  2319. } else {
  2320. ValueMapping[&*BI] = New;
  2321. }
  2322. if (New) {
  2323. // Otherwise, insert the new instruction into the block.
  2324. New->setName(BI->getName());
  2325. PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
  2326. // Update Dominance from simplified New instruction operands.
  2327. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
  2328. if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
  2329. Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
  2330. }
  2331. }
  2332. // Check to see if the targets of the branch had PHI nodes. If so, we need to
  2333. // add entries to the PHI nodes for branch from PredBB now.
  2334. BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
  2335. addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
  2336. ValueMapping);
  2337. addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
  2338. ValueMapping);
  2339. updateSSA(BB, PredBB, ValueMapping);
  2340. // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
  2341. // that we nuked.
  2342. BB->removePredecessor(PredBB, true);
  2343. // Remove the unconditional branch at the end of the PredBB block.
  2344. OldPredBranch->eraseFromParent();
  2345. if (HasProfileData)
  2346. BPI->copyEdgeProbabilities(BB, PredBB);
  2347. DTU->applyUpdatesPermissive(Updates);
  2348. ++NumDupes;
  2349. return true;
  2350. }
  2351. // Pred is a predecessor of BB with an unconditional branch to BB. SI is
  2352. // a Select instruction in Pred. BB has other predecessors and SI is used in
  2353. // a PHI node in BB. SI has no other use.
  2354. // A new basic block, NewBB, is created and SI is converted to compare and
  2355. // conditional branch. SI is erased from parent.
  2356. void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
  2357. SelectInst *SI, PHINode *SIUse,
  2358. unsigned Idx) {
  2359. // Expand the select.
  2360. //
  2361. // Pred --
  2362. // | v
  2363. // | NewBB
  2364. // | |
  2365. // |-----
  2366. // v
  2367. // BB
  2368. BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
  2369. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
  2370. BB->getParent(), BB);
  2371. // Move the unconditional branch to NewBB.
  2372. PredTerm->removeFromParent();
  2373. NewBB->getInstList().insert(NewBB->end(), PredTerm);
  2374. // Create a conditional branch and update PHI nodes.
  2375. BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
  2376. SIUse->setIncomingValue(Idx, SI->getFalseValue());
  2377. SIUse->addIncoming(SI->getTrueValue(), NewBB);
  2378. // The select is now dead.
  2379. SI->eraseFromParent();
  2380. DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
  2381. {DominatorTree::Insert, Pred, NewBB}});
  2382. // Update any other PHI nodes in BB.
  2383. for (BasicBlock::iterator BI = BB->begin();
  2384. PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
  2385. if (Phi != SIUse)
  2386. Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
  2387. }
  2388. bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
  2389. PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
  2390. if (!CondPHI || CondPHI->getParent() != BB)
  2391. return false;
  2392. for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
  2393. BasicBlock *Pred = CondPHI->getIncomingBlock(I);
  2394. SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
  2395. // The second and third condition can be potentially relaxed. Currently
  2396. // the conditions help to simplify the code and allow us to reuse existing
  2397. // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
  2398. if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
  2399. continue;
  2400. BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
  2401. if (!PredTerm || !PredTerm->isUnconditional())
  2402. continue;
  2403. unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
  2404. return true;
  2405. }
  2406. return false;
  2407. }
  2408. /// tryToUnfoldSelect - Look for blocks of the form
  2409. /// bb1:
  2410. /// %a = select
  2411. /// br bb2
  2412. ///
  2413. /// bb2:
  2414. /// %p = phi [%a, %bb1] ...
  2415. /// %c = icmp %p
  2416. /// br i1 %c
  2417. ///
  2418. /// And expand the select into a branch structure if one of its arms allows %c
  2419. /// to be folded. This later enables threading from bb1 over bb2.
  2420. bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
  2421. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  2422. PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
  2423. Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
  2424. if (!CondBr || !CondBr->isConditional() || !CondLHS ||
  2425. CondLHS->getParent() != BB)
  2426. return false;
  2427. for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
  2428. BasicBlock *Pred = CondLHS->getIncomingBlock(I);
  2429. SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
  2430. // Look if one of the incoming values is a select in the corresponding
  2431. // predecessor.
  2432. if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
  2433. continue;
  2434. BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
  2435. if (!PredTerm || !PredTerm->isUnconditional())
  2436. continue;
  2437. // Now check if one of the select values would allow us to constant fold the
  2438. // terminator in BB. We don't do the transform if both sides fold, those
  2439. // cases will be threaded in any case.
  2440. LazyValueInfo::Tristate LHSFolds =
  2441. LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
  2442. CondRHS, Pred, BB, CondCmp);
  2443. LazyValueInfo::Tristate RHSFolds =
  2444. LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
  2445. CondRHS, Pred, BB, CondCmp);
  2446. if ((LHSFolds != LazyValueInfo::Unknown ||
  2447. RHSFolds != LazyValueInfo::Unknown) &&
  2448. LHSFolds != RHSFolds) {
  2449. unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
  2450. return true;
  2451. }
  2452. }
  2453. return false;
  2454. }
  2455. /// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
  2456. /// same BB in the form
  2457. /// bb:
  2458. /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
  2459. /// %s = select %p, trueval, falseval
  2460. ///
  2461. /// or
  2462. ///
  2463. /// bb:
  2464. /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
  2465. /// %c = cmp %p, 0
  2466. /// %s = select %c, trueval, falseval
  2467. ///
  2468. /// And expand the select into a branch structure. This later enables
  2469. /// jump-threading over bb in this pass.
  2470. ///
  2471. /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
  2472. /// select if the associated PHI has at least one constant. If the unfolded
  2473. /// select is not jump-threaded, it will be folded again in the later
  2474. /// optimizations.
  2475. bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
  2476. // This transform would reduce the quality of msan diagnostics.
  2477. // Disable this transform under MemorySanitizer.
  2478. if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
  2479. return false;
  2480. // If threading this would thread across a loop header, don't thread the edge.
  2481. // See the comments above findLoopHeaders for justifications and caveats.
  2482. if (LoopHeaders.count(BB))
  2483. return false;
  2484. for (BasicBlock::iterator BI = BB->begin();
  2485. PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
  2486. // Look for a Phi having at least one constant incoming value.
  2487. if (llvm::all_of(PN->incoming_values(),
  2488. [](Value *V) { return !isa<ConstantInt>(V); }))
  2489. continue;
  2490. auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
  2491. // Check if SI is in BB and use V as condition.
  2492. if (SI->getParent() != BB)
  2493. return false;
  2494. Value *Cond = SI->getCondition();
  2495. return (Cond && Cond == V && Cond->getType()->isIntegerTy(1));
  2496. };
  2497. SelectInst *SI = nullptr;
  2498. for (Use &U : PN->uses()) {
  2499. if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
  2500. // Look for a ICmp in BB that compares PN with a constant and is the
  2501. // condition of a Select.
  2502. if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
  2503. isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
  2504. if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
  2505. if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
  2506. SI = SelectI;
  2507. break;
  2508. }
  2509. } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
  2510. // Look for a Select in BB that uses PN as condition.
  2511. if (isUnfoldCandidate(SelectI, U.get())) {
  2512. SI = SelectI;
  2513. break;
  2514. }
  2515. }
  2516. }
  2517. if (!SI)
  2518. continue;
  2519. // Expand the select.
  2520. Value *Cond = SI->getCondition();
  2521. if (InsertFreezeWhenUnfoldingSelect &&
  2522. !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI,
  2523. &DTU->getDomTree()))
  2524. Cond = new FreezeInst(Cond, "cond.fr", SI);
  2525. Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
  2526. BasicBlock *SplitBB = SI->getParent();
  2527. BasicBlock *NewBB = Term->getParent();
  2528. PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
  2529. NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
  2530. NewPN->addIncoming(SI->getFalseValue(), BB);
  2531. SI->replaceAllUsesWith(NewPN);
  2532. SI->eraseFromParent();
  2533. // NewBB and SplitBB are newly created blocks which require insertion.
  2534. std::vector<DominatorTree::UpdateType> Updates;
  2535. Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
  2536. Updates.push_back({DominatorTree::Insert, BB, SplitBB});
  2537. Updates.push_back({DominatorTree::Insert, BB, NewBB});
  2538. Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
  2539. // BB's successors were moved to SplitBB, update DTU accordingly.
  2540. for (auto *Succ : successors(SplitBB)) {
  2541. Updates.push_back({DominatorTree::Delete, BB, Succ});
  2542. Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
  2543. }
  2544. DTU->applyUpdatesPermissive(Updates);
  2545. return true;
  2546. }
  2547. return false;
  2548. }
  2549. /// Try to propagate a guard from the current BB into one of its predecessors
  2550. /// in case if another branch of execution implies that the condition of this
  2551. /// guard is always true. Currently we only process the simplest case that
  2552. /// looks like:
  2553. ///
  2554. /// Start:
  2555. /// %cond = ...
  2556. /// br i1 %cond, label %T1, label %F1
  2557. /// T1:
  2558. /// br label %Merge
  2559. /// F1:
  2560. /// br label %Merge
  2561. /// Merge:
  2562. /// %condGuard = ...
  2563. /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
  2564. ///
  2565. /// And cond either implies condGuard or !condGuard. In this case all the
  2566. /// instructions before the guard can be duplicated in both branches, and the
  2567. /// guard is then threaded to one of them.
  2568. bool JumpThreadingPass::processGuards(BasicBlock *BB) {
  2569. using namespace PatternMatch;
  2570. // We only want to deal with two predecessors.
  2571. BasicBlock *Pred1, *Pred2;
  2572. auto PI = pred_begin(BB), PE = pred_end(BB);
  2573. if (PI == PE)
  2574. return false;
  2575. Pred1 = *PI++;
  2576. if (PI == PE)
  2577. return false;
  2578. Pred2 = *PI++;
  2579. if (PI != PE)
  2580. return false;
  2581. if (Pred1 == Pred2)
  2582. return false;
  2583. // Try to thread one of the guards of the block.
  2584. // TODO: Look up deeper than to immediate predecessor?
  2585. auto *Parent = Pred1->getSinglePredecessor();
  2586. if (!Parent || Parent != Pred2->getSinglePredecessor())
  2587. return false;
  2588. if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
  2589. for (auto &I : *BB)
  2590. if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
  2591. return true;
  2592. return false;
  2593. }
  2594. /// Try to propagate the guard from BB which is the lower block of a diamond
  2595. /// to one of its branches, in case if diamond's condition implies guard's
  2596. /// condition.
  2597. bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
  2598. BranchInst *BI) {
  2599. assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
  2600. assert(BI->isConditional() && "Unconditional branch has 2 successors?");
  2601. Value *GuardCond = Guard->getArgOperand(0);
  2602. Value *BranchCond = BI->getCondition();
  2603. BasicBlock *TrueDest = BI->getSuccessor(0);
  2604. BasicBlock *FalseDest = BI->getSuccessor(1);
  2605. auto &DL = BB->getModule()->getDataLayout();
  2606. bool TrueDestIsSafe = false;
  2607. bool FalseDestIsSafe = false;
  2608. // True dest is safe if BranchCond => GuardCond.
  2609. auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
  2610. if (Impl && *Impl)
  2611. TrueDestIsSafe = true;
  2612. else {
  2613. // False dest is safe if !BranchCond => GuardCond.
  2614. Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
  2615. if (Impl && *Impl)
  2616. FalseDestIsSafe = true;
  2617. }
  2618. if (!TrueDestIsSafe && !FalseDestIsSafe)
  2619. return false;
  2620. BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
  2621. BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
  2622. ValueToValueMapTy UnguardedMapping, GuardedMapping;
  2623. Instruction *AfterGuard = Guard->getNextNode();
  2624. unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
  2625. if (Cost > BBDupThreshold)
  2626. return false;
  2627. // Duplicate all instructions before the guard and the guard itself to the
  2628. // branch where implication is not proved.
  2629. BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
  2630. BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
  2631. assert(GuardedBlock && "Could not create the guarded block?");
  2632. // Duplicate all instructions before the guard in the unguarded branch.
  2633. // Since we have successfully duplicated the guarded block and this block
  2634. // has fewer instructions, we expect it to succeed.
  2635. BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
  2636. BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
  2637. assert(UnguardedBlock && "Could not create the unguarded block?");
  2638. LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
  2639. << GuardedBlock->getName() << "\n");
  2640. // Some instructions before the guard may still have uses. For them, we need
  2641. // to create Phi nodes merging their copies in both guarded and unguarded
  2642. // branches. Those instructions that have no uses can be just removed.
  2643. SmallVector<Instruction *, 4> ToRemove;
  2644. for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
  2645. if (!isa<PHINode>(&*BI))
  2646. ToRemove.push_back(&*BI);
  2647. Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
  2648. assert(InsertionPoint && "Empty block?");
  2649. // Substitute with Phis & remove.
  2650. for (auto *Inst : reverse(ToRemove)) {
  2651. if (!Inst->use_empty()) {
  2652. PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
  2653. NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
  2654. NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
  2655. NewPN->insertBefore(InsertionPoint);
  2656. Inst->replaceAllUsesWith(NewPN);
  2657. }
  2658. Inst->eraseFromParent();
  2659. }
  2660. return true;
  2661. }