SimpleLoopUnswitch.cpp 138 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357
  1. ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
  9. #include "llvm/ADT/DenseMap.h"
  10. #include "llvm/ADT/STLExtras.h"
  11. #include "llvm/ADT/Sequence.h"
  12. #include "llvm/ADT/SetVector.h"
  13. #include "llvm/ADT/SmallPtrSet.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/ADT/Statistic.h"
  16. #include "llvm/ADT/Twine.h"
  17. #include "llvm/Analysis/AssumptionCache.h"
  18. #include "llvm/Analysis/BlockFrequencyInfo.h"
  19. #include "llvm/Analysis/CFG.h"
  20. #include "llvm/Analysis/CodeMetrics.h"
  21. #include "llvm/Analysis/GuardUtils.h"
  22. #include "llvm/Analysis/LoopAnalysisManager.h"
  23. #include "llvm/Analysis/LoopInfo.h"
  24. #include "llvm/Analysis/LoopIterator.h"
  25. #include "llvm/Analysis/LoopPass.h"
  26. #include "llvm/Analysis/MemorySSA.h"
  27. #include "llvm/Analysis/MemorySSAUpdater.h"
  28. #include "llvm/Analysis/MustExecute.h"
  29. #include "llvm/Analysis/ProfileSummaryInfo.h"
  30. #include "llvm/Analysis/ScalarEvolution.h"
  31. #include "llvm/Analysis/TargetTransformInfo.h"
  32. #include "llvm/Analysis/ValueTracking.h"
  33. #include "llvm/IR/BasicBlock.h"
  34. #include "llvm/IR/Constant.h"
  35. #include "llvm/IR/Constants.h"
  36. #include "llvm/IR/Dominators.h"
  37. #include "llvm/IR/Function.h"
  38. #include "llvm/IR/IRBuilder.h"
  39. #include "llvm/IR/InstrTypes.h"
  40. #include "llvm/IR/Instruction.h"
  41. #include "llvm/IR/Instructions.h"
  42. #include "llvm/IR/IntrinsicInst.h"
  43. #include "llvm/IR/PatternMatch.h"
  44. #include "llvm/IR/Use.h"
  45. #include "llvm/IR/Value.h"
  46. #include "llvm/InitializePasses.h"
  47. #include "llvm/Pass.h"
  48. #include "llvm/Support/Casting.h"
  49. #include "llvm/Support/CommandLine.h"
  50. #include "llvm/Support/Debug.h"
  51. #include "llvm/Support/ErrorHandling.h"
  52. #include "llvm/Support/GenericDomTree.h"
  53. #include "llvm/Support/InstructionCost.h"
  54. #include "llvm/Support/raw_ostream.h"
  55. #include "llvm/Transforms/Scalar/LoopPassManager.h"
  56. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  57. #include "llvm/Transforms/Utils/Cloning.h"
  58. #include "llvm/Transforms/Utils/Local.h"
  59. #include "llvm/Transforms/Utils/LoopUtils.h"
  60. #include "llvm/Transforms/Utils/ValueMapper.h"
  61. #include <algorithm>
  62. #include <cassert>
  63. #include <iterator>
  64. #include <numeric>
  65. #include <optional>
  66. #include <utility>
  67. #define DEBUG_TYPE "simple-loop-unswitch"
  68. using namespace llvm;
  69. using namespace llvm::PatternMatch;
  70. STATISTIC(NumBranches, "Number of branches unswitched");
  71. STATISTIC(NumSwitches, "Number of switches unswitched");
  72. STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
  73. STATISTIC(NumTrivial, "Number of unswitches that are trivial");
  74. STATISTIC(
  75. NumCostMultiplierSkipped,
  76. "Number of unswitch candidates that had their cost multiplier skipped");
  77. static cl::opt<bool> EnableNonTrivialUnswitch(
  78. "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
  79. cl::desc("Forcibly enables non-trivial loop unswitching rather than "
  80. "following the configuration passed into the pass."));
  81. static cl::opt<int>
  82. UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
  83. cl::desc("The cost threshold for unswitching a loop."));
  84. static cl::opt<bool> EnableUnswitchCostMultiplier(
  85. "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
  86. cl::desc("Enable unswitch cost multiplier that prohibits exponential "
  87. "explosion in nontrivial unswitch."));
  88. static cl::opt<int> UnswitchSiblingsToplevelDiv(
  89. "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
  90. cl::desc("Toplevel siblings divisor for cost multiplier."));
  91. static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
  92. "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
  93. cl::desc("Number of unswitch candidates that are ignored when calculating "
  94. "cost multiplier."));
  95. static cl::opt<bool> UnswitchGuards(
  96. "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
  97. cl::desc("If enabled, simple loop unswitching will also consider "
  98. "llvm.experimental.guard intrinsics as unswitch candidates."));
  99. static cl::opt<bool> DropNonTrivialImplicitNullChecks(
  100. "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
  101. cl::init(false), cl::Hidden,
  102. cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
  103. "null checks to save time analyzing if we can keep it."));
  104. static cl::opt<unsigned>
  105. MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
  106. cl::desc("Max number of memory uses to explore during "
  107. "partial unswitching analysis"),
  108. cl::init(100), cl::Hidden);
  109. static cl::opt<bool> FreezeLoopUnswitchCond(
  110. "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
  111. cl::desc("If enabled, the freeze instruction will be added to condition "
  112. "of loop unswitch to prevent miscompilation."));
  113. namespace {
  114. struct NonTrivialUnswitchCandidate {
  115. Instruction *TI = nullptr;
  116. TinyPtrVector<Value *> Invariants;
  117. std::optional<InstructionCost> Cost;
  118. NonTrivialUnswitchCandidate(
  119. Instruction *TI, ArrayRef<Value *> Invariants,
  120. std::optional<InstructionCost> Cost = std::nullopt)
  121. : TI(TI), Invariants(Invariants), Cost(Cost){};
  122. };
  123. } // end anonymous namespace.
  124. // Helper to skip (select x, true, false), which matches both a logical AND and
  125. // OR and can confuse code that tries to determine if \p Cond is either a
  126. // logical AND or OR but not both.
  127. static Value *skipTrivialSelect(Value *Cond) {
  128. Value *CondNext;
  129. while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
  130. Cond = CondNext;
  131. return Cond;
  132. }
  133. /// Collect all of the loop invariant input values transitively used by the
  134. /// homogeneous instruction graph from a given root.
  135. ///
  136. /// This essentially walks from a root recursively through loop variant operands
  137. /// which have perform the same logical operation (AND or OR) and finds all
  138. /// inputs which are loop invariant. For some operations these can be
  139. /// re-associated and unswitched out of the loop entirely.
  140. static TinyPtrVector<Value *>
  141. collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root,
  142. const LoopInfo &LI) {
  143. assert(!L.isLoopInvariant(&Root) &&
  144. "Only need to walk the graph if root itself is not invariant.");
  145. TinyPtrVector<Value *> Invariants;
  146. bool IsRootAnd = match(&Root, m_LogicalAnd());
  147. bool IsRootOr = match(&Root, m_LogicalOr());
  148. // Build a worklist and recurse through operators collecting invariants.
  149. SmallVector<Instruction *, 4> Worklist;
  150. SmallPtrSet<Instruction *, 8> Visited;
  151. Worklist.push_back(&Root);
  152. Visited.insert(&Root);
  153. do {
  154. Instruction &I = *Worklist.pop_back_val();
  155. for (Value *OpV : I.operand_values()) {
  156. // Skip constants as unswitching isn't interesting for them.
  157. if (isa<Constant>(OpV))
  158. continue;
  159. // Add it to our result if loop invariant.
  160. if (L.isLoopInvariant(OpV)) {
  161. Invariants.push_back(OpV);
  162. continue;
  163. }
  164. // If not an instruction with the same opcode, nothing we can do.
  165. Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
  166. if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
  167. (IsRootOr && match(OpI, m_LogicalOr())))) {
  168. // Visit this operand.
  169. if (Visited.insert(OpI).second)
  170. Worklist.push_back(OpI);
  171. }
  172. }
  173. } while (!Worklist.empty());
  174. return Invariants;
  175. }
  176. static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
  177. Constant &Replacement) {
  178. assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
  179. // Replace uses of LIC in the loop with the given constant.
  180. // We use make_early_inc_range as set invalidates the iterator.
  181. for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
  182. Instruction *UserI = dyn_cast<Instruction>(U.getUser());
  183. // Replace this use within the loop body.
  184. if (UserI && L.contains(UserI))
  185. U.set(&Replacement);
  186. }
  187. }
  188. /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
  189. /// incoming values along this edge.
  190. static bool areLoopExitPHIsLoopInvariant(const Loop &L,
  191. const BasicBlock &ExitingBB,
  192. const BasicBlock &ExitBB) {
  193. for (const Instruction &I : ExitBB) {
  194. auto *PN = dyn_cast<PHINode>(&I);
  195. if (!PN)
  196. // No more PHIs to check.
  197. return true;
  198. // If the incoming value for this edge isn't loop invariant the unswitch
  199. // won't be trivial.
  200. if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
  201. return false;
  202. }
  203. llvm_unreachable("Basic blocks should never be empty!");
  204. }
  205. /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
  206. /// end of \p BB and conditionally branch on the copied condition. We only
  207. /// branch on a single value.
  208. static void buildPartialUnswitchConditionalBranch(
  209. BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
  210. BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
  211. const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
  212. IRBuilder<> IRB(&BB);
  213. SmallVector<Value *> FrozenInvariants;
  214. for (Value *Inv : Invariants) {
  215. if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
  216. Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
  217. FrozenInvariants.push_back(Inv);
  218. }
  219. Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
  220. : IRB.CreateAnd(FrozenInvariants);
  221. IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
  222. Direction ? &NormalSucc : &UnswitchedSucc);
  223. }
  224. /// Copy a set of loop invariant values, and conditionally branch on them.
  225. static void buildPartialInvariantUnswitchConditionalBranch(
  226. BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
  227. BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
  228. MemorySSAUpdater *MSSAU) {
  229. ValueToValueMapTy VMap;
  230. for (auto *Val : reverse(ToDuplicate)) {
  231. Instruction *Inst = cast<Instruction>(Val);
  232. Instruction *NewInst = Inst->clone();
  233. NewInst->insertInto(&BB, BB.end());
  234. RemapInstruction(NewInst, VMap,
  235. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  236. VMap[Val] = NewInst;
  237. if (!MSSAU)
  238. continue;
  239. MemorySSA *MSSA = MSSAU->getMemorySSA();
  240. if (auto *MemUse =
  241. dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
  242. auto *DefiningAccess = MemUse->getDefiningAccess();
  243. // Get the first defining access before the loop.
  244. while (L.contains(DefiningAccess->getBlock())) {
  245. // If the defining access is a MemoryPhi, get the incoming
  246. // value for the pre-header as defining access.
  247. if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
  248. DefiningAccess =
  249. MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
  250. else
  251. DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
  252. }
  253. MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
  254. NewInst->getParent(),
  255. MemorySSA::BeforeTerminator);
  256. }
  257. }
  258. IRBuilder<> IRB(&BB);
  259. Value *Cond = VMap[ToDuplicate[0]];
  260. IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
  261. Direction ? &NormalSucc : &UnswitchedSucc);
  262. }
  263. /// Rewrite the PHI nodes in an unswitched loop exit basic block.
  264. ///
  265. /// Requires that the loop exit and unswitched basic block are the same, and
  266. /// that the exiting block was a unique predecessor of that block. Rewrites the
  267. /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
  268. /// PHI nodes from the old preheader that now contains the unswitched
  269. /// terminator.
  270. static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
  271. BasicBlock &OldExitingBB,
  272. BasicBlock &OldPH) {
  273. for (PHINode &PN : UnswitchedBB.phis()) {
  274. // When the loop exit is directly unswitched we just need to update the
  275. // incoming basic block. We loop to handle weird cases with repeated
  276. // incoming blocks, but expect to typically only have one operand here.
  277. for (auto i : seq<int>(0, PN.getNumOperands())) {
  278. assert(PN.getIncomingBlock(i) == &OldExitingBB &&
  279. "Found incoming block different from unique predecessor!");
  280. PN.setIncomingBlock(i, &OldPH);
  281. }
  282. }
  283. }
  284. /// Rewrite the PHI nodes in the loop exit basic block and the split off
  285. /// unswitched block.
  286. ///
  287. /// Because the exit block remains an exit from the loop, this rewrites the
  288. /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
  289. /// nodes into the unswitched basic block to select between the value in the
  290. /// old preheader and the loop exit.
  291. static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
  292. BasicBlock &UnswitchedBB,
  293. BasicBlock &OldExitingBB,
  294. BasicBlock &OldPH,
  295. bool FullUnswitch) {
  296. assert(&ExitBB != &UnswitchedBB &&
  297. "Must have different loop exit and unswitched blocks!");
  298. Instruction *InsertPt = &*UnswitchedBB.begin();
  299. for (PHINode &PN : ExitBB.phis()) {
  300. auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
  301. PN.getName() + ".split", InsertPt);
  302. // Walk backwards over the old PHI node's inputs to minimize the cost of
  303. // removing each one. We have to do this weird loop manually so that we
  304. // create the same number of new incoming edges in the new PHI as we expect
  305. // each case-based edge to be included in the unswitched switch in some
  306. // cases.
  307. // FIXME: This is really, really gross. It would be much cleaner if LLVM
  308. // allowed us to create a single entry for a predecessor block without
  309. // having separate entries for each "edge" even though these edges are
  310. // required to produce identical results.
  311. for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
  312. if (PN.getIncomingBlock(i) != &OldExitingBB)
  313. continue;
  314. Value *Incoming = PN.getIncomingValue(i);
  315. if (FullUnswitch)
  316. // No more edge from the old exiting block to the exit block.
  317. PN.removeIncomingValue(i);
  318. NewPN->addIncoming(Incoming, &OldPH);
  319. }
  320. // Now replace the old PHI with the new one and wire the old one in as an
  321. // input to the new one.
  322. PN.replaceAllUsesWith(NewPN);
  323. NewPN->addIncoming(&PN, &ExitBB);
  324. }
  325. }
  326. /// Hoist the current loop up to the innermost loop containing a remaining exit.
  327. ///
  328. /// Because we've removed an exit from the loop, we may have changed the set of
  329. /// loops reachable and need to move the current loop up the loop nest or even
  330. /// to an entirely separate nest.
  331. static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
  332. DominatorTree &DT, LoopInfo &LI,
  333. MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
  334. // If the loop is already at the top level, we can't hoist it anywhere.
  335. Loop *OldParentL = L.getParentLoop();
  336. if (!OldParentL)
  337. return;
  338. SmallVector<BasicBlock *, 4> Exits;
  339. L.getExitBlocks(Exits);
  340. Loop *NewParentL = nullptr;
  341. for (auto *ExitBB : Exits)
  342. if (Loop *ExitL = LI.getLoopFor(ExitBB))
  343. if (!NewParentL || NewParentL->contains(ExitL))
  344. NewParentL = ExitL;
  345. if (NewParentL == OldParentL)
  346. return;
  347. // The new parent loop (if different) should always contain the old one.
  348. if (NewParentL)
  349. assert(NewParentL->contains(OldParentL) &&
  350. "Can only hoist this loop up the nest!");
  351. // The preheader will need to move with the body of this loop. However,
  352. // because it isn't in this loop we also need to update the primary loop map.
  353. assert(OldParentL == LI.getLoopFor(&Preheader) &&
  354. "Parent loop of this loop should contain this loop's preheader!");
  355. LI.changeLoopFor(&Preheader, NewParentL);
  356. // Remove this loop from its old parent.
  357. OldParentL->removeChildLoop(&L);
  358. // Add the loop either to the new parent or as a top-level loop.
  359. if (NewParentL)
  360. NewParentL->addChildLoop(&L);
  361. else
  362. LI.addTopLevelLoop(&L);
  363. // Remove this loops blocks from the old parent and every other loop up the
  364. // nest until reaching the new parent. Also update all of these
  365. // no-longer-containing loops to reflect the nesting change.
  366. for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
  367. OldContainingL = OldContainingL->getParentLoop()) {
  368. llvm::erase_if(OldContainingL->getBlocksVector(),
  369. [&](const BasicBlock *BB) {
  370. return BB == &Preheader || L.contains(BB);
  371. });
  372. OldContainingL->getBlocksSet().erase(&Preheader);
  373. for (BasicBlock *BB : L.blocks())
  374. OldContainingL->getBlocksSet().erase(BB);
  375. // Because we just hoisted a loop out of this one, we have essentially
  376. // created new exit paths from it. That means we need to form LCSSA PHI
  377. // nodes for values used in the no-longer-nested loop.
  378. formLCSSA(*OldContainingL, DT, &LI, SE);
  379. // We shouldn't need to form dedicated exits because the exit introduced
  380. // here is the (just split by unswitching) preheader. However, after trivial
  381. // unswitching it is possible to get new non-dedicated exits out of parent
  382. // loop so let's conservatively form dedicated exit blocks and figure out
  383. // if we can optimize later.
  384. formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
  385. /*PreserveLCSSA*/ true);
  386. }
  387. }
  388. // Return the top-most loop containing ExitBB and having ExitBB as exiting block
  389. // or the loop containing ExitBB, if there is no parent loop containing ExitBB
  390. // as exiting block.
  391. static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
  392. const LoopInfo &LI) {
  393. const Loop *TopMost = LI.getLoopFor(ExitBB);
  394. const Loop *Current = TopMost;
  395. while (Current) {
  396. if (Current->isLoopExiting(ExitBB))
  397. TopMost = Current;
  398. Current = Current->getParentLoop();
  399. }
  400. return TopMost;
  401. }
  402. /// Unswitch a trivial branch if the condition is loop invariant.
  403. ///
  404. /// This routine should only be called when loop code leading to the branch has
  405. /// been validated as trivial (no side effects). This routine checks if the
  406. /// condition is invariant and one of the successors is a loop exit. This
  407. /// allows us to unswitch without duplicating the loop, making it trivial.
  408. ///
  409. /// If this routine fails to unswitch the branch it returns false.
  410. ///
  411. /// If the branch can be unswitched, this routine splits the preheader and
  412. /// hoists the branch above that split. Preserves loop simplified form
  413. /// (splitting the exit block as necessary). It simplifies the branch within
  414. /// the loop to an unconditional branch but doesn't remove it entirely. Further
  415. /// cleanup can be done with some simplifycfg like pass.
  416. ///
  417. /// If `SE` is not null, it will be updated based on the potential loop SCEVs
  418. /// invalidated by this.
  419. static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
  420. LoopInfo &LI, ScalarEvolution *SE,
  421. MemorySSAUpdater *MSSAU) {
  422. assert(BI.isConditional() && "Can only unswitch a conditional branch!");
  423. LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
  424. // The loop invariant values that we want to unswitch.
  425. TinyPtrVector<Value *> Invariants;
  426. // When true, we're fully unswitching the branch rather than just unswitching
  427. // some input conditions to the branch.
  428. bool FullUnswitch = false;
  429. Value *Cond = skipTrivialSelect(BI.getCondition());
  430. if (L.isLoopInvariant(Cond)) {
  431. Invariants.push_back(Cond);
  432. FullUnswitch = true;
  433. } else {
  434. if (auto *CondInst = dyn_cast<Instruction>(Cond))
  435. Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
  436. if (Invariants.empty()) {
  437. LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
  438. return false;
  439. }
  440. }
  441. // Check that one of the branch's successors exits, and which one.
  442. bool ExitDirection = true;
  443. int LoopExitSuccIdx = 0;
  444. auto *LoopExitBB = BI.getSuccessor(0);
  445. if (L.contains(LoopExitBB)) {
  446. ExitDirection = false;
  447. LoopExitSuccIdx = 1;
  448. LoopExitBB = BI.getSuccessor(1);
  449. if (L.contains(LoopExitBB)) {
  450. LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
  451. return false;
  452. }
  453. }
  454. auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
  455. auto *ParentBB = BI.getParent();
  456. if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
  457. LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
  458. return false;
  459. }
  460. // When unswitching only part of the branch's condition, we need the exit
  461. // block to be reached directly from the partially unswitched input. This can
  462. // be done when the exit block is along the true edge and the branch condition
  463. // is a graph of `or` operations, or the exit block is along the false edge
  464. // and the condition is a graph of `and` operations.
  465. if (!FullUnswitch) {
  466. if (ExitDirection ? !match(Cond, m_LogicalOr())
  467. : !match(Cond, m_LogicalAnd())) {
  468. LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
  469. "non-full unswitch!\n");
  470. return false;
  471. }
  472. }
  473. LLVM_DEBUG({
  474. dbgs() << " unswitching trivial invariant conditions for: " << BI
  475. << "\n";
  476. for (Value *Invariant : Invariants) {
  477. dbgs() << " " << *Invariant << " == true";
  478. if (Invariant != Invariants.back())
  479. dbgs() << " ||";
  480. dbgs() << "\n";
  481. }
  482. });
  483. // If we have scalar evolutions, we need to invalidate them including this
  484. // loop, the loop containing the exit block and the topmost parent loop
  485. // exiting via LoopExitBB.
  486. if (SE) {
  487. if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
  488. SE->forgetLoop(ExitL);
  489. else
  490. // Forget the entire nest as this exits the entire nest.
  491. SE->forgetTopmostLoop(&L);
  492. SE->forgetBlockAndLoopDispositions();
  493. }
  494. if (MSSAU && VerifyMemorySSA)
  495. MSSAU->getMemorySSA()->verifyMemorySSA();
  496. // Split the preheader, so that we know that there is a safe place to insert
  497. // the conditional branch. We will change the preheader to have a conditional
  498. // branch on LoopCond.
  499. BasicBlock *OldPH = L.getLoopPreheader();
  500. BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
  501. // Now that we have a place to insert the conditional branch, create a place
  502. // to branch to: this is the exit block out of the loop that we are
  503. // unswitching. We need to split this if there are other loop predecessors.
  504. // Because the loop is in simplified form, *any* other predecessor is enough.
  505. BasicBlock *UnswitchedBB;
  506. if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
  507. assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
  508. "A branch's parent isn't a predecessor!");
  509. UnswitchedBB = LoopExitBB;
  510. } else {
  511. UnswitchedBB =
  512. SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
  513. }
  514. if (MSSAU && VerifyMemorySSA)
  515. MSSAU->getMemorySSA()->verifyMemorySSA();
  516. // Actually move the invariant uses into the unswitched position. If possible,
  517. // we do this by moving the instructions, but when doing partial unswitching
  518. // we do it by building a new merge of the values in the unswitched position.
  519. OldPH->getTerminator()->eraseFromParent();
  520. if (FullUnswitch) {
  521. // If fully unswitching, we can use the existing branch instruction.
  522. // Splice it into the old PH to gate reaching the new preheader and re-point
  523. // its successors.
  524. OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator());
  525. BI.setCondition(Cond);
  526. if (MSSAU) {
  527. // Temporarily clone the terminator, to make MSSA update cheaper by
  528. // separating "insert edge" updates from "remove edge" ones.
  529. BI.clone()->insertInto(ParentBB, ParentBB->end());
  530. } else {
  531. // Create a new unconditional branch that will continue the loop as a new
  532. // terminator.
  533. BranchInst::Create(ContinueBB, ParentBB);
  534. }
  535. BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
  536. BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
  537. } else {
  538. // Only unswitching a subset of inputs to the condition, so we will need to
  539. // build a new branch that merges the invariant inputs.
  540. if (ExitDirection)
  541. assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&
  542. "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
  543. "condition!");
  544. else
  545. assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&
  546. "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
  547. " condition!");
  548. buildPartialUnswitchConditionalBranch(
  549. *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
  550. FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
  551. }
  552. // Update the dominator tree with the added edge.
  553. DT.insertEdge(OldPH, UnswitchedBB);
  554. // After the dominator tree was updated with the added edge, update MemorySSA
  555. // if available.
  556. if (MSSAU) {
  557. SmallVector<CFGUpdate, 1> Updates;
  558. Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
  559. MSSAU->applyInsertUpdates(Updates, DT);
  560. }
  561. // Finish updating dominator tree and memory ssa for full unswitch.
  562. if (FullUnswitch) {
  563. if (MSSAU) {
  564. // Remove the cloned branch instruction.
  565. ParentBB->getTerminator()->eraseFromParent();
  566. // Create unconditional branch now.
  567. BranchInst::Create(ContinueBB, ParentBB);
  568. MSSAU->removeEdge(ParentBB, LoopExitBB);
  569. }
  570. DT.deleteEdge(ParentBB, LoopExitBB);
  571. }
  572. if (MSSAU && VerifyMemorySSA)
  573. MSSAU->getMemorySSA()->verifyMemorySSA();
  574. // Rewrite the relevant PHI nodes.
  575. if (UnswitchedBB == LoopExitBB)
  576. rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
  577. else
  578. rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
  579. *ParentBB, *OldPH, FullUnswitch);
  580. // The constant we can replace all of our invariants with inside the loop
  581. // body. If any of the invariants have a value other than this the loop won't
  582. // be entered.
  583. ConstantInt *Replacement = ExitDirection
  584. ? ConstantInt::getFalse(BI.getContext())
  585. : ConstantInt::getTrue(BI.getContext());
  586. // Since this is an i1 condition we can also trivially replace uses of it
  587. // within the loop with a constant.
  588. for (Value *Invariant : Invariants)
  589. replaceLoopInvariantUses(L, Invariant, *Replacement);
  590. // If this was full unswitching, we may have changed the nesting relationship
  591. // for this loop so hoist it to its correct parent if needed.
  592. if (FullUnswitch)
  593. hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
  594. if (MSSAU && VerifyMemorySSA)
  595. MSSAU->getMemorySSA()->verifyMemorySSA();
  596. LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
  597. ++NumTrivial;
  598. ++NumBranches;
  599. return true;
  600. }
  601. /// Unswitch a trivial switch if the condition is loop invariant.
  602. ///
  603. /// This routine should only be called when loop code leading to the switch has
  604. /// been validated as trivial (no side effects). This routine checks if the
  605. /// condition is invariant and that at least one of the successors is a loop
  606. /// exit. This allows us to unswitch without duplicating the loop, making it
  607. /// trivial.
  608. ///
  609. /// If this routine fails to unswitch the switch it returns false.
  610. ///
  611. /// If the switch can be unswitched, this routine splits the preheader and
  612. /// copies the switch above that split. If the default case is one of the
  613. /// exiting cases, it copies the non-exiting cases and points them at the new
  614. /// preheader. If the default case is not exiting, it copies the exiting cases
  615. /// and points the default at the preheader. It preserves loop simplified form
  616. /// (splitting the exit blocks as necessary). It simplifies the switch within
  617. /// the loop by removing now-dead cases. If the default case is one of those
  618. /// unswitched, it replaces its destination with a new basic block containing
  619. /// only unreachable. Such basic blocks, while technically loop exits, are not
  620. /// considered for unswitching so this is a stable transform and the same
  621. /// switch will not be revisited. If after unswitching there is only a single
  622. /// in-loop successor, the switch is further simplified to an unconditional
  623. /// branch. Still more cleanup can be done with some simplifycfg like pass.
  624. ///
  625. /// If `SE` is not null, it will be updated based on the potential loop SCEVs
  626. /// invalidated by this.
  627. static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
  628. LoopInfo &LI, ScalarEvolution *SE,
  629. MemorySSAUpdater *MSSAU) {
  630. LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
  631. Value *LoopCond = SI.getCondition();
  632. // If this isn't switching on an invariant condition, we can't unswitch it.
  633. if (!L.isLoopInvariant(LoopCond))
  634. return false;
  635. auto *ParentBB = SI.getParent();
  636. // The same check must be used both for the default and the exit cases. We
  637. // should never leave edges from the switch instruction to a basic block that
  638. // we are unswitching, hence the condition used to determine the default case
  639. // needs to also be used to populate ExitCaseIndices, which is then used to
  640. // remove cases from the switch.
  641. auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
  642. // BBToCheck is not an exit block if it is inside loop L.
  643. if (L.contains(&BBToCheck))
  644. return false;
  645. // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
  646. if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
  647. return false;
  648. // We do not unswitch a block that only has an unreachable statement, as
  649. // it's possible this is a previously unswitched block. Only unswitch if
  650. // either the terminator is not unreachable, or, if it is, it's not the only
  651. // instruction in the block.
  652. auto *TI = BBToCheck.getTerminator();
  653. bool isUnreachable = isa<UnreachableInst>(TI);
  654. return !isUnreachable ||
  655. (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
  656. };
  657. SmallVector<int, 4> ExitCaseIndices;
  658. for (auto Case : SI.cases())
  659. if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
  660. ExitCaseIndices.push_back(Case.getCaseIndex());
  661. BasicBlock *DefaultExitBB = nullptr;
  662. SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
  663. SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
  664. if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
  665. DefaultExitBB = SI.getDefaultDest();
  666. } else if (ExitCaseIndices.empty())
  667. return false;
  668. LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
  669. if (MSSAU && VerifyMemorySSA)
  670. MSSAU->getMemorySSA()->verifyMemorySSA();
  671. // We may need to invalidate SCEVs for the outermost loop reached by any of
  672. // the exits.
  673. Loop *OuterL = &L;
  674. if (DefaultExitBB) {
  675. // Clear out the default destination temporarily to allow accurate
  676. // predecessor lists to be examined below.
  677. SI.setDefaultDest(nullptr);
  678. // Check the loop containing this exit.
  679. Loop *ExitL = LI.getLoopFor(DefaultExitBB);
  680. if (!ExitL || ExitL->contains(OuterL))
  681. OuterL = ExitL;
  682. }
  683. // Store the exit cases into a separate data structure and remove them from
  684. // the switch.
  685. SmallVector<std::tuple<ConstantInt *, BasicBlock *,
  686. SwitchInstProfUpdateWrapper::CaseWeightOpt>,
  687. 4> ExitCases;
  688. ExitCases.reserve(ExitCaseIndices.size());
  689. SwitchInstProfUpdateWrapper SIW(SI);
  690. // We walk the case indices backwards so that we remove the last case first
  691. // and don't disrupt the earlier indices.
  692. for (unsigned Index : reverse(ExitCaseIndices)) {
  693. auto CaseI = SI.case_begin() + Index;
  694. // Compute the outer loop from this exit.
  695. Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
  696. if (!ExitL || ExitL->contains(OuterL))
  697. OuterL = ExitL;
  698. // Save the value of this case.
  699. auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
  700. ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
  701. // Delete the unswitched cases.
  702. SIW.removeCase(CaseI);
  703. }
  704. if (SE) {
  705. if (OuterL)
  706. SE->forgetLoop(OuterL);
  707. else
  708. SE->forgetTopmostLoop(&L);
  709. }
  710. // Check if after this all of the remaining cases point at the same
  711. // successor.
  712. BasicBlock *CommonSuccBB = nullptr;
  713. if (SI.getNumCases() > 0 &&
  714. all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
  715. return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
  716. }))
  717. CommonSuccBB = SI.case_begin()->getCaseSuccessor();
  718. if (!DefaultExitBB) {
  719. // If we're not unswitching the default, we need it to match any cases to
  720. // have a common successor or if we have no cases it is the common
  721. // successor.
  722. if (SI.getNumCases() == 0)
  723. CommonSuccBB = SI.getDefaultDest();
  724. else if (SI.getDefaultDest() != CommonSuccBB)
  725. CommonSuccBB = nullptr;
  726. }
  727. // Split the preheader, so that we know that there is a safe place to insert
  728. // the switch.
  729. BasicBlock *OldPH = L.getLoopPreheader();
  730. BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
  731. OldPH->getTerminator()->eraseFromParent();
  732. // Now add the unswitched switch.
  733. auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
  734. SwitchInstProfUpdateWrapper NewSIW(*NewSI);
  735. // Rewrite the IR for the unswitched basic blocks. This requires two steps.
  736. // First, we split any exit blocks with remaining in-loop predecessors. Then
  737. // we update the PHIs in one of two ways depending on if there was a split.
  738. // We walk in reverse so that we split in the same order as the cases
  739. // appeared. This is purely for convenience of reading the resulting IR, but
  740. // it doesn't cost anything really.
  741. SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
  742. SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
  743. // Handle the default exit if necessary.
  744. // FIXME: It'd be great if we could merge this with the loop below but LLVM's
  745. // ranges aren't quite powerful enough yet.
  746. if (DefaultExitBB) {
  747. if (pred_empty(DefaultExitBB)) {
  748. UnswitchedExitBBs.insert(DefaultExitBB);
  749. rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
  750. } else {
  751. auto *SplitBB =
  752. SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
  753. rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
  754. *ParentBB, *OldPH,
  755. /*FullUnswitch*/ true);
  756. DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
  757. }
  758. }
  759. // Note that we must use a reference in the for loop so that we update the
  760. // container.
  761. for (auto &ExitCase : reverse(ExitCases)) {
  762. // Grab a reference to the exit block in the pair so that we can update it.
  763. BasicBlock *ExitBB = std::get<1>(ExitCase);
  764. // If this case is the last edge into the exit block, we can simply reuse it
  765. // as it will no longer be a loop exit. No mapping necessary.
  766. if (pred_empty(ExitBB)) {
  767. // Only rewrite once.
  768. if (UnswitchedExitBBs.insert(ExitBB).second)
  769. rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
  770. continue;
  771. }
  772. // Otherwise we need to split the exit block so that we retain an exit
  773. // block from the loop and a target for the unswitched condition.
  774. BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
  775. if (!SplitExitBB) {
  776. // If this is the first time we see this, do the split and remember it.
  777. SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
  778. rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
  779. *ParentBB, *OldPH,
  780. /*FullUnswitch*/ true);
  781. }
  782. // Update the case pair to point to the split block.
  783. std::get<1>(ExitCase) = SplitExitBB;
  784. }
  785. // Now add the unswitched cases. We do this in reverse order as we built them
  786. // in reverse order.
  787. for (auto &ExitCase : reverse(ExitCases)) {
  788. ConstantInt *CaseVal = std::get<0>(ExitCase);
  789. BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
  790. NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
  791. }
  792. // If the default was unswitched, re-point it and add explicit cases for
  793. // entering the loop.
  794. if (DefaultExitBB) {
  795. NewSIW->setDefaultDest(DefaultExitBB);
  796. NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
  797. // We removed all the exit cases, so we just copy the cases to the
  798. // unswitched switch.
  799. for (const auto &Case : SI.cases())
  800. NewSIW.addCase(Case.getCaseValue(), NewPH,
  801. SIW.getSuccessorWeight(Case.getSuccessorIndex()));
  802. } else if (DefaultCaseWeight) {
  803. // We have to set branch weight of the default case.
  804. uint64_t SW = *DefaultCaseWeight;
  805. for (const auto &Case : SI.cases()) {
  806. auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
  807. assert(W &&
  808. "case weight must be defined as default case weight is defined");
  809. SW += *W;
  810. }
  811. NewSIW.setSuccessorWeight(0, SW);
  812. }
  813. // If we ended up with a common successor for every path through the switch
  814. // after unswitching, rewrite it to an unconditional branch to make it easy
  815. // to recognize. Otherwise we potentially have to recognize the default case
  816. // pointing at unreachable and other complexity.
  817. if (CommonSuccBB) {
  818. BasicBlock *BB = SI.getParent();
  819. // We may have had multiple edges to this common successor block, so remove
  820. // them as predecessors. We skip the first one, either the default or the
  821. // actual first case.
  822. bool SkippedFirst = DefaultExitBB == nullptr;
  823. for (auto Case : SI.cases()) {
  824. assert(Case.getCaseSuccessor() == CommonSuccBB &&
  825. "Non-common successor!");
  826. (void)Case;
  827. if (!SkippedFirst) {
  828. SkippedFirst = true;
  829. continue;
  830. }
  831. CommonSuccBB->removePredecessor(BB,
  832. /*KeepOneInputPHIs*/ true);
  833. }
  834. // Now nuke the switch and replace it with a direct branch.
  835. SIW.eraseFromParent();
  836. BranchInst::Create(CommonSuccBB, BB);
  837. } else if (DefaultExitBB) {
  838. assert(SI.getNumCases() > 0 &&
  839. "If we had no cases we'd have a common successor!");
  840. // Move the last case to the default successor. This is valid as if the
  841. // default got unswitched it cannot be reached. This has the advantage of
  842. // being simple and keeping the number of edges from this switch to
  843. // successors the same, and avoiding any PHI update complexity.
  844. auto LastCaseI = std::prev(SI.case_end());
  845. SI.setDefaultDest(LastCaseI->getCaseSuccessor());
  846. SIW.setSuccessorWeight(
  847. 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
  848. SIW.removeCase(LastCaseI);
  849. }
  850. // Walk the unswitched exit blocks and the unswitched split blocks and update
  851. // the dominator tree based on the CFG edits. While we are walking unordered
  852. // containers here, the API for applyUpdates takes an unordered list of
  853. // updates and requires them to not contain duplicates.
  854. SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
  855. for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
  856. DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
  857. DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
  858. }
  859. for (auto SplitUnswitchedPair : SplitExitBBMap) {
  860. DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
  861. DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
  862. }
  863. if (MSSAU) {
  864. MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
  865. if (VerifyMemorySSA)
  866. MSSAU->getMemorySSA()->verifyMemorySSA();
  867. } else {
  868. DT.applyUpdates(DTUpdates);
  869. }
  870. assert(DT.verify(DominatorTree::VerificationLevel::Fast));
  871. // We may have changed the nesting relationship for this loop so hoist it to
  872. // its correct parent if needed.
  873. hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
  874. if (MSSAU && VerifyMemorySSA)
  875. MSSAU->getMemorySSA()->verifyMemorySSA();
  876. ++NumTrivial;
  877. ++NumSwitches;
  878. LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
  879. return true;
  880. }
  881. /// This routine scans the loop to find a branch or switch which occurs before
  882. /// any side effects occur. These can potentially be unswitched without
  883. /// duplicating the loop. If a branch or switch is successfully unswitched the
  884. /// scanning continues to see if subsequent branches or switches have become
  885. /// trivial. Once all trivial candidates have been unswitched, this routine
  886. /// returns.
  887. ///
  888. /// The return value indicates whether anything was unswitched (and therefore
  889. /// changed).
  890. ///
  891. /// If `SE` is not null, it will be updated based on the potential loop SCEVs
  892. /// invalidated by this.
  893. static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
  894. LoopInfo &LI, ScalarEvolution *SE,
  895. MemorySSAUpdater *MSSAU) {
  896. bool Changed = false;
  897. // If loop header has only one reachable successor we should keep looking for
  898. // trivial condition candidates in the successor as well. An alternative is
  899. // to constant fold conditions and merge successors into loop header (then we
  900. // only need to check header's terminator). The reason for not doing this in
  901. // LoopUnswitch pass is that it could potentially break LoopPassManager's
  902. // invariants. Folding dead branches could either eliminate the current loop
  903. // or make other loops unreachable. LCSSA form might also not be preserved
  904. // after deleting branches. The following code keeps traversing loop header's
  905. // successors until it finds the trivial condition candidate (condition that
  906. // is not a constant). Since unswitching generates branches with constant
  907. // conditions, this scenario could be very common in practice.
  908. BasicBlock *CurrentBB = L.getHeader();
  909. SmallPtrSet<BasicBlock *, 8> Visited;
  910. Visited.insert(CurrentBB);
  911. do {
  912. // Check if there are any side-effecting instructions (e.g. stores, calls,
  913. // volatile loads) in the part of the loop that the code *would* execute
  914. // without unswitching.
  915. if (MSSAU) // Possible early exit with MSSA
  916. if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
  917. if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
  918. return Changed;
  919. if (llvm::any_of(*CurrentBB,
  920. [](Instruction &I) { return I.mayHaveSideEffects(); }))
  921. return Changed;
  922. Instruction *CurrentTerm = CurrentBB->getTerminator();
  923. if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
  924. // Don't bother trying to unswitch past a switch with a constant
  925. // condition. This should be removed prior to running this pass by
  926. // simplifycfg.
  927. if (isa<Constant>(SI->getCondition()))
  928. return Changed;
  929. if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
  930. // Couldn't unswitch this one so we're done.
  931. return Changed;
  932. // Mark that we managed to unswitch something.
  933. Changed = true;
  934. // If unswitching turned the terminator into an unconditional branch then
  935. // we can continue. The unswitching logic specifically works to fold any
  936. // cases it can into an unconditional branch to make it easier to
  937. // recognize here.
  938. auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
  939. if (!BI || BI->isConditional())
  940. return Changed;
  941. CurrentBB = BI->getSuccessor(0);
  942. continue;
  943. }
  944. auto *BI = dyn_cast<BranchInst>(CurrentTerm);
  945. if (!BI)
  946. // We do not understand other terminator instructions.
  947. return Changed;
  948. // Don't bother trying to unswitch past an unconditional branch or a branch
  949. // with a constant value. These should be removed by simplifycfg prior to
  950. // running this pass.
  951. if (!BI->isConditional() ||
  952. isa<Constant>(skipTrivialSelect(BI->getCondition())))
  953. return Changed;
  954. // Found a trivial condition candidate: non-foldable conditional branch. If
  955. // we fail to unswitch this, we can't do anything else that is trivial.
  956. if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
  957. return Changed;
  958. // Mark that we managed to unswitch something.
  959. Changed = true;
  960. // If we only unswitched some of the conditions feeding the branch, we won't
  961. // have collapsed it to a single successor.
  962. BI = cast<BranchInst>(CurrentBB->getTerminator());
  963. if (BI->isConditional())
  964. return Changed;
  965. // Follow the newly unconditional branch into its successor.
  966. CurrentBB = BI->getSuccessor(0);
  967. // When continuing, if we exit the loop or reach a previous visited block,
  968. // then we can not reach any trivial condition candidates (unfoldable
  969. // branch instructions or switch instructions) and no unswitch can happen.
  970. } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
  971. return Changed;
  972. }
  973. /// Build the cloned blocks for an unswitched copy of the given loop.
  974. ///
  975. /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
  976. /// after the split block (`SplitBB`) that will be used to select between the
  977. /// cloned and original loop.
  978. ///
  979. /// This routine handles cloning all of the necessary loop blocks and exit
  980. /// blocks including rewriting their instructions and the relevant PHI nodes.
  981. /// Any loop blocks or exit blocks which are dominated by a different successor
  982. /// than the one for this clone of the loop blocks can be trivially skipped. We
  983. /// use the `DominatingSucc` map to determine whether a block satisfies that
  984. /// property with a simple map lookup.
  985. ///
  986. /// It also correctly creates the unconditional branch in the cloned
  987. /// unswitched parent block to only point at the unswitched successor.
  988. ///
  989. /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
  990. /// block splitting is correctly reflected in `LoopInfo`, essentially all of
  991. /// the cloned blocks (and their loops) are left without full `LoopInfo`
  992. /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
  993. /// blocks to them but doesn't create the cloned `DominatorTree` structure and
  994. /// instead the caller must recompute an accurate DT. It *does* correctly
  995. /// update the `AssumptionCache` provided in `AC`.
  996. static BasicBlock *buildClonedLoopBlocks(
  997. Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
  998. ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
  999. BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
  1000. const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
  1001. ValueToValueMapTy &VMap,
  1002. SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
  1003. DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
  1004. ScalarEvolution *SE) {
  1005. SmallVector<BasicBlock *, 4> NewBlocks;
  1006. NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
  1007. // We will need to clone a bunch of blocks, wrap up the clone operation in
  1008. // a helper.
  1009. auto CloneBlock = [&](BasicBlock *OldBB) {
  1010. // Clone the basic block and insert it before the new preheader.
  1011. BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
  1012. NewBB->moveBefore(LoopPH);
  1013. // Record this block and the mapping.
  1014. NewBlocks.push_back(NewBB);
  1015. VMap[OldBB] = NewBB;
  1016. return NewBB;
  1017. };
  1018. // We skip cloning blocks when they have a dominating succ that is not the
  1019. // succ we are cloning for.
  1020. auto SkipBlock = [&](BasicBlock *BB) {
  1021. auto It = DominatingSucc.find(BB);
  1022. return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
  1023. };
  1024. // First, clone the preheader.
  1025. auto *ClonedPH = CloneBlock(LoopPH);
  1026. // Then clone all the loop blocks, skipping the ones that aren't necessary.
  1027. for (auto *LoopBB : L.blocks())
  1028. if (!SkipBlock(LoopBB))
  1029. CloneBlock(LoopBB);
  1030. // Split all the loop exit edges so that when we clone the exit blocks, if
  1031. // any of the exit blocks are *also* a preheader for some other loop, we
  1032. // don't create multiple predecessors entering the loop header.
  1033. for (auto *ExitBB : ExitBlocks) {
  1034. if (SkipBlock(ExitBB))
  1035. continue;
  1036. // When we are going to clone an exit, we don't need to clone all the
  1037. // instructions in the exit block and we want to ensure we have an easy
  1038. // place to merge the CFG, so split the exit first. This is always safe to
  1039. // do because there cannot be any non-loop predecessors of a loop exit in
  1040. // loop simplified form.
  1041. auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
  1042. // Rearrange the names to make it easier to write test cases by having the
  1043. // exit block carry the suffix rather than the merge block carrying the
  1044. // suffix.
  1045. MergeBB->takeName(ExitBB);
  1046. ExitBB->setName(Twine(MergeBB->getName()) + ".split");
  1047. // Now clone the original exit block.
  1048. auto *ClonedExitBB = CloneBlock(ExitBB);
  1049. assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
  1050. "Exit block should have been split to have one successor!");
  1051. assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
  1052. "Cloned exit block has the wrong successor!");
  1053. // Remap any cloned instructions and create a merge phi node for them.
  1054. for (auto ZippedInsts : llvm::zip_first(
  1055. llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
  1056. llvm::make_range(ClonedExitBB->begin(),
  1057. std::prev(ClonedExitBB->end())))) {
  1058. Instruction &I = std::get<0>(ZippedInsts);
  1059. Instruction &ClonedI = std::get<1>(ZippedInsts);
  1060. // The only instructions in the exit block should be PHI nodes and
  1061. // potentially a landing pad.
  1062. assert(
  1063. (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
  1064. "Bad instruction in exit block!");
  1065. // We should have a value map between the instruction and its clone.
  1066. assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
  1067. // Forget SCEVs based on exit phis in case SCEV looked through the phi.
  1068. if (SE && isa<PHINode>(I))
  1069. SE->forgetValue(&I);
  1070. auto *MergePN =
  1071. PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
  1072. &*MergeBB->getFirstInsertionPt());
  1073. I.replaceAllUsesWith(MergePN);
  1074. MergePN->addIncoming(&I, ExitBB);
  1075. MergePN->addIncoming(&ClonedI, ClonedExitBB);
  1076. }
  1077. }
  1078. // Rewrite the instructions in the cloned blocks to refer to the instructions
  1079. // in the cloned blocks. We have to do this as a second pass so that we have
  1080. // everything available. Also, we have inserted new instructions which may
  1081. // include assume intrinsics, so we update the assumption cache while
  1082. // processing this.
  1083. for (auto *ClonedBB : NewBlocks)
  1084. for (Instruction &I : *ClonedBB) {
  1085. RemapInstruction(&I, VMap,
  1086. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  1087. if (auto *II = dyn_cast<AssumeInst>(&I))
  1088. AC.registerAssumption(II);
  1089. }
  1090. // Update any PHI nodes in the cloned successors of the skipped blocks to not
  1091. // have spurious incoming values.
  1092. for (auto *LoopBB : L.blocks())
  1093. if (SkipBlock(LoopBB))
  1094. for (auto *SuccBB : successors(LoopBB))
  1095. if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
  1096. for (PHINode &PN : ClonedSuccBB->phis())
  1097. PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
  1098. // Remove the cloned parent as a predecessor of any successor we ended up
  1099. // cloning other than the unswitched one.
  1100. auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
  1101. for (auto *SuccBB : successors(ParentBB)) {
  1102. if (SuccBB == UnswitchedSuccBB)
  1103. continue;
  1104. auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
  1105. if (!ClonedSuccBB)
  1106. continue;
  1107. ClonedSuccBB->removePredecessor(ClonedParentBB,
  1108. /*KeepOneInputPHIs*/ true);
  1109. }
  1110. // Replace the cloned branch with an unconditional branch to the cloned
  1111. // unswitched successor.
  1112. auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
  1113. Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
  1114. // Trivial Simplification. If Terminator is a conditional branch and
  1115. // condition becomes dead - erase it.
  1116. Value *ClonedConditionToErase = nullptr;
  1117. if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
  1118. ClonedConditionToErase = BI->getCondition();
  1119. else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
  1120. ClonedConditionToErase = SI->getCondition();
  1121. ClonedTerminator->eraseFromParent();
  1122. BranchInst::Create(ClonedSuccBB, ClonedParentBB);
  1123. if (ClonedConditionToErase)
  1124. RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
  1125. MSSAU);
  1126. // If there are duplicate entries in the PHI nodes because of multiple edges
  1127. // to the unswitched successor, we need to nuke all but one as we replaced it
  1128. // with a direct branch.
  1129. for (PHINode &PN : ClonedSuccBB->phis()) {
  1130. bool Found = false;
  1131. // Loop over the incoming operands backwards so we can easily delete as we
  1132. // go without invalidating the index.
  1133. for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
  1134. if (PN.getIncomingBlock(i) != ClonedParentBB)
  1135. continue;
  1136. if (!Found) {
  1137. Found = true;
  1138. continue;
  1139. }
  1140. PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
  1141. }
  1142. }
  1143. // Record the domtree updates for the new blocks.
  1144. SmallPtrSet<BasicBlock *, 4> SuccSet;
  1145. for (auto *ClonedBB : NewBlocks) {
  1146. for (auto *SuccBB : successors(ClonedBB))
  1147. if (SuccSet.insert(SuccBB).second)
  1148. DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
  1149. SuccSet.clear();
  1150. }
  1151. return ClonedPH;
  1152. }
  1153. /// Recursively clone the specified loop and all of its children.
  1154. ///
  1155. /// The target parent loop for the clone should be provided, or can be null if
  1156. /// the clone is a top-level loop. While cloning, all the blocks are mapped
  1157. /// with the provided value map. The entire original loop must be present in
  1158. /// the value map. The cloned loop is returned.
  1159. static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
  1160. const ValueToValueMapTy &VMap, LoopInfo &LI) {
  1161. auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
  1162. assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
  1163. ClonedL.reserveBlocks(OrigL.getNumBlocks());
  1164. for (auto *BB : OrigL.blocks()) {
  1165. auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
  1166. ClonedL.addBlockEntry(ClonedBB);
  1167. if (LI.getLoopFor(BB) == &OrigL)
  1168. LI.changeLoopFor(ClonedBB, &ClonedL);
  1169. }
  1170. };
  1171. // We specially handle the first loop because it may get cloned into
  1172. // a different parent and because we most commonly are cloning leaf loops.
  1173. Loop *ClonedRootL = LI.AllocateLoop();
  1174. if (RootParentL)
  1175. RootParentL->addChildLoop(ClonedRootL);
  1176. else
  1177. LI.addTopLevelLoop(ClonedRootL);
  1178. AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
  1179. if (OrigRootL.isInnermost())
  1180. return ClonedRootL;
  1181. // If we have a nest, we can quickly clone the entire loop nest using an
  1182. // iterative approach because it is a tree. We keep the cloned parent in the
  1183. // data structure to avoid repeatedly querying through a map to find it.
  1184. SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
  1185. // Build up the loops to clone in reverse order as we'll clone them from the
  1186. // back.
  1187. for (Loop *ChildL : llvm::reverse(OrigRootL))
  1188. LoopsToClone.push_back({ClonedRootL, ChildL});
  1189. do {
  1190. Loop *ClonedParentL, *L;
  1191. std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
  1192. Loop *ClonedL = LI.AllocateLoop();
  1193. ClonedParentL->addChildLoop(ClonedL);
  1194. AddClonedBlocksToLoop(*L, *ClonedL);
  1195. for (Loop *ChildL : llvm::reverse(*L))
  1196. LoopsToClone.push_back({ClonedL, ChildL});
  1197. } while (!LoopsToClone.empty());
  1198. return ClonedRootL;
  1199. }
  1200. /// Build the cloned loops of an original loop from unswitching.
  1201. ///
  1202. /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
  1203. /// operation. We need to re-verify that there even is a loop (as the backedge
  1204. /// may not have been cloned), and even if there are remaining backedges the
  1205. /// backedge set may be different. However, we know that each child loop is
  1206. /// undisturbed, we only need to find where to place each child loop within
  1207. /// either any parent loop or within a cloned version of the original loop.
  1208. ///
  1209. /// Because child loops may end up cloned outside of any cloned version of the
  1210. /// original loop, multiple cloned sibling loops may be created. All of them
  1211. /// are returned so that the newly introduced loop nest roots can be
  1212. /// identified.
  1213. static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
  1214. const ValueToValueMapTy &VMap, LoopInfo &LI,
  1215. SmallVectorImpl<Loop *> &NonChildClonedLoops) {
  1216. Loop *ClonedL = nullptr;
  1217. auto *OrigPH = OrigL.getLoopPreheader();
  1218. auto *OrigHeader = OrigL.getHeader();
  1219. auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
  1220. auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
  1221. // We need to know the loops of the cloned exit blocks to even compute the
  1222. // accurate parent loop. If we only clone exits to some parent of the
  1223. // original parent, we want to clone into that outer loop. We also keep track
  1224. // of the loops that our cloned exit blocks participate in.
  1225. Loop *ParentL = nullptr;
  1226. SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
  1227. SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
  1228. ClonedExitsInLoops.reserve(ExitBlocks.size());
  1229. for (auto *ExitBB : ExitBlocks)
  1230. if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
  1231. if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
  1232. ExitLoopMap[ClonedExitBB] = ExitL;
  1233. ClonedExitsInLoops.push_back(ClonedExitBB);
  1234. if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
  1235. ParentL = ExitL;
  1236. }
  1237. assert((!ParentL || ParentL == OrigL.getParentLoop() ||
  1238. ParentL->contains(OrigL.getParentLoop())) &&
  1239. "The computed parent loop should always contain (or be) the parent of "
  1240. "the original loop.");
  1241. // We build the set of blocks dominated by the cloned header from the set of
  1242. // cloned blocks out of the original loop. While not all of these will
  1243. // necessarily be in the cloned loop, it is enough to establish that they
  1244. // aren't in unreachable cycles, etc.
  1245. SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
  1246. for (auto *BB : OrigL.blocks())
  1247. if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
  1248. ClonedLoopBlocks.insert(ClonedBB);
  1249. // Rebuild the set of blocks that will end up in the cloned loop. We may have
  1250. // skipped cloning some region of this loop which can in turn skip some of
  1251. // the backedges so we have to rebuild the blocks in the loop based on the
  1252. // backedges that remain after cloning.
  1253. SmallVector<BasicBlock *, 16> Worklist;
  1254. SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
  1255. for (auto *Pred : predecessors(ClonedHeader)) {
  1256. // The only possible non-loop header predecessor is the preheader because
  1257. // we know we cloned the loop in simplified form.
  1258. if (Pred == ClonedPH)
  1259. continue;
  1260. // Because the loop was in simplified form, the only non-loop predecessor
  1261. // should be the preheader.
  1262. assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
  1263. "header other than the preheader "
  1264. "that is not part of the loop!");
  1265. // Insert this block into the loop set and on the first visit (and if it
  1266. // isn't the header we're currently walking) put it into the worklist to
  1267. // recurse through.
  1268. if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
  1269. Worklist.push_back(Pred);
  1270. }
  1271. // If we had any backedges then there *is* a cloned loop. Put the header into
  1272. // the loop set and then walk the worklist backwards to find all the blocks
  1273. // that remain within the loop after cloning.
  1274. if (!BlocksInClonedLoop.empty()) {
  1275. BlocksInClonedLoop.insert(ClonedHeader);
  1276. while (!Worklist.empty()) {
  1277. BasicBlock *BB = Worklist.pop_back_val();
  1278. assert(BlocksInClonedLoop.count(BB) &&
  1279. "Didn't put block into the loop set!");
  1280. // Insert any predecessors that are in the possible set into the cloned
  1281. // set, and if the insert is successful, add them to the worklist. Note
  1282. // that we filter on the blocks that are definitely reachable via the
  1283. // backedge to the loop header so we may prune out dead code within the
  1284. // cloned loop.
  1285. for (auto *Pred : predecessors(BB))
  1286. if (ClonedLoopBlocks.count(Pred) &&
  1287. BlocksInClonedLoop.insert(Pred).second)
  1288. Worklist.push_back(Pred);
  1289. }
  1290. ClonedL = LI.AllocateLoop();
  1291. if (ParentL) {
  1292. ParentL->addBasicBlockToLoop(ClonedPH, LI);
  1293. ParentL->addChildLoop(ClonedL);
  1294. } else {
  1295. LI.addTopLevelLoop(ClonedL);
  1296. }
  1297. NonChildClonedLoops.push_back(ClonedL);
  1298. ClonedL->reserveBlocks(BlocksInClonedLoop.size());
  1299. // We don't want to just add the cloned loop blocks based on how we
  1300. // discovered them. The original order of blocks was carefully built in
  1301. // a way that doesn't rely on predecessor ordering. Rather than re-invent
  1302. // that logic, we just re-walk the original blocks (and those of the child
  1303. // loops) and filter them as we add them into the cloned loop.
  1304. for (auto *BB : OrigL.blocks()) {
  1305. auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
  1306. if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
  1307. continue;
  1308. // Directly add the blocks that are only in this loop.
  1309. if (LI.getLoopFor(BB) == &OrigL) {
  1310. ClonedL->addBasicBlockToLoop(ClonedBB, LI);
  1311. continue;
  1312. }
  1313. // We want to manually add it to this loop and parents.
  1314. // Registering it with LoopInfo will happen when we clone the top
  1315. // loop for this block.
  1316. for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
  1317. PL->addBlockEntry(ClonedBB);
  1318. }
  1319. // Now add each child loop whose header remains within the cloned loop. All
  1320. // of the blocks within the loop must satisfy the same constraints as the
  1321. // header so once we pass the header checks we can just clone the entire
  1322. // child loop nest.
  1323. for (Loop *ChildL : OrigL) {
  1324. auto *ClonedChildHeader =
  1325. cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
  1326. if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
  1327. continue;
  1328. #ifndef NDEBUG
  1329. // We should never have a cloned child loop header but fail to have
  1330. // all of the blocks for that child loop.
  1331. for (auto *ChildLoopBB : ChildL->blocks())
  1332. assert(BlocksInClonedLoop.count(
  1333. cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
  1334. "Child cloned loop has a header within the cloned outer "
  1335. "loop but not all of its blocks!");
  1336. #endif
  1337. cloneLoopNest(*ChildL, ClonedL, VMap, LI);
  1338. }
  1339. }
  1340. // Now that we've handled all the components of the original loop that were
  1341. // cloned into a new loop, we still need to handle anything from the original
  1342. // loop that wasn't in a cloned loop.
  1343. // Figure out what blocks are left to place within any loop nest containing
  1344. // the unswitched loop. If we never formed a loop, the cloned PH is one of
  1345. // them.
  1346. SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
  1347. if (BlocksInClonedLoop.empty())
  1348. UnloopedBlockSet.insert(ClonedPH);
  1349. for (auto *ClonedBB : ClonedLoopBlocks)
  1350. if (!BlocksInClonedLoop.count(ClonedBB))
  1351. UnloopedBlockSet.insert(ClonedBB);
  1352. // Copy the cloned exits and sort them in ascending loop depth, we'll work
  1353. // backwards across these to process them inside out. The order shouldn't
  1354. // matter as we're just trying to build up the map from inside-out; we use
  1355. // the map in a more stably ordered way below.
  1356. auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
  1357. llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
  1358. return ExitLoopMap.lookup(LHS)->getLoopDepth() <
  1359. ExitLoopMap.lookup(RHS)->getLoopDepth();
  1360. });
  1361. // Populate the existing ExitLoopMap with everything reachable from each
  1362. // exit, starting from the inner most exit.
  1363. while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
  1364. assert(Worklist.empty() && "Didn't clear worklist!");
  1365. BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
  1366. Loop *ExitL = ExitLoopMap.lookup(ExitBB);
  1367. // Walk the CFG back until we hit the cloned PH adding everything reachable
  1368. // and in the unlooped set to this exit block's loop.
  1369. Worklist.push_back(ExitBB);
  1370. do {
  1371. BasicBlock *BB = Worklist.pop_back_val();
  1372. // We can stop recursing at the cloned preheader (if we get there).
  1373. if (BB == ClonedPH)
  1374. continue;
  1375. for (BasicBlock *PredBB : predecessors(BB)) {
  1376. // If this pred has already been moved to our set or is part of some
  1377. // (inner) loop, no update needed.
  1378. if (!UnloopedBlockSet.erase(PredBB)) {
  1379. assert(
  1380. (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
  1381. "Predecessor not mapped to a loop!");
  1382. continue;
  1383. }
  1384. // We just insert into the loop set here. We'll add these blocks to the
  1385. // exit loop after we build up the set in an order that doesn't rely on
  1386. // predecessor order (which in turn relies on use list order).
  1387. bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
  1388. (void)Inserted;
  1389. assert(Inserted && "Should only visit an unlooped block once!");
  1390. // And recurse through to its predecessors.
  1391. Worklist.push_back(PredBB);
  1392. }
  1393. } while (!Worklist.empty());
  1394. }
  1395. // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
  1396. // blocks to their outer loops, walk the cloned blocks and the cloned exits
  1397. // in their original order adding them to the correct loop.
  1398. // We need a stable insertion order. We use the order of the original loop
  1399. // order and map into the correct parent loop.
  1400. for (auto *BB : llvm::concat<BasicBlock *const>(
  1401. ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
  1402. if (Loop *OuterL = ExitLoopMap.lookup(BB))
  1403. OuterL->addBasicBlockToLoop(BB, LI);
  1404. #ifndef NDEBUG
  1405. for (auto &BBAndL : ExitLoopMap) {
  1406. auto *BB = BBAndL.first;
  1407. auto *OuterL = BBAndL.second;
  1408. assert(LI.getLoopFor(BB) == OuterL &&
  1409. "Failed to put all blocks into outer loops!");
  1410. }
  1411. #endif
  1412. // Now that all the blocks are placed into the correct containing loop in the
  1413. // absence of child loops, find all the potentially cloned child loops and
  1414. // clone them into whatever outer loop we placed their header into.
  1415. for (Loop *ChildL : OrigL) {
  1416. auto *ClonedChildHeader =
  1417. cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
  1418. if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
  1419. continue;
  1420. #ifndef NDEBUG
  1421. for (auto *ChildLoopBB : ChildL->blocks())
  1422. assert(VMap.count(ChildLoopBB) &&
  1423. "Cloned a child loop header but not all of that loops blocks!");
  1424. #endif
  1425. NonChildClonedLoops.push_back(cloneLoopNest(
  1426. *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
  1427. }
  1428. }
  1429. static void
  1430. deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
  1431. ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
  1432. DominatorTree &DT, MemorySSAUpdater *MSSAU) {
  1433. // Find all the dead clones, and remove them from their successors.
  1434. SmallVector<BasicBlock *, 16> DeadBlocks;
  1435. for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
  1436. for (const auto &VMap : VMaps)
  1437. if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
  1438. if (!DT.isReachableFromEntry(ClonedBB)) {
  1439. for (BasicBlock *SuccBB : successors(ClonedBB))
  1440. SuccBB->removePredecessor(ClonedBB);
  1441. DeadBlocks.push_back(ClonedBB);
  1442. }
  1443. // Remove all MemorySSA in the dead blocks
  1444. if (MSSAU) {
  1445. SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
  1446. DeadBlocks.end());
  1447. MSSAU->removeBlocks(DeadBlockSet);
  1448. }
  1449. // Drop any remaining references to break cycles.
  1450. for (BasicBlock *BB : DeadBlocks)
  1451. BB->dropAllReferences();
  1452. // Erase them from the IR.
  1453. for (BasicBlock *BB : DeadBlocks)
  1454. BB->eraseFromParent();
  1455. }
  1456. static void
  1457. deleteDeadBlocksFromLoop(Loop &L,
  1458. SmallVectorImpl<BasicBlock *> &ExitBlocks,
  1459. DominatorTree &DT, LoopInfo &LI,
  1460. MemorySSAUpdater *MSSAU,
  1461. ScalarEvolution *SE,
  1462. function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
  1463. // Find all the dead blocks tied to this loop, and remove them from their
  1464. // successors.
  1465. SmallSetVector<BasicBlock *, 8> DeadBlockSet;
  1466. // Start with loop/exit blocks and get a transitive closure of reachable dead
  1467. // blocks.
  1468. SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
  1469. ExitBlocks.end());
  1470. DeathCandidates.append(L.blocks().begin(), L.blocks().end());
  1471. while (!DeathCandidates.empty()) {
  1472. auto *BB = DeathCandidates.pop_back_val();
  1473. if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
  1474. for (BasicBlock *SuccBB : successors(BB)) {
  1475. SuccBB->removePredecessor(BB);
  1476. DeathCandidates.push_back(SuccBB);
  1477. }
  1478. DeadBlockSet.insert(BB);
  1479. }
  1480. }
  1481. // Remove all MemorySSA in the dead blocks
  1482. if (MSSAU)
  1483. MSSAU->removeBlocks(DeadBlockSet);
  1484. // Filter out the dead blocks from the exit blocks list so that it can be
  1485. // used in the caller.
  1486. llvm::erase_if(ExitBlocks,
  1487. [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
  1488. // Walk from this loop up through its parents removing all of the dead blocks.
  1489. for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
  1490. for (auto *BB : DeadBlockSet)
  1491. ParentL->getBlocksSet().erase(BB);
  1492. llvm::erase_if(ParentL->getBlocksVector(),
  1493. [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
  1494. }
  1495. // Now delete the dead child loops. This raw delete will clear them
  1496. // recursively.
  1497. llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
  1498. if (!DeadBlockSet.count(ChildL->getHeader()))
  1499. return false;
  1500. assert(llvm::all_of(ChildL->blocks(),
  1501. [&](BasicBlock *ChildBB) {
  1502. return DeadBlockSet.count(ChildBB);
  1503. }) &&
  1504. "If the child loop header is dead all blocks in the child loop must "
  1505. "be dead as well!");
  1506. DestroyLoopCB(*ChildL, ChildL->getName());
  1507. if (SE)
  1508. SE->forgetBlockAndLoopDispositions();
  1509. LI.destroy(ChildL);
  1510. return true;
  1511. });
  1512. // Remove the loop mappings for the dead blocks and drop all the references
  1513. // from these blocks to others to handle cyclic references as we start
  1514. // deleting the blocks themselves.
  1515. for (auto *BB : DeadBlockSet) {
  1516. // Check that the dominator tree has already been updated.
  1517. assert(!DT.getNode(BB) && "Should already have cleared domtree!");
  1518. LI.changeLoopFor(BB, nullptr);
  1519. // Drop all uses of the instructions to make sure we won't have dangling
  1520. // uses in other blocks.
  1521. for (auto &I : *BB)
  1522. if (!I.use_empty())
  1523. I.replaceAllUsesWith(PoisonValue::get(I.getType()));
  1524. BB->dropAllReferences();
  1525. }
  1526. // Actually delete the blocks now that they've been fully unhooked from the
  1527. // IR.
  1528. for (auto *BB : DeadBlockSet)
  1529. BB->eraseFromParent();
  1530. }
  1531. /// Recompute the set of blocks in a loop after unswitching.
  1532. ///
  1533. /// This walks from the original headers predecessors to rebuild the loop. We
  1534. /// take advantage of the fact that new blocks can't have been added, and so we
  1535. /// filter by the original loop's blocks. This also handles potentially
  1536. /// unreachable code that we don't want to explore but might be found examining
  1537. /// the predecessors of the header.
  1538. ///
  1539. /// If the original loop is no longer a loop, this will return an empty set. If
  1540. /// it remains a loop, all the blocks within it will be added to the set
  1541. /// (including those blocks in inner loops).
  1542. static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
  1543. LoopInfo &LI) {
  1544. SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
  1545. auto *PH = L.getLoopPreheader();
  1546. auto *Header = L.getHeader();
  1547. // A worklist to use while walking backwards from the header.
  1548. SmallVector<BasicBlock *, 16> Worklist;
  1549. // First walk the predecessors of the header to find the backedges. This will
  1550. // form the basis of our walk.
  1551. for (auto *Pred : predecessors(Header)) {
  1552. // Skip the preheader.
  1553. if (Pred == PH)
  1554. continue;
  1555. // Because the loop was in simplified form, the only non-loop predecessor
  1556. // is the preheader.
  1557. assert(L.contains(Pred) && "Found a predecessor of the loop header other "
  1558. "than the preheader that is not part of the "
  1559. "loop!");
  1560. // Insert this block into the loop set and on the first visit and, if it
  1561. // isn't the header we're currently walking, put it into the worklist to
  1562. // recurse through.
  1563. if (LoopBlockSet.insert(Pred).second && Pred != Header)
  1564. Worklist.push_back(Pred);
  1565. }
  1566. // If no backedges were found, we're done.
  1567. if (LoopBlockSet.empty())
  1568. return LoopBlockSet;
  1569. // We found backedges, recurse through them to identify the loop blocks.
  1570. while (!Worklist.empty()) {
  1571. BasicBlock *BB = Worklist.pop_back_val();
  1572. assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
  1573. // No need to walk past the header.
  1574. if (BB == Header)
  1575. continue;
  1576. // Because we know the inner loop structure remains valid we can use the
  1577. // loop structure to jump immediately across the entire nested loop.
  1578. // Further, because it is in loop simplified form, we can directly jump
  1579. // to its preheader afterward.
  1580. if (Loop *InnerL = LI.getLoopFor(BB))
  1581. if (InnerL != &L) {
  1582. assert(L.contains(InnerL) &&
  1583. "Should not reach a loop *outside* this loop!");
  1584. // The preheader is the only possible predecessor of the loop so
  1585. // insert it into the set and check whether it was already handled.
  1586. auto *InnerPH = InnerL->getLoopPreheader();
  1587. assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
  1588. "but not contain the inner loop "
  1589. "preheader!");
  1590. if (!LoopBlockSet.insert(InnerPH).second)
  1591. // The only way to reach the preheader is through the loop body
  1592. // itself so if it has been visited the loop is already handled.
  1593. continue;
  1594. // Insert all of the blocks (other than those already present) into
  1595. // the loop set. We expect at least the block that led us to find the
  1596. // inner loop to be in the block set, but we may also have other loop
  1597. // blocks if they were already enqueued as predecessors of some other
  1598. // outer loop block.
  1599. for (auto *InnerBB : InnerL->blocks()) {
  1600. if (InnerBB == BB) {
  1601. assert(LoopBlockSet.count(InnerBB) &&
  1602. "Block should already be in the set!");
  1603. continue;
  1604. }
  1605. LoopBlockSet.insert(InnerBB);
  1606. }
  1607. // Add the preheader to the worklist so we will continue past the
  1608. // loop body.
  1609. Worklist.push_back(InnerPH);
  1610. continue;
  1611. }
  1612. // Insert any predecessors that were in the original loop into the new
  1613. // set, and if the insert is successful, add them to the worklist.
  1614. for (auto *Pred : predecessors(BB))
  1615. if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
  1616. Worklist.push_back(Pred);
  1617. }
  1618. assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
  1619. // We've found all the blocks participating in the loop, return our completed
  1620. // set.
  1621. return LoopBlockSet;
  1622. }
  1623. /// Rebuild a loop after unswitching removes some subset of blocks and edges.
  1624. ///
  1625. /// The removal may have removed some child loops entirely but cannot have
  1626. /// disturbed any remaining child loops. However, they may need to be hoisted
  1627. /// to the parent loop (or to be top-level loops). The original loop may be
  1628. /// completely removed.
  1629. ///
  1630. /// The sibling loops resulting from this update are returned. If the original
  1631. /// loop remains a valid loop, it will be the first entry in this list with all
  1632. /// of the newly sibling loops following it.
  1633. ///
  1634. /// Returns true if the loop remains a loop after unswitching, and false if it
  1635. /// is no longer a loop after unswitching (and should not continue to be
  1636. /// referenced).
  1637. static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
  1638. LoopInfo &LI,
  1639. SmallVectorImpl<Loop *> &HoistedLoops,
  1640. ScalarEvolution *SE) {
  1641. auto *PH = L.getLoopPreheader();
  1642. // Compute the actual parent loop from the exit blocks. Because we may have
  1643. // pruned some exits the loop may be different from the original parent.
  1644. Loop *ParentL = nullptr;
  1645. SmallVector<Loop *, 4> ExitLoops;
  1646. SmallVector<BasicBlock *, 4> ExitsInLoops;
  1647. ExitsInLoops.reserve(ExitBlocks.size());
  1648. for (auto *ExitBB : ExitBlocks)
  1649. if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
  1650. ExitLoops.push_back(ExitL);
  1651. ExitsInLoops.push_back(ExitBB);
  1652. if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
  1653. ParentL = ExitL;
  1654. }
  1655. // Recompute the blocks participating in this loop. This may be empty if it
  1656. // is no longer a loop.
  1657. auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
  1658. // If we still have a loop, we need to re-set the loop's parent as the exit
  1659. // block set changing may have moved it within the loop nest. Note that this
  1660. // can only happen when this loop has a parent as it can only hoist the loop
  1661. // *up* the nest.
  1662. if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
  1663. // Remove this loop's (original) blocks from all of the intervening loops.
  1664. for (Loop *IL = L.getParentLoop(); IL != ParentL;
  1665. IL = IL->getParentLoop()) {
  1666. IL->getBlocksSet().erase(PH);
  1667. for (auto *BB : L.blocks())
  1668. IL->getBlocksSet().erase(BB);
  1669. llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
  1670. return BB == PH || L.contains(BB);
  1671. });
  1672. }
  1673. LI.changeLoopFor(PH, ParentL);
  1674. L.getParentLoop()->removeChildLoop(&L);
  1675. if (ParentL)
  1676. ParentL->addChildLoop(&L);
  1677. else
  1678. LI.addTopLevelLoop(&L);
  1679. }
  1680. // Now we update all the blocks which are no longer within the loop.
  1681. auto &Blocks = L.getBlocksVector();
  1682. auto BlocksSplitI =
  1683. LoopBlockSet.empty()
  1684. ? Blocks.begin()
  1685. : std::stable_partition(
  1686. Blocks.begin(), Blocks.end(),
  1687. [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
  1688. // Before we erase the list of unlooped blocks, build a set of them.
  1689. SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
  1690. if (LoopBlockSet.empty())
  1691. UnloopedBlocks.insert(PH);
  1692. // Now erase these blocks from the loop.
  1693. for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
  1694. L.getBlocksSet().erase(BB);
  1695. Blocks.erase(BlocksSplitI, Blocks.end());
  1696. // Sort the exits in ascending loop depth, we'll work backwards across these
  1697. // to process them inside out.
  1698. llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
  1699. return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
  1700. });
  1701. // We'll build up a set for each exit loop.
  1702. SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
  1703. Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
  1704. auto RemoveUnloopedBlocksFromLoop =
  1705. [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
  1706. for (auto *BB : UnloopedBlocks)
  1707. L.getBlocksSet().erase(BB);
  1708. llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
  1709. return UnloopedBlocks.count(BB);
  1710. });
  1711. };
  1712. SmallVector<BasicBlock *, 16> Worklist;
  1713. while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
  1714. assert(Worklist.empty() && "Didn't clear worklist!");
  1715. assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
  1716. // Grab the next exit block, in decreasing loop depth order.
  1717. BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
  1718. Loop &ExitL = *LI.getLoopFor(ExitBB);
  1719. assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
  1720. // Erase all of the unlooped blocks from the loops between the previous
  1721. // exit loop and this exit loop. This works because the ExitInLoops list is
  1722. // sorted in increasing order of loop depth and thus we visit loops in
  1723. // decreasing order of loop depth.
  1724. for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
  1725. RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
  1726. // Walk the CFG back until we hit the cloned PH adding everything reachable
  1727. // and in the unlooped set to this exit block's loop.
  1728. Worklist.push_back(ExitBB);
  1729. do {
  1730. BasicBlock *BB = Worklist.pop_back_val();
  1731. // We can stop recursing at the cloned preheader (if we get there).
  1732. if (BB == PH)
  1733. continue;
  1734. for (BasicBlock *PredBB : predecessors(BB)) {
  1735. // If this pred has already been moved to our set or is part of some
  1736. // (inner) loop, no update needed.
  1737. if (!UnloopedBlocks.erase(PredBB)) {
  1738. assert((NewExitLoopBlocks.count(PredBB) ||
  1739. ExitL.contains(LI.getLoopFor(PredBB))) &&
  1740. "Predecessor not in a nested loop (or already visited)!");
  1741. continue;
  1742. }
  1743. // We just insert into the loop set here. We'll add these blocks to the
  1744. // exit loop after we build up the set in a deterministic order rather
  1745. // than the predecessor-influenced visit order.
  1746. bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
  1747. (void)Inserted;
  1748. assert(Inserted && "Should only visit an unlooped block once!");
  1749. // And recurse through to its predecessors.
  1750. Worklist.push_back(PredBB);
  1751. }
  1752. } while (!Worklist.empty());
  1753. // If blocks in this exit loop were directly part of the original loop (as
  1754. // opposed to a child loop) update the map to point to this exit loop. This
  1755. // just updates a map and so the fact that the order is unstable is fine.
  1756. for (auto *BB : NewExitLoopBlocks)
  1757. if (Loop *BBL = LI.getLoopFor(BB))
  1758. if (BBL == &L || !L.contains(BBL))
  1759. LI.changeLoopFor(BB, &ExitL);
  1760. // We will remove the remaining unlooped blocks from this loop in the next
  1761. // iteration or below.
  1762. NewExitLoopBlocks.clear();
  1763. }
  1764. // Any remaining unlooped blocks are no longer part of any loop unless they
  1765. // are part of some child loop.
  1766. for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
  1767. RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
  1768. for (auto *BB : UnloopedBlocks)
  1769. if (Loop *BBL = LI.getLoopFor(BB))
  1770. if (BBL == &L || !L.contains(BBL))
  1771. LI.changeLoopFor(BB, nullptr);
  1772. // Sink all the child loops whose headers are no longer in the loop set to
  1773. // the parent (or to be top level loops). We reach into the loop and directly
  1774. // update its subloop vector to make this batch update efficient.
  1775. auto &SubLoops = L.getSubLoopsVector();
  1776. auto SubLoopsSplitI =
  1777. LoopBlockSet.empty()
  1778. ? SubLoops.begin()
  1779. : std::stable_partition(
  1780. SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
  1781. return LoopBlockSet.count(SubL->getHeader());
  1782. });
  1783. for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
  1784. HoistedLoops.push_back(HoistedL);
  1785. HoistedL->setParentLoop(nullptr);
  1786. // To compute the new parent of this hoisted loop we look at where we
  1787. // placed the preheader above. We can't lookup the header itself because we
  1788. // retained the mapping from the header to the hoisted loop. But the
  1789. // preheader and header should have the exact same new parent computed
  1790. // based on the set of exit blocks from the original loop as the preheader
  1791. // is a predecessor of the header and so reached in the reverse walk. And
  1792. // because the loops were all in simplified form the preheader of the
  1793. // hoisted loop can't be part of some *other* loop.
  1794. if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
  1795. NewParentL->addChildLoop(HoistedL);
  1796. else
  1797. LI.addTopLevelLoop(HoistedL);
  1798. }
  1799. SubLoops.erase(SubLoopsSplitI, SubLoops.end());
  1800. // Actually delete the loop if nothing remained within it.
  1801. if (Blocks.empty()) {
  1802. assert(SubLoops.empty() &&
  1803. "Failed to remove all subloops from the original loop!");
  1804. if (Loop *ParentL = L.getParentLoop())
  1805. ParentL->removeChildLoop(llvm::find(*ParentL, &L));
  1806. else
  1807. LI.removeLoop(llvm::find(LI, &L));
  1808. // markLoopAsDeleted for L should be triggered by the caller (it is typically
  1809. // done by using the UnswitchCB callback).
  1810. if (SE)
  1811. SE->forgetBlockAndLoopDispositions();
  1812. LI.destroy(&L);
  1813. return false;
  1814. }
  1815. return true;
  1816. }
  1817. /// Helper to visit a dominator subtree, invoking a callable on each node.
  1818. ///
  1819. /// Returning false at any point will stop walking past that node of the tree.
  1820. template <typename CallableT>
  1821. void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
  1822. SmallVector<DomTreeNode *, 4> DomWorklist;
  1823. DomWorklist.push_back(DT[BB]);
  1824. #ifndef NDEBUG
  1825. SmallPtrSet<DomTreeNode *, 4> Visited;
  1826. Visited.insert(DT[BB]);
  1827. #endif
  1828. do {
  1829. DomTreeNode *N = DomWorklist.pop_back_val();
  1830. // Visit this node.
  1831. if (!Callable(N->getBlock()))
  1832. continue;
  1833. // Accumulate the child nodes.
  1834. for (DomTreeNode *ChildN : *N) {
  1835. assert(Visited.insert(ChildN).second &&
  1836. "Cannot visit a node twice when walking a tree!");
  1837. DomWorklist.push_back(ChildN);
  1838. }
  1839. } while (!DomWorklist.empty());
  1840. }
  1841. static void unswitchNontrivialInvariants(
  1842. Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
  1843. IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
  1844. AssumptionCache &AC,
  1845. function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
  1846. ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
  1847. function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
  1848. auto *ParentBB = TI.getParent();
  1849. BranchInst *BI = dyn_cast<BranchInst>(&TI);
  1850. SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
  1851. // We can only unswitch switches, conditional branches with an invariant
  1852. // condition, or combining invariant conditions with an instruction or
  1853. // partially invariant instructions.
  1854. assert((SI || (BI && BI->isConditional())) &&
  1855. "Can only unswitch switches and conditional branch!");
  1856. bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
  1857. bool FullUnswitch =
  1858. SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
  1859. !PartiallyInvariant);
  1860. if (FullUnswitch)
  1861. assert(Invariants.size() == 1 &&
  1862. "Cannot have other invariants with full unswitching!");
  1863. else
  1864. assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
  1865. "Partial unswitching requires an instruction as the condition!");
  1866. if (MSSAU && VerifyMemorySSA)
  1867. MSSAU->getMemorySSA()->verifyMemorySSA();
  1868. // Constant and BBs tracking the cloned and continuing successor. When we are
  1869. // unswitching the entire condition, this can just be trivially chosen to
  1870. // unswitch towards `true`. However, when we are unswitching a set of
  1871. // invariants combined with `and` or `or` or partially invariant instructions,
  1872. // the combining operation determines the best direction to unswitch: we want
  1873. // to unswitch the direction that will collapse the branch.
  1874. bool Direction = true;
  1875. int ClonedSucc = 0;
  1876. if (!FullUnswitch) {
  1877. Value *Cond = skipTrivialSelect(BI->getCondition());
  1878. (void)Cond;
  1879. assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||
  1880. PartiallyInvariant) &&
  1881. "Only `or`, `and`, an `select`, partially invariant instructions "
  1882. "can combine invariants being unswitched.");
  1883. if (!match(Cond, m_LogicalOr())) {
  1884. if (match(Cond, m_LogicalAnd()) ||
  1885. (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
  1886. Direction = false;
  1887. ClonedSucc = 1;
  1888. }
  1889. }
  1890. }
  1891. BasicBlock *RetainedSuccBB =
  1892. BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
  1893. SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
  1894. if (BI)
  1895. UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
  1896. else
  1897. for (auto Case : SI->cases())
  1898. if (Case.getCaseSuccessor() != RetainedSuccBB)
  1899. UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
  1900. assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
  1901. "Should not unswitch the same successor we are retaining!");
  1902. // The branch should be in this exact loop. Any inner loop's invariant branch
  1903. // should be handled by unswitching that inner loop. The caller of this
  1904. // routine should filter out any candidates that remain (but were skipped for
  1905. // whatever reason).
  1906. assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
  1907. // Compute the parent loop now before we start hacking on things.
  1908. Loop *ParentL = L.getParentLoop();
  1909. // Get blocks in RPO order for MSSA update, before changing the CFG.
  1910. LoopBlocksRPO LBRPO(&L);
  1911. if (MSSAU)
  1912. LBRPO.perform(&LI);
  1913. // Compute the outer-most loop containing one of our exit blocks. This is the
  1914. // furthest up our loopnest which can be mutated, which we will use below to
  1915. // update things.
  1916. Loop *OuterExitL = &L;
  1917. SmallVector<BasicBlock *, 4> ExitBlocks;
  1918. L.getUniqueExitBlocks(ExitBlocks);
  1919. for (auto *ExitBB : ExitBlocks) {
  1920. Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
  1921. if (!NewOuterExitL) {
  1922. // We exited the entire nest with this block, so we're done.
  1923. OuterExitL = nullptr;
  1924. break;
  1925. }
  1926. if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
  1927. OuterExitL = NewOuterExitL;
  1928. }
  1929. // At this point, we're definitely going to unswitch something so invalidate
  1930. // any cached information in ScalarEvolution for the outer most loop
  1931. // containing an exit block and all nested loops.
  1932. if (SE) {
  1933. if (OuterExitL)
  1934. SE->forgetLoop(OuterExitL);
  1935. else
  1936. SE->forgetTopmostLoop(&L);
  1937. SE->forgetBlockAndLoopDispositions();
  1938. }
  1939. bool InsertFreeze = false;
  1940. if (FreezeLoopUnswitchCond) {
  1941. ICFLoopSafetyInfo SafetyInfo;
  1942. SafetyInfo.computeLoopSafetyInfo(&L);
  1943. InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
  1944. }
  1945. // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
  1946. // otherwise the branch instruction will have been moved outside the loop
  1947. // already, and may imply that a poison condition is always UB.
  1948. Value *FullUnswitchCond = nullptr;
  1949. if (FullUnswitch) {
  1950. FullUnswitchCond =
  1951. BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition();
  1952. if (InsertFreeze)
  1953. InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
  1954. FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
  1955. }
  1956. // If the edge from this terminator to a successor dominates that successor,
  1957. // store a map from each block in its dominator subtree to it. This lets us
  1958. // tell when cloning for a particular successor if a block is dominated by
  1959. // some *other* successor with a single data structure. We use this to
  1960. // significantly reduce cloning.
  1961. SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
  1962. for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
  1963. UnswitchedSuccBBs))
  1964. if (SuccBB->getUniquePredecessor() ||
  1965. llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
  1966. return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
  1967. }))
  1968. visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
  1969. DominatingSucc[BB] = SuccBB;
  1970. return true;
  1971. });
  1972. // Split the preheader, so that we know that there is a safe place to insert
  1973. // the conditional branch. We will change the preheader to have a conditional
  1974. // branch on LoopCond. The original preheader will become the split point
  1975. // between the unswitched versions, and we will have a new preheader for the
  1976. // original loop.
  1977. BasicBlock *SplitBB = L.getLoopPreheader();
  1978. BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
  1979. // Keep track of the dominator tree updates needed.
  1980. SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
  1981. // Clone the loop for each unswitched successor.
  1982. SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
  1983. VMaps.reserve(UnswitchedSuccBBs.size());
  1984. SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
  1985. for (auto *SuccBB : UnswitchedSuccBBs) {
  1986. VMaps.emplace_back(new ValueToValueMapTy());
  1987. ClonedPHs[SuccBB] = buildClonedLoopBlocks(
  1988. L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
  1989. DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
  1990. }
  1991. // Drop metadata if we may break its semantics by moving this instr into the
  1992. // split block.
  1993. if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
  1994. if (DropNonTrivialImplicitNullChecks)
  1995. // Do not spend time trying to understand if we can keep it, just drop it
  1996. // to save compile time.
  1997. TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
  1998. else {
  1999. // It is only legal to preserve make.implicit metadata if we are
  2000. // guaranteed no reach implicit null check after following this branch.
  2001. ICFLoopSafetyInfo SafetyInfo;
  2002. SafetyInfo.computeLoopSafetyInfo(&L);
  2003. if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
  2004. TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
  2005. }
  2006. }
  2007. // The stitching of the branched code back together depends on whether we're
  2008. // doing full unswitching or not with the exception that we always want to
  2009. // nuke the initial terminator placed in the split block.
  2010. SplitBB->getTerminator()->eraseFromParent();
  2011. if (FullUnswitch) {
  2012. // Splice the terminator from the original loop and rewrite its
  2013. // successors.
  2014. SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator());
  2015. // Keep a clone of the terminator for MSSA updates.
  2016. Instruction *NewTI = TI.clone();
  2017. NewTI->insertInto(ParentBB, ParentBB->end());
  2018. // First wire up the moved terminator to the preheaders.
  2019. if (BI) {
  2020. BasicBlock *ClonedPH = ClonedPHs.begin()->second;
  2021. BI->setSuccessor(ClonedSucc, ClonedPH);
  2022. BI->setSuccessor(1 - ClonedSucc, LoopPH);
  2023. if (InsertFreeze)
  2024. FullUnswitchCond = new FreezeInst(
  2025. FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI);
  2026. BI->setCondition(FullUnswitchCond);
  2027. DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
  2028. } else {
  2029. assert(SI && "Must either be a branch or switch!");
  2030. // Walk the cases and directly update their successors.
  2031. assert(SI->getDefaultDest() == RetainedSuccBB &&
  2032. "Not retaining default successor!");
  2033. SI->setDefaultDest(LoopPH);
  2034. for (const auto &Case : SI->cases())
  2035. if (Case.getCaseSuccessor() == RetainedSuccBB)
  2036. Case.setSuccessor(LoopPH);
  2037. else
  2038. Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
  2039. if (InsertFreeze)
  2040. SI->setCondition(new FreezeInst(
  2041. FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI));
  2042. // We need to use the set to populate domtree updates as even when there
  2043. // are multiple cases pointing at the same successor we only want to
  2044. // remove and insert one edge in the domtree.
  2045. for (BasicBlock *SuccBB : UnswitchedSuccBBs)
  2046. DTUpdates.push_back(
  2047. {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
  2048. }
  2049. if (MSSAU) {
  2050. DT.applyUpdates(DTUpdates);
  2051. DTUpdates.clear();
  2052. // Remove all but one edge to the retained block and all unswitched
  2053. // blocks. This is to avoid having duplicate entries in the cloned Phis,
  2054. // when we know we only keep a single edge for each case.
  2055. MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
  2056. for (BasicBlock *SuccBB : UnswitchedSuccBBs)
  2057. MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
  2058. for (auto &VMap : VMaps)
  2059. MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
  2060. /*IgnoreIncomingWithNoClones=*/true);
  2061. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
  2062. // Remove all edges to unswitched blocks.
  2063. for (BasicBlock *SuccBB : UnswitchedSuccBBs)
  2064. MSSAU->removeEdge(ParentBB, SuccBB);
  2065. }
  2066. // Now unhook the successor relationship as we'll be replacing
  2067. // the terminator with a direct branch. This is much simpler for branches
  2068. // than switches so we handle those first.
  2069. if (BI) {
  2070. // Remove the parent as a predecessor of the unswitched successor.
  2071. assert(UnswitchedSuccBBs.size() == 1 &&
  2072. "Only one possible unswitched block for a branch!");
  2073. BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
  2074. UnswitchedSuccBB->removePredecessor(ParentBB,
  2075. /*KeepOneInputPHIs*/ true);
  2076. DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
  2077. } else {
  2078. // Note that we actually want to remove the parent block as a predecessor
  2079. // of *every* case successor. The case successor is either unswitched,
  2080. // completely eliminating an edge from the parent to that successor, or it
  2081. // is a duplicate edge to the retained successor as the retained successor
  2082. // is always the default successor and as we'll replace this with a direct
  2083. // branch we no longer need the duplicate entries in the PHI nodes.
  2084. SwitchInst *NewSI = cast<SwitchInst>(NewTI);
  2085. assert(NewSI->getDefaultDest() == RetainedSuccBB &&
  2086. "Not retaining default successor!");
  2087. for (const auto &Case : NewSI->cases())
  2088. Case.getCaseSuccessor()->removePredecessor(
  2089. ParentBB,
  2090. /*KeepOneInputPHIs*/ true);
  2091. // We need to use the set to populate domtree updates as even when there
  2092. // are multiple cases pointing at the same successor we only want to
  2093. // remove and insert one edge in the domtree.
  2094. for (BasicBlock *SuccBB : UnswitchedSuccBBs)
  2095. DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
  2096. }
  2097. // After MSSAU update, remove the cloned terminator instruction NewTI.
  2098. ParentBB->getTerminator()->eraseFromParent();
  2099. // Create a new unconditional branch to the continuing block (as opposed to
  2100. // the one cloned).
  2101. BranchInst::Create(RetainedSuccBB, ParentBB);
  2102. } else {
  2103. assert(BI && "Only branches have partial unswitching.");
  2104. assert(UnswitchedSuccBBs.size() == 1 &&
  2105. "Only one possible unswitched block for a branch!");
  2106. BasicBlock *ClonedPH = ClonedPHs.begin()->second;
  2107. // When doing a partial unswitch, we have to do a bit more work to build up
  2108. // the branch in the split block.
  2109. if (PartiallyInvariant)
  2110. buildPartialInvariantUnswitchConditionalBranch(
  2111. *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
  2112. else {
  2113. buildPartialUnswitchConditionalBranch(
  2114. *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
  2115. FreezeLoopUnswitchCond, BI, &AC, DT);
  2116. }
  2117. DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
  2118. if (MSSAU) {
  2119. DT.applyUpdates(DTUpdates);
  2120. DTUpdates.clear();
  2121. // Perform MSSA cloning updates.
  2122. for (auto &VMap : VMaps)
  2123. MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
  2124. /*IgnoreIncomingWithNoClones=*/true);
  2125. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
  2126. }
  2127. }
  2128. // Apply the updates accumulated above to get an up-to-date dominator tree.
  2129. DT.applyUpdates(DTUpdates);
  2130. // Now that we have an accurate dominator tree, first delete the dead cloned
  2131. // blocks so that we can accurately build any cloned loops. It is important to
  2132. // not delete the blocks from the original loop yet because we still want to
  2133. // reference the original loop to understand the cloned loop's structure.
  2134. deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
  2135. // Build the cloned loop structure itself. This may be substantially
  2136. // different from the original structure due to the simplified CFG. This also
  2137. // handles inserting all the cloned blocks into the correct loops.
  2138. SmallVector<Loop *, 4> NonChildClonedLoops;
  2139. for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
  2140. buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
  2141. // Now that our cloned loops have been built, we can update the original loop.
  2142. // First we delete the dead blocks from it and then we rebuild the loop
  2143. // structure taking these deletions into account.
  2144. deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
  2145. if (MSSAU && VerifyMemorySSA)
  2146. MSSAU->getMemorySSA()->verifyMemorySSA();
  2147. SmallVector<Loop *, 4> HoistedLoops;
  2148. bool IsStillLoop =
  2149. rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
  2150. if (MSSAU && VerifyMemorySSA)
  2151. MSSAU->getMemorySSA()->verifyMemorySSA();
  2152. // This transformation has a high risk of corrupting the dominator tree, and
  2153. // the below steps to rebuild loop structures will result in hard to debug
  2154. // errors in that case so verify that the dominator tree is sane first.
  2155. // FIXME: Remove this when the bugs stop showing up and rely on existing
  2156. // verification steps.
  2157. assert(DT.verify(DominatorTree::VerificationLevel::Fast));
  2158. if (BI && !PartiallyInvariant) {
  2159. // If we unswitched a branch which collapses the condition to a known
  2160. // constant we want to replace all the uses of the invariants within both
  2161. // the original and cloned blocks. We do this here so that we can use the
  2162. // now updated dominator tree to identify which side the users are on.
  2163. assert(UnswitchedSuccBBs.size() == 1 &&
  2164. "Only one possible unswitched block for a branch!");
  2165. BasicBlock *ClonedPH = ClonedPHs.begin()->second;
  2166. // When considering multiple partially-unswitched invariants
  2167. // we cant just go replace them with constants in both branches.
  2168. //
  2169. // For 'AND' we infer that true branch ("continue") means true
  2170. // for each invariant operand.
  2171. // For 'OR' we can infer that false branch ("continue") means false
  2172. // for each invariant operand.
  2173. // So it happens that for multiple-partial case we dont replace
  2174. // in the unswitched branch.
  2175. bool ReplaceUnswitched =
  2176. FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
  2177. ConstantInt *UnswitchedReplacement =
  2178. Direction ? ConstantInt::getTrue(BI->getContext())
  2179. : ConstantInt::getFalse(BI->getContext());
  2180. ConstantInt *ContinueReplacement =
  2181. Direction ? ConstantInt::getFalse(BI->getContext())
  2182. : ConstantInt::getTrue(BI->getContext());
  2183. for (Value *Invariant : Invariants) {
  2184. assert(!isa<Constant>(Invariant) &&
  2185. "Should not be replacing constant values!");
  2186. // Use make_early_inc_range here as set invalidates the iterator.
  2187. for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
  2188. Instruction *UserI = dyn_cast<Instruction>(U.getUser());
  2189. if (!UserI)
  2190. continue;
  2191. // Replace it with the 'continue' side if in the main loop body, and the
  2192. // unswitched if in the cloned blocks.
  2193. if (DT.dominates(LoopPH, UserI->getParent()))
  2194. U.set(ContinueReplacement);
  2195. else if (ReplaceUnswitched &&
  2196. DT.dominates(ClonedPH, UserI->getParent()))
  2197. U.set(UnswitchedReplacement);
  2198. }
  2199. }
  2200. }
  2201. // We can change which blocks are exit blocks of all the cloned sibling
  2202. // loops, the current loop, and any parent loops which shared exit blocks
  2203. // with the current loop. As a consequence, we need to re-form LCSSA for
  2204. // them. But we shouldn't need to re-form LCSSA for any child loops.
  2205. // FIXME: This could be made more efficient by tracking which exit blocks are
  2206. // new, and focusing on them, but that isn't likely to be necessary.
  2207. //
  2208. // In order to reasonably rebuild LCSSA we need to walk inside-out across the
  2209. // loop nest and update every loop that could have had its exits changed. We
  2210. // also need to cover any intervening loops. We add all of these loops to
  2211. // a list and sort them by loop depth to achieve this without updating
  2212. // unnecessary loops.
  2213. auto UpdateLoop = [&](Loop &UpdateL) {
  2214. #ifndef NDEBUG
  2215. UpdateL.verifyLoop();
  2216. for (Loop *ChildL : UpdateL) {
  2217. ChildL->verifyLoop();
  2218. assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
  2219. "Perturbed a child loop's LCSSA form!");
  2220. }
  2221. #endif
  2222. // First build LCSSA for this loop so that we can preserve it when
  2223. // forming dedicated exits. We don't want to perturb some other loop's
  2224. // LCSSA while doing that CFG edit.
  2225. formLCSSA(UpdateL, DT, &LI, SE);
  2226. // For loops reached by this loop's original exit blocks we may
  2227. // introduced new, non-dedicated exits. At least try to re-form dedicated
  2228. // exits for these loops. This may fail if they couldn't have dedicated
  2229. // exits to start with.
  2230. formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
  2231. };
  2232. // For non-child cloned loops and hoisted loops, we just need to update LCSSA
  2233. // and we can do it in any order as they don't nest relative to each other.
  2234. //
  2235. // Also check if any of the loops we have updated have become top-level loops
  2236. // as that will necessitate widening the outer loop scope.
  2237. for (Loop *UpdatedL :
  2238. llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
  2239. UpdateLoop(*UpdatedL);
  2240. if (UpdatedL->isOutermost())
  2241. OuterExitL = nullptr;
  2242. }
  2243. if (IsStillLoop) {
  2244. UpdateLoop(L);
  2245. if (L.isOutermost())
  2246. OuterExitL = nullptr;
  2247. }
  2248. // If the original loop had exit blocks, walk up through the outer most loop
  2249. // of those exit blocks to update LCSSA and form updated dedicated exits.
  2250. if (OuterExitL != &L)
  2251. for (Loop *OuterL = ParentL; OuterL != OuterExitL;
  2252. OuterL = OuterL->getParentLoop())
  2253. UpdateLoop(*OuterL);
  2254. #ifndef NDEBUG
  2255. // Verify the entire loop structure to catch any incorrect updates before we
  2256. // progress in the pass pipeline.
  2257. LI.verify(DT);
  2258. #endif
  2259. // Now that we've unswitched something, make callbacks to report the changes.
  2260. // For that we need to merge together the updated loops and the cloned loops
  2261. // and check whether the original loop survived.
  2262. SmallVector<Loop *, 4> SibLoops;
  2263. for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
  2264. if (UpdatedL->getParentLoop() == ParentL)
  2265. SibLoops.push_back(UpdatedL);
  2266. UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
  2267. if (MSSAU && VerifyMemorySSA)
  2268. MSSAU->getMemorySSA()->verifyMemorySSA();
  2269. if (BI)
  2270. ++NumBranches;
  2271. else
  2272. ++NumSwitches;
  2273. }
  2274. /// Recursively compute the cost of a dominator subtree based on the per-block
  2275. /// cost map provided.
  2276. ///
  2277. /// The recursive computation is memozied into the provided DT-indexed cost map
  2278. /// to allow querying it for most nodes in the domtree without it becoming
  2279. /// quadratic.
  2280. static InstructionCost computeDomSubtreeCost(
  2281. DomTreeNode &N,
  2282. const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
  2283. SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
  2284. // Don't accumulate cost (or recurse through) blocks not in our block cost
  2285. // map and thus not part of the duplication cost being considered.
  2286. auto BBCostIt = BBCostMap.find(N.getBlock());
  2287. if (BBCostIt == BBCostMap.end())
  2288. return 0;
  2289. // Lookup this node to see if we already computed its cost.
  2290. auto DTCostIt = DTCostMap.find(&N);
  2291. if (DTCostIt != DTCostMap.end())
  2292. return DTCostIt->second;
  2293. // If not, we have to compute it. We can't use insert above and update
  2294. // because computing the cost may insert more things into the map.
  2295. InstructionCost Cost = std::accumulate(
  2296. N.begin(), N.end(), BBCostIt->second,
  2297. [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
  2298. return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
  2299. });
  2300. bool Inserted = DTCostMap.insert({&N, Cost}).second;
  2301. (void)Inserted;
  2302. assert(Inserted && "Should not insert a node while visiting children!");
  2303. return Cost;
  2304. }
  2305. /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
  2306. /// making the following replacement:
  2307. ///
  2308. /// --code before guard--
  2309. /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
  2310. /// --code after guard--
  2311. ///
  2312. /// into
  2313. ///
  2314. /// --code before guard--
  2315. /// br i1 %cond, label %guarded, label %deopt
  2316. ///
  2317. /// guarded:
  2318. /// --code after guard--
  2319. ///
  2320. /// deopt:
  2321. /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
  2322. /// unreachable
  2323. ///
  2324. /// It also makes all relevant DT and LI updates, so that all structures are in
  2325. /// valid state after this transform.
  2326. static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
  2327. DominatorTree &DT, LoopInfo &LI,
  2328. MemorySSAUpdater *MSSAU) {
  2329. SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
  2330. LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
  2331. BasicBlock *CheckBB = GI->getParent();
  2332. if (MSSAU && VerifyMemorySSA)
  2333. MSSAU->getMemorySSA()->verifyMemorySSA();
  2334. // Remove all CheckBB's successors from DomTree. A block can be seen among
  2335. // successors more than once, but for DomTree it should be added only once.
  2336. SmallPtrSet<BasicBlock *, 4> Successors;
  2337. for (auto *Succ : successors(CheckBB))
  2338. if (Successors.insert(Succ).second)
  2339. DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
  2340. Instruction *DeoptBlockTerm =
  2341. SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
  2342. BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
  2343. // SplitBlockAndInsertIfThen inserts control flow that branches to
  2344. // DeoptBlockTerm if the condition is true. We want the opposite.
  2345. CheckBI->swapSuccessors();
  2346. BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
  2347. GuardedBlock->setName("guarded");
  2348. CheckBI->getSuccessor(1)->setName("deopt");
  2349. BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
  2350. if (MSSAU)
  2351. MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
  2352. GI->moveBefore(DeoptBlockTerm);
  2353. GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
  2354. // Add new successors of CheckBB into DomTree.
  2355. for (auto *Succ : successors(CheckBB))
  2356. DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
  2357. // Now the blocks that used to be CheckBB's successors are GuardedBlock's
  2358. // successors.
  2359. for (auto *Succ : Successors)
  2360. DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
  2361. // Make proper changes to DT.
  2362. DT.applyUpdates(DTUpdates);
  2363. // Inform LI of a new loop block.
  2364. L.addBasicBlockToLoop(GuardedBlock, LI);
  2365. if (MSSAU) {
  2366. MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
  2367. MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
  2368. if (VerifyMemorySSA)
  2369. MSSAU->getMemorySSA()->verifyMemorySSA();
  2370. }
  2371. ++NumGuards;
  2372. return CheckBI;
  2373. }
  2374. /// Cost multiplier is a way to limit potentially exponential behavior
  2375. /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
  2376. /// candidates available. Also accounting for the number of "sibling" loops with
  2377. /// the idea to account for previous unswitches that already happened on this
  2378. /// cluster of loops. There was an attempt to keep this formula simple,
  2379. /// just enough to limit the worst case behavior. Even if it is not that simple
  2380. /// now it is still not an attempt to provide a detailed heuristic size
  2381. /// prediction.
  2382. ///
  2383. /// TODO: Make a proper accounting of "explosion" effect for all kinds of
  2384. /// unswitch candidates, making adequate predictions instead of wild guesses.
  2385. /// That requires knowing not just the number of "remaining" candidates but
  2386. /// also costs of unswitching for each of these candidates.
  2387. static int CalculateUnswitchCostMultiplier(
  2388. const Instruction &TI, const Loop &L, const LoopInfo &LI,
  2389. const DominatorTree &DT,
  2390. ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
  2391. // Guards and other exiting conditions do not contribute to exponential
  2392. // explosion as soon as they dominate the latch (otherwise there might be
  2393. // another path to the latch remaining that does not allow to eliminate the
  2394. // loop copy on unswitch).
  2395. const BasicBlock *Latch = L.getLoopLatch();
  2396. const BasicBlock *CondBlock = TI.getParent();
  2397. if (DT.dominates(CondBlock, Latch) &&
  2398. (isGuard(&TI) ||
  2399. llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
  2400. return L.contains(SuccBB);
  2401. }) <= 1)) {
  2402. NumCostMultiplierSkipped++;
  2403. return 1;
  2404. }
  2405. auto *ParentL = L.getParentLoop();
  2406. int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
  2407. : std::distance(LI.begin(), LI.end()));
  2408. // Count amount of clones that all the candidates might cause during
  2409. // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
  2410. int UnswitchedClones = 0;
  2411. for (auto Candidate : UnswitchCandidates) {
  2412. const Instruction *CI = Candidate.TI;
  2413. const BasicBlock *CondBlock = CI->getParent();
  2414. bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
  2415. if (isGuard(CI)) {
  2416. if (!SkipExitingSuccessors)
  2417. UnswitchedClones++;
  2418. continue;
  2419. }
  2420. int NonExitingSuccessors =
  2421. llvm::count_if(successors(CondBlock),
  2422. [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
  2423. return !SkipExitingSuccessors || L.contains(SuccBB);
  2424. });
  2425. UnswitchedClones += Log2_32(NonExitingSuccessors);
  2426. }
  2427. // Ignore up to the "unscaled candidates" number of unswitch candidates
  2428. // when calculating the power-of-two scaling of the cost. The main idea
  2429. // with this control is to allow a small number of unswitches to happen
  2430. // and rely more on siblings multiplier (see below) when the number
  2431. // of candidates is small.
  2432. unsigned ClonesPower =
  2433. std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
  2434. // Allowing top-level loops to spread a bit more than nested ones.
  2435. int SiblingsMultiplier =
  2436. std::max((ParentL ? SiblingsCount
  2437. : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
  2438. 1);
  2439. // Compute the cost multiplier in a way that won't overflow by saturating
  2440. // at an upper bound.
  2441. int CostMultiplier;
  2442. if (ClonesPower > Log2_32(UnswitchThreshold) ||
  2443. SiblingsMultiplier > UnswitchThreshold)
  2444. CostMultiplier = UnswitchThreshold;
  2445. else
  2446. CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
  2447. (int)UnswitchThreshold);
  2448. LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
  2449. << " (siblings " << SiblingsMultiplier << " * clones "
  2450. << (1 << ClonesPower) << ")"
  2451. << " for unswitch candidate: " << TI << "\n");
  2452. return CostMultiplier;
  2453. }
  2454. static bool collectUnswitchCandidates(
  2455. SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
  2456. IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
  2457. const Loop &L, const LoopInfo &LI, AAResults &AA,
  2458. const MemorySSAUpdater *MSSAU) {
  2459. assert(UnswitchCandidates.empty() && "Should be!");
  2460. // Whether or not we should also collect guards in the loop.
  2461. bool CollectGuards = false;
  2462. if (UnswitchGuards) {
  2463. auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
  2464. Intrinsic::getName(Intrinsic::experimental_guard));
  2465. if (GuardDecl && !GuardDecl->use_empty())
  2466. CollectGuards = true;
  2467. }
  2468. for (auto *BB : L.blocks()) {
  2469. if (LI.getLoopFor(BB) != &L)
  2470. continue;
  2471. if (CollectGuards)
  2472. for (auto &I : *BB)
  2473. if (isGuard(&I)) {
  2474. auto *Cond =
  2475. skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
  2476. // TODO: Support AND, OR conditions and partial unswitching.
  2477. if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
  2478. UnswitchCandidates.push_back({&I, {Cond}});
  2479. }
  2480. if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
  2481. // We can only consider fully loop-invariant switch conditions as we need
  2482. // to completely eliminate the switch after unswitching.
  2483. if (!isa<Constant>(SI->getCondition()) &&
  2484. L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
  2485. UnswitchCandidates.push_back({SI, {SI->getCondition()}});
  2486. continue;
  2487. }
  2488. auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  2489. if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
  2490. BI->getSuccessor(0) == BI->getSuccessor(1))
  2491. continue;
  2492. Value *Cond = skipTrivialSelect(BI->getCondition());
  2493. if (isa<Constant>(Cond))
  2494. continue;
  2495. if (L.isLoopInvariant(Cond)) {
  2496. UnswitchCandidates.push_back({BI, {Cond}});
  2497. continue;
  2498. }
  2499. Instruction &CondI = *cast<Instruction>(Cond);
  2500. if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
  2501. TinyPtrVector<Value *> Invariants =
  2502. collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
  2503. if (Invariants.empty())
  2504. continue;
  2505. UnswitchCandidates.push_back({BI, std::move(Invariants)});
  2506. continue;
  2507. }
  2508. }
  2509. if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
  2510. !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
  2511. return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
  2512. })) {
  2513. MemorySSA *MSSA = MSSAU->getMemorySSA();
  2514. if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
  2515. LLVM_DEBUG(
  2516. dbgs() << "simple-loop-unswitch: Found partially invariant condition "
  2517. << *Info->InstToDuplicate[0] << "\n");
  2518. PartialIVInfo = *Info;
  2519. PartialIVCondBranch = L.getHeader()->getTerminator();
  2520. TinyPtrVector<Value *> ValsToDuplicate;
  2521. llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
  2522. UnswitchCandidates.push_back(
  2523. {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
  2524. }
  2525. }
  2526. return !UnswitchCandidates.empty();
  2527. }
  2528. static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
  2529. if (!L.isSafeToClone())
  2530. return false;
  2531. for (auto *BB : L.blocks())
  2532. for (auto &I : *BB) {
  2533. if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
  2534. return false;
  2535. if (auto *CB = dyn_cast<CallBase>(&I)) {
  2536. assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
  2537. if (CB->isConvergent())
  2538. return false;
  2539. }
  2540. }
  2541. // Check if there are irreducible CFG cycles in this loop. If so, we cannot
  2542. // easily unswitch non-trivial edges out of the loop. Doing so might turn the
  2543. // irreducible control flow into reducible control flow and introduce new
  2544. // loops "out of thin air". If we ever discover important use cases for doing
  2545. // this, we can add support to loop unswitch, but it is a lot of complexity
  2546. // for what seems little or no real world benefit.
  2547. LoopBlocksRPO RPOT(&L);
  2548. RPOT.perform(&LI);
  2549. if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
  2550. return false;
  2551. SmallVector<BasicBlock *, 4> ExitBlocks;
  2552. L.getUniqueExitBlocks(ExitBlocks);
  2553. // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
  2554. // instruction as we don't know how to split those exit blocks.
  2555. // FIXME: We should teach SplitBlock to handle this and remove this
  2556. // restriction.
  2557. for (auto *ExitBB : ExitBlocks) {
  2558. auto *I = ExitBB->getFirstNonPHI();
  2559. if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
  2560. LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
  2561. "in exit block\n");
  2562. return false;
  2563. }
  2564. }
  2565. return true;
  2566. }
  2567. static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
  2568. ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
  2569. const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
  2570. const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
  2571. // Given that unswitching these terminators will require duplicating parts of
  2572. // the loop, so we need to be able to model that cost. Compute the ephemeral
  2573. // values and set up a data structure to hold per-BB costs. We cache each
  2574. // block's cost so that we don't recompute this when considering different
  2575. // subsets of the loop for duplication during unswitching.
  2576. SmallPtrSet<const Value *, 4> EphValues;
  2577. CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
  2578. SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
  2579. // Compute the cost of each block, as well as the total loop cost. Also, bail
  2580. // out if we see instructions which are incompatible with loop unswitching
  2581. // (convergent, noduplicate, or cross-basic-block tokens).
  2582. // FIXME: We might be able to safely handle some of these in non-duplicated
  2583. // regions.
  2584. TargetTransformInfo::TargetCostKind CostKind =
  2585. L.getHeader()->getParent()->hasMinSize()
  2586. ? TargetTransformInfo::TCK_CodeSize
  2587. : TargetTransformInfo::TCK_SizeAndLatency;
  2588. InstructionCost LoopCost = 0;
  2589. for (auto *BB : L.blocks()) {
  2590. InstructionCost Cost = 0;
  2591. for (auto &I : *BB) {
  2592. if (EphValues.count(&I))
  2593. continue;
  2594. Cost += TTI.getInstructionCost(&I, CostKind);
  2595. }
  2596. assert(Cost >= 0 && "Must not have negative costs!");
  2597. LoopCost += Cost;
  2598. assert(LoopCost >= 0 && "Must not have negative loop costs!");
  2599. BBCostMap[BB] = Cost;
  2600. }
  2601. LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
  2602. // Now we find the best candidate by searching for the one with the following
  2603. // properties in order:
  2604. //
  2605. // 1) An unswitching cost below the threshold
  2606. // 2) The smallest number of duplicated unswitch candidates (to avoid
  2607. // creating redundant subsequent unswitching)
  2608. // 3) The smallest cost after unswitching.
  2609. //
  2610. // We prioritize reducing fanout of unswitch candidates provided the cost
  2611. // remains below the threshold because this has a multiplicative effect.
  2612. //
  2613. // This requires memoizing each dominator subtree to avoid redundant work.
  2614. //
  2615. // FIXME: Need to actually do the number of candidates part above.
  2616. SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
  2617. // Given a terminator which might be unswitched, computes the non-duplicated
  2618. // cost for that terminator.
  2619. auto ComputeUnswitchedCost = [&](Instruction &TI,
  2620. bool FullUnswitch) -> InstructionCost {
  2621. BasicBlock &BB = *TI.getParent();
  2622. SmallPtrSet<BasicBlock *, 4> Visited;
  2623. InstructionCost Cost = 0;
  2624. for (BasicBlock *SuccBB : successors(&BB)) {
  2625. // Don't count successors more than once.
  2626. if (!Visited.insert(SuccBB).second)
  2627. continue;
  2628. // If this is a partial unswitch candidate, then it must be a conditional
  2629. // branch with a condition of either `or`, `and`, their corresponding
  2630. // select forms or partially invariant instructions. In that case, one of
  2631. // the successors is necessarily duplicated, so don't even try to remove
  2632. // its cost.
  2633. if (!FullUnswitch) {
  2634. auto &BI = cast<BranchInst>(TI);
  2635. Value *Cond = skipTrivialSelect(BI.getCondition());
  2636. if (match(Cond, m_LogicalAnd())) {
  2637. if (SuccBB == BI.getSuccessor(1))
  2638. continue;
  2639. } else if (match(Cond, m_LogicalOr())) {
  2640. if (SuccBB == BI.getSuccessor(0))
  2641. continue;
  2642. } else if ((PartialIVInfo.KnownValue->isOneValue() &&
  2643. SuccBB == BI.getSuccessor(0)) ||
  2644. (!PartialIVInfo.KnownValue->isOneValue() &&
  2645. SuccBB == BI.getSuccessor(1)))
  2646. continue;
  2647. }
  2648. // This successor's domtree will not need to be duplicated after
  2649. // unswitching if the edge to the successor dominates it (and thus the
  2650. // entire tree). This essentially means there is no other path into this
  2651. // subtree and so it will end up live in only one clone of the loop.
  2652. if (SuccBB->getUniquePredecessor() ||
  2653. llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
  2654. return PredBB == &BB || DT.dominates(SuccBB, PredBB);
  2655. })) {
  2656. Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
  2657. assert(Cost <= LoopCost &&
  2658. "Non-duplicated cost should never exceed total loop cost!");
  2659. }
  2660. }
  2661. // Now scale the cost by the number of unique successors minus one. We
  2662. // subtract one because there is already at least one copy of the entire
  2663. // loop. This is computing the new cost of unswitching a condition.
  2664. // Note that guards always have 2 unique successors that are implicit and
  2665. // will be materialized if we decide to unswitch it.
  2666. int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
  2667. assert(SuccessorsCount > 1 &&
  2668. "Cannot unswitch a condition without multiple distinct successors!");
  2669. return (LoopCost - Cost) * (SuccessorsCount - 1);
  2670. };
  2671. std::optional<NonTrivialUnswitchCandidate> Best;
  2672. for (auto &Candidate : UnswitchCandidates) {
  2673. Instruction &TI = *Candidate.TI;
  2674. ArrayRef<Value *> Invariants = Candidate.Invariants;
  2675. BranchInst *BI = dyn_cast<BranchInst>(&TI);
  2676. InstructionCost CandidateCost = ComputeUnswitchedCost(
  2677. TI, /*FullUnswitch*/ !BI ||
  2678. (Invariants.size() == 1 &&
  2679. Invariants[0] == skipTrivialSelect(BI->getCondition())));
  2680. // Calculate cost multiplier which is a tool to limit potentially
  2681. // exponential behavior of loop-unswitch.
  2682. if (EnableUnswitchCostMultiplier) {
  2683. int CostMultiplier =
  2684. CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
  2685. assert(
  2686. (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
  2687. "cost multiplier needs to be in the range of 1..UnswitchThreshold");
  2688. CandidateCost *= CostMultiplier;
  2689. LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
  2690. << " (multiplier: " << CostMultiplier << ")"
  2691. << " for unswitch candidate: " << TI << "\n");
  2692. } else {
  2693. LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
  2694. << " for unswitch candidate: " << TI << "\n");
  2695. }
  2696. if (!Best || CandidateCost < Best->Cost) {
  2697. Best = Candidate;
  2698. Best->Cost = CandidateCost;
  2699. }
  2700. }
  2701. assert(Best && "Must be!");
  2702. return *Best;
  2703. }
  2704. static bool unswitchBestCondition(
  2705. Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
  2706. AAResults &AA, TargetTransformInfo &TTI,
  2707. function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
  2708. ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
  2709. function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
  2710. // Collect all invariant conditions within this loop (as opposed to an inner
  2711. // loop which would be handled when visiting that inner loop).
  2712. SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
  2713. IVConditionInfo PartialIVInfo;
  2714. Instruction *PartialIVCondBranch = nullptr;
  2715. // If we didn't find any candidates, we're done.
  2716. if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
  2717. PartialIVCondBranch, L, LI, AA, MSSAU))
  2718. return false;
  2719. LLVM_DEBUG(
  2720. dbgs() << "Considering " << UnswitchCandidates.size()
  2721. << " non-trivial loop invariant conditions for unswitching.\n");
  2722. NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
  2723. UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
  2724. assert(Best.TI && "Failed to find loop unswitch candidate");
  2725. assert(Best.Cost && "Failed to compute cost");
  2726. if (*Best.Cost >= UnswitchThreshold) {
  2727. LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
  2728. << "\n");
  2729. return false;
  2730. }
  2731. if (Best.TI != PartialIVCondBranch)
  2732. PartialIVInfo.InstToDuplicate.clear();
  2733. // If the best candidate is a guard, turn it into a branch.
  2734. if (isGuard(Best.TI))
  2735. Best.TI =
  2736. turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
  2737. LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
  2738. << ") terminator: " << *Best.TI << "\n");
  2739. unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
  2740. LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
  2741. return true;
  2742. }
  2743. /// Unswitch control flow predicated on loop invariant conditions.
  2744. ///
  2745. /// This first hoists all branches or switches which are trivial (IE, do not
  2746. /// require duplicating any part of the loop) out of the loop body. It then
  2747. /// looks at other loop invariant control flows and tries to unswitch those as
  2748. /// well by cloning the loop if the result is small enough.
  2749. ///
  2750. /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
  2751. /// also updated based on the unswitch. The `MSSA` analysis is also updated if
  2752. /// valid (i.e. its use is enabled).
  2753. ///
  2754. /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
  2755. /// true, we will attempt to do non-trivial unswitching as well as trivial
  2756. /// unswitching.
  2757. ///
  2758. /// The `UnswitchCB` callback provided will be run after unswitching is
  2759. /// complete, with the first parameter set to `true` if the provided loop
  2760. /// remains a loop, and a list of new sibling loops created.
  2761. ///
  2762. /// If `SE` is non-null, we will update that analysis based on the unswitching
  2763. /// done.
  2764. static bool
  2765. unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
  2766. AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
  2767. bool NonTrivial,
  2768. function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
  2769. ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
  2770. ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
  2771. function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
  2772. assert(L.isRecursivelyLCSSAForm(DT, LI) &&
  2773. "Loops must be in LCSSA form before unswitching.");
  2774. // Must be in loop simplified form: we need a preheader and dedicated exits.
  2775. if (!L.isLoopSimplifyForm())
  2776. return false;
  2777. // Try trivial unswitch first before loop over other basic blocks in the loop.
  2778. if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
  2779. // If we unswitched successfully we will want to clean up the loop before
  2780. // processing it further so just mark it as unswitched and return.
  2781. UnswitchCB(/*CurrentLoopValid*/ true, false, {});
  2782. return true;
  2783. }
  2784. // Check whether we should continue with non-trivial conditions.
  2785. // EnableNonTrivialUnswitch: Global variable that forces non-trivial
  2786. // unswitching for testing and debugging.
  2787. // NonTrivial: Parameter that enables non-trivial unswitching for this
  2788. // invocation of the transform. But this should be allowed only
  2789. // for targets without branch divergence.
  2790. //
  2791. // FIXME: If divergence analysis becomes available to a loop
  2792. // transform, we should allow unswitching for non-trivial uniform
  2793. // branches even on targets that have divergence.
  2794. // https://bugs.llvm.org/show_bug.cgi?id=48819
  2795. bool ContinueWithNonTrivial =
  2796. EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
  2797. if (!ContinueWithNonTrivial)
  2798. return false;
  2799. // Skip non-trivial unswitching for optsize functions.
  2800. if (L.getHeader()->getParent()->hasOptSize())
  2801. return false;
  2802. // Skip cold loops, as unswitching them brings little benefit
  2803. // but increases the code size
  2804. if (PSI && PSI->hasProfileSummary() && BFI &&
  2805. PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
  2806. LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
  2807. return false;
  2808. }
  2809. // Perform legality checks.
  2810. if (!isSafeForNoNTrivialUnswitching(L, LI))
  2811. return false;
  2812. // For non-trivial unswitching, because it often creates new loops, we rely on
  2813. // the pass manager to iterate on the loops rather than trying to immediately
  2814. // reach a fixed point. There is no substantial advantage to iterating
  2815. // internally, and if any of the new loops are simplified enough to contain
  2816. // trivial unswitching we want to prefer those.
  2817. // Try to unswitch the best invariant condition. We prefer this full unswitch to
  2818. // a partial unswitch when possible below the threshold.
  2819. if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
  2820. DestroyLoopCB))
  2821. return true;
  2822. // No other opportunities to unswitch.
  2823. return false;
  2824. }
  2825. PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
  2826. LoopStandardAnalysisResults &AR,
  2827. LPMUpdater &U) {
  2828. Function &F = *L.getHeader()->getParent();
  2829. (void)F;
  2830. ProfileSummaryInfo *PSI = nullptr;
  2831. if (auto OuterProxy =
  2832. AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
  2833. .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
  2834. PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
  2835. LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
  2836. << "\n");
  2837. // Save the current loop name in a variable so that we can report it even
  2838. // after it has been deleted.
  2839. std::string LoopName = std::string(L.getName());
  2840. auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
  2841. bool PartiallyInvariant,
  2842. ArrayRef<Loop *> NewLoops) {
  2843. // If we did a non-trivial unswitch, we have added new (cloned) loops.
  2844. if (!NewLoops.empty())
  2845. U.addSiblingLoops(NewLoops);
  2846. // If the current loop remains valid, we should revisit it to catch any
  2847. // other unswitch opportunities. Otherwise, we need to mark it as deleted.
  2848. if (CurrentLoopValid) {
  2849. if (PartiallyInvariant) {
  2850. // Mark the new loop as partially unswitched, to avoid unswitching on
  2851. // the same condition again.
  2852. auto &Context = L.getHeader()->getContext();
  2853. MDNode *DisableUnswitchMD = MDNode::get(
  2854. Context,
  2855. MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
  2856. MDNode *NewLoopID = makePostTransformationMetadata(
  2857. Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
  2858. {DisableUnswitchMD});
  2859. L.setLoopID(NewLoopID);
  2860. } else
  2861. U.revisitCurrentLoop();
  2862. } else
  2863. U.markLoopAsDeleted(L, LoopName);
  2864. };
  2865. auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
  2866. U.markLoopAsDeleted(L, Name);
  2867. };
  2868. std::optional<MemorySSAUpdater> MSSAU;
  2869. if (AR.MSSA) {
  2870. MSSAU = MemorySSAUpdater(AR.MSSA);
  2871. if (VerifyMemorySSA)
  2872. AR.MSSA->verifyMemorySSA();
  2873. }
  2874. if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
  2875. UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
  2876. DestroyLoopCB))
  2877. return PreservedAnalyses::all();
  2878. if (AR.MSSA && VerifyMemorySSA)
  2879. AR.MSSA->verifyMemorySSA();
  2880. // Historically this pass has had issues with the dominator tree so verify it
  2881. // in asserts builds.
  2882. assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
  2883. auto PA = getLoopPassPreservedAnalyses();
  2884. if (AR.MSSA)
  2885. PA.preserve<MemorySSAAnalysis>();
  2886. return PA;
  2887. }
  2888. void SimpleLoopUnswitchPass::printPipeline(
  2889. raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
  2890. static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
  2891. OS, MapClassName2PassName);
  2892. OS << "<";
  2893. OS << (NonTrivial ? "" : "no-") << "nontrivial;";
  2894. OS << (Trivial ? "" : "no-") << "trivial";
  2895. OS << ">";
  2896. }
  2897. namespace {
  2898. class SimpleLoopUnswitchLegacyPass : public LoopPass {
  2899. bool NonTrivial;
  2900. public:
  2901. static char ID; // Pass ID, replacement for typeid
  2902. explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
  2903. : LoopPass(ID), NonTrivial(NonTrivial) {
  2904. initializeSimpleLoopUnswitchLegacyPassPass(
  2905. *PassRegistry::getPassRegistry());
  2906. }
  2907. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  2908. void getAnalysisUsage(AnalysisUsage &AU) const override {
  2909. AU.addRequired<AssumptionCacheTracker>();
  2910. AU.addRequired<TargetTransformInfoWrapperPass>();
  2911. AU.addRequired<MemorySSAWrapperPass>();
  2912. AU.addPreserved<MemorySSAWrapperPass>();
  2913. getLoopAnalysisUsage(AU);
  2914. }
  2915. };
  2916. } // end anonymous namespace
  2917. bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
  2918. if (skipLoop(L))
  2919. return false;
  2920. Function &F = *L->getHeader()->getParent();
  2921. LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
  2922. << "\n");
  2923. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  2924. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  2925. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  2926. auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  2927. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  2928. MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
  2929. MemorySSAUpdater MSSAU(MSSA);
  2930. auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
  2931. auto *SE = SEWP ? &SEWP->getSE() : nullptr;
  2932. auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
  2933. ArrayRef<Loop *> NewLoops) {
  2934. // If we did a non-trivial unswitch, we have added new (cloned) loops.
  2935. for (auto *NewL : NewLoops)
  2936. LPM.addLoop(*NewL);
  2937. // If the current loop remains valid, re-add it to the queue. This is
  2938. // a little wasteful as we'll finish processing the current loop as well,
  2939. // but it is the best we can do in the old PM.
  2940. if (CurrentLoopValid) {
  2941. // If the current loop has been unswitched using a partially invariant
  2942. // condition, we should not re-add the current loop to avoid unswitching
  2943. // on the same condition again.
  2944. if (!PartiallyInvariant)
  2945. LPM.addLoop(*L);
  2946. } else
  2947. LPM.markLoopAsDeleted(*L);
  2948. };
  2949. auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
  2950. LPM.markLoopAsDeleted(L);
  2951. };
  2952. if (VerifyMemorySSA)
  2953. MSSA->verifyMemorySSA();
  2954. bool Changed =
  2955. unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
  2956. &MSSAU, nullptr, nullptr, DestroyLoopCB);
  2957. if (VerifyMemorySSA)
  2958. MSSA->verifyMemorySSA();
  2959. // Historically this pass has had issues with the dominator tree so verify it
  2960. // in asserts builds.
  2961. assert(DT.verify(DominatorTree::VerificationLevel::Fast));
  2962. return Changed;
  2963. }
  2964. char SimpleLoopUnswitchLegacyPass::ID = 0;
  2965. INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
  2966. "Simple unswitch loops", false, false)
  2967. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  2968. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  2969. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  2970. INITIALIZE_PASS_DEPENDENCY(LoopPass)
  2971. INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  2972. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  2973. INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
  2974. "Simple unswitch loops", false, false)
  2975. Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
  2976. return new SimpleLoopUnswitchLegacyPass(NonTrivial);
  2977. }