LoopUnswitch.cpp 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774
  1. //===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass transforms loops that contain branches on loop-invariant conditions
  10. // to multiple loops. For example, it turns the left into the right code:
  11. //
  12. // for (...) if (lic)
  13. // A for (...)
  14. // if (lic) A; B; C
  15. // B else
  16. // C for (...)
  17. // A; C
  18. //
  19. // This can increase the size of the code exponentially (doubling it every time
  20. // a loop is unswitched) so we only unswitch if the resultant code will be
  21. // smaller than a threshold.
  22. //
  23. // This pass expects LICM to be run before it to hoist invariant conditions out
  24. // of the loop, to make the unswitching opportunity obvious.
  25. //
  26. //===----------------------------------------------------------------------===//
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/STLExtras.h"
  29. #include "llvm/ADT/SmallPtrSet.h"
  30. #include "llvm/ADT/SmallVector.h"
  31. #include "llvm/ADT/Statistic.h"
  32. #include "llvm/Analysis/AssumptionCache.h"
  33. #include "llvm/Analysis/CodeMetrics.h"
  34. #include "llvm/Analysis/InstructionSimplify.h"
  35. #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
  36. #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
  37. #include "llvm/Analysis/LoopInfo.h"
  38. #include "llvm/Analysis/LoopIterator.h"
  39. #include "llvm/Analysis/LoopPass.h"
  40. #include "llvm/Analysis/MemorySSA.h"
  41. #include "llvm/Analysis/MemorySSAUpdater.h"
  42. #include "llvm/Analysis/MustExecute.h"
  43. #include "llvm/Analysis/ScalarEvolution.h"
  44. #include "llvm/Analysis/TargetTransformInfo.h"
  45. #include "llvm/IR/Attributes.h"
  46. #include "llvm/IR/BasicBlock.h"
  47. #include "llvm/IR/Constant.h"
  48. #include "llvm/IR/Constants.h"
  49. #include "llvm/IR/DerivedTypes.h"
  50. #include "llvm/IR/Dominators.h"
  51. #include "llvm/IR/Function.h"
  52. #include "llvm/IR/IRBuilder.h"
  53. #include "llvm/IR/InstrTypes.h"
  54. #include "llvm/IR/Instruction.h"
  55. #include "llvm/IR/Instructions.h"
  56. #include "llvm/IR/IntrinsicInst.h"
  57. #include "llvm/IR/Intrinsics.h"
  58. #include "llvm/IR/Module.h"
  59. #include "llvm/IR/Type.h"
  60. #include "llvm/IR/User.h"
  61. #include "llvm/IR/Value.h"
  62. #include "llvm/IR/ValueHandle.h"
  63. #include "llvm/InitializePasses.h"
  64. #include "llvm/Pass.h"
  65. #include "llvm/Support/Casting.h"
  66. #include "llvm/Support/CommandLine.h"
  67. #include "llvm/Support/Debug.h"
  68. #include "llvm/Support/raw_ostream.h"
  69. #include "llvm/Transforms/Scalar.h"
  70. #include "llvm/Transforms/Scalar/LoopPassManager.h"
  71. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  72. #include "llvm/Transforms/Utils/Cloning.h"
  73. #include "llvm/Transforms/Utils/Local.h"
  74. #include "llvm/Transforms/Utils/LoopUtils.h"
  75. #include "llvm/Transforms/Utils/ValueMapper.h"
  76. #include <algorithm>
  77. #include <cassert>
  78. #include <map>
  79. #include <set>
  80. #include <tuple>
  81. #include <utility>
  82. #include <vector>
  83. using namespace llvm;
  84. #define DEBUG_TYPE "loop-unswitch"
  85. STATISTIC(NumBranches, "Number of branches unswitched");
  86. STATISTIC(NumSwitches, "Number of switches unswitched");
  87. STATISTIC(NumGuards, "Number of guards unswitched");
  88. STATISTIC(NumSelects , "Number of selects unswitched");
  89. STATISTIC(NumTrivial , "Number of unswitches that are trivial");
  90. STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
  91. STATISTIC(TotalInsts, "Total number of instructions analyzed");
  92. // The specific value of 100 here was chosen based only on intuition and a
  93. // few specific examples.
  94. static cl::opt<unsigned>
  95. Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
  96. cl::init(100), cl::Hidden);
  97. static cl::opt<unsigned>
  98. MSSAThreshold("loop-unswitch-memoryssa-threshold",
  99. cl::desc("Max number of memory uses to explore during "
  100. "partial unswitching analysis"),
  101. cl::init(100), cl::Hidden);
  102. namespace {
  103. class LUAnalysisCache {
  104. using UnswitchedValsMap =
  105. DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>;
  106. using UnswitchedValsIt = UnswitchedValsMap::iterator;
  107. struct LoopProperties {
  108. unsigned CanBeUnswitchedCount;
  109. unsigned WasUnswitchedCount;
  110. unsigned SizeEstimation;
  111. UnswitchedValsMap UnswitchedVals;
  112. };
  113. // Here we use std::map instead of DenseMap, since we need to keep valid
  114. // LoopProperties pointer for current loop for better performance.
  115. using LoopPropsMap = std::map<const Loop *, LoopProperties>;
  116. using LoopPropsMapIt = LoopPropsMap::iterator;
  117. LoopPropsMap LoopsProperties;
  118. UnswitchedValsMap *CurLoopInstructions = nullptr;
  119. LoopProperties *CurrentLoopProperties = nullptr;
  120. // A loop unswitching with an estimated cost above this threshold
  121. // is not performed. MaxSize is turned into unswitching quota for
  122. // the current loop, and reduced correspondingly, though note that
  123. // the quota is returned by releaseMemory() when the loop has been
  124. // processed, so that MaxSize will return to its previous
  125. // value. So in most cases MaxSize will equal the Threshold flag
  126. // when a new loop is processed. An exception to that is that
  127. // MaxSize will have a smaller value while processing nested loops
  128. // that were introduced due to loop unswitching of an outer loop.
  129. //
  130. // FIXME: The way that MaxSize works is subtle and depends on the
  131. // pass manager processing loops and calling releaseMemory() in a
  132. // specific order. It would be good to find a more straightforward
  133. // way of doing what MaxSize does.
  134. unsigned MaxSize;
  135. public:
  136. LUAnalysisCache() : MaxSize(Threshold) {}
  137. // Analyze loop. Check its size, calculate is it possible to unswitch
  138. // it. Returns true if we can unswitch this loop.
  139. bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
  140. AssumptionCache *AC);
  141. // Clean all data related to given loop.
  142. void forgetLoop(const Loop *L);
  143. // Mark case value as unswitched.
  144. // Since SI instruction can be partly unswitched, in order to avoid
  145. // extra unswitching in cloned loops keep track all unswitched values.
  146. void setUnswitched(const SwitchInst *SI, const Value *V);
  147. // Check was this case value unswitched before or not.
  148. bool isUnswitched(const SwitchInst *SI, const Value *V);
  149. // Returns true if another unswitching could be done within the cost
  150. // threshold.
  151. bool costAllowsUnswitching();
  152. // Clone all loop-unswitch related loop properties.
  153. // Redistribute unswitching quotas.
  154. // Note, that new loop data is stored inside the VMap.
  155. void cloneData(const Loop *NewLoop, const Loop *OldLoop,
  156. const ValueToValueMapTy &VMap);
  157. };
  158. class LoopUnswitch : public LoopPass {
  159. LoopInfo *LI; // Loop information
  160. LPPassManager *LPM;
  161. AssumptionCache *AC;
  162. // Used to check if second loop needs processing after
  163. // rewriteLoopBodyWithConditionConstant rewrites first loop.
  164. std::vector<Loop*> LoopProcessWorklist;
  165. LUAnalysisCache BranchesInfo;
  166. bool OptimizeForSize;
  167. bool RedoLoop = false;
  168. Loop *CurrentLoop = nullptr;
  169. DominatorTree *DT = nullptr;
  170. MemorySSA *MSSA = nullptr;
  171. AAResults *AA = nullptr;
  172. std::unique_ptr<MemorySSAUpdater> MSSAU;
  173. BasicBlock *LoopHeader = nullptr;
  174. BasicBlock *LoopPreheader = nullptr;
  175. bool SanitizeMemory;
  176. SimpleLoopSafetyInfo SafetyInfo;
  177. // LoopBlocks contains all of the basic blocks of the loop, including the
  178. // preheader of the loop, the body of the loop, and the exit blocks of the
  179. // loop, in that order.
  180. std::vector<BasicBlock*> LoopBlocks;
  181. // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
  182. std::vector<BasicBlock*> NewBlocks;
  183. bool HasBranchDivergence;
  184. public:
  185. static char ID; // Pass ID, replacement for typeid
  186. explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false)
  187. : LoopPass(ID), OptimizeForSize(Os),
  188. HasBranchDivergence(HasBranchDivergence) {
  189. initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
  190. }
  191. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  192. bool processCurrentLoop();
  193. bool isUnreachableDueToPreviousUnswitching(BasicBlock *);
  194. /// This transformation requires natural loop information & requires that
  195. /// loop preheaders be inserted into the CFG.
  196. ///
  197. void getAnalysisUsage(AnalysisUsage &AU) const override {
  198. // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
  199. // can remain part of the same loop pass as LICM
  200. AU.addPreserved<LazyBlockFrequencyInfoPass>();
  201. AU.addPreserved<LazyBranchProbabilityInfoPass>();
  202. AU.addRequired<AssumptionCacheTracker>();
  203. AU.addRequired<TargetTransformInfoWrapperPass>();
  204. AU.addRequired<MemorySSAWrapperPass>();
  205. AU.addPreserved<MemorySSAWrapperPass>();
  206. if (HasBranchDivergence)
  207. AU.addRequired<LegacyDivergenceAnalysis>();
  208. getLoopAnalysisUsage(AU);
  209. }
  210. private:
  211. void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); }
  212. void initLoopData() {
  213. LoopHeader = CurrentLoop->getHeader();
  214. LoopPreheader = CurrentLoop->getLoopPreheader();
  215. }
  216. /// Split all of the edges from inside the loop to their exit blocks.
  217. /// Update the appropriate Phi nodes as we do so.
  218. void splitExitEdges(Loop *L,
  219. const SmallVectorImpl<BasicBlock *> &ExitBlocks);
  220. bool tryTrivialLoopUnswitch(bool &Changed);
  221. bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
  222. Instruction *TI = nullptr,
  223. ArrayRef<Instruction *> ToDuplicate = {});
  224. void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
  225. BasicBlock *ExitBlock, Instruction *TI);
  226. void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
  227. Instruction *TI,
  228. ArrayRef<Instruction *> ToDuplicate = {});
  229. void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
  230. Constant *Val, bool IsEqual);
  231. void
  232. emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
  233. BasicBlock *TrueDest, BasicBlock *FalseDest,
  234. BranchInst *OldBranch, Instruction *TI,
  235. ArrayRef<Instruction *> ToDuplicate = {});
  236. void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
  237. /// Given that the Invariant is not equal to Val. Simplify instructions
  238. /// in the loop.
  239. Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
  240. Constant *Val);
  241. };
  242. } // end anonymous namespace
  243. // Analyze loop. Check its size, calculate is it possible to unswitch
  244. // it. Returns true if we can unswitch this loop.
  245. bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
  246. AssumptionCache *AC) {
  247. LoopPropsMapIt PropsIt;
  248. bool Inserted;
  249. std::tie(PropsIt, Inserted) =
  250. LoopsProperties.insert(std::make_pair(L, LoopProperties()));
  251. LoopProperties &Props = PropsIt->second;
  252. if (Inserted) {
  253. // New loop.
  254. // Limit the number of instructions to avoid causing significant code
  255. // expansion, and the number of basic blocks, to avoid loops with
  256. // large numbers of branches which cause loop unswitching to go crazy.
  257. // This is a very ad-hoc heuristic.
  258. SmallPtrSet<const Value *, 32> EphValues;
  259. CodeMetrics::collectEphemeralValues(L, AC, EphValues);
  260. // FIXME: This is overly conservative because it does not take into
  261. // consideration code simplification opportunities and code that can
  262. // be shared by the resultant unswitched loops.
  263. CodeMetrics Metrics;
  264. for (BasicBlock *BB : L->blocks())
  265. Metrics.analyzeBasicBlock(BB, TTI, EphValues);
  266. Props.SizeEstimation = Metrics.NumInsts;
  267. Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
  268. Props.WasUnswitchedCount = 0;
  269. MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
  270. if (Metrics.notDuplicatable) {
  271. LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName()
  272. << ", contents cannot be "
  273. << "duplicated!\n");
  274. return false;
  275. }
  276. }
  277. // Be careful. This links are good only before new loop addition.
  278. CurrentLoopProperties = &Props;
  279. CurLoopInstructions = &Props.UnswitchedVals;
  280. return true;
  281. }
  282. // Clean all data related to given loop.
  283. void LUAnalysisCache::forgetLoop(const Loop *L) {
  284. LoopPropsMapIt LIt = LoopsProperties.find(L);
  285. if (LIt != LoopsProperties.end()) {
  286. LoopProperties &Props = LIt->second;
  287. MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
  288. Props.SizeEstimation;
  289. LoopsProperties.erase(LIt);
  290. }
  291. CurrentLoopProperties = nullptr;
  292. CurLoopInstructions = nullptr;
  293. }
  294. // Mark case value as unswitched.
  295. // Since SI instruction can be partly unswitched, in order to avoid
  296. // extra unswitching in cloned loops keep track all unswitched values.
  297. void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) {
  298. (*CurLoopInstructions)[SI].insert(V);
  299. }
  300. // Check was this case value unswitched before or not.
  301. bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
  302. return (*CurLoopInstructions)[SI].count(V);
  303. }
  304. bool LUAnalysisCache::costAllowsUnswitching() {
  305. return CurrentLoopProperties->CanBeUnswitchedCount > 0;
  306. }
  307. // Clone all loop-unswitch related loop properties.
  308. // Redistribute unswitching quotas.
  309. // Note, that new loop data is stored inside the VMap.
  310. void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
  311. const ValueToValueMapTy &VMap) {
  312. LoopProperties &NewLoopProps = LoopsProperties[NewLoop];
  313. LoopProperties &OldLoopProps = *CurrentLoopProperties;
  314. UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals;
  315. // Reallocate "can-be-unswitched quota"
  316. --OldLoopProps.CanBeUnswitchedCount;
  317. ++OldLoopProps.WasUnswitchedCount;
  318. NewLoopProps.WasUnswitchedCount = 0;
  319. unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
  320. NewLoopProps.CanBeUnswitchedCount = Quota / 2;
  321. OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
  322. NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
  323. // Clone unswitched values info:
  324. // for new loop switches we clone info about values that was
  325. // already unswitched and has redundant successors.
  326. for (const auto &I : Insts) {
  327. const SwitchInst *OldInst = I.first;
  328. Value *NewI = VMap.lookup(OldInst);
  329. const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI);
  330. assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
  331. NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
  332. }
  333. }
  334. char LoopUnswitch::ID = 0;
  335. INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
  336. false, false)
  337. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  338. INITIALIZE_PASS_DEPENDENCY(LoopPass)
  339. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  340. INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
  341. INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  342. INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
  343. false, false)
  344. Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) {
  345. return new LoopUnswitch(Os, HasBranchDivergence);
  346. }
  347. /// Operator chain lattice.
  348. enum OperatorChain {
  349. OC_OpChainNone, ///< There is no operator.
  350. OC_OpChainOr, ///< There are only ORs.
  351. OC_OpChainAnd, ///< There are only ANDs.
  352. OC_OpChainMixed ///< There are ANDs and ORs.
  353. };
  354. /// Cond is a condition that occurs in L. If it is invariant in the loop, or has
  355. /// an invariant piece, return the invariant. Otherwise, return null.
  356. //
  357. /// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a
  358. /// mixed operator chain, as we can not reliably find a value which will
  359. /// simplify the operator chain. If the chain is AND-only or OR-only, we can use
  360. /// 0 or ~0 to simplify the chain.
  361. ///
  362. /// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
  363. /// simplify the condition itself to a loop variant condition, but at the
  364. /// cost of creating an entirely new loop.
  365. static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
  366. OperatorChain &ParentChain,
  367. DenseMap<Value *, Value *> &Cache,
  368. MemorySSAUpdater *MSSAU) {
  369. auto CacheIt = Cache.find(Cond);
  370. if (CacheIt != Cache.end())
  371. return CacheIt->second;
  372. // We started analyze new instruction, increment scanned instructions counter.
  373. ++TotalInsts;
  374. // We can never unswitch on vector conditions.
  375. if (Cond->getType()->isVectorTy())
  376. return nullptr;
  377. // Constants should be folded, not unswitched on!
  378. if (isa<Constant>(Cond)) return nullptr;
  379. // TODO: Handle: br (VARIANT|INVARIANT).
  380. // Hoist simple values out.
  381. if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) {
  382. Cache[Cond] = Cond;
  383. return Cond;
  384. }
  385. // Walk up the operator chain to find partial invariant conditions.
  386. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
  387. if (BO->getOpcode() == Instruction::And ||
  388. BO->getOpcode() == Instruction::Or) {
  389. // Given the previous operator, compute the current operator chain status.
  390. OperatorChain NewChain;
  391. switch (ParentChain) {
  392. case OC_OpChainNone:
  393. NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
  394. OC_OpChainOr;
  395. break;
  396. case OC_OpChainOr:
  397. NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr :
  398. OC_OpChainMixed;
  399. break;
  400. case OC_OpChainAnd:
  401. NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
  402. OC_OpChainMixed;
  403. break;
  404. case OC_OpChainMixed:
  405. NewChain = OC_OpChainMixed;
  406. break;
  407. }
  408. // If we reach a Mixed state, we do not want to keep walking up as we can not
  409. // reliably find a value that will simplify the chain. With this check, we
  410. // will return null on the first sight of mixed chain and the caller will
  411. // either backtrack to find partial LIV in other operand or return null.
  412. if (NewChain != OC_OpChainMixed) {
  413. // Update the current operator chain type before we search up the chain.
  414. ParentChain = NewChain;
  415. // If either the left or right side is invariant, we can unswitch on this,
  416. // which will cause the branch to go away in one loop and the condition to
  417. // simplify in the other one.
  418. if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed,
  419. ParentChain, Cache, MSSAU)) {
  420. Cache[Cond] = LHS;
  421. return LHS;
  422. }
  423. // We did not manage to find a partial LIV in operand(0). Backtrack and try
  424. // operand(1).
  425. ParentChain = NewChain;
  426. if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed,
  427. ParentChain, Cache, MSSAU)) {
  428. Cache[Cond] = RHS;
  429. return RHS;
  430. }
  431. }
  432. }
  433. Cache[Cond] = nullptr;
  434. return nullptr;
  435. }
  436. /// Cond is a condition that occurs in L. If it is invariant in the loop, or has
  437. /// an invariant piece, return the invariant along with the operator chain type.
  438. /// Otherwise, return null.
  439. static std::pair<Value *, OperatorChain>
  440. findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
  441. MemorySSAUpdater *MSSAU) {
  442. DenseMap<Value *, Value *> Cache;
  443. OperatorChain OpChain = OC_OpChainNone;
  444. Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU);
  445. // In case we do find a LIV, it can not be obtained by walking up a mixed
  446. // operator chain.
  447. assert((!FCond || OpChain != OC_OpChainMixed) &&
  448. "Do not expect a partial LIV with mixed operator chain");
  449. return {FCond, OpChain};
  450. }
  451. bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
  452. if (skipLoop(L))
  453. return false;
  454. AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
  455. *L->getHeader()->getParent());
  456. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  457. LPM = &LPMRef;
  458. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  459. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  460. MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
  461. MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
  462. CurrentLoop = L;
  463. Function *F = CurrentLoop->getHeader()->getParent();
  464. SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
  465. if (SanitizeMemory)
  466. SafetyInfo.computeLoopSafetyInfo(L);
  467. if (VerifyMemorySSA)
  468. MSSA->verifyMemorySSA();
  469. bool Changed = false;
  470. do {
  471. assert(CurrentLoop->isLCSSAForm(*DT));
  472. if (VerifyMemorySSA)
  473. MSSA->verifyMemorySSA();
  474. RedoLoop = false;
  475. Changed |= processCurrentLoop();
  476. } while (RedoLoop);
  477. if (VerifyMemorySSA)
  478. MSSA->verifyMemorySSA();
  479. return Changed;
  480. }
  481. // Return true if the BasicBlock BB is unreachable from the loop header.
  482. // Return false, otherwise.
  483. bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {
  484. auto *Node = DT->getNode(BB)->getIDom();
  485. BasicBlock *DomBB = Node->getBlock();
  486. while (CurrentLoop->contains(DomBB)) {
  487. BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator());
  488. Node = DT->getNode(DomBB)->getIDom();
  489. DomBB = Node->getBlock();
  490. if (!BInst || !BInst->isConditional())
  491. continue;
  492. Value *Cond = BInst->getCondition();
  493. if (!isa<ConstantInt>(Cond))
  494. continue;
  495. BasicBlock *UnreachableSucc =
  496. Cond == ConstantInt::getTrue(Cond->getContext())
  497. ? BInst->getSuccessor(1)
  498. : BInst->getSuccessor(0);
  499. if (DT->dominates(UnreachableSucc, BB))
  500. return true;
  501. }
  502. return false;
  503. }
  504. /// FIXME: Remove this workaround when freeze related patches are done.
  505. /// LoopUnswitch and Equality propagation in GVN have discrepancy about
  506. /// whether branch on undef/poison has undefine behavior. Here it is to
  507. /// rule out some common cases that we found such discrepancy already
  508. /// causing problems. Detail could be found in PR31652. Note if the
  509. /// func returns true, it is unsafe. But if it is false, it doesn't mean
  510. /// it is necessarily safe.
  511. static bool equalityPropUnSafe(Value &LoopCond) {
  512. ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond);
  513. if (!CI || !CI->isEquality())
  514. return false;
  515. Value *LHS = CI->getOperand(0);
  516. Value *RHS = CI->getOperand(1);
  517. if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
  518. return true;
  519. auto HasUndefInPHI = [](PHINode &PN) {
  520. for (Value *Opd : PN.incoming_values()) {
  521. if (isa<UndefValue>(Opd))
  522. return true;
  523. }
  524. return false;
  525. };
  526. PHINode *LPHI = dyn_cast<PHINode>(LHS);
  527. PHINode *RPHI = dyn_cast<PHINode>(RHS);
  528. if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI)))
  529. return true;
  530. auto HasUndefInSelect = [](SelectInst &SI) {
  531. if (isa<UndefValue>(SI.getTrueValue()) ||
  532. isa<UndefValue>(SI.getFalseValue()))
  533. return true;
  534. return false;
  535. };
  536. SelectInst *LSI = dyn_cast<SelectInst>(LHS);
  537. SelectInst *RSI = dyn_cast<SelectInst>(RHS);
  538. if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI)))
  539. return true;
  540. return false;
  541. }
  542. /// Do actual work and unswitch loop if possible and profitable.
  543. bool LoopUnswitch::processCurrentLoop() {
  544. bool Changed = false;
  545. initLoopData();
  546. // If LoopSimplify was unable to form a preheader, don't do any unswitching.
  547. if (!LoopPreheader)
  548. return false;
  549. // Loops with indirectbr cannot be cloned.
  550. if (!CurrentLoop->isSafeToClone())
  551. return false;
  552. // Without dedicated exits, splitting the exit edge may fail.
  553. if (!CurrentLoop->hasDedicatedExits())
  554. return false;
  555. LLVMContext &Context = LoopHeader->getContext();
  556. // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
  557. if (!BranchesInfo.countLoop(
  558. CurrentLoop,
  559. getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
  560. *CurrentLoop->getHeader()->getParent()),
  561. AC))
  562. return false;
  563. // Try trivial unswitch first before loop over other basic blocks in the loop.
  564. if (tryTrivialLoopUnswitch(Changed)) {
  565. return true;
  566. }
  567. // Do not do non-trivial unswitch while optimizing for size.
  568. // FIXME: Use Function::hasOptSize().
  569. if (OptimizeForSize ||
  570. LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
  571. return Changed;
  572. // Run through the instructions in the loop, keeping track of three things:
  573. //
  574. // - That we do not unswitch loops containing convergent operations, as we
  575. // might be making them control dependent on the unswitch value when they
  576. // were not before.
  577. // FIXME: This could be refined to only bail if the convergent operation is
  578. // not already control-dependent on the unswitch value.
  579. //
  580. // - That basic blocks in the loop contain invokes whose predecessor edges we
  581. // cannot split.
  582. //
  583. // - The set of guard intrinsics encountered (these are non terminator
  584. // instructions that are also profitable to be unswitched).
  585. SmallVector<IntrinsicInst *, 4> Guards;
  586. for (const auto BB : CurrentLoop->blocks()) {
  587. for (auto &I : *BB) {
  588. auto *CB = dyn_cast<CallBase>(&I);
  589. if (!CB)
  590. continue;
  591. if (CB->isConvergent())
  592. return Changed;
  593. if (auto *II = dyn_cast<InvokeInst>(&I))
  594. if (!II->getUnwindDest()->canSplitPredecessors())
  595. return Changed;
  596. if (auto *II = dyn_cast<IntrinsicInst>(&I))
  597. if (II->getIntrinsicID() == Intrinsic::experimental_guard)
  598. Guards.push_back(II);
  599. }
  600. }
  601. for (IntrinsicInst *Guard : Guards) {
  602. Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop,
  603. Changed, MSSAU.get())
  604. .first;
  605. if (LoopCond &&
  606. unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
  607. // NB! Unswitching (if successful) could have erased some of the
  608. // instructions in Guards leaving dangling pointers there. This is fine
  609. // because we're returning now, and won't look at Guards again.
  610. ++NumGuards;
  611. return true;
  612. }
  613. }
  614. // Loop over all of the basic blocks in the loop. If we find an interior
  615. // block that is branching on a loop-invariant condition, we can unswitch this
  616. // loop.
  617. for (Loop::block_iterator I = CurrentLoop->block_begin(),
  618. E = CurrentLoop->block_end();
  619. I != E; ++I) {
  620. Instruction *TI = (*I)->getTerminator();
  621. // Unswitching on a potentially uninitialized predicate is not
  622. // MSan-friendly. Limit this to the cases when the original predicate is
  623. // guaranteed to execute, to avoid creating a use-of-uninitialized-value
  624. // in the code that did not have one.
  625. // This is a workaround for the discrepancy between LLVM IR and MSan
  626. // semantics. See PR28054 for more details.
  627. if (SanitizeMemory &&
  628. !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop))
  629. continue;
  630. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  631. // Some branches may be rendered unreachable because of previous
  632. // unswitching.
  633. // Unswitch only those branches that are reachable.
  634. if (isUnreachableDueToPreviousUnswitching(*I))
  635. continue;
  636. // If this isn't branching on an invariant condition, we can't unswitch
  637. // it.
  638. if (BI->isConditional()) {
  639. // See if this, or some part of it, is loop invariant. If so, we can
  640. // unswitch on it if we desire.
  641. Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop,
  642. Changed, MSSAU.get())
  643. .first;
  644. if (LoopCond && !equalityPropUnSafe(*LoopCond) &&
  645. unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
  646. ++NumBranches;
  647. return true;
  648. }
  649. }
  650. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  651. Value *SC = SI->getCondition();
  652. Value *LoopCond;
  653. OperatorChain OpChain;
  654. std::tie(LoopCond, OpChain) =
  655. findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get());
  656. unsigned NumCases = SI->getNumCases();
  657. if (LoopCond && NumCases) {
  658. // Find a value to unswitch on:
  659. // FIXME: this should chose the most expensive case!
  660. // FIXME: scan for a case with a non-critical edge?
  661. Constant *UnswitchVal = nullptr;
  662. // Find a case value such that at least one case value is unswitched
  663. // out.
  664. if (OpChain == OC_OpChainAnd) {
  665. // If the chain only has ANDs and the switch has a case value of 0.
  666. // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
  667. auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
  668. if (BranchesInfo.isUnswitched(SI, AllZero))
  669. continue;
  670. // We are unswitching 0 out.
  671. UnswitchVal = AllZero;
  672. } else if (OpChain == OC_OpChainOr) {
  673. // If the chain only has ORs and the switch has a case value of ~0.
  674. // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
  675. auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
  676. if (BranchesInfo.isUnswitched(SI, AllOne))
  677. continue;
  678. // We are unswitching ~0 out.
  679. UnswitchVal = AllOne;
  680. } else {
  681. assert(OpChain == OC_OpChainNone &&
  682. "Expect to unswitch on trivial chain");
  683. // Do not process same value again and again.
  684. // At this point we have some cases already unswitched and
  685. // some not yet unswitched. Let's find the first not yet unswitched one.
  686. for (auto Case : SI->cases()) {
  687. Constant *UnswitchValCandidate = Case.getCaseValue();
  688. if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
  689. UnswitchVal = UnswitchValCandidate;
  690. break;
  691. }
  692. }
  693. }
  694. if (!UnswitchVal)
  695. continue;
  696. if (unswitchIfProfitable(LoopCond, UnswitchVal)) {
  697. ++NumSwitches;
  698. // In case of a full LIV, UnswitchVal is the value we unswitched out.
  699. // In case of a partial LIV, we only unswitch when its an AND-chain
  700. // or OR-chain. In both cases switch input value simplifies to
  701. // UnswitchVal.
  702. BranchesInfo.setUnswitched(SI, UnswitchVal);
  703. return true;
  704. }
  705. }
  706. }
  707. // Scan the instructions to check for unswitchable values.
  708. for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
  709. BBI != E; ++BBI)
  710. if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
  711. Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop,
  712. Changed, MSSAU.get())
  713. .first;
  714. if (LoopCond &&
  715. unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
  716. ++NumSelects;
  717. return true;
  718. }
  719. }
  720. }
  721. // Check if there is a header condition that is invariant along the patch from
  722. // either the true or false successors to the header. This allows unswitching
  723. // conditions depending on memory accesses, if there's a path not clobbering
  724. // the memory locations. Check if this transform has been disabled using
  725. // metadata, to avoid unswitching the same loop multiple times.
  726. if (MSSA &&
  727. !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
  728. if (auto Info =
  729. hasPartialIVCondition(*CurrentLoop, MSSAThreshold, *MSSA, *AA)) {
  730. assert(!Info->InstToDuplicate.empty() &&
  731. "need at least a partially invariant condition");
  732. LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
  733. << *Info->InstToDuplicate[0] << "\n");
  734. Instruction *TI = CurrentLoop->getHeader()->getTerminator();
  735. Value *LoopCond = Info->InstToDuplicate[0];
  736. // If the partially unswitched path is a no-op and has a single exit
  737. // block, we do not need to do full unswitching. Instead, we can directly
  738. // branch to the exit.
  739. // TODO: Instead of duplicating the checks, we could also just directly
  740. // branch to the exit from the conditional branch in the loop.
  741. if (Info->PathIsNoop) {
  742. if (HasBranchDivergence &&
  743. getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
  744. LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
  745. << CurrentLoop->getHeader()->getName()
  746. << " at non-trivial condition '"
  747. << *Info->KnownValue << "' == " << *LoopCond << "\n"
  748. << ". Condition is divergent.\n");
  749. return false;
  750. }
  751. ++NumBranches;
  752. BasicBlock *TrueDest = LoopHeader;
  753. BasicBlock *FalseDest = Info->ExitForPath;
  754. if (Info->KnownValue->isOneValue())
  755. std::swap(TrueDest, FalseDest);
  756. auto *OldBr =
  757. cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator());
  758. emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest,
  759. FalseDest, OldBr, TI,
  760. Info->InstToDuplicate);
  761. delete OldBr;
  762. RedoLoop = false;
  763. return true;
  764. }
  765. // Otherwise, the path is not a no-op. Run regular unswitching.
  766. if (unswitchIfProfitable(LoopCond, Info->KnownValue,
  767. CurrentLoop->getHeader()->getTerminator(),
  768. Info->InstToDuplicate)) {
  769. ++NumBranches;
  770. RedoLoop = false;
  771. return true;
  772. }
  773. }
  774. }
  775. return Changed;
  776. }
  777. /// Check to see if all paths from BB exit the loop with no side effects
  778. /// (including infinite loops).
  779. ///
  780. /// If true, we return true and set ExitBB to the block we
  781. /// exit through.
  782. ///
  783. static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
  784. BasicBlock *&ExitBB,
  785. std::set<BasicBlock*> &Visited) {
  786. if (!Visited.insert(BB).second) {
  787. // Already visited. Without more analysis, this could indicate an infinite
  788. // loop.
  789. return false;
  790. }
  791. if (!L->contains(BB)) {
  792. // Otherwise, this is a loop exit, this is fine so long as this is the
  793. // first exit.
  794. if (ExitBB) return false;
  795. ExitBB = BB;
  796. return true;
  797. }
  798. // Otherwise, this is an unvisited intra-loop node. Check all successors.
  799. for (BasicBlock *Succ : successors(BB)) {
  800. // Check to see if the successor is a trivial loop exit.
  801. if (!isTrivialLoopExitBlockHelper(L, Succ, ExitBB, Visited))
  802. return false;
  803. }
  804. // Okay, everything after this looks good, check to make sure that this block
  805. // doesn't include any side effects.
  806. for (Instruction &I : *BB)
  807. if (I.mayHaveSideEffects())
  808. return false;
  809. return true;
  810. }
  811. /// Return true if the specified block unconditionally leads to an exit from
  812. /// the specified loop, and has no side-effects in the process. If so, return
  813. /// the block that is exited to, otherwise return null.
  814. static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
  815. std::set<BasicBlock*> Visited;
  816. Visited.insert(L->getHeader()); // Branches to header make infinite loops.
  817. BasicBlock *ExitBB = nullptr;
  818. if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
  819. return ExitBB;
  820. return nullptr;
  821. }
  822. /// We have found that we can unswitch CurrentLoop when LoopCond == Val to
  823. /// simplify the loop. If we decide that this is profitable,
  824. /// unswitch the loop, reprocess the pieces, then return true.
  825. bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
  826. Instruction *TI,
  827. ArrayRef<Instruction *> ToDuplicate) {
  828. // Check to see if it would be profitable to unswitch current loop.
  829. if (!BranchesInfo.costAllowsUnswitching()) {
  830. LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
  831. << CurrentLoop->getHeader()->getName()
  832. << " at non-trivial condition '" << *Val
  833. << "' == " << *LoopCond << "\n"
  834. << ". Cost too high.\n");
  835. return false;
  836. }
  837. if (HasBranchDivergence &&
  838. getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
  839. LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
  840. << CurrentLoop->getHeader()->getName()
  841. << " at non-trivial condition '" << *Val
  842. << "' == " << *LoopCond << "\n"
  843. << ". Condition is divergent.\n");
  844. return false;
  845. }
  846. unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
  847. return true;
  848. }
  849. /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
  850. /// otherwise branch to FalseDest. Insert the code immediately before OldBranch
  851. /// and remove (but not erase!) it from the function.
  852. void LoopUnswitch::emitPreheaderBranchOnCondition(
  853. Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
  854. BranchInst *OldBranch, Instruction *TI,
  855. ArrayRef<Instruction *> ToDuplicate) {
  856. assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
  857. assert(TrueDest != FalseDest && "Branch targets should be different");
  858. // Insert a conditional branch on LIC to the two preheaders. The original
  859. // code is the true version and the new code is the false version.
  860. Value *BranchVal = LIC;
  861. bool Swapped = false;
  862. if (!ToDuplicate.empty()) {
  863. ValueToValueMapTy Old2New;
  864. for (Instruction *I : reverse(ToDuplicate)) {
  865. auto *New = I->clone();
  866. New->insertBefore(OldBranch);
  867. RemapInstruction(New, Old2New,
  868. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  869. Old2New[I] = New;
  870. if (MSSAU) {
  871. MemorySSA *MSSA = MSSAU->getMemorySSA();
  872. auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
  873. if (!MemA)
  874. continue;
  875. Loop *L = LI->getLoopFor(I->getParent());
  876. auto *DefiningAccess = MemA->getDefiningAccess();
  877. // Get the first defining access before the loop.
  878. while (L->contains(DefiningAccess->getBlock())) {
  879. // If the defining access is a MemoryPhi, get the incoming
  880. // value for the pre-header as defining access.
  881. if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
  882. DefiningAccess =
  883. MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
  884. } else {
  885. DefiningAccess =
  886. cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
  887. }
  888. }
  889. MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
  890. MemorySSA::BeforeTerminator);
  891. }
  892. }
  893. BranchVal = Old2New[ToDuplicate[0]];
  894. } else {
  895. if (!isa<ConstantInt>(Val) ||
  896. Val->getType() != Type::getInt1Ty(LIC->getContext()))
  897. BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
  898. else if (Val != ConstantInt::getTrue(Val->getContext())) {
  899. // We want to enter the new loop when the condition is true.
  900. std::swap(TrueDest, FalseDest);
  901. Swapped = true;
  902. }
  903. }
  904. // Old branch will be removed, so save its parent and successor to update the
  905. // DomTree.
  906. auto *OldBranchSucc = OldBranch->getSuccessor(0);
  907. auto *OldBranchParent = OldBranch->getParent();
  908. // Insert the new branch.
  909. BranchInst *BI =
  910. IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);
  911. if (Swapped)
  912. BI->swapProfMetadata();
  913. // Remove the old branch so there is only one branch at the end. This is
  914. // needed to perform DomTree's internal DFS walk on the function's CFG.
  915. OldBranch->removeFromParent();
  916. // Inform the DT about the new branch.
  917. if (DT) {
  918. // First, add both successors.
  919. SmallVector<DominatorTree::UpdateType, 3> Updates;
  920. if (TrueDest != OldBranchSucc)
  921. Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest});
  922. if (FalseDest != OldBranchSucc)
  923. Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest});
  924. // If both of the new successors are different from the old one, inform the
  925. // DT that the edge was deleted.
  926. if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
  927. Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
  928. }
  929. if (MSSAU)
  930. MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
  931. else
  932. DT->applyUpdates(Updates);
  933. }
  934. // If either edge is critical, split it. This helps preserve LoopSimplify
  935. // form for enclosing loops.
  936. auto Options =
  937. CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
  938. SplitCriticalEdge(BI, 0, Options);
  939. SplitCriticalEdge(BI, 1, Options);
  940. }
  941. /// Given a loop that has a trivial unswitchable condition in it (a cond branch
  942. /// from its header block to its latch block, where the path through the loop
  943. /// that doesn't execute its body has no side-effects), unswitch it. This
  944. /// doesn't involve any code duplication, just moving the conditional branch
  945. /// outside of the loop and updating loop info.
  946. void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
  947. BasicBlock *ExitBlock,
  948. Instruction *TI) {
  949. LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
  950. << LoopHeader->getName() << " [" << L->getBlocks().size()
  951. << " blocks] in Function "
  952. << L->getHeader()->getParent()->getName()
  953. << " on cond: " << *Val << " == " << *Cond << "\n");
  954. // We are going to make essential changes to CFG. This may invalidate cached
  955. // information for L or one of its parent loops in SCEV.
  956. if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
  957. SEWP->getSE().forgetTopmostLoop(L);
  958. // First step, split the preheader, so that we know that there is a safe place
  959. // to insert the conditional branch. We will change LoopPreheader to have a
  960. // conditional branch on Cond.
  961. BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get());
  962. // Now that we have a place to insert the conditional branch, create a place
  963. // to branch to: this is the exit block out of the loop that we should
  964. // short-circuit to.
  965. // Split this block now, so that the loop maintains its exit block, and so
  966. // that the jump from the preheader can execute the contents of the exit block
  967. // without actually branching to it (the exit block should be dominated by the
  968. // loop header, not the preheader).
  969. assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
  970. BasicBlock *NewExit =
  971. SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
  972. // Okay, now we have a position to branch from and a position to branch to,
  973. // insert the new conditional branch.
  974. auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator());
  975. assert(OldBranch && "Failed to split the preheader");
  976. emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI);
  977. // emitPreheaderBranchOnCondition removed the OldBranch from the function.
  978. // Delete it, as it is no longer needed.
  979. delete OldBranch;
  980. // We need to reprocess this loop, it could be unswitched again.
  981. RedoLoop = true;
  982. // Now that we know that the loop is never entered when this condition is a
  983. // particular value, rewrite the loop with this info. We know that this will
  984. // at least eliminate the old branch.
  985. rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false);
  986. ++NumTrivial;
  987. }
  988. /// Check if the first non-constant condition starting from the loop header is
  989. /// a trivial unswitch condition: that is, a condition controls whether or not
  990. /// the loop does anything at all. If it is a trivial condition, unswitching
  991. /// produces no code duplications (equivalently, it produces a simpler loop and
  992. /// a new empty loop, which gets deleted). Therefore always unswitch trivial
  993. /// condition.
  994. bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) {
  995. BasicBlock *CurrentBB = CurrentLoop->getHeader();
  996. Instruction *CurrentTerm = CurrentBB->getTerminator();
  997. LLVMContext &Context = CurrentBB->getContext();
  998. // If loop header has only one reachable successor (currently via an
  999. // unconditional branch or constant foldable conditional branch, but
  1000. // should also consider adding constant foldable switch instruction in
  1001. // future), we should keep looking for trivial condition candidates in
  1002. // the successor as well. An alternative is to constant fold conditions
  1003. // and merge successors into loop header (then we only need to check header's
  1004. // terminator). The reason for not doing this in LoopUnswitch pass is that
  1005. // it could potentially break LoopPassManager's invariants. Folding dead
  1006. // branches could either eliminate the current loop or make other loops
  1007. // unreachable. LCSSA form might also not be preserved after deleting
  1008. // branches. The following code keeps traversing loop header's successors
  1009. // until it finds the trivial condition candidate (condition that is not a
  1010. // constant). Since unswitching generates branches with constant conditions,
  1011. // this scenario could be very common in practice.
  1012. SmallPtrSet<BasicBlock*, 8> Visited;
  1013. while (true) {
  1014. // If we exit loop or reach a previous visited block, then
  1015. // we can not reach any trivial condition candidates (unfoldable
  1016. // branch instructions or switch instructions) and no unswitch
  1017. // can happen. Exit and return false.
  1018. if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
  1019. return false;
  1020. // Check if this loop will execute any side-effecting instructions (e.g.
  1021. // stores, calls, volatile loads) in the part of the loop that the code
  1022. // *would* execute. Check the header first.
  1023. for (Instruction &I : *CurrentBB)
  1024. if (I.mayHaveSideEffects())
  1025. return false;
  1026. if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
  1027. if (BI->isUnconditional()) {
  1028. CurrentBB = BI->getSuccessor(0);
  1029. } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
  1030. CurrentBB = BI->getSuccessor(0);
  1031. } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
  1032. CurrentBB = BI->getSuccessor(1);
  1033. } else {
  1034. // Found a trivial condition candidate: non-foldable conditional branch.
  1035. break;
  1036. }
  1037. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
  1038. // At this point, any constant-foldable instructions should have probably
  1039. // been folded.
  1040. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
  1041. if (!Cond)
  1042. break;
  1043. // Find the target block we are definitely going to.
  1044. CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
  1045. } else {
  1046. // We do not understand these terminator instructions.
  1047. break;
  1048. }
  1049. CurrentTerm = CurrentBB->getTerminator();
  1050. }
  1051. // CondVal is the condition that controls the trivial condition.
  1052. // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
  1053. Constant *CondVal = nullptr;
  1054. BasicBlock *LoopExitBB = nullptr;
  1055. if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
  1056. // If this isn't branching on an invariant condition, we can't unswitch it.
  1057. if (!BI->isConditional())
  1058. return false;
  1059. Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop,
  1060. Changed, MSSAU.get())
  1061. .first;
  1062. // Unswitch only if the trivial condition itself is an LIV (not
  1063. // partial LIV which could occur in and/or)
  1064. if (!LoopCond || LoopCond != BI->getCondition())
  1065. return false;
  1066. // Check to see if a successor of the branch is guaranteed to
  1067. // exit through a unique exit block without having any
  1068. // side-effects. If so, determine the value of Cond that causes
  1069. // it to do this.
  1070. if ((LoopExitBB =
  1071. isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) {
  1072. CondVal = ConstantInt::getTrue(Context);
  1073. } else if ((LoopExitBB =
  1074. isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) {
  1075. CondVal = ConstantInt::getFalse(Context);
  1076. }
  1077. // If we didn't find a single unique LoopExit block, or if the loop exit
  1078. // block contains phi nodes, this isn't trivial.
  1079. if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
  1080. return false; // Can't handle this.
  1081. if (equalityPropUnSafe(*LoopCond))
  1082. return false;
  1083. unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB,
  1084. CurrentTerm);
  1085. ++NumBranches;
  1086. return true;
  1087. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
  1088. // If this isn't switching on an invariant condition, we can't unswitch it.
  1089. Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop,
  1090. Changed, MSSAU.get())
  1091. .first;
  1092. // Unswitch only if the trivial condition itself is an LIV (not
  1093. // partial LIV which could occur in and/or)
  1094. if (!LoopCond || LoopCond != SI->getCondition())
  1095. return false;
  1096. // Check to see if a successor of the switch is guaranteed to go to the
  1097. // latch block or exit through a one exit block without having any
  1098. // side-effects. If so, determine the value of Cond that causes it to do
  1099. // this.
  1100. // Note that we can't trivially unswitch on the default case or
  1101. // on already unswitched cases.
  1102. for (auto Case : SI->cases()) {
  1103. BasicBlock *LoopExitCandidate;
  1104. if ((LoopExitCandidate =
  1105. isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) {
  1106. // Okay, we found a trivial case, remember the value that is trivial.
  1107. ConstantInt *CaseVal = Case.getCaseValue();
  1108. // Check that it was not unswitched before, since already unswitched
  1109. // trivial vals are looks trivial too.
  1110. if (BranchesInfo.isUnswitched(SI, CaseVal))
  1111. continue;
  1112. LoopExitBB = LoopExitCandidate;
  1113. CondVal = CaseVal;
  1114. break;
  1115. }
  1116. }
  1117. // If we didn't find a single unique LoopExit block, or if the loop exit
  1118. // block contains phi nodes, this isn't trivial.
  1119. if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
  1120. return false; // Can't handle this.
  1121. unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB,
  1122. nullptr);
  1123. // We are only unswitching full LIV.
  1124. BranchesInfo.setUnswitched(SI, CondVal);
  1125. ++NumSwitches;
  1126. return true;
  1127. }
  1128. return false;
  1129. }
  1130. /// Split all of the edges from inside the loop to their exit blocks.
  1131. /// Update the appropriate Phi nodes as we do so.
  1132. void LoopUnswitch::splitExitEdges(
  1133. Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  1134. for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
  1135. BasicBlock *ExitBlock = ExitBlocks[I];
  1136. SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
  1137. // Although SplitBlockPredecessors doesn't preserve loop-simplify in
  1138. // general, if we call it on all predecessors of all exits then it does.
  1139. SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
  1140. /*PreserveLCSSA*/ true);
  1141. }
  1142. }
  1143. /// We determined that the loop is profitable to unswitch when LIC equal Val.
  1144. /// Split it into loop versions and test the condition outside of either loop.
  1145. /// Return the loops created as Out1/Out2.
  1146. void LoopUnswitch::unswitchNontrivialCondition(
  1147. Value *LIC, Constant *Val, Loop *L, Instruction *TI,
  1148. ArrayRef<Instruction *> ToDuplicate) {
  1149. Function *F = LoopHeader->getParent();
  1150. LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
  1151. << LoopHeader->getName() << " [" << L->getBlocks().size()
  1152. << " blocks] in Function " << F->getName() << " when '"
  1153. << *Val << "' == " << *LIC << "\n");
  1154. // We are going to make essential changes to CFG. This may invalidate cached
  1155. // information for L or one of its parent loops in SCEV.
  1156. if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
  1157. SEWP->getSE().forgetTopmostLoop(L);
  1158. LoopBlocks.clear();
  1159. NewBlocks.clear();
  1160. if (MSSAU && VerifyMemorySSA)
  1161. MSSA->verifyMemorySSA();
  1162. // First step, split the preheader and exit blocks, and add these blocks to
  1163. // the LoopBlocks list.
  1164. BasicBlock *NewPreheader =
  1165. SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get());
  1166. LoopBlocks.push_back(NewPreheader);
  1167. // We want the loop to come after the preheader, but before the exit blocks.
  1168. llvm::append_range(LoopBlocks, L->blocks());
  1169. SmallVector<BasicBlock*, 8> ExitBlocks;
  1170. L->getUniqueExitBlocks(ExitBlocks);
  1171. // Split all of the edges from inside the loop to their exit blocks. Update
  1172. // the appropriate Phi nodes as we do so.
  1173. splitExitEdges(L, ExitBlocks);
  1174. // The exit blocks may have been changed due to edge splitting, recompute.
  1175. ExitBlocks.clear();
  1176. L->getUniqueExitBlocks(ExitBlocks);
  1177. // Add exit blocks to the loop blocks.
  1178. llvm::append_range(LoopBlocks, ExitBlocks);
  1179. // Next step, clone all of the basic blocks that make up the loop (including
  1180. // the loop preheader and exit blocks), keeping track of the mapping between
  1181. // the instructions and blocks.
  1182. NewBlocks.reserve(LoopBlocks.size());
  1183. ValueToValueMapTy VMap;
  1184. for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) {
  1185. BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F);
  1186. NewBlocks.push_back(NewBB);
  1187. VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping.
  1188. }
  1189. // Splice the newly inserted blocks into the function right before the
  1190. // original preheader.
  1191. F->getBasicBlockList().splice(NewPreheader->getIterator(),
  1192. F->getBasicBlockList(),
  1193. NewBlocks[0]->getIterator(), F->end());
  1194. // Now we create the new Loop object for the versioned loop.
  1195. Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
  1196. // Recalculate unswitching quota, inherit simplified switches info for NewBB,
  1197. // Probably clone more loop-unswitch related loop properties.
  1198. BranchesInfo.cloneData(NewLoop, L, VMap);
  1199. Loop *ParentLoop = L->getParentLoop();
  1200. if (ParentLoop) {
  1201. // Make sure to add the cloned preheader and exit blocks to the parent loop
  1202. // as well.
  1203. ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
  1204. }
  1205. for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) {
  1206. BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]);
  1207. // The new exit block should be in the same loop as the old one.
  1208. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI]))
  1209. ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
  1210. assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
  1211. "Exit block should have been split to have one successor!");
  1212. BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
  1213. // If the successor of the exit block had PHI nodes, add an entry for
  1214. // NewExit.
  1215. for (PHINode &PN : ExitSucc->phis()) {
  1216. Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]);
  1217. ValueToValueMapTy::iterator It = VMap.find(V);
  1218. if (It != VMap.end()) V = It->second;
  1219. PN.addIncoming(V, NewExit);
  1220. }
  1221. if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
  1222. PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
  1223. &*ExitSucc->getFirstInsertionPt());
  1224. for (BasicBlock *BB : predecessors(ExitSucc)) {
  1225. LandingPadInst *LPI = BB->getLandingPadInst();
  1226. LPI->replaceAllUsesWith(PN);
  1227. PN->addIncoming(LPI, BB);
  1228. }
  1229. }
  1230. }
  1231. // Rewrite the code to refer to itself.
  1232. for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) {
  1233. for (Instruction &I : *NewBlocks[NBI]) {
  1234. RemapInstruction(&I, VMap,
  1235. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  1236. if (auto *II = dyn_cast<AssumeInst>(&I))
  1237. AC->registerAssumption(II);
  1238. }
  1239. }
  1240. // Rewrite the original preheader to select between versions of the loop.
  1241. BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator());
  1242. assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
  1243. "Preheader splitting did not work correctly!");
  1244. if (MSSAU) {
  1245. // Update MemorySSA after cloning, and before splitting to unreachables,
  1246. // since that invalidates the 1:1 mapping of clones in VMap.
  1247. LoopBlocksRPO LBRPO(L);
  1248. LBRPO.perform(LI);
  1249. MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
  1250. }
  1251. // Emit the new branch that selects between the two versions of this loop.
  1252. emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
  1253. TI, ToDuplicate);
  1254. if (MSSAU) {
  1255. // Update MemoryPhis in Exit blocks.
  1256. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
  1257. if (VerifyMemorySSA)
  1258. MSSA->verifyMemorySSA();
  1259. }
  1260. // The OldBr was replaced by a new one and removed (but not erased) by
  1261. // emitPreheaderBranchOnCondition. It is no longer needed, so delete it.
  1262. delete OldBR;
  1263. LoopProcessWorklist.push_back(NewLoop);
  1264. RedoLoop = true;
  1265. // Keep a WeakTrackingVH holding onto LIC. If the first call to
  1266. // RewriteLoopBody
  1267. // deletes the instruction (for example by simplifying a PHI that feeds into
  1268. // the condition that we're unswitching on), we don't rewrite the second
  1269. // iteration.
  1270. WeakTrackingVH LICHandle(LIC);
  1271. if (ToDuplicate.empty()) {
  1272. // Now we rewrite the original code to know that the condition is true and
  1273. // the new code to know that the condition is false.
  1274. rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
  1275. // It's possible that simplifying one loop could cause the other to be
  1276. // changed to another value or a constant. If its a constant, don't
  1277. // simplify it.
  1278. if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
  1279. LICHandle && !isa<Constant>(LICHandle))
  1280. rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
  1281. /*IsEqual=*/true);
  1282. } else {
  1283. // Partial unswitching. Update the condition in the right loop with the
  1284. // constant.
  1285. auto *CC = cast<ConstantInt>(Val);
  1286. if (CC->isOneValue()) {
  1287. rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
  1288. /*IsEqual=*/true);
  1289. } else
  1290. rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
  1291. // Mark the new loop as partially unswitched, to avoid unswitching on the
  1292. // same condition again.
  1293. auto &Context = NewLoop->getHeader()->getContext();
  1294. MDNode *DisableUnswitchMD = MDNode::get(
  1295. Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
  1296. MDNode *NewLoopID = makePostTransformationMetadata(
  1297. Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
  1298. {DisableUnswitchMD});
  1299. NewLoop->setLoopID(NewLoopID);
  1300. }
  1301. if (MSSA && VerifyMemorySSA)
  1302. MSSA->verifyMemorySSA();
  1303. }
  1304. /// Remove all instances of I from the worklist vector specified.
  1305. static void removeFromWorklist(Instruction *I,
  1306. std::vector<Instruction *> &Worklist) {
  1307. llvm::erase_value(Worklist, I);
  1308. }
  1309. /// When we find that I really equals V, remove I from the
  1310. /// program, replacing all uses with V and update the worklist.
  1311. static void replaceUsesOfWith(Instruction *I, Value *V,
  1312. std::vector<Instruction *> &Worklist, Loop *L,
  1313. LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
  1314. LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
  1315. // Add uses to the worklist, which may be dead now.
  1316. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
  1317. if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
  1318. Worklist.push_back(Use);
  1319. // Add users to the worklist which may be simplified now.
  1320. for (User *U : I->users())
  1321. Worklist.push_back(cast<Instruction>(U));
  1322. removeFromWorklist(I, Worklist);
  1323. I->replaceAllUsesWith(V);
  1324. if (!I->mayHaveSideEffects()) {
  1325. if (MSSAU)
  1326. MSSAU->removeMemoryAccess(I);
  1327. I->eraseFromParent();
  1328. }
  1329. ++NumSimplify;
  1330. }
  1331. /// We know either that the value LIC has the value specified by Val in the
  1332. /// specified loop, or we know it does NOT have that value.
  1333. /// Rewrite any uses of LIC or of properties correlated to it.
  1334. void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
  1335. Constant *Val,
  1336. bool IsEqual) {
  1337. assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
  1338. // FIXME: Support correlated properties, like:
  1339. // for (...)
  1340. // if (li1 < li2)
  1341. // ...
  1342. // if (li1 > li2)
  1343. // ...
  1344. // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
  1345. // selects, switches.
  1346. std::vector<Instruction*> Worklist;
  1347. LLVMContext &Context = Val->getContext();
  1348. // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
  1349. // in the loop with the appropriate one directly.
  1350. if (IsEqual || (isa<ConstantInt>(Val) &&
  1351. Val->getType()->isIntegerTy(1))) {
  1352. Value *Replacement;
  1353. if (IsEqual)
  1354. Replacement = Val;
  1355. else
  1356. Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
  1357. !cast<ConstantInt>(Val)->getZExtValue());
  1358. for (User *U : LIC->users()) {
  1359. Instruction *UI = dyn_cast<Instruction>(U);
  1360. if (!UI || !L->contains(UI))
  1361. continue;
  1362. Worklist.push_back(UI);
  1363. }
  1364. for (Instruction *UI : Worklist)
  1365. UI->replaceUsesOfWith(LIC, Replacement);
  1366. simplifyCode(Worklist, L);
  1367. return;
  1368. }
  1369. // Otherwise, we don't know the precise value of LIC, but we do know that it
  1370. // is certainly NOT "Val". As such, simplify any uses in the loop that we
  1371. // can. This case occurs when we unswitch switch statements.
  1372. for (User *U : LIC->users()) {
  1373. Instruction *UI = dyn_cast<Instruction>(U);
  1374. if (!UI || !L->contains(UI))
  1375. continue;
  1376. // At this point, we know LIC is definitely not Val. Try to use some simple
  1377. // logic to simplify the user w.r.t. to the context.
  1378. if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) {
  1379. if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
  1380. // This in-loop instruction has been simplified w.r.t. its context,
  1381. // i.e. LIC != Val, make sure we propagate its replacement value to
  1382. // all its users.
  1383. //
  1384. // We can not yet delete UI, the LIC user, yet, because that would invalidate
  1385. // the LIC->users() iterator !. However, we can make this instruction
  1386. // dead by replacing all its users and push it onto the worklist so that
  1387. // it can be properly deleted and its operands simplified.
  1388. UI->replaceAllUsesWith(Replacement);
  1389. }
  1390. }
  1391. // This is a LIC user, push it into the worklist so that simplifyCode can
  1392. // attempt to simplify it.
  1393. Worklist.push_back(UI);
  1394. // If we know that LIC is not Val, use this info to simplify code.
  1395. SwitchInst *SI = dyn_cast<SwitchInst>(UI);
  1396. if (!SI || !isa<ConstantInt>(Val)) continue;
  1397. // NOTE: if a case value for the switch is unswitched out, we record it
  1398. // after the unswitch finishes. We can not record it here as the switch
  1399. // is not a direct user of the partial LIV.
  1400. SwitchInst::CaseHandle DeadCase =
  1401. *SI->findCaseValue(cast<ConstantInt>(Val));
  1402. // Default case is live for multiple values.
  1403. if (DeadCase == *SI->case_default())
  1404. continue;
  1405. // Found a dead case value. Don't remove PHI nodes in the
  1406. // successor if they become single-entry, those PHI nodes may
  1407. // be in the Users list.
  1408. BasicBlock *Switch = SI->getParent();
  1409. BasicBlock *SISucc = DeadCase.getCaseSuccessor();
  1410. BasicBlock *Latch = L->getLoopLatch();
  1411. if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
  1412. // If the DeadCase successor dominates the loop latch, then the
  1413. // transformation isn't safe since it will delete the sole predecessor edge
  1414. // to the latch.
  1415. if (Latch && DT->dominates(SISucc, Latch))
  1416. continue;
  1417. // FIXME: This is a hack. We need to keep the successor around
  1418. // and hooked up so as to preserve the loop structure, because
  1419. // trying to update it is complicated. So instead we preserve the
  1420. // loop structure and put the block on a dead code path.
  1421. SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
  1422. // Compute the successors instead of relying on the return value
  1423. // of SplitEdge, since it may have split the switch successor
  1424. // after PHI nodes.
  1425. BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
  1426. BasicBlock *OldSISucc = *succ_begin(NewSISucc);
  1427. // Create an "unreachable" destination.
  1428. BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
  1429. Switch->getParent(),
  1430. OldSISucc);
  1431. new UnreachableInst(Context, Abort);
  1432. // Force the new case destination to branch to the "unreachable"
  1433. // block while maintaining a (dead) CFG edge to the old block.
  1434. NewSISucc->getTerminator()->eraseFromParent();
  1435. BranchInst::Create(Abort, OldSISucc,
  1436. ConstantInt::getTrue(Context), NewSISucc);
  1437. // Release the PHI operands for this edge.
  1438. for (PHINode &PN : NewSISucc->phis())
  1439. PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
  1440. // Tell the domtree about the new block. We don't fully update the
  1441. // domtree here -- instead we force it to do a full recomputation
  1442. // after the pass is complete -- but we do need to inform it of
  1443. // new blocks.
  1444. DT->addNewBlock(Abort, NewSISucc);
  1445. }
  1446. simplifyCode(Worklist, L);
  1447. }
  1448. /// Now that we have simplified some instructions in the loop, walk over it and
  1449. /// constant prop, dce, and fold control flow where possible. Note that this is
  1450. /// effectively a very simple loop-structure-aware optimizer. During processing
  1451. /// of this loop, L could very well be deleted, so it must not be used.
  1452. ///
  1453. /// FIXME: When the loop optimizer is more mature, separate this out to a new
  1454. /// pass.
  1455. ///
  1456. void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) {
  1457. const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
  1458. while (!Worklist.empty()) {
  1459. Instruction *I = Worklist.back();
  1460. Worklist.pop_back();
  1461. // Simple DCE.
  1462. if (isInstructionTriviallyDead(I)) {
  1463. LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
  1464. // Add uses to the worklist, which may be dead now.
  1465. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
  1466. if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
  1467. Worklist.push_back(Use);
  1468. removeFromWorklist(I, Worklist);
  1469. if (MSSAU)
  1470. MSSAU->removeMemoryAccess(I);
  1471. I->eraseFromParent();
  1472. ++NumSimplify;
  1473. continue;
  1474. }
  1475. // See if instruction simplification can hack this up. This is common for
  1476. // things like "select false, X, Y" after unswitching made the condition be
  1477. // 'false'. TODO: update the domtree properly so we can pass it here.
  1478. if (Value *V = SimplifyInstruction(I, DL))
  1479. if (LI->replacementPreservesLCSSAForm(I, V)) {
  1480. replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
  1481. continue;
  1482. }
  1483. // Special case hacks that appear commonly in unswitched code.
  1484. if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
  1485. if (BI->isUnconditional()) {
  1486. // If BI's parent is the only pred of the successor, fold the two blocks
  1487. // together.
  1488. BasicBlock *Pred = BI->getParent();
  1489. (void)Pred;
  1490. BasicBlock *Succ = BI->getSuccessor(0);
  1491. BasicBlock *SinglePred = Succ->getSinglePredecessor();
  1492. if (!SinglePred) continue; // Nothing to do.
  1493. assert(SinglePred == Pred && "CFG broken");
  1494. // Make the LPM and Worklist updates specific to LoopUnswitch.
  1495. removeFromWorklist(BI, Worklist);
  1496. auto SuccIt = Succ->begin();
  1497. while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) {
  1498. for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It)
  1499. if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It)))
  1500. Worklist.push_back(Use);
  1501. for (User *U : PN->users())
  1502. Worklist.push_back(cast<Instruction>(U));
  1503. removeFromWorklist(PN, Worklist);
  1504. ++NumSimplify;
  1505. }
  1506. // Merge the block and make the remaining analyses updates.
  1507. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
  1508. MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get());
  1509. ++NumSimplify;
  1510. continue;
  1511. }
  1512. continue;
  1513. }
  1514. }
  1515. }
  1516. /// Simple simplifications we can do given the information that Cond is
  1517. /// definitely not equal to Val.
  1518. Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst,
  1519. Value *Invariant,
  1520. Constant *Val) {
  1521. // icmp eq cond, val -> false
  1522. ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
  1523. if (CI && CI->isEquality()) {
  1524. Value *Op0 = CI->getOperand(0);
  1525. Value *Op1 = CI->getOperand(1);
  1526. if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) {
  1527. LLVMContext &Ctx = Inst->getContext();
  1528. if (CI->getPredicate() == CmpInst::ICMP_EQ)
  1529. return ConstantInt::getFalse(Ctx);
  1530. else
  1531. return ConstantInt::getTrue(Ctx);
  1532. }
  1533. }
  1534. // FIXME: there may be other opportunities, e.g. comparison with floating
  1535. // point, or Invariant - Val != 0, etc.
  1536. return nullptr;
  1537. }