LoopPredication.cpp 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314
  1. //===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // The LoopPredication pass tries to convert loop variant range checks to loop
  10. // invariant by widening checks across loop iterations. For example, it will
  11. // convert
  12. //
  13. // for (i = 0; i < n; i++) {
  14. // guard(i < len);
  15. // ...
  16. // }
  17. //
  18. // to
  19. //
  20. // for (i = 0; i < n; i++) {
  21. // guard(n - 1 < len);
  22. // ...
  23. // }
  24. //
  25. // After this transformation the condition of the guard is loop invariant, so
  26. // loop-unswitch can later unswitch the loop by this condition which basically
  27. // predicates the loop by the widened condition:
  28. //
  29. // if (n - 1 < len)
  30. // for (i = 0; i < n; i++) {
  31. // ...
  32. // }
  33. // else
  34. // deoptimize
  35. //
  36. // It's tempting to rely on SCEV here, but it has proven to be problematic.
  37. // Generally the facts SCEV provides about the increment step of add
  38. // recurrences are true if the backedge of the loop is taken, which implicitly
  39. // assumes that the guard doesn't fail. Using these facts to optimize the
  40. // guard results in a circular logic where the guard is optimized under the
  41. // assumption that it never fails.
  42. //
  43. // For example, in the loop below the induction variable will be marked as nuw
  44. // basing on the guard. Basing on nuw the guard predicate will be considered
  45. // monotonic. Given a monotonic condition it's tempting to replace the induction
  46. // variable in the condition with its value on the last iteration. But this
  47. // transformation is not correct, e.g. e = 4, b = 5 breaks the loop.
  48. //
  49. // for (int i = b; i != e; i++)
  50. // guard(i u< len)
  51. //
  52. // One of the ways to reason about this problem is to use an inductive proof
  53. // approach. Given the loop:
  54. //
  55. // if (B(0)) {
  56. // do {
  57. // I = PHI(0, I.INC)
  58. // I.INC = I + Step
  59. // guard(G(I));
  60. // } while (B(I));
  61. // }
  62. //
  63. // where B(x) and G(x) are predicates that map integers to booleans, we want a
  64. // loop invariant expression M such the following program has the same semantics
  65. // as the above:
  66. //
  67. // if (B(0)) {
  68. // do {
  69. // I = PHI(0, I.INC)
  70. // I.INC = I + Step
  71. // guard(G(0) && M);
  72. // } while (B(I));
  73. // }
  74. //
  75. // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
  76. //
  77. // Informal proof that the transformation above is correct:
  78. //
  79. // By the definition of guards we can rewrite the guard condition to:
  80. // G(I) && G(0) && M
  81. //
  82. // Let's prove that for each iteration of the loop:
  83. // G(0) && M => G(I)
  84. // And the condition above can be simplified to G(Start) && M.
  85. //
  86. // Induction base.
  87. // G(0) && M => G(0)
  88. //
  89. // Induction step. Assuming G(0) && M => G(I) on the subsequent
  90. // iteration:
  91. //
  92. // B(I) is true because it's the backedge condition.
  93. // G(I) is true because the backedge is guarded by this condition.
  94. //
  95. // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step).
  96. //
  97. // Note that we can use anything stronger than M, i.e. any condition which
  98. // implies M.
  99. //
  100. // When S = 1 (i.e. forward iterating loop), the transformation is supported
  101. // when:
  102. // * The loop has a single latch with the condition of the form:
  103. // B(X) = latchStart + X <pred> latchLimit,
  104. // where <pred> is u<, u<=, s<, or s<=.
  105. // * The guard condition is of the form
  106. // G(X) = guardStart + X u< guardLimit
  107. //
  108. // For the ult latch comparison case M is:
  109. // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit =>
  110. // guardStart + X + 1 u< guardLimit
  111. //
  112. // The only way the antecedent can be true and the consequent can be false is
  113. // if
  114. // X == guardLimit - 1 - guardStart
  115. // (and guardLimit is non-zero, but we won't use this latter fact).
  116. // If X == guardLimit - 1 - guardStart then the second half of the antecedent is
  117. // latchStart + guardLimit - 1 - guardStart u< latchLimit
  118. // and its negation is
  119. // latchStart + guardLimit - 1 - guardStart u>= latchLimit
  120. //
  121. // In other words, if
  122. // latchLimit u<= latchStart + guardLimit - 1 - guardStart
  123. // then:
  124. // (the ranges below are written in ConstantRange notation, where [A, B) is the
  125. // set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
  126. //
  127. // forall X . guardStart + X u< guardLimit &&
  128. // latchStart + X u< latchLimit =>
  129. // guardStart + X + 1 u< guardLimit
  130. // == forall X . guardStart + X u< guardLimit &&
  131. // latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
  132. // guardStart + X + 1 u< guardLimit
  133. // == forall X . (guardStart + X) in [0, guardLimit) &&
  134. // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
  135. // (guardStart + X + 1) in [0, guardLimit)
  136. // == forall X . X in [-guardStart, guardLimit - guardStart) &&
  137. // X in [-latchStart, guardLimit - 1 - guardStart) =>
  138. // X in [-guardStart - 1, guardLimit - guardStart - 1)
  139. // == true
  140. //
  141. // So the widened condition is:
  142. // guardStart u< guardLimit &&
  143. // latchStart + guardLimit - 1 - guardStart u>= latchLimit
  144. // Similarly for ule condition the widened condition is:
  145. // guardStart u< guardLimit &&
  146. // latchStart + guardLimit - 1 - guardStart u> latchLimit
  147. // For slt condition the widened condition is:
  148. // guardStart u< guardLimit &&
  149. // latchStart + guardLimit - 1 - guardStart s>= latchLimit
  150. // For sle condition the widened condition is:
  151. // guardStart u< guardLimit &&
  152. // latchStart + guardLimit - 1 - guardStart s> latchLimit
  153. //
  154. // When S = -1 (i.e. reverse iterating loop), the transformation is supported
  155. // when:
  156. // * The loop has a single latch with the condition of the form:
  157. // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=.
  158. // * The guard condition is of the form
  159. // G(X) = X - 1 u< guardLimit
  160. //
  161. // For the ugt latch comparison case M is:
  162. // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit
  163. //
  164. // The only way the antecedent can be true and the consequent can be false is if
  165. // X == 1.
  166. // If X == 1 then the second half of the antecedent is
  167. // 1 u> latchLimit, and its negation is latchLimit u>= 1.
  168. //
  169. // So the widened condition is:
  170. // guardStart u< guardLimit && latchLimit u>= 1.
  171. // Similarly for sgt condition the widened condition is:
  172. // guardStart u< guardLimit && latchLimit s>= 1.
  173. // For uge condition the widened condition is:
  174. // guardStart u< guardLimit && latchLimit u> 1.
  175. // For sge condition the widened condition is:
  176. // guardStart u< guardLimit && latchLimit s> 1.
  177. //===----------------------------------------------------------------------===//
  178. #include "llvm/Transforms/Scalar/LoopPredication.h"
  179. #include "llvm/ADT/Statistic.h"
  180. #include "llvm/Analysis/AliasAnalysis.h"
  181. #include "llvm/Analysis/BranchProbabilityInfo.h"
  182. #include "llvm/Analysis/GuardUtils.h"
  183. #include "llvm/Analysis/LoopInfo.h"
  184. #include "llvm/Analysis/LoopPass.h"
  185. #include "llvm/Analysis/MemorySSA.h"
  186. #include "llvm/Analysis/MemorySSAUpdater.h"
  187. #include "llvm/Analysis/ScalarEvolution.h"
  188. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  189. #include "llvm/IR/Function.h"
  190. #include "llvm/IR/GlobalValue.h"
  191. #include "llvm/IR/IntrinsicInst.h"
  192. #include "llvm/IR/Module.h"
  193. #include "llvm/IR/PatternMatch.h"
  194. #include "llvm/InitializePasses.h"
  195. #include "llvm/Pass.h"
  196. #include "llvm/Support/CommandLine.h"
  197. #include "llvm/Support/Debug.h"
  198. #include "llvm/Transforms/Scalar.h"
  199. #include "llvm/Transforms/Utils/GuardUtils.h"
  200. #include "llvm/Transforms/Utils/Local.h"
  201. #include "llvm/Transforms/Utils/LoopUtils.h"
  202. #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  203. #define DEBUG_TYPE "loop-predication"
  204. STATISTIC(TotalConsidered, "Number of guards considered");
  205. STATISTIC(TotalWidened, "Number of checks widened");
  206. using namespace llvm;
  207. static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
  208. cl::Hidden, cl::init(true));
  209. static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop",
  210. cl::Hidden, cl::init(true));
  211. static cl::opt<bool>
  212. SkipProfitabilityChecks("loop-predication-skip-profitability-checks",
  213. cl::Hidden, cl::init(false));
  214. // This is the scale factor for the latch probability. We use this during
  215. // profitability analysis to find other exiting blocks that have a much higher
  216. // probability of exiting the loop instead of loop exiting via latch.
  217. // This value should be greater than 1 for a sane profitability check.
  218. static cl::opt<float> LatchExitProbabilityScale(
  219. "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),
  220. cl::desc("scale factor for the latch probability. Value should be greater "
  221. "than 1. Lower values are ignored"));
  222. static cl::opt<bool> PredicateWidenableBranchGuards(
  223. "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
  224. cl::desc("Whether or not we should predicate guards "
  225. "expressed as widenable branches to deoptimize blocks"),
  226. cl::init(true));
  227. namespace {
  228. /// Represents an induction variable check:
  229. /// icmp Pred, <induction variable>, <loop invariant limit>
  230. struct LoopICmp {
  231. ICmpInst::Predicate Pred;
  232. const SCEVAddRecExpr *IV;
  233. const SCEV *Limit;
  234. LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
  235. const SCEV *Limit)
  236. : Pred(Pred), IV(IV), Limit(Limit) {}
  237. LoopICmp() {}
  238. void dump() {
  239. dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
  240. << ", Limit = " << *Limit << "\n";
  241. }
  242. };
  243. class LoopPredication {
  244. AliasAnalysis *AA;
  245. DominatorTree *DT;
  246. ScalarEvolution *SE;
  247. LoopInfo *LI;
  248. MemorySSAUpdater *MSSAU;
  249. Loop *L;
  250. const DataLayout *DL;
  251. BasicBlock *Preheader;
  252. LoopICmp LatchCheck;
  253. bool isSupportedStep(const SCEV* Step);
  254. Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
  255. Optional<LoopICmp> parseLoopLatchICmp();
  256. /// Return an insertion point suitable for inserting a safe to speculate
  257. /// instruction whose only user will be 'User' which has operands 'Ops'. A
  258. /// trivial result would be the at the User itself, but we try to return a
  259. /// loop invariant location if possible.
  260. Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
  261. /// Same as above, *except* that this uses the SCEV definition of invariant
  262. /// which is that an expression *can be made* invariant via SCEVExpander.
  263. /// Thus, this version is only suitable for finding an insert point to be be
  264. /// passed to SCEVExpander!
  265. Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
  266. /// Return true if the value is known to produce a single fixed value across
  267. /// all iterations on which it executes. Note that this does not imply
  268. /// speculation safety. That must be established separately.
  269. bool isLoopInvariantValue(const SCEV* S);
  270. Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
  271. ICmpInst::Predicate Pred, const SCEV *LHS,
  272. const SCEV *RHS);
  273. Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
  274. Instruction *Guard);
  275. Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
  276. LoopICmp RangeCheck,
  277. SCEVExpander &Expander,
  278. Instruction *Guard);
  279. Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
  280. LoopICmp RangeCheck,
  281. SCEVExpander &Expander,
  282. Instruction *Guard);
  283. unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
  284. SCEVExpander &Expander, Instruction *Guard);
  285. bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
  286. bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
  287. // If the loop always exits through another block in the loop, we should not
  288. // predicate based on the latch check. For example, the latch check can be a
  289. // very coarse grained check and there can be more fine grained exit checks
  290. // within the loop.
  291. bool isLoopProfitableToPredicate();
  292. bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
  293. public:
  294. LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE,
  295. LoopInfo *LI, MemorySSAUpdater *MSSAU)
  296. : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
  297. bool runOnLoop(Loop *L);
  298. };
  299. class LoopPredicationLegacyPass : public LoopPass {
  300. public:
  301. static char ID;
  302. LoopPredicationLegacyPass() : LoopPass(ID) {
  303. initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
  304. }
  305. void getAnalysisUsage(AnalysisUsage &AU) const override {
  306. AU.addRequired<BranchProbabilityInfoWrapperPass>();
  307. getLoopAnalysisUsage(AU);
  308. AU.addPreserved<MemorySSAWrapperPass>();
  309. }
  310. bool runOnLoop(Loop *L, LPPassManager &LPM) override {
  311. if (skipLoop(L))
  312. return false;
  313. auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
  314. auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  315. auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  316. auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
  317. std::unique_ptr<MemorySSAUpdater> MSSAU;
  318. if (MSSAWP)
  319. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
  320. auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  321. LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
  322. return LP.runOnLoop(L);
  323. }
  324. };
  325. char LoopPredicationLegacyPass::ID = 0;
  326. } // end namespace
  327. INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
  328. "Loop predication", false, false)
  329. INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
  330. INITIALIZE_PASS_DEPENDENCY(LoopPass)
  331. INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
  332. "Loop predication", false, false)
  333. Pass *llvm::createLoopPredicationPass() {
  334. return new LoopPredicationLegacyPass();
  335. }
  336. PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
  337. LoopStandardAnalysisResults &AR,
  338. LPMUpdater &U) {
  339. std::unique_ptr<MemorySSAUpdater> MSSAU;
  340. if (AR.MSSA)
  341. MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
  342. LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
  343. MSSAU ? MSSAU.get() : nullptr);
  344. if (!LP.runOnLoop(&L))
  345. return PreservedAnalyses::all();
  346. auto PA = getLoopPassPreservedAnalyses();
  347. if (AR.MSSA)
  348. PA.preserve<MemorySSAAnalysis>();
  349. return PA;
  350. }
  351. Optional<LoopICmp>
  352. LoopPredication::parseLoopICmp(ICmpInst *ICI) {
  353. auto Pred = ICI->getPredicate();
  354. auto *LHS = ICI->getOperand(0);
  355. auto *RHS = ICI->getOperand(1);
  356. const SCEV *LHSS = SE->getSCEV(LHS);
  357. if (isa<SCEVCouldNotCompute>(LHSS))
  358. return None;
  359. const SCEV *RHSS = SE->getSCEV(RHS);
  360. if (isa<SCEVCouldNotCompute>(RHSS))
  361. return None;
  362. // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV
  363. if (SE->isLoopInvariant(LHSS, L)) {
  364. std::swap(LHS, RHS);
  365. std::swap(LHSS, RHSS);
  366. Pred = ICmpInst::getSwappedPredicate(Pred);
  367. }
  368. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);
  369. if (!AR || AR->getLoop() != L)
  370. return None;
  371. return LoopICmp(Pred, AR, RHSS);
  372. }
  373. Value *LoopPredication::expandCheck(SCEVExpander &Expander,
  374. Instruction *Guard,
  375. ICmpInst::Predicate Pred, const SCEV *LHS,
  376. const SCEV *RHS) {
  377. Type *Ty = LHS->getType();
  378. assert(Ty == RHS->getType() && "expandCheck operands have different types?");
  379. if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
  380. IRBuilder<> Builder(Guard);
  381. if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
  382. return Builder.getTrue();
  383. if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
  384. LHS, RHS))
  385. return Builder.getFalse();
  386. }
  387. Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
  388. Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
  389. IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
  390. return Builder.CreateICmp(Pred, LHSV, RHSV);
  391. }
  392. // Returns true if its safe to truncate the IV to RangeCheckType.
  393. // When the IV type is wider than the range operand type, we can still do loop
  394. // predication, by generating SCEVs for the range and latch that are of the
  395. // same type. We achieve this by generating a SCEV truncate expression for the
  396. // latch IV. This is done iff truncation of the IV is a safe operation,
  397. // without loss of information.
  398. // Another way to achieve this is by generating a wider type SCEV for the
  399. // range check operand, however, this needs a more involved check that
  400. // operands do not overflow. This can lead to loss of information when the
  401. // range operand is of the form: add i32 %offset, %iv. We need to prove that
  402. // sext(x + y) is same as sext(x) + sext(y).
  403. // This function returns true if we can safely represent the IV type in
  404. // the RangeCheckType without loss of information.
  405. static bool isSafeToTruncateWideIVType(const DataLayout &DL,
  406. ScalarEvolution &SE,
  407. const LoopICmp LatchCheck,
  408. Type *RangeCheckType) {
  409. if (!EnableIVTruncation)
  410. return false;
  411. assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() >
  412. DL.getTypeSizeInBits(RangeCheckType).getFixedSize() &&
  413. "Expected latch check IV type to be larger than range check operand "
  414. "type!");
  415. // The start and end values of the IV should be known. This is to guarantee
  416. // that truncating the wide type will not lose information.
  417. auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
  418. auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
  419. if (!Limit || !Start)
  420. return false;
  421. // This check makes sure that the IV does not change sign during loop
  422. // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
  423. // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
  424. // IV wraps around, and the truncation of the IV would lose the range of
  425. // iterations between 2^32 and 2^64.
  426. if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
  427. return false;
  428. // The active bits should be less than the bits in the RangeCheckType. This
  429. // guarantees that truncating the latch check to RangeCheckType is a safe
  430. // operation.
  431. auto RangeCheckTypeBitSize =
  432. DL.getTypeSizeInBits(RangeCheckType).getFixedSize();
  433. return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
  434. Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
  435. }
  436. // Return an LoopICmp describing a latch check equivlent to LatchCheck but with
  437. // the requested type if safe to do so. May involve the use of a new IV.
  438. static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
  439. ScalarEvolution &SE,
  440. const LoopICmp LatchCheck,
  441. Type *RangeCheckType) {
  442. auto *LatchType = LatchCheck.IV->getType();
  443. if (RangeCheckType == LatchType)
  444. return LatchCheck;
  445. // For now, bail out if latch type is narrower than range type.
  446. if (DL.getTypeSizeInBits(LatchType).getFixedSize() <
  447. DL.getTypeSizeInBits(RangeCheckType).getFixedSize())
  448. return None;
  449. if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
  450. return None;
  451. // We can now safely identify the truncated version of the IV and limit for
  452. // RangeCheckType.
  453. LoopICmp NewLatchCheck;
  454. NewLatchCheck.Pred = LatchCheck.Pred;
  455. NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
  456. SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
  457. if (!NewLatchCheck.IV)
  458. return None;
  459. NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
  460. LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
  461. << "can be represented as range check type:"
  462. << *RangeCheckType << "\n");
  463. LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
  464. LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
  465. return NewLatchCheck;
  466. }
  467. bool LoopPredication::isSupportedStep(const SCEV* Step) {
  468. return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
  469. }
  470. Instruction *LoopPredication::findInsertPt(Instruction *Use,
  471. ArrayRef<Value*> Ops) {
  472. for (Value *Op : Ops)
  473. if (!L->isLoopInvariant(Op))
  474. return Use;
  475. return Preheader->getTerminator();
  476. }
  477. Instruction *LoopPredication::findInsertPt(Instruction *Use,
  478. ArrayRef<const SCEV*> Ops) {
  479. // Subtlety: SCEV considers things to be invariant if the value produced is
  480. // the same across iterations. This is not the same as being able to
  481. // evaluate outside the loop, which is what we actually need here.
  482. for (const SCEV *Op : Ops)
  483. if (!SE->isLoopInvariant(Op, L) ||
  484. !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
  485. return Use;
  486. return Preheader->getTerminator();
  487. }
  488. bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
  489. // Handling expressions which produce invariant results, but *haven't* yet
  490. // been removed from the loop serves two important purposes.
  491. // 1) Most importantly, it resolves a pass ordering cycle which would
  492. // otherwise need us to iteration licm, loop-predication, and either
  493. // loop-unswitch or loop-peeling to make progress on examples with lots of
  494. // predicable range checks in a row. (Since, in the general case, we can't
  495. // hoist the length checks until the dominating checks have been discharged
  496. // as we can't prove doing so is safe.)
  497. // 2) As a nice side effect, this exposes the value of peeling or unswitching
  498. // much more obviously in the IR. Otherwise, the cost modeling for other
  499. // transforms would end up needing to duplicate all of this logic to model a
  500. // check which becomes predictable based on a modeled peel or unswitch.
  501. //
  502. // The cost of doing so in the worst case is an extra fill from the stack in
  503. // the loop to materialize the loop invariant test value instead of checking
  504. // against the original IV which is presumable in a register inside the loop.
  505. // Such cases are presumably rare, and hint at missing oppurtunities for
  506. // other passes.
  507. if (SE->isLoopInvariant(S, L))
  508. // Note: This the SCEV variant, so the original Value* may be within the
  509. // loop even though SCEV has proven it is loop invariant.
  510. return true;
  511. // Handle a particular important case which SCEV doesn't yet know about which
  512. // shows up in range checks on arrays with immutable lengths.
  513. // TODO: This should be sunk inside SCEV.
  514. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
  515. if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
  516. if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
  517. if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
  518. LI->hasMetadata(LLVMContext::MD_invariant_load))
  519. return true;
  520. return false;
  521. }
  522. Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
  523. LoopICmp LatchCheck, LoopICmp RangeCheck,
  524. SCEVExpander &Expander, Instruction *Guard) {
  525. auto *Ty = RangeCheck.IV->getType();
  526. // Generate the widened condition for the forward loop:
  527. // guardStart u< guardLimit &&
  528. // latchLimit <pred> guardLimit - 1 - guardStart + latchStart
  529. // where <pred> depends on the latch condition predicate. See the file
  530. // header comment for the reasoning.
  531. // guardLimit - guardStart + latchStart - 1
  532. const SCEV *GuardStart = RangeCheck.IV->getStart();
  533. const SCEV *GuardLimit = RangeCheck.Limit;
  534. const SCEV *LatchStart = LatchCheck.IV->getStart();
  535. const SCEV *LatchLimit = LatchCheck.Limit;
  536. // Subtlety: We need all the values to be *invariant* across all iterations,
  537. // but we only need to check expansion safety for those which *aren't*
  538. // already guaranteed to dominate the guard.
  539. if (!isLoopInvariantValue(GuardStart) ||
  540. !isLoopInvariantValue(GuardLimit) ||
  541. !isLoopInvariantValue(LatchStart) ||
  542. !isLoopInvariantValue(LatchLimit)) {
  543. LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
  544. return None;
  545. }
  546. if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
  547. !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
  548. LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
  549. return None;
  550. }
  551. // guardLimit - guardStart + latchStart - 1
  552. const SCEV *RHS =
  553. SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
  554. SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
  555. auto LimitCheckPred =
  556. ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
  557. LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
  558. LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
  559. LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
  560. auto *LimitCheck =
  561. expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
  562. auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
  563. GuardStart, GuardLimit);
  564. IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
  565. return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
  566. }
  567. Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
  568. LoopICmp LatchCheck, LoopICmp RangeCheck,
  569. SCEVExpander &Expander, Instruction *Guard) {
  570. auto *Ty = RangeCheck.IV->getType();
  571. const SCEV *GuardStart = RangeCheck.IV->getStart();
  572. const SCEV *GuardLimit = RangeCheck.Limit;
  573. const SCEV *LatchStart = LatchCheck.IV->getStart();
  574. const SCEV *LatchLimit = LatchCheck.Limit;
  575. // Subtlety: We need all the values to be *invariant* across all iterations,
  576. // but we only need to check expansion safety for those which *aren't*
  577. // already guaranteed to dominate the guard.
  578. if (!isLoopInvariantValue(GuardStart) ||
  579. !isLoopInvariantValue(GuardLimit) ||
  580. !isLoopInvariantValue(LatchStart) ||
  581. !isLoopInvariantValue(LatchLimit)) {
  582. LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
  583. return None;
  584. }
  585. if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
  586. !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
  587. LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
  588. return None;
  589. }
  590. // The decrement of the latch check IV should be the same as the
  591. // rangeCheckIV.
  592. auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE);
  593. if (RangeCheck.IV != PostDecLatchCheckIV) {
  594. LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: "
  595. << *PostDecLatchCheckIV
  596. << " and RangeCheckIV: " << *RangeCheck.IV << "\n");
  597. return None;
  598. }
  599. // Generate the widened condition for CountDownLoop:
  600. // guardStart u< guardLimit &&
  601. // latchLimit <pred> 1.
  602. // See the header comment for reasoning of the checks.
  603. auto LimitCheckPred =
  604. ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
  605. auto *FirstIterationCheck = expandCheck(Expander, Guard,
  606. ICmpInst::ICMP_ULT,
  607. GuardStart, GuardLimit);
  608. auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
  609. SE->getOne(Ty));
  610. IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
  611. return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
  612. }
  613. static void normalizePredicate(ScalarEvolution *SE, Loop *L,
  614. LoopICmp& RC) {
  615. // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
  616. // ULT/UGE form for ease of handling by our caller.
  617. if (ICmpInst::isEquality(RC.Pred) &&
  618. RC.IV->getStepRecurrence(*SE)->isOne() &&
  619. SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
  620. RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
  621. ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
  622. }
  623. /// If ICI can be widened to a loop invariant condition emits the loop
  624. /// invariant condition in the loop preheader and return it, otherwise
  625. /// returns None.
  626. Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
  627. SCEVExpander &Expander,
  628. Instruction *Guard) {
  629. LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
  630. LLVM_DEBUG(ICI->dump());
  631. // parseLoopStructure guarantees that the latch condition is:
  632. // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.
  633. // We are looking for the range checks of the form:
  634. // i u< guardLimit
  635. auto RangeCheck = parseLoopICmp(ICI);
  636. if (!RangeCheck) {
  637. LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
  638. return None;
  639. }
  640. LLVM_DEBUG(dbgs() << "Guard check:\n");
  641. LLVM_DEBUG(RangeCheck->dump());
  642. if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {
  643. LLVM_DEBUG(dbgs() << "Unsupported range check predicate("
  644. << RangeCheck->Pred << ")!\n");
  645. return None;
  646. }
  647. auto *RangeCheckIV = RangeCheck->IV;
  648. if (!RangeCheckIV->isAffine()) {
  649. LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n");
  650. return None;
  651. }
  652. auto *Step = RangeCheckIV->getStepRecurrence(*SE);
  653. // We cannot just compare with latch IV step because the latch and range IVs
  654. // may have different types.
  655. if (!isSupportedStep(Step)) {
  656. LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
  657. return None;
  658. }
  659. auto *Ty = RangeCheckIV->getType();
  660. auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
  661. if (!CurrLatchCheckOpt) {
  662. LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
  663. "corresponding to range type: "
  664. << *Ty << "\n");
  665. return None;
  666. }
  667. LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
  668. // At this point, the range and latch step should have the same type, but need
  669. // not have the same value (we support both 1 and -1 steps).
  670. assert(Step->getType() ==
  671. CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() &&
  672. "Range and latch steps should be of same type!");
  673. if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) {
  674. LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n");
  675. return None;
  676. }
  677. if (Step->isOne())
  678. return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
  679. Expander, Guard);
  680. else {
  681. assert(Step->isAllOnesValue() && "Step should be -1!");
  682. return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
  683. Expander, Guard);
  684. }
  685. }
  686. unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
  687. Value *Condition,
  688. SCEVExpander &Expander,
  689. Instruction *Guard) {
  690. unsigned NumWidened = 0;
  691. // The guard condition is expected to be in form of:
  692. // cond1 && cond2 && cond3 ...
  693. // Iterate over subconditions looking for icmp conditions which can be
  694. // widened across loop iterations. Widening these conditions remember the
  695. // resulting list of subconditions in Checks vector.
  696. SmallVector<Value *, 4> Worklist(1, Condition);
  697. SmallPtrSet<Value *, 4> Visited;
  698. Value *WideableCond = nullptr;
  699. do {
  700. Value *Condition = Worklist.pop_back_val();
  701. if (!Visited.insert(Condition).second)
  702. continue;
  703. Value *LHS, *RHS;
  704. using namespace llvm::PatternMatch;
  705. if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
  706. Worklist.push_back(LHS);
  707. Worklist.push_back(RHS);
  708. continue;
  709. }
  710. if (match(Condition,
  711. m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
  712. // Pick any, we don't care which
  713. WideableCond = Condition;
  714. continue;
  715. }
  716. if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
  717. if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
  718. Guard)) {
  719. Checks.push_back(NewRangeCheck.getValue());
  720. NumWidened++;
  721. continue;
  722. }
  723. }
  724. // Save the condition as is if we can't widen it
  725. Checks.push_back(Condition);
  726. } while (!Worklist.empty());
  727. // At the moment, our matching logic for wideable conditions implicitly
  728. // assumes we preserve the form: (br (and Cond, WC())). FIXME
  729. // Note that if there were multiple calls to wideable condition in the
  730. // traversal, we only need to keep one, and which one is arbitrary.
  731. if (WideableCond)
  732. Checks.push_back(WideableCond);
  733. return NumWidened;
  734. }
  735. bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
  736. SCEVExpander &Expander) {
  737. LLVM_DEBUG(dbgs() << "Processing guard:\n");
  738. LLVM_DEBUG(Guard->dump());
  739. TotalConsidered++;
  740. SmallVector<Value *, 4> Checks;
  741. unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
  742. Guard);
  743. if (NumWidened == 0)
  744. return false;
  745. TotalWidened += NumWidened;
  746. // Emit the new guard condition
  747. IRBuilder<> Builder(findInsertPt(Guard, Checks));
  748. Value *AllChecks = Builder.CreateAnd(Checks);
  749. auto *OldCond = Guard->getOperand(0);
  750. Guard->setOperand(0, AllChecks);
  751. RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
  752. LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
  753. return true;
  754. }
  755. bool LoopPredication::widenWidenableBranchGuardConditions(
  756. BranchInst *BI, SCEVExpander &Expander) {
  757. assert(isGuardAsWidenableBranch(BI) && "Must be!");
  758. LLVM_DEBUG(dbgs() << "Processing guard:\n");
  759. LLVM_DEBUG(BI->dump());
  760. TotalConsidered++;
  761. SmallVector<Value *, 4> Checks;
  762. unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
  763. Expander, BI);
  764. if (NumWidened == 0)
  765. return false;
  766. TotalWidened += NumWidened;
  767. // Emit the new guard condition
  768. IRBuilder<> Builder(findInsertPt(BI, Checks));
  769. Value *AllChecks = Builder.CreateAnd(Checks);
  770. auto *OldCond = BI->getCondition();
  771. BI->setCondition(AllChecks);
  772. RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
  773. assert(isGuardAsWidenableBranch(BI) &&
  774. "Stopped being a guard after transform?");
  775. LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
  776. return true;
  777. }
  778. Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
  779. using namespace PatternMatch;
  780. BasicBlock *LoopLatch = L->getLoopLatch();
  781. if (!LoopLatch) {
  782. LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n");
  783. return None;
  784. }
  785. auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
  786. if (!BI || !BI->isConditional()) {
  787. LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
  788. return None;
  789. }
  790. BasicBlock *TrueDest = BI->getSuccessor(0);
  791. assert(
  792. (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
  793. "One of the latch's destinations must be the header");
  794. auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
  795. if (!ICI) {
  796. LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
  797. return None;
  798. }
  799. auto Result = parseLoopICmp(ICI);
  800. if (!Result) {
  801. LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
  802. return None;
  803. }
  804. if (TrueDest != L->getHeader())
  805. Result->Pred = ICmpInst::getInversePredicate(Result->Pred);
  806. // Check affine first, so if it's not we don't try to compute the step
  807. // recurrence.
  808. if (!Result->IV->isAffine()) {
  809. LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n");
  810. return None;
  811. }
  812. auto *Step = Result->IV->getStepRecurrence(*SE);
  813. if (!isSupportedStep(Step)) {
  814. LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");
  815. return None;
  816. }
  817. auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) {
  818. if (Step->isOne()) {
  819. return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&
  820. Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
  821. } else {
  822. assert(Step->isAllOnesValue() && "Step should be -1!");
  823. return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&
  824. Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;
  825. }
  826. };
  827. normalizePredicate(SE, L, *Result);
  828. if (IsUnsupportedPredicate(Step, Result->Pred)) {
  829. LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
  830. << ")!\n");
  831. return None;
  832. }
  833. return Result;
  834. }
  835. bool LoopPredication::isLoopProfitableToPredicate() {
  836. if (SkipProfitabilityChecks)
  837. return true;
  838. SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
  839. L->getExitEdges(ExitEdges);
  840. // If there is only one exiting edge in the loop, it is always profitable to
  841. // predicate the loop.
  842. if (ExitEdges.size() == 1)
  843. return true;
  844. // Calculate the exiting probabilities of all exiting edges from the loop,
  845. // starting with the LatchExitProbability.
  846. // Heuristic for profitability: If any of the exiting blocks' probability of
  847. // exiting the loop is larger than exiting through the latch block, it's not
  848. // profitable to predicate the loop.
  849. auto *LatchBlock = L->getLoopLatch();
  850. assert(LatchBlock && "Should have a single latch at this point!");
  851. auto *LatchTerm = LatchBlock->getTerminator();
  852. assert(LatchTerm->getNumSuccessors() == 2 &&
  853. "expected to be an exiting block with 2 succs!");
  854. unsigned LatchBrExitIdx =
  855. LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
  856. // We compute branch probabilities without BPI. We do not rely on BPI since
  857. // Loop predication is usually run in an LPM and BPI is only preserved
  858. // lossily within loop pass managers, while BPI has an inherent notion of
  859. // being complete for an entire function.
  860. // If the latch exits into a deoptimize or an unreachable block, do not
  861. // predicate on that latch check.
  862. auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
  863. if (isa<UnreachableInst>(LatchTerm) ||
  864. LatchExitBlock->getTerminatingDeoptimizeCall())
  865. return false;
  866. auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
  867. if (!ProfileData || !ProfileData->getOperand(0))
  868. return false;
  869. if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
  870. if (!MDS->getString().equals("branch_weights"))
  871. return false;
  872. if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
  873. return false;
  874. return true;
  875. };
  876. MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
  877. // Latch terminator has no valid profile data, so nothing to check
  878. // profitability on.
  879. if (!IsValidProfileData(LatchProfileData, LatchTerm))
  880. return true;
  881. auto ComputeBranchProbability =
  882. [&](const BasicBlock *ExitingBlock,
  883. const BasicBlock *ExitBlock) -> BranchProbability {
  884. auto *Term = ExitingBlock->getTerminator();
  885. MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
  886. unsigned NumSucc = Term->getNumSuccessors();
  887. if (IsValidProfileData(ProfileData, Term)) {
  888. uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
  889. for (unsigned i = 0; i < NumSucc; i++) {
  890. ConstantInt *CI =
  891. mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
  892. ProfVal = CI->getValue().getZExtValue();
  893. if (Term->getSuccessor(i) == ExitBlock)
  894. Numerator += ProfVal;
  895. Denominator += ProfVal;
  896. }
  897. return BranchProbability::getBranchProbability(Numerator, Denominator);
  898. } else {
  899. assert(LatchBlock != ExitingBlock &&
  900. "Latch term should always have profile data!");
  901. // No profile data, so we choose the weight as 1/num_of_succ(Src)
  902. return BranchProbability::getBranchProbability(1, NumSucc);
  903. }
  904. };
  905. BranchProbability LatchExitProbability =
  906. ComputeBranchProbability(LatchBlock, LatchExitBlock);
  907. // Protect against degenerate inputs provided by the user. Providing a value
  908. // less than one, can invert the definition of profitable loop predication.
  909. float ScaleFactor = LatchExitProbabilityScale;
  910. if (ScaleFactor < 1) {
  911. LLVM_DEBUG(
  912. dbgs()
  913. << "Ignored user setting for loop-predication-latch-probability-scale: "
  914. << LatchExitProbabilityScale << "\n");
  915. LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
  916. ScaleFactor = 1.0;
  917. }
  918. const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
  919. for (const auto &ExitEdge : ExitEdges) {
  920. BranchProbability ExitingBlockProbability =
  921. ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
  922. // Some exiting edge has higher probability than the latch exiting edge.
  923. // No longer profitable to predicate.
  924. if (ExitingBlockProbability > LatchProbabilityThreshold)
  925. return false;
  926. }
  927. // We have concluded that the most probable way to exit from the
  928. // loop is through the latch (or there's no profile information and all
  929. // exits are equally likely).
  930. return true;
  931. }
  932. /// If we can (cheaply) find a widenable branch which controls entry into the
  933. /// loop, return it.
  934. static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) {
  935. // Walk back through any unconditional executed blocks and see if we can find
  936. // a widenable condition which seems to control execution of this loop. Note
  937. // that we predict that maythrow calls are likely untaken and thus that it's
  938. // profitable to widen a branch before a maythrow call with a condition
  939. // afterwards even though that may cause the slow path to run in a case where
  940. // it wouldn't have otherwise.
  941. BasicBlock *BB = L->getLoopPreheader();
  942. if (!BB)
  943. return nullptr;
  944. do {
  945. if (BasicBlock *Pred = BB->getSinglePredecessor())
  946. if (BB == Pred->getSingleSuccessor()) {
  947. BB = Pred;
  948. continue;
  949. }
  950. break;
  951. } while (true);
  952. if (BasicBlock *Pred = BB->getSinglePredecessor()) {
  953. auto *Term = Pred->getTerminator();
  954. Value *Cond, *WC;
  955. BasicBlock *IfTrueBB, *IfFalseBB;
  956. if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
  957. IfTrueBB == BB)
  958. return cast<BranchInst>(Term);
  959. }
  960. return nullptr;
  961. }
  962. /// Return the minimum of all analyzeable exit counts. This is an upper bound
  963. /// on the actual exit count. If there are not at least two analyzeable exits,
  964. /// returns SCEVCouldNotCompute.
  965. static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
  966. DominatorTree &DT,
  967. Loop *L) {
  968. SmallVector<BasicBlock *, 16> ExitingBlocks;
  969. L->getExitingBlocks(ExitingBlocks);
  970. SmallVector<const SCEV *, 4> ExitCounts;
  971. for (BasicBlock *ExitingBB : ExitingBlocks) {
  972. const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
  973. if (isa<SCEVCouldNotCompute>(ExitCount))
  974. continue;
  975. assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
  976. "We should only have known counts for exiting blocks that "
  977. "dominate latch!");
  978. ExitCounts.push_back(ExitCount);
  979. }
  980. if (ExitCounts.size() < 2)
  981. return SE.getCouldNotCompute();
  982. return SE.getUMinFromMismatchedTypes(ExitCounts);
  983. }
  984. /// This implements an analogous, but entirely distinct transform from the main
  985. /// loop predication transform. This one is phrased in terms of using a
  986. /// widenable branch *outside* the loop to allow us to simplify loop exits in a
  987. /// following loop. This is close in spirit to the IndVarSimplify transform
  988. /// of the same name, but is materially different widening loosens legality
  989. /// sharply.
  990. bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
  991. // The transformation performed here aims to widen a widenable condition
  992. // above the loop such that all analyzeable exit leading to deopt are dead.
  993. // It assumes that the latch is the dominant exit for profitability and that
  994. // exits branching to deoptimizing blocks are rarely taken. It relies on the
  995. // semantics of widenable expressions for legality. (i.e. being able to fall
  996. // down the widenable path spuriously allows us to ignore exit order,
  997. // unanalyzeable exits, side effects, exceptional exits, and other challenges
  998. // which restrict the applicability of the non-WC based version of this
  999. // transform in IndVarSimplify.)
  1000. //
  1001. // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may
  1002. // imply flags on the expression being hoisted and inserting new uses (flags
  1003. // are only correct for current uses). The result is that we may be
  1004. // inserting a branch on the value which can be either poison or undef. In
  1005. // this case, the branch can legally go either way; we just need to avoid
  1006. // introducing UB. This is achieved through the use of the freeze
  1007. // instruction.
  1008. SmallVector<BasicBlock *, 16> ExitingBlocks;
  1009. L->getExitingBlocks(ExitingBlocks);
  1010. if (ExitingBlocks.empty())
  1011. return false; // Nothing to do.
  1012. auto *Latch = L->getLoopLatch();
  1013. if (!Latch)
  1014. return false;
  1015. auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);
  1016. if (!WidenableBR)
  1017. return false;
  1018. const SCEV *LatchEC = SE->getExitCount(L, Latch);
  1019. if (isa<SCEVCouldNotCompute>(LatchEC))
  1020. return false; // profitability - want hot exit in analyzeable set
  1021. // At this point, we have found an analyzeable latch, and a widenable
  1022. // condition above the loop. If we have a widenable exit within the loop
  1023. // (for which we can't compute exit counts), drop the ability to further
  1024. // widen so that we gain ability to analyze it's exit count and perform this
  1025. // transform. TODO: It'd be nice to know for sure the exit became
  1026. // analyzeable after dropping widenability.
  1027. bool ChangedLoop = false;
  1028. for (auto *ExitingBB : ExitingBlocks) {
  1029. if (LI->getLoopFor(ExitingBB) != L)
  1030. continue;
  1031. auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
  1032. if (!BI)
  1033. continue;
  1034. Use *Cond, *WC;
  1035. BasicBlock *IfTrueBB, *IfFalseBB;
  1036. if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
  1037. L->contains(IfTrueBB)) {
  1038. WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
  1039. ChangedLoop = true;
  1040. }
  1041. }
  1042. if (ChangedLoop)
  1043. SE->forgetLoop(L);
  1044. // The use of umin(all analyzeable exits) instead of latch is subtle, but
  1045. // important for profitability. We may have a loop which hasn't been fully
  1046. // canonicalized just yet. If the exit we chose to widen is provably never
  1047. // taken, we want the widened form to *also* be provably never taken. We
  1048. // can't guarantee this as a current unanalyzeable exit may later become
  1049. // analyzeable, but we can at least avoid the obvious cases.
  1050. const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
  1051. if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
  1052. !SE->isLoopInvariant(MinEC, L) ||
  1053. !isSafeToExpandAt(MinEC, WidenableBR, *SE))
  1054. return ChangedLoop;
  1055. // Subtlety: We need to avoid inserting additional uses of the WC. We know
  1056. // that it can only have one transitive use at the moment, and thus moving
  1057. // that use to just before the branch and inserting code before it and then
  1058. // modifying the operand is legal.
  1059. auto *IP = cast<Instruction>(WidenableBR->getCondition());
  1060. // Here we unconditionally modify the IR, so after this point we should return
  1061. // only `true`!
  1062. IP->moveBefore(WidenableBR);
  1063. if (MSSAU)
  1064. if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
  1065. MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
  1066. MemorySSA::BeforeTerminator);
  1067. Rewriter.setInsertPoint(IP);
  1068. IRBuilder<> B(IP);
  1069. bool InvalidateLoop = false;
  1070. Value *MinECV = nullptr; // lazily generated if needed
  1071. for (BasicBlock *ExitingBB : ExitingBlocks) {
  1072. // If our exiting block exits multiple loops, we can only rewrite the
  1073. // innermost one. Otherwise, we're changing how many times the innermost
  1074. // loop runs before it exits.
  1075. if (LI->getLoopFor(ExitingBB) != L)
  1076. continue;
  1077. // Can't rewrite non-branch yet.
  1078. auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
  1079. if (!BI)
  1080. continue;
  1081. // If already constant, nothing to do.
  1082. if (isa<Constant>(BI->getCondition()))
  1083. continue;
  1084. const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
  1085. if (isa<SCEVCouldNotCompute>(ExitCount) ||
  1086. ExitCount->getType()->isPointerTy() ||
  1087. !isSafeToExpandAt(ExitCount, WidenableBR, *SE))
  1088. continue;
  1089. const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
  1090. BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
  1091. if (!ExitBB->getPostdominatingDeoptimizeCall())
  1092. continue;
  1093. /// Here we can be fairly sure that executing this exit will most likely
  1094. /// lead to executing llvm.experimental.deoptimize.
  1095. /// This is a profitability heuristic, not a legality constraint.
  1096. // If we found a widenable exit condition, do two things:
  1097. // 1) fold the widened exit test into the widenable condition
  1098. // 2) fold the branch to untaken - avoids infinite looping
  1099. Value *ECV = Rewriter.expandCodeFor(ExitCount);
  1100. if (!MinECV)
  1101. MinECV = Rewriter.expandCodeFor(MinEC);
  1102. Value *RHS = MinECV;
  1103. if (ECV->getType() != RHS->getType()) {
  1104. Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
  1105. ECV = B.CreateZExt(ECV, WiderTy);
  1106. RHS = B.CreateZExt(RHS, WiderTy);
  1107. }
  1108. assert(!Latch || DT->dominates(ExitingBB, Latch));
  1109. Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);
  1110. // Freeze poison or undef to an arbitrary bit pattern to ensure we can
  1111. // branch without introducing UB. See NOTE ON POISON/UNDEF above for
  1112. // context.
  1113. NewCond = B.CreateFreeze(NewCond);
  1114. widenWidenableBranch(WidenableBR, NewCond);
  1115. Value *OldCond = BI->getCondition();
  1116. BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
  1117. InvalidateLoop = true;
  1118. }
  1119. if (InvalidateLoop)
  1120. // We just mutated a bunch of loop exits changing there exit counts
  1121. // widely. We need to force recomputation of the exit counts given these
  1122. // changes. Note that all of the inserted exits are never taken, and
  1123. // should be removed next time the CFG is modified.
  1124. SE->forgetLoop(L);
  1125. // Always return `true` since we have moved the WidenableBR's condition.
  1126. return true;
  1127. }
  1128. bool LoopPredication::runOnLoop(Loop *Loop) {
  1129. L = Loop;
  1130. LLVM_DEBUG(dbgs() << "Analyzing ");
  1131. LLVM_DEBUG(L->dump());
  1132. Module *M = L->getHeader()->getModule();
  1133. // There is nothing to do if the module doesn't use guards
  1134. auto *GuardDecl =
  1135. M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
  1136. bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
  1137. auto *WCDecl = M->getFunction(
  1138. Intrinsic::getName(Intrinsic::experimental_widenable_condition));
  1139. bool HasWidenableConditions =
  1140. PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
  1141. if (!HasIntrinsicGuards && !HasWidenableConditions)
  1142. return false;
  1143. DL = &M->getDataLayout();
  1144. Preheader = L->getLoopPreheader();
  1145. if (!Preheader)
  1146. return false;
  1147. auto LatchCheckOpt = parseLoopLatchICmp();
  1148. if (!LatchCheckOpt)
  1149. return false;
  1150. LatchCheck = *LatchCheckOpt;
  1151. LLVM_DEBUG(dbgs() << "Latch check:\n");
  1152. LLVM_DEBUG(LatchCheck.dump());
  1153. if (!isLoopProfitableToPredicate()) {
  1154. LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n");
  1155. return false;
  1156. }
  1157. // Collect all the guards into a vector and process later, so as not
  1158. // to invalidate the instruction iterator.
  1159. SmallVector<IntrinsicInst *, 4> Guards;
  1160. SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
  1161. for (const auto BB : L->blocks()) {
  1162. for (auto &I : *BB)
  1163. if (isGuard(&I))
  1164. Guards.push_back(cast<IntrinsicInst>(&I));
  1165. if (PredicateWidenableBranchGuards &&
  1166. isGuardAsWidenableBranch(BB->getTerminator()))
  1167. GuardsAsWidenableBranches.push_back(
  1168. cast<BranchInst>(BB->getTerminator()));
  1169. }
  1170. SCEVExpander Expander(*SE, *DL, "loop-predication");
  1171. bool Changed = false;
  1172. for (auto *Guard : Guards)
  1173. Changed |= widenGuardConditions(Guard, Expander);
  1174. for (auto *Guard : GuardsAsWidenableBranches)
  1175. Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
  1176. Changed |= predicateLoopExits(L, Expander);
  1177. if (MSSAU && VerifyMemorySSA)
  1178. MSSAU->getMemorySSA()->verifyMemorySSA();
  1179. return Changed;
  1180. }