GuardWidening.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. //===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the guard widening pass. The semantics of the
  10. // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails
  11. // more often that it did before the transform. This optimization is called
  12. // "widening" and can be used hoist and common runtime checks in situations like
  13. // these:
  14. //
  15. // %cmp0 = 7 u< Length
  16. // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
  17. // call @unknown_side_effects()
  18. // %cmp1 = 9 u< Length
  19. // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]
  20. // ...
  21. //
  22. // =>
  23. //
  24. // %cmp0 = 9 u< Length
  25. // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
  26. // call @unknown_side_effects()
  27. // ...
  28. //
  29. // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a
  30. // generic implementation of the same function, which will have the correct
  31. // semantics from that point onward. It is always _legal_ to deoptimize (so
  32. // replacing %cmp0 with false is "correct"), though it may not always be
  33. // profitable to do so.
  34. //
  35. // NB! This pass is a work in progress. It hasn't been tuned to be "production
  36. // ready" yet. It is known to have quadriatic running time and will not scale
  37. // to large numbers of guards
  38. //
  39. //===----------------------------------------------------------------------===//
  40. #include "llvm/Transforms/Scalar/GuardWidening.h"
  41. #include "llvm/ADT/DenseMap.h"
  42. #include "llvm/ADT/DepthFirstIterator.h"
  43. #include "llvm/ADT/Statistic.h"
  44. #include "llvm/Analysis/AssumptionCache.h"
  45. #include "llvm/Analysis/GuardUtils.h"
  46. #include "llvm/Analysis/LoopInfo.h"
  47. #include "llvm/Analysis/LoopPass.h"
  48. #include "llvm/Analysis/MemorySSAUpdater.h"
  49. #include "llvm/Analysis/PostDominators.h"
  50. #include "llvm/Analysis/ValueTracking.h"
  51. #include "llvm/IR/ConstantRange.h"
  52. #include "llvm/IR/Dominators.h"
  53. #include "llvm/IR/IntrinsicInst.h"
  54. #include "llvm/IR/PatternMatch.h"
  55. #include "llvm/InitializePasses.h"
  56. #include "llvm/Pass.h"
  57. #include "llvm/Support/CommandLine.h"
  58. #include "llvm/Support/Debug.h"
  59. #include "llvm/Support/KnownBits.h"
  60. #include "llvm/Transforms/Scalar.h"
  61. #include "llvm/Transforms/Utils/GuardUtils.h"
  62. #include "llvm/Transforms/Utils/LoopUtils.h"
  63. #include <functional>
  64. using namespace llvm;
  65. #define DEBUG_TYPE "guard-widening"
  66. STATISTIC(GuardsEliminated, "Number of eliminated guards");
  67. STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
  68. static cl::opt<bool>
  69. WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
  70. cl::desc("Whether or not we should widen guards "
  71. "expressed as branches by widenable conditions"),
  72. cl::init(true));
  73. namespace {
  74. // Get the condition of \p I. It can either be a guard or a conditional branch.
  75. static Value *getCondition(Instruction *I) {
  76. if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
  77. assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
  78. "Bad guard intrinsic?");
  79. return GI->getArgOperand(0);
  80. }
  81. Value *Cond, *WC;
  82. BasicBlock *IfTrueBB, *IfFalseBB;
  83. if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB))
  84. return Cond;
  85. return cast<BranchInst>(I)->getCondition();
  86. }
  87. // Set the condition for \p I to \p NewCond. \p I can either be a guard or a
  88. // conditional branch.
  89. static void setCondition(Instruction *I, Value *NewCond) {
  90. if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
  91. assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
  92. "Bad guard intrinsic?");
  93. GI->setArgOperand(0, NewCond);
  94. return;
  95. }
  96. cast<BranchInst>(I)->setCondition(NewCond);
  97. }
  98. // Eliminates the guard instruction properly.
  99. static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
  100. GuardInst->eraseFromParent();
  101. if (MSSAU)
  102. MSSAU->removeMemoryAccess(GuardInst);
  103. ++GuardsEliminated;
  104. }
  105. class GuardWideningImpl {
  106. DominatorTree &DT;
  107. PostDominatorTree *PDT;
  108. LoopInfo &LI;
  109. AssumptionCache &AC;
  110. MemorySSAUpdater *MSSAU;
  111. /// Together, these describe the region of interest. This might be all of
  112. /// the blocks within a function, or only a given loop's blocks and preheader.
  113. DomTreeNode *Root;
  114. std::function<bool(BasicBlock*)> BlockFilter;
  115. /// The set of guards and conditional branches whose conditions have been
  116. /// widened into dominating guards.
  117. SmallVector<Instruction *, 16> EliminatedGuardsAndBranches;
  118. /// The set of guards which have been widened to include conditions to other
  119. /// guards.
  120. DenseSet<Instruction *> WidenedGuards;
  121. /// Try to eliminate instruction \p Instr by widening it into an earlier
  122. /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that
  123. /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
  124. /// maps BasicBlocks to the set of guards seen in that block.
  125. bool eliminateInstrViaWidening(
  126. Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
  127. const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
  128. GuardsPerBlock, bool InvertCondition = false);
  129. /// Used to keep track of which widening potential is more effective.
  130. enum WideningScore {
  131. /// Don't widen.
  132. WS_IllegalOrNegative,
  133. /// Widening is performance neutral as far as the cycles spent in check
  134. /// conditions goes (but can still help, e.g., code layout, having less
  135. /// deopt state).
  136. WS_Neutral,
  137. /// Widening is profitable.
  138. WS_Positive,
  139. /// Widening is very profitable. Not significantly different from \c
  140. /// WS_Positive, except by the order.
  141. WS_VeryPositive
  142. };
  143. static StringRef scoreTypeToString(WideningScore WS);
  144. /// Compute the score for widening the condition in \p DominatedInstr
  145. /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
  146. /// inverted condition of the dominating guard.
  147. WideningScore computeWideningScore(Instruction *DominatedInstr,
  148. Instruction *DominatingGuard,
  149. bool InvertCond);
  150. /// Helper to check if \p V can be hoisted to \p InsertPos.
  151. bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
  152. SmallPtrSet<const Instruction *, 8> Visited;
  153. return isAvailableAt(V, InsertPos, Visited);
  154. }
  155. bool isAvailableAt(const Value *V, const Instruction *InsertPos,
  156. SmallPtrSetImpl<const Instruction *> &Visited) const;
  157. /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
  158. /// isAvailableAt returned true.
  159. void makeAvailableAt(Value *V, Instruction *InsertPos) const;
  160. /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
  161. /// to generate an expression computing the logical AND of \p Cond0 and (\p
  162. /// Cond1 XOR \p InvertCondition).
  163. /// Return true if the expression computing the AND is only as
  164. /// expensive as computing one of the two. If \p InsertPt is true then
  165. /// actually generate the resulting expression, make it available at \p
  166. /// InsertPt and return it in \p Result (else no change to the IR is made).
  167. bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
  168. Value *&Result, bool InvertCondition);
  169. /// Represents a range check of the form \c Base + \c Offset u< \c Length,
  170. /// with the constraint that \c Length is not negative. \c CheckInst is the
  171. /// pre-existing instruction in the IR that computes the result of this range
  172. /// check.
  173. class RangeCheck {
  174. const Value *Base;
  175. const ConstantInt *Offset;
  176. const Value *Length;
  177. ICmpInst *CheckInst;
  178. public:
  179. explicit RangeCheck(const Value *Base, const ConstantInt *Offset,
  180. const Value *Length, ICmpInst *CheckInst)
  181. : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
  182. void setBase(const Value *NewBase) { Base = NewBase; }
  183. void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }
  184. const Value *getBase() const { return Base; }
  185. const ConstantInt *getOffset() const { return Offset; }
  186. const APInt &getOffsetValue() const { return getOffset()->getValue(); }
  187. const Value *getLength() const { return Length; };
  188. ICmpInst *getCheckInst() const { return CheckInst; }
  189. void print(raw_ostream &OS, bool PrintTypes = false) {
  190. OS << "Base: ";
  191. Base->printAsOperand(OS, PrintTypes);
  192. OS << " Offset: ";
  193. Offset->printAsOperand(OS, PrintTypes);
  194. OS << " Length: ";
  195. Length->printAsOperand(OS, PrintTypes);
  196. }
  197. LLVM_DUMP_METHOD void dump() {
  198. print(dbgs());
  199. dbgs() << "\n";
  200. }
  201. };
  202. /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
  203. /// append them to \p Checks. Returns true on success, may clobber \c Checks
  204. /// on failure.
  205. bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
  206. SmallPtrSet<const Value *, 8> Visited;
  207. return parseRangeChecks(CheckCond, Checks, Visited);
  208. }
  209. bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
  210. SmallPtrSetImpl<const Value *> &Visited);
  211. /// Combine the checks in \p Checks into a smaller set of checks and append
  212. /// them into \p CombinedChecks. Return true on success (i.e. all of checks
  213. /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
  214. /// and \p CombinedChecks on success and on failure.
  215. bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
  216. SmallVectorImpl<RangeCheck> &CombinedChecks) const;
  217. /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
  218. /// computing only one of the two expressions?
  219. bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) {
  220. Value *ResultUnused;
  221. return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused,
  222. InvertCond);
  223. }
  224. /// If \p InvertCondition is false, Widen \p ToWiden to fail if
  225. /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
  226. /// true (in addition to whatever it is already checking).
  227. void widenGuard(Instruction *ToWiden, Value *NewCondition,
  228. bool InvertCondition) {
  229. Value *Result;
  230. widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
  231. InvertCondition);
  232. if (isGuardAsWidenableBranch(ToWiden)) {
  233. setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
  234. return;
  235. }
  236. setCondition(ToWiden, Result);
  237. }
  238. public:
  239. explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
  240. LoopInfo &LI, AssumptionCache &AC,
  241. MemorySSAUpdater *MSSAU, DomTreeNode *Root,
  242. std::function<bool(BasicBlock *)> BlockFilter)
  243. : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root),
  244. BlockFilter(BlockFilter) {}
  245. /// The entry point for this pass.
  246. bool run();
  247. };
  248. }
  249. static bool isSupportedGuardInstruction(const Instruction *Insn) {
  250. if (isGuard(Insn))
  251. return true;
  252. if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))
  253. return true;
  254. return false;
  255. }
  256. bool GuardWideningImpl::run() {
  257. DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
  258. bool Changed = false;
  259. for (auto DFI = df_begin(Root), DFE = df_end(Root);
  260. DFI != DFE; ++DFI) {
  261. auto *BB = (*DFI)->getBlock();
  262. if (!BlockFilter(BB))
  263. continue;
  264. auto &CurrentList = GuardsInBlock[BB];
  265. for (auto &I : *BB)
  266. if (isSupportedGuardInstruction(&I))
  267. CurrentList.push_back(cast<Instruction>(&I));
  268. for (auto *II : CurrentList)
  269. Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
  270. }
  271. assert(EliminatedGuardsAndBranches.empty() || Changed);
  272. for (auto *I : EliminatedGuardsAndBranches)
  273. if (!WidenedGuards.count(I)) {
  274. assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
  275. if (isSupportedGuardInstruction(I))
  276. eliminateGuard(I, MSSAU);
  277. else {
  278. assert(isa<BranchInst>(I) &&
  279. "Eliminated something other than guard or branch?");
  280. ++CondBranchEliminated;
  281. }
  282. }
  283. return Changed;
  284. }
  285. bool GuardWideningImpl::eliminateInstrViaWidening(
  286. Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
  287. const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
  288. GuardsInBlock, bool InvertCondition) {
  289. // Ignore trivial true or false conditions. These instructions will be
  290. // trivially eliminated by any cleanup pass. Do not erase them because other
  291. // guards can possibly be widened into them.
  292. if (isa<ConstantInt>(getCondition(Instr)))
  293. return false;
  294. Instruction *BestSoFar = nullptr;
  295. auto BestScoreSoFar = WS_IllegalOrNegative;
  296. // In the set of dominating guards, find the one we can merge GuardInst with
  297. // for the most profit.
  298. for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {
  299. auto *CurBB = DFSI.getPath(i)->getBlock();
  300. if (!BlockFilter(CurBB))
  301. break;
  302. assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
  303. const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
  304. auto I = GuardsInCurBB.begin();
  305. auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)
  306. : GuardsInCurBB.end();
  307. #ifndef NDEBUG
  308. {
  309. unsigned Index = 0;
  310. for (auto &I : *CurBB) {
  311. if (Index == GuardsInCurBB.size())
  312. break;
  313. if (GuardsInCurBB[Index] == &I)
  314. Index++;
  315. }
  316. assert(Index == GuardsInCurBB.size() &&
  317. "Guards expected to be in order!");
  318. }
  319. #endif
  320. assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
  321. for (auto *Candidate : make_range(I, E)) {
  322. auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
  323. LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
  324. << " and " << *getCondition(Candidate) << " is "
  325. << scoreTypeToString(Score) << "\n");
  326. if (Score > BestScoreSoFar) {
  327. BestScoreSoFar = Score;
  328. BestSoFar = Candidate;
  329. }
  330. }
  331. }
  332. if (BestScoreSoFar == WS_IllegalOrNegative) {
  333. LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");
  334. return false;
  335. }
  336. assert(BestSoFar != Instr && "Should have never visited same guard!");
  337. assert(DT.dominates(BestSoFar, Instr) && "Should be!");
  338. LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
  339. << " with score " << scoreTypeToString(BestScoreSoFar)
  340. << "\n");
  341. widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
  342. auto NewGuardCondition = InvertCondition
  343. ? ConstantInt::getFalse(Instr->getContext())
  344. : ConstantInt::getTrue(Instr->getContext());
  345. setCondition(Instr, NewGuardCondition);
  346. EliminatedGuardsAndBranches.push_back(Instr);
  347. WidenedGuards.insert(BestSoFar);
  348. return true;
  349. }
  350. GuardWideningImpl::WideningScore
  351. GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
  352. Instruction *DominatingGuard,
  353. bool InvertCond) {
  354. Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
  355. Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
  356. bool HoistingOutOfLoop = false;
  357. if (DominatingGuardLoop != DominatedInstrLoop) {
  358. // Be conservative and don't widen into a sibling loop. TODO: If the
  359. // sibling is colder, we should consider allowing this.
  360. if (DominatingGuardLoop &&
  361. !DominatingGuardLoop->contains(DominatedInstrLoop))
  362. return WS_IllegalOrNegative;
  363. HoistingOutOfLoop = true;
  364. }
  365. if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
  366. return WS_IllegalOrNegative;
  367. // If the guard was conditional executed, it may never be reached
  368. // dynamically. There are two potential downsides to hoisting it out of the
  369. // conditionally executed region: 1) we may spuriously deopt without need and
  370. // 2) we have the extra cost of computing the guard condition in the common
  371. // case. At the moment, we really only consider the second in our heuristic
  372. // here. TODO: evaluate cost model for spurious deopt
  373. // NOTE: As written, this also lets us hoist right over another guard which
  374. // is essentially just another spelling for control flow.
  375. if (isWideningCondProfitable(getCondition(DominatedInstr),
  376. getCondition(DominatingGuard), InvertCond))
  377. return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
  378. if (HoistingOutOfLoop)
  379. return WS_Positive;
  380. // Returns true if we might be hoisting above explicit control flow. Note
  381. // that this completely ignores implicit control flow (guards, calls which
  382. // throw, etc...). That choice appears arbitrary.
  383. auto MaybeHoistingOutOfIf = [&]() {
  384. auto *DominatingBlock = DominatingGuard->getParent();
  385. auto *DominatedBlock = DominatedInstr->getParent();
  386. if (isGuardAsWidenableBranch(DominatingGuard))
  387. DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
  388. // Same Block?
  389. if (DominatedBlock == DominatingBlock)
  390. return false;
  391. // Obvious successor (common loop header/preheader case)
  392. if (DominatedBlock == DominatingBlock->getUniqueSuccessor())
  393. return false;
  394. // TODO: diamond, triangle cases
  395. if (!PDT) return true;
  396. return !PDT->dominates(DominatedBlock, DominatingBlock);
  397. };
  398. return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
  399. }
  400. bool GuardWideningImpl::isAvailableAt(
  401. const Value *V, const Instruction *Loc,
  402. SmallPtrSetImpl<const Instruction *> &Visited) const {
  403. auto *Inst = dyn_cast<Instruction>(V);
  404. if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
  405. return true;
  406. if (!isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) ||
  407. Inst->mayReadFromMemory())
  408. return false;
  409. Visited.insert(Inst);
  410. // We only want to go _up_ the dominance chain when recursing.
  411. assert(!isa<PHINode>(Loc) &&
  412. "PHIs should return false for isSafeToSpeculativelyExecute");
  413. assert(DT.isReachableFromEntry(Inst->getParent()) &&
  414. "We did a DFS from the block entry!");
  415. return all_of(Inst->operands(),
  416. [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
  417. }
  418. void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
  419. auto *Inst = dyn_cast<Instruction>(V);
  420. if (!Inst || DT.dominates(Inst, Loc))
  421. return;
  422. assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) &&
  423. !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
  424. for (Value *Op : Inst->operands())
  425. makeAvailableAt(Op, Loc);
  426. Inst->moveBefore(Loc);
  427. // If we moved instruction before guard we must clean poison generating flags.
  428. Inst->dropPoisonGeneratingFlags();
  429. }
  430. bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
  431. Instruction *InsertPt, Value *&Result,
  432. bool InvertCondition) {
  433. using namespace llvm::PatternMatch;
  434. {
  435. // L >u C0 && L >u C1 -> L >u max(C0, C1)
  436. ConstantInt *RHS0, *RHS1;
  437. Value *LHS;
  438. ICmpInst::Predicate Pred0, Pred1;
  439. if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
  440. match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
  441. if (InvertCondition)
  442. Pred1 = ICmpInst::getInversePredicate(Pred1);
  443. ConstantRange CR0 =
  444. ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
  445. ConstantRange CR1 =
  446. ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
  447. // Given what we're doing here and the semantics of guards, it would
  448. // be correct to use a subset intersection, but that may be too
  449. // aggressive in cases we care about.
  450. if (std::optional<ConstantRange> Intersect =
  451. CR0.exactIntersectWith(CR1)) {
  452. APInt NewRHSAP;
  453. CmpInst::Predicate Pred;
  454. if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
  455. if (InsertPt) {
  456. ConstantInt *NewRHS =
  457. ConstantInt::get(Cond0->getContext(), NewRHSAP);
  458. Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
  459. }
  460. return true;
  461. }
  462. }
  463. }
  464. }
  465. {
  466. SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
  467. // TODO: Support InvertCondition case?
  468. if (!InvertCondition &&
  469. parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
  470. combineRangeChecks(Checks, CombinedChecks)) {
  471. if (InsertPt) {
  472. Result = nullptr;
  473. for (auto &RC : CombinedChecks) {
  474. makeAvailableAt(RC.getCheckInst(), InsertPt);
  475. if (Result)
  476. Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "",
  477. InsertPt);
  478. else
  479. Result = RC.getCheckInst();
  480. }
  481. assert(Result && "Failed to find result value");
  482. Result->setName("wide.chk");
  483. }
  484. return true;
  485. }
  486. }
  487. // Base case -- just logical-and the two conditions together.
  488. if (InsertPt) {
  489. makeAvailableAt(Cond0, InsertPt);
  490. makeAvailableAt(Cond1, InsertPt);
  491. if (InvertCondition)
  492. Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
  493. Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
  494. }
  495. // We were not able to compute Cond0 AND Cond1 for the price of one.
  496. return false;
  497. }
  498. bool GuardWideningImpl::parseRangeChecks(
  499. Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
  500. SmallPtrSetImpl<const Value *> &Visited) {
  501. if (!Visited.insert(CheckCond).second)
  502. return true;
  503. using namespace llvm::PatternMatch;
  504. {
  505. Value *AndLHS, *AndRHS;
  506. if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS))))
  507. return parseRangeChecks(AndLHS, Checks) &&
  508. parseRangeChecks(AndRHS, Checks);
  509. }
  510. auto *IC = dyn_cast<ICmpInst>(CheckCond);
  511. if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||
  512. (IC->getPredicate() != ICmpInst::ICMP_ULT &&
  513. IC->getPredicate() != ICmpInst::ICMP_UGT))
  514. return false;
  515. const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
  516. if (IC->getPredicate() == ICmpInst::ICMP_UGT)
  517. std::swap(CmpLHS, CmpRHS);
  518. auto &DL = IC->getModule()->getDataLayout();
  519. GuardWideningImpl::RangeCheck Check(
  520. CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())),
  521. CmpRHS, IC);
  522. if (!isKnownNonNegative(Check.getLength(), DL))
  523. return false;
  524. // What we have in \c Check now is a correct interpretation of \p CheckCond.
  525. // Try to see if we can move some constant offsets into the \c Offset field.
  526. bool Changed;
  527. auto &Ctx = CheckCond->getContext();
  528. do {
  529. Value *OpLHS;
  530. ConstantInt *OpRHS;
  531. Changed = false;
  532. #ifndef NDEBUG
  533. auto *BaseInst = dyn_cast<Instruction>(Check.getBase());
  534. assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) &&
  535. "Unreachable instruction?");
  536. #endif
  537. if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
  538. Check.setBase(OpLHS);
  539. APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
  540. Check.setOffset(ConstantInt::get(Ctx, NewOffset));
  541. Changed = true;
  542. } else if (match(Check.getBase(),
  543. m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
  544. KnownBits Known = computeKnownBits(OpLHS, DL);
  545. if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
  546. Check.setBase(OpLHS);
  547. APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
  548. Check.setOffset(ConstantInt::get(Ctx, NewOffset));
  549. Changed = true;
  550. }
  551. }
  552. } while (Changed);
  553. Checks.push_back(Check);
  554. return true;
  555. }
  556. bool GuardWideningImpl::combineRangeChecks(
  557. SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
  558. SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {
  559. unsigned OldCount = Checks.size();
  560. while (!Checks.empty()) {
  561. // Pick all of the range checks with a specific base and length, and try to
  562. // merge them.
  563. const Value *CurrentBase = Checks.front().getBase();
  564. const Value *CurrentLength = Checks.front().getLength();
  565. SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
  566. auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) {
  567. return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;
  568. };
  569. copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
  570. erase_if(Checks, IsCurrentCheck);
  571. assert(CurrentChecks.size() != 0 && "We know we have at least one!");
  572. if (CurrentChecks.size() < 3) {
  573. llvm::append_range(RangeChecksOut, CurrentChecks);
  574. continue;
  575. }
  576. // CurrentChecks.size() will typically be 3 here, but so far there has been
  577. // no need to hard-code that fact.
  578. llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS,
  579. const GuardWideningImpl::RangeCheck &RHS) {
  580. return LHS.getOffsetValue().slt(RHS.getOffsetValue());
  581. });
  582. // Note: std::sort should not invalidate the ChecksStart iterator.
  583. const ConstantInt *MinOffset = CurrentChecks.front().getOffset();
  584. const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();
  585. unsigned BitWidth = MaxOffset->getValue().getBitWidth();
  586. if ((MaxOffset->getValue() - MinOffset->getValue())
  587. .ugt(APInt::getSignedMinValue(BitWidth)))
  588. return false;
  589. APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue();
  590. const APInt &HighOffset = MaxOffset->getValue();
  591. auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) {
  592. return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);
  593. };
  594. if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))
  595. return false;
  596. // We have a series of f+1 checks as:
  597. //
  598. // I+k_0 u< L ... Chk_0
  599. // I+k_1 u< L ... Chk_1
  600. // ...
  601. // I+k_f u< L ... Chk_f
  602. //
  603. // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0
  604. // k_f-k_0 u< INT_MIN+k_f ... Precond_1
  605. // k_f != k_0 ... Precond_2
  606. //
  607. // Claim:
  608. // Chk_0 AND Chk_f implies all the other checks
  609. //
  610. // Informal proof sketch:
  611. //
  612. // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap
  613. // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and
  614. // thus I+k_f is the greatest unsigned value in that range.
  615. //
  616. // This combined with Ckh_(f+1) shows that everything in that range is u< L.
  617. // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)
  618. // lie in [I+k_0,I+k_f], this proving our claim.
  619. //
  620. // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are
  621. // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal
  622. // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping
  623. // range by definition, and the latter case is impossible:
  624. //
  625. // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)
  626. // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  627. //
  628. // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted
  629. // with 'x' above) to be at least >u INT_MIN.
  630. RangeChecksOut.emplace_back(CurrentChecks.front());
  631. RangeChecksOut.emplace_back(CurrentChecks.back());
  632. }
  633. assert(RangeChecksOut.size() <= OldCount && "We pessimized!");
  634. return RangeChecksOut.size() != OldCount;
  635. }
  636. #ifndef NDEBUG
  637. StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
  638. switch (WS) {
  639. case WS_IllegalOrNegative:
  640. return "IllegalOrNegative";
  641. case WS_Neutral:
  642. return "Neutral";
  643. case WS_Positive:
  644. return "Positive";
  645. case WS_VeryPositive:
  646. return "VeryPositive";
  647. }
  648. llvm_unreachable("Fully covered switch above!");
  649. }
  650. #endif
  651. PreservedAnalyses GuardWideningPass::run(Function &F,
  652. FunctionAnalysisManager &AM) {
  653. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  654. auto &LI = AM.getResult<LoopAnalysis>(F);
  655. auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
  656. auto &AC = AM.getResult<AssumptionAnalysis>(F);
  657. auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);
  658. std::unique_ptr<MemorySSAUpdater> MSSAU;
  659. if (MSSAA)
  660. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());
  661. if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr,
  662. DT.getRootNode(), [](BasicBlock *) { return true; })
  663. .run())
  664. return PreservedAnalyses::all();
  665. PreservedAnalyses PA;
  666. PA.preserveSet<CFGAnalyses>();
  667. PA.preserve<MemorySSAAnalysis>();
  668. return PA;
  669. }
  670. PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
  671. LoopStandardAnalysisResults &AR,
  672. LPMUpdater &U) {
  673. BasicBlock *RootBB = L.getLoopPredecessor();
  674. if (!RootBB)
  675. RootBB = L.getHeader();
  676. auto BlockFilter = [&](BasicBlock *BB) {
  677. return BB == RootBB || L.contains(BB);
  678. };
  679. std::unique_ptr<MemorySSAUpdater> MSSAU;
  680. if (AR.MSSA)
  681. MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
  682. if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC,
  683. MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB),
  684. BlockFilter)
  685. .run())
  686. return PreservedAnalyses::all();
  687. auto PA = getLoopPassPreservedAnalyses();
  688. if (AR.MSSA)
  689. PA.preserve<MemorySSAAnalysis>();
  690. return PA;
  691. }
  692. namespace {
  693. struct GuardWideningLegacyPass : public FunctionPass {
  694. static char ID;
  695. GuardWideningLegacyPass() : FunctionPass(ID) {
  696. initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
  697. }
  698. bool runOnFunction(Function &F) override {
  699. if (skipFunction(F))
  700. return false;
  701. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  702. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  703. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  704. auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
  705. auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
  706. std::unique_ptr<MemorySSAUpdater> MSSAU;
  707. if (MSSAWP)
  708. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
  709. return GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr,
  710. DT.getRootNode(),
  711. [](BasicBlock *) { return true; })
  712. .run();
  713. }
  714. void getAnalysisUsage(AnalysisUsage &AU) const override {
  715. AU.setPreservesCFG();
  716. AU.addRequired<DominatorTreeWrapperPass>();
  717. AU.addRequired<PostDominatorTreeWrapperPass>();
  718. AU.addRequired<LoopInfoWrapperPass>();
  719. AU.addPreserved<MemorySSAWrapperPass>();
  720. }
  721. };
  722. /// Same as above, but restricted to a single loop at a time. Can be
  723. /// scheduled with other loop passes w/o breaking out of LPM
  724. struct LoopGuardWideningLegacyPass : public LoopPass {
  725. static char ID;
  726. LoopGuardWideningLegacyPass() : LoopPass(ID) {
  727. initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
  728. }
  729. bool runOnLoop(Loop *L, LPPassManager &LPM) override {
  730. if (skipLoop(L))
  731. return false;
  732. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  733. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  734. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
  735. *L->getHeader()->getParent());
  736. auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
  737. auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
  738. auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
  739. std::unique_ptr<MemorySSAUpdater> MSSAU;
  740. if (MSSAWP)
  741. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
  742. BasicBlock *RootBB = L->getLoopPredecessor();
  743. if (!RootBB)
  744. RootBB = L->getHeader();
  745. auto BlockFilter = [&](BasicBlock *BB) {
  746. return BB == RootBB || L->contains(BB);
  747. };
  748. return GuardWideningImpl(DT, PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr,
  749. DT.getNode(RootBB), BlockFilter)
  750. .run();
  751. }
  752. void getAnalysisUsage(AnalysisUsage &AU) const override {
  753. AU.setPreservesCFG();
  754. getLoopAnalysisUsage(AU);
  755. AU.addPreserved<PostDominatorTreeWrapperPass>();
  756. AU.addPreserved<MemorySSAWrapperPass>();
  757. }
  758. };
  759. }
  760. char GuardWideningLegacyPass::ID = 0;
  761. char LoopGuardWideningLegacyPass::ID = 0;
  762. INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
  763. false, false)
  764. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  765. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  766. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  767. INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
  768. false, false)
  769. INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
  770. "Widen guards (within a single loop, as a loop pass)",
  771. false, false)
  772. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  773. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  774. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  775. INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
  776. "Widen guards (within a single loop, as a loop pass)",
  777. false, false)
  778. FunctionPass *llvm::createGuardWideningPass() {
  779. return new GuardWideningLegacyPass();
  780. }
  781. Pass *llvm::createLoopGuardWideningPass() {
  782. return new LoopGuardWideningLegacyPass();
  783. }