GuardWidening.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. //===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the guard widening pass. The semantics of the
  10. // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails
  11. // more often that it did before the transform. This optimization is called
  12. // "widening" and can be used hoist and common runtime checks in situations like
  13. // these:
  14. //
  15. // %cmp0 = 7 u< Length
  16. // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
  17. // call @unknown_side_effects()
  18. // %cmp1 = 9 u< Length
  19. // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]
  20. // ...
  21. //
  22. // =>
  23. //
  24. // %cmp0 = 9 u< Length
  25. // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
  26. // call @unknown_side_effects()
  27. // ...
  28. //
  29. // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a
  30. // generic implementation of the same function, which will have the correct
  31. // semantics from that point onward. It is always _legal_ to deoptimize (so
  32. // replacing %cmp0 with false is "correct"), though it may not always be
  33. // profitable to do so.
  34. //
  35. // NB! This pass is a work in progress. It hasn't been tuned to be "production
  36. // ready" yet. It is known to have quadriatic running time and will not scale
  37. // to large numbers of guards
  38. //
  39. //===----------------------------------------------------------------------===//
  40. #include "llvm/Transforms/Scalar/GuardWidening.h"
  41. #include "llvm/ADT/DenseMap.h"
  42. #include "llvm/ADT/DepthFirstIterator.h"
  43. #include "llvm/ADT/Statistic.h"
  44. #include "llvm/Analysis/BranchProbabilityInfo.h"
  45. #include "llvm/Analysis/GuardUtils.h"
  46. #include "llvm/Analysis/LoopInfo.h"
  47. #include "llvm/Analysis/LoopPass.h"
  48. #include "llvm/Analysis/PostDominators.h"
  49. #include "llvm/Analysis/ValueTracking.h"
  50. #include "llvm/IR/ConstantRange.h"
  51. #include "llvm/IR/Dominators.h"
  52. #include "llvm/IR/IntrinsicInst.h"
  53. #include "llvm/IR/PatternMatch.h"
  54. #include "llvm/InitializePasses.h"
  55. #include "llvm/Pass.h"
  56. #include "llvm/Support/CommandLine.h"
  57. #include "llvm/Support/Debug.h"
  58. #include "llvm/Support/KnownBits.h"
  59. #include "llvm/Transforms/Scalar.h"
  60. #include "llvm/Transforms/Utils/GuardUtils.h"
  61. #include "llvm/Transforms/Utils/LoopUtils.h"
  62. #include <functional>
  63. using namespace llvm;
  64. #define DEBUG_TYPE "guard-widening"
  65. STATISTIC(GuardsEliminated, "Number of eliminated guards");
  66. STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
  67. static cl::opt<bool>
  68. WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
  69. cl::desc("Whether or not we should widen guards "
  70. "expressed as branches by widenable conditions"),
  71. cl::init(true));
  72. namespace {
  73. // Get the condition of \p I. It can either be a guard or a conditional branch.
  74. static Value *getCondition(Instruction *I) {
  75. if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
  76. assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
  77. "Bad guard intrinsic?");
  78. return GI->getArgOperand(0);
  79. }
  80. Value *Cond, *WC;
  81. BasicBlock *IfTrueBB, *IfFalseBB;
  82. if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB))
  83. return Cond;
  84. return cast<BranchInst>(I)->getCondition();
  85. }
  86. // Set the condition for \p I to \p NewCond. \p I can either be a guard or a
  87. // conditional branch.
  88. static void setCondition(Instruction *I, Value *NewCond) {
  89. if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
  90. assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
  91. "Bad guard intrinsic?");
  92. GI->setArgOperand(0, NewCond);
  93. return;
  94. }
  95. cast<BranchInst>(I)->setCondition(NewCond);
  96. }
  97. // Eliminates the guard instruction properly.
  98. static void eliminateGuard(Instruction *GuardInst) {
  99. GuardInst->eraseFromParent();
  100. ++GuardsEliminated;
  101. }
  102. class GuardWideningImpl {
  103. DominatorTree &DT;
  104. PostDominatorTree *PDT;
  105. LoopInfo &LI;
  106. /// Together, these describe the region of interest. This might be all of
  107. /// the blocks within a function, or only a given loop's blocks and preheader.
  108. DomTreeNode *Root;
  109. std::function<bool(BasicBlock*)> BlockFilter;
  110. /// The set of guards and conditional branches whose conditions have been
  111. /// widened into dominating guards.
  112. SmallVector<Instruction *, 16> EliminatedGuardsAndBranches;
  113. /// The set of guards which have been widened to include conditions to other
  114. /// guards.
  115. DenseSet<Instruction *> WidenedGuards;
  116. /// Try to eliminate instruction \p Instr by widening it into an earlier
  117. /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that
  118. /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
  119. /// maps BasicBlocks to the set of guards seen in that block.
  120. bool eliminateInstrViaWidening(
  121. Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
  122. const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
  123. GuardsPerBlock, bool InvertCondition = false);
  124. /// Used to keep track of which widening potential is more effective.
  125. enum WideningScore {
  126. /// Don't widen.
  127. WS_IllegalOrNegative,
  128. /// Widening is performance neutral as far as the cycles spent in check
  129. /// conditions goes (but can still help, e.g., code layout, having less
  130. /// deopt state).
  131. WS_Neutral,
  132. /// Widening is profitable.
  133. WS_Positive,
  134. /// Widening is very profitable. Not significantly different from \c
  135. /// WS_Positive, except by the order.
  136. WS_VeryPositive
  137. };
  138. static StringRef scoreTypeToString(WideningScore WS);
  139. /// Compute the score for widening the condition in \p DominatedInstr
  140. /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
  141. /// inverted condition of the dominating guard.
  142. WideningScore computeWideningScore(Instruction *DominatedInstr,
  143. Instruction *DominatingGuard,
  144. bool InvertCond);
  145. /// Helper to check if \p V can be hoisted to \p InsertPos.
  146. bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
  147. SmallPtrSet<const Instruction *, 8> Visited;
  148. return isAvailableAt(V, InsertPos, Visited);
  149. }
  150. bool isAvailableAt(const Value *V, const Instruction *InsertPos,
  151. SmallPtrSetImpl<const Instruction *> &Visited) const;
  152. /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
  153. /// isAvailableAt returned true.
  154. void makeAvailableAt(Value *V, Instruction *InsertPos) const;
  155. /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
  156. /// to generate an expression computing the logical AND of \p Cond0 and (\p
  157. /// Cond1 XOR \p InvertCondition).
  158. /// Return true if the expression computing the AND is only as
  159. /// expensive as computing one of the two. If \p InsertPt is true then
  160. /// actually generate the resulting expression, make it available at \p
  161. /// InsertPt and return it in \p Result (else no change to the IR is made).
  162. bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
  163. Value *&Result, bool InvertCondition);
  164. /// Represents a range check of the form \c Base + \c Offset u< \c Length,
  165. /// with the constraint that \c Length is not negative. \c CheckInst is the
  166. /// pre-existing instruction in the IR that computes the result of this range
  167. /// check.
  168. class RangeCheck {
  169. const Value *Base;
  170. const ConstantInt *Offset;
  171. const Value *Length;
  172. ICmpInst *CheckInst;
  173. public:
  174. explicit RangeCheck(const Value *Base, const ConstantInt *Offset,
  175. const Value *Length, ICmpInst *CheckInst)
  176. : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
  177. void setBase(const Value *NewBase) { Base = NewBase; }
  178. void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }
  179. const Value *getBase() const { return Base; }
  180. const ConstantInt *getOffset() const { return Offset; }
  181. const APInt &getOffsetValue() const { return getOffset()->getValue(); }
  182. const Value *getLength() const { return Length; };
  183. ICmpInst *getCheckInst() const { return CheckInst; }
  184. void print(raw_ostream &OS, bool PrintTypes = false) {
  185. OS << "Base: ";
  186. Base->printAsOperand(OS, PrintTypes);
  187. OS << " Offset: ";
  188. Offset->printAsOperand(OS, PrintTypes);
  189. OS << " Length: ";
  190. Length->printAsOperand(OS, PrintTypes);
  191. }
  192. LLVM_DUMP_METHOD void dump() {
  193. print(dbgs());
  194. dbgs() << "\n";
  195. }
  196. };
  197. /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
  198. /// append them to \p Checks. Returns true on success, may clobber \c Checks
  199. /// on failure.
  200. bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
  201. SmallPtrSet<const Value *, 8> Visited;
  202. return parseRangeChecks(CheckCond, Checks, Visited);
  203. }
  204. bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
  205. SmallPtrSetImpl<const Value *> &Visited);
  206. /// Combine the checks in \p Checks into a smaller set of checks and append
  207. /// them into \p CombinedChecks. Return true on success (i.e. all of checks
  208. /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
  209. /// and \p CombinedChecks on success and on failure.
  210. bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
  211. SmallVectorImpl<RangeCheck> &CombinedChecks) const;
  212. /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
  213. /// computing only one of the two expressions?
  214. bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) {
  215. Value *ResultUnused;
  216. return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused,
  217. InvertCond);
  218. }
  219. /// If \p InvertCondition is false, Widen \p ToWiden to fail if
  220. /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
  221. /// true (in addition to whatever it is already checking).
  222. void widenGuard(Instruction *ToWiden, Value *NewCondition,
  223. bool InvertCondition) {
  224. Value *Result;
  225. widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
  226. InvertCondition);
  227. if (isGuardAsWidenableBranch(ToWiden)) {
  228. setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
  229. return;
  230. }
  231. setCondition(ToWiden, Result);
  232. }
  233. public:
  234. explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
  235. LoopInfo &LI, DomTreeNode *Root,
  236. std::function<bool(BasicBlock*)> BlockFilter)
  237. : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
  238. {}
  239. /// The entry point for this pass.
  240. bool run();
  241. };
  242. }
  243. static bool isSupportedGuardInstruction(const Instruction *Insn) {
  244. if (isGuard(Insn))
  245. return true;
  246. if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))
  247. return true;
  248. return false;
  249. }
  250. bool GuardWideningImpl::run() {
  251. DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
  252. bool Changed = false;
  253. for (auto DFI = df_begin(Root), DFE = df_end(Root);
  254. DFI != DFE; ++DFI) {
  255. auto *BB = (*DFI)->getBlock();
  256. if (!BlockFilter(BB))
  257. continue;
  258. auto &CurrentList = GuardsInBlock[BB];
  259. for (auto &I : *BB)
  260. if (isSupportedGuardInstruction(&I))
  261. CurrentList.push_back(cast<Instruction>(&I));
  262. for (auto *II : CurrentList)
  263. Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
  264. }
  265. assert(EliminatedGuardsAndBranches.empty() || Changed);
  266. for (auto *I : EliminatedGuardsAndBranches)
  267. if (!WidenedGuards.count(I)) {
  268. assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
  269. if (isSupportedGuardInstruction(I))
  270. eliminateGuard(I);
  271. else {
  272. assert(isa<BranchInst>(I) &&
  273. "Eliminated something other than guard or branch?");
  274. ++CondBranchEliminated;
  275. }
  276. }
  277. return Changed;
  278. }
  279. bool GuardWideningImpl::eliminateInstrViaWidening(
  280. Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
  281. const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
  282. GuardsInBlock, bool InvertCondition) {
  283. // Ignore trivial true or false conditions. These instructions will be
  284. // trivially eliminated by any cleanup pass. Do not erase them because other
  285. // guards can possibly be widened into them.
  286. if (isa<ConstantInt>(getCondition(Instr)))
  287. return false;
  288. Instruction *BestSoFar = nullptr;
  289. auto BestScoreSoFar = WS_IllegalOrNegative;
  290. // In the set of dominating guards, find the one we can merge GuardInst with
  291. // for the most profit.
  292. for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {
  293. auto *CurBB = DFSI.getPath(i)->getBlock();
  294. if (!BlockFilter(CurBB))
  295. break;
  296. assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
  297. const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
  298. auto I = GuardsInCurBB.begin();
  299. auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)
  300. : GuardsInCurBB.end();
  301. #ifndef NDEBUG
  302. {
  303. unsigned Index = 0;
  304. for (auto &I : *CurBB) {
  305. if (Index == GuardsInCurBB.size())
  306. break;
  307. if (GuardsInCurBB[Index] == &I)
  308. Index++;
  309. }
  310. assert(Index == GuardsInCurBB.size() &&
  311. "Guards expected to be in order!");
  312. }
  313. #endif
  314. assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
  315. for (auto *Candidate : make_range(I, E)) {
  316. auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
  317. LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
  318. << " and " << *getCondition(Candidate) << " is "
  319. << scoreTypeToString(Score) << "\n");
  320. if (Score > BestScoreSoFar) {
  321. BestScoreSoFar = Score;
  322. BestSoFar = Candidate;
  323. }
  324. }
  325. }
  326. if (BestScoreSoFar == WS_IllegalOrNegative) {
  327. LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");
  328. return false;
  329. }
  330. assert(BestSoFar != Instr && "Should have never visited same guard!");
  331. assert(DT.dominates(BestSoFar, Instr) && "Should be!");
  332. LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
  333. << " with score " << scoreTypeToString(BestScoreSoFar)
  334. << "\n");
  335. widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
  336. auto NewGuardCondition = InvertCondition
  337. ? ConstantInt::getFalse(Instr->getContext())
  338. : ConstantInt::getTrue(Instr->getContext());
  339. setCondition(Instr, NewGuardCondition);
  340. EliminatedGuardsAndBranches.push_back(Instr);
  341. WidenedGuards.insert(BestSoFar);
  342. return true;
  343. }
  344. GuardWideningImpl::WideningScore
  345. GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
  346. Instruction *DominatingGuard,
  347. bool InvertCond) {
  348. Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
  349. Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
  350. bool HoistingOutOfLoop = false;
  351. if (DominatingGuardLoop != DominatedInstrLoop) {
  352. // Be conservative and don't widen into a sibling loop. TODO: If the
  353. // sibling is colder, we should consider allowing this.
  354. if (DominatingGuardLoop &&
  355. !DominatingGuardLoop->contains(DominatedInstrLoop))
  356. return WS_IllegalOrNegative;
  357. HoistingOutOfLoop = true;
  358. }
  359. if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
  360. return WS_IllegalOrNegative;
  361. // If the guard was conditional executed, it may never be reached
  362. // dynamically. There are two potential downsides to hoisting it out of the
  363. // conditionally executed region: 1) we may spuriously deopt without need and
  364. // 2) we have the extra cost of computing the guard condition in the common
  365. // case. At the moment, we really only consider the second in our heuristic
  366. // here. TODO: evaluate cost model for spurious deopt
  367. // NOTE: As written, this also lets us hoist right over another guard which
  368. // is essentially just another spelling for control flow.
  369. if (isWideningCondProfitable(getCondition(DominatedInstr),
  370. getCondition(DominatingGuard), InvertCond))
  371. return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
  372. if (HoistingOutOfLoop)
  373. return WS_Positive;
  374. // Returns true if we might be hoisting above explicit control flow. Note
  375. // that this completely ignores implicit control flow (guards, calls which
  376. // throw, etc...). That choice appears arbitrary.
  377. auto MaybeHoistingOutOfIf = [&]() {
  378. auto *DominatingBlock = DominatingGuard->getParent();
  379. auto *DominatedBlock = DominatedInstr->getParent();
  380. if (isGuardAsWidenableBranch(DominatingGuard))
  381. DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
  382. // Same Block?
  383. if (DominatedBlock == DominatingBlock)
  384. return false;
  385. // Obvious successor (common loop header/preheader case)
  386. if (DominatedBlock == DominatingBlock->getUniqueSuccessor())
  387. return false;
  388. // TODO: diamond, triangle cases
  389. if (!PDT) return true;
  390. return !PDT->dominates(DominatedBlock, DominatingBlock);
  391. };
  392. return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
  393. }
  394. bool GuardWideningImpl::isAvailableAt(
  395. const Value *V, const Instruction *Loc,
  396. SmallPtrSetImpl<const Instruction *> &Visited) const {
  397. auto *Inst = dyn_cast<Instruction>(V);
  398. if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
  399. return true;
  400. if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) ||
  401. Inst->mayReadFromMemory())
  402. return false;
  403. Visited.insert(Inst);
  404. // We only want to go _up_ the dominance chain when recursing.
  405. assert(!isa<PHINode>(Loc) &&
  406. "PHIs should return false for isSafeToSpeculativelyExecute");
  407. assert(DT.isReachableFromEntry(Inst->getParent()) &&
  408. "We did a DFS from the block entry!");
  409. return all_of(Inst->operands(),
  410. [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
  411. }
  412. void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
  413. auto *Inst = dyn_cast<Instruction>(V);
  414. if (!Inst || DT.dominates(Inst, Loc))
  415. return;
  416. assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) &&
  417. !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
  418. for (Value *Op : Inst->operands())
  419. makeAvailableAt(Op, Loc);
  420. Inst->moveBefore(Loc);
  421. }
  422. bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
  423. Instruction *InsertPt, Value *&Result,
  424. bool InvertCondition) {
  425. using namespace llvm::PatternMatch;
  426. {
  427. // L >u C0 && L >u C1 -> L >u max(C0, C1)
  428. ConstantInt *RHS0, *RHS1;
  429. Value *LHS;
  430. ICmpInst::Predicate Pred0, Pred1;
  431. if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
  432. match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
  433. if (InvertCondition)
  434. Pred1 = ICmpInst::getInversePredicate(Pred1);
  435. ConstantRange CR0 =
  436. ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
  437. ConstantRange CR1 =
  438. ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
  439. // SubsetIntersect is a subset of the actual mathematical intersection of
  440. // CR0 and CR1, while SupersetIntersect is a superset of the actual
  441. // mathematical intersection. If these two ConstantRanges are equal, then
  442. // we know we were able to represent the actual mathematical intersection
  443. // of CR0 and CR1, and can use the same to generate an icmp instruction.
  444. //
  445. // Given what we're doing here and the semantics of guards, it would
  446. // actually be correct to just use SubsetIntersect, but that may be too
  447. // aggressive in cases we care about.
  448. auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse();
  449. auto SupersetIntersect = CR0.intersectWith(CR1);
  450. APInt NewRHSAP;
  451. CmpInst::Predicate Pred;
  452. if (SubsetIntersect == SupersetIntersect &&
  453. SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) {
  454. if (InsertPt) {
  455. ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP);
  456. Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
  457. }
  458. return true;
  459. }
  460. }
  461. }
  462. {
  463. SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
  464. // TODO: Support InvertCondition case?
  465. if (!InvertCondition &&
  466. parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
  467. combineRangeChecks(Checks, CombinedChecks)) {
  468. if (InsertPt) {
  469. Result = nullptr;
  470. for (auto &RC : CombinedChecks) {
  471. makeAvailableAt(RC.getCheckInst(), InsertPt);
  472. if (Result)
  473. Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "",
  474. InsertPt);
  475. else
  476. Result = RC.getCheckInst();
  477. }
  478. assert(Result && "Failed to find result value");
  479. Result->setName("wide.chk");
  480. }
  481. return true;
  482. }
  483. }
  484. // Base case -- just logical-and the two conditions together.
  485. if (InsertPt) {
  486. makeAvailableAt(Cond0, InsertPt);
  487. makeAvailableAt(Cond1, InsertPt);
  488. if (InvertCondition)
  489. Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
  490. Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
  491. }
  492. // We were not able to compute Cond0 AND Cond1 for the price of one.
  493. return false;
  494. }
  495. bool GuardWideningImpl::parseRangeChecks(
  496. Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
  497. SmallPtrSetImpl<const Value *> &Visited) {
  498. if (!Visited.insert(CheckCond).second)
  499. return true;
  500. using namespace llvm::PatternMatch;
  501. {
  502. Value *AndLHS, *AndRHS;
  503. if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS))))
  504. return parseRangeChecks(AndLHS, Checks) &&
  505. parseRangeChecks(AndRHS, Checks);
  506. }
  507. auto *IC = dyn_cast<ICmpInst>(CheckCond);
  508. if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||
  509. (IC->getPredicate() != ICmpInst::ICMP_ULT &&
  510. IC->getPredicate() != ICmpInst::ICMP_UGT))
  511. return false;
  512. const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
  513. if (IC->getPredicate() == ICmpInst::ICMP_UGT)
  514. std::swap(CmpLHS, CmpRHS);
  515. auto &DL = IC->getModule()->getDataLayout();
  516. GuardWideningImpl::RangeCheck Check(
  517. CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())),
  518. CmpRHS, IC);
  519. if (!isKnownNonNegative(Check.getLength(), DL))
  520. return false;
  521. // What we have in \c Check now is a correct interpretation of \p CheckCond.
  522. // Try to see if we can move some constant offsets into the \c Offset field.
  523. bool Changed;
  524. auto &Ctx = CheckCond->getContext();
  525. do {
  526. Value *OpLHS;
  527. ConstantInt *OpRHS;
  528. Changed = false;
  529. #ifndef NDEBUG
  530. auto *BaseInst = dyn_cast<Instruction>(Check.getBase());
  531. assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) &&
  532. "Unreachable instruction?");
  533. #endif
  534. if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
  535. Check.setBase(OpLHS);
  536. APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
  537. Check.setOffset(ConstantInt::get(Ctx, NewOffset));
  538. Changed = true;
  539. } else if (match(Check.getBase(),
  540. m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
  541. KnownBits Known = computeKnownBits(OpLHS, DL);
  542. if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
  543. Check.setBase(OpLHS);
  544. APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
  545. Check.setOffset(ConstantInt::get(Ctx, NewOffset));
  546. Changed = true;
  547. }
  548. }
  549. } while (Changed);
  550. Checks.push_back(Check);
  551. return true;
  552. }
  553. bool GuardWideningImpl::combineRangeChecks(
  554. SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
  555. SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {
  556. unsigned OldCount = Checks.size();
  557. while (!Checks.empty()) {
  558. // Pick all of the range checks with a specific base and length, and try to
  559. // merge them.
  560. const Value *CurrentBase = Checks.front().getBase();
  561. const Value *CurrentLength = Checks.front().getLength();
  562. SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
  563. auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) {
  564. return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;
  565. };
  566. copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
  567. erase_if(Checks, IsCurrentCheck);
  568. assert(CurrentChecks.size() != 0 && "We know we have at least one!");
  569. if (CurrentChecks.size() < 3) {
  570. llvm::append_range(RangeChecksOut, CurrentChecks);
  571. continue;
  572. }
  573. // CurrentChecks.size() will typically be 3 here, but so far there has been
  574. // no need to hard-code that fact.
  575. llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS,
  576. const GuardWideningImpl::RangeCheck &RHS) {
  577. return LHS.getOffsetValue().slt(RHS.getOffsetValue());
  578. });
  579. // Note: std::sort should not invalidate the ChecksStart iterator.
  580. const ConstantInt *MinOffset = CurrentChecks.front().getOffset();
  581. const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();
  582. unsigned BitWidth = MaxOffset->getValue().getBitWidth();
  583. if ((MaxOffset->getValue() - MinOffset->getValue())
  584. .ugt(APInt::getSignedMinValue(BitWidth)))
  585. return false;
  586. APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue();
  587. const APInt &HighOffset = MaxOffset->getValue();
  588. auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) {
  589. return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);
  590. };
  591. if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))
  592. return false;
  593. // We have a series of f+1 checks as:
  594. //
  595. // I+k_0 u< L ... Chk_0
  596. // I+k_1 u< L ... Chk_1
  597. // ...
  598. // I+k_f u< L ... Chk_f
  599. //
  600. // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0
  601. // k_f-k_0 u< INT_MIN+k_f ... Precond_1
  602. // k_f != k_0 ... Precond_2
  603. //
  604. // Claim:
  605. // Chk_0 AND Chk_f implies all the other checks
  606. //
  607. // Informal proof sketch:
  608. //
  609. // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap
  610. // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and
  611. // thus I+k_f is the greatest unsigned value in that range.
  612. //
  613. // This combined with Ckh_(f+1) shows that everything in that range is u< L.
  614. // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)
  615. // lie in [I+k_0,I+k_f], this proving our claim.
  616. //
  617. // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are
  618. // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal
  619. // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping
  620. // range by definition, and the latter case is impossible:
  621. //
  622. // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)
  623. // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  624. //
  625. // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted
  626. // with 'x' above) to be at least >u INT_MIN.
  627. RangeChecksOut.emplace_back(CurrentChecks.front());
  628. RangeChecksOut.emplace_back(CurrentChecks.back());
  629. }
  630. assert(RangeChecksOut.size() <= OldCount && "We pessimized!");
  631. return RangeChecksOut.size() != OldCount;
  632. }
  633. #ifndef NDEBUG
  634. StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
  635. switch (WS) {
  636. case WS_IllegalOrNegative:
  637. return "IllegalOrNegative";
  638. case WS_Neutral:
  639. return "Neutral";
  640. case WS_Positive:
  641. return "Positive";
  642. case WS_VeryPositive:
  643. return "VeryPositive";
  644. }
  645. llvm_unreachable("Fully covered switch above!");
  646. }
  647. #endif
  648. PreservedAnalyses GuardWideningPass::run(Function &F,
  649. FunctionAnalysisManager &AM) {
  650. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  651. auto &LI = AM.getResult<LoopAnalysis>(F);
  652. auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
  653. if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
  654. [](BasicBlock*) { return true; } ).run())
  655. return PreservedAnalyses::all();
  656. PreservedAnalyses PA;
  657. PA.preserveSet<CFGAnalyses>();
  658. return PA;
  659. }
  660. PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
  661. LoopStandardAnalysisResults &AR,
  662. LPMUpdater &U) {
  663. BasicBlock *RootBB = L.getLoopPredecessor();
  664. if (!RootBB)
  665. RootBB = L.getHeader();
  666. auto BlockFilter = [&](BasicBlock *BB) {
  667. return BB == RootBB || L.contains(BB);
  668. };
  669. if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
  670. BlockFilter).run())
  671. return PreservedAnalyses::all();
  672. return getLoopPassPreservedAnalyses();
  673. }
  674. namespace {
  675. struct GuardWideningLegacyPass : public FunctionPass {
  676. static char ID;
  677. GuardWideningLegacyPass() : FunctionPass(ID) {
  678. initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
  679. }
  680. bool runOnFunction(Function &F) override {
  681. if (skipFunction(F))
  682. return false;
  683. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  684. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  685. auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
  686. return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
  687. [](BasicBlock*) { return true; } ).run();
  688. }
  689. void getAnalysisUsage(AnalysisUsage &AU) const override {
  690. AU.setPreservesCFG();
  691. AU.addRequired<DominatorTreeWrapperPass>();
  692. AU.addRequired<PostDominatorTreeWrapperPass>();
  693. AU.addRequired<LoopInfoWrapperPass>();
  694. }
  695. };
  696. /// Same as above, but restricted to a single loop at a time. Can be
  697. /// scheduled with other loop passes w/o breaking out of LPM
  698. struct LoopGuardWideningLegacyPass : public LoopPass {
  699. static char ID;
  700. LoopGuardWideningLegacyPass() : LoopPass(ID) {
  701. initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
  702. }
  703. bool runOnLoop(Loop *L, LPPassManager &LPM) override {
  704. if (skipLoop(L))
  705. return false;
  706. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  707. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  708. auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
  709. auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
  710. BasicBlock *RootBB = L->getLoopPredecessor();
  711. if (!RootBB)
  712. RootBB = L->getHeader();
  713. auto BlockFilter = [&](BasicBlock *BB) {
  714. return BB == RootBB || L->contains(BB);
  715. };
  716. return GuardWideningImpl(DT, PDT, LI,
  717. DT.getNode(RootBB), BlockFilter).run();
  718. }
  719. void getAnalysisUsage(AnalysisUsage &AU) const override {
  720. AU.setPreservesCFG();
  721. getLoopAnalysisUsage(AU);
  722. AU.addPreserved<PostDominatorTreeWrapperPass>();
  723. }
  724. };
  725. }
  726. char GuardWideningLegacyPass::ID = 0;
  727. char LoopGuardWideningLegacyPass::ID = 0;
  728. INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
  729. false, false)
  730. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  731. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  732. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  733. INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
  734. false, false)
  735. INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
  736. "Widen guards (within a single loop, as a loop pass)",
  737. false, false)
  738. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  739. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  740. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  741. INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
  742. "Widen guards (within a single loop, as a loop pass)",
  743. false, false)
  744. FunctionPass *llvm::createGuardWideningPass() {
  745. return new GuardWideningLegacyPass();
  746. }
  747. Pass *llvm::createLoopGuardWideningPass() {
  748. return new LoopGuardWideningLegacyPass();
  749. }