GuardWidening.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. //===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the guard widening pass. The semantics of the
  10. // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails
  11. // more often that it did before the transform. This optimization is called
  12. // "widening" and can be used hoist and common runtime checks in situations like
  13. // these:
  14. //
  15. // %cmp0 = 7 u< Length
  16. // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
  17. // call @unknown_side_effects()
  18. // %cmp1 = 9 u< Length
  19. // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]
  20. // ...
  21. //
  22. // =>
  23. //
  24. // %cmp0 = 9 u< Length
  25. // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
  26. // call @unknown_side_effects()
  27. // ...
  28. //
  29. // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a
  30. // generic implementation of the same function, which will have the correct
  31. // semantics from that point onward. It is always _legal_ to deoptimize (so
  32. // replacing %cmp0 with false is "correct"), though it may not always be
  33. // profitable to do so.
  34. //
  35. // NB! This pass is a work in progress. It hasn't been tuned to be "production
  36. // ready" yet. It is known to have quadriatic running time and will not scale
  37. // to large numbers of guards
  38. //
  39. //===----------------------------------------------------------------------===//
  40. #include "llvm/Transforms/Scalar/GuardWidening.h"
  41. #include "llvm/ADT/DenseMap.h"
  42. #include "llvm/ADT/DepthFirstIterator.h"
  43. #include "llvm/ADT/Statistic.h"
  44. #include "llvm/Analysis/BranchProbabilityInfo.h"
  45. #include "llvm/Analysis/GuardUtils.h"
  46. #include "llvm/Analysis/LoopInfo.h"
  47. #include "llvm/Analysis/LoopPass.h"
  48. #include "llvm/Analysis/MemorySSAUpdater.h"
  49. #include "llvm/Analysis/PostDominators.h"
  50. #include "llvm/Analysis/ValueTracking.h"
  51. #include "llvm/IR/ConstantRange.h"
  52. #include "llvm/IR/Dominators.h"
  53. #include "llvm/IR/IntrinsicInst.h"
  54. #include "llvm/IR/PatternMatch.h"
  55. #include "llvm/InitializePasses.h"
  56. #include "llvm/Pass.h"
  57. #include "llvm/Support/CommandLine.h"
  58. #include "llvm/Support/Debug.h"
  59. #include "llvm/Support/KnownBits.h"
  60. #include "llvm/Transforms/Scalar.h"
  61. #include "llvm/Transforms/Utils/GuardUtils.h"
  62. #include "llvm/Transforms/Utils/LoopUtils.h"
  63. #include <functional>
  64. using namespace llvm;
  65. #define DEBUG_TYPE "guard-widening"
  66. STATISTIC(GuardsEliminated, "Number of eliminated guards");
  67. STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
  68. static cl::opt<bool>
  69. WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
  70. cl::desc("Whether or not we should widen guards "
  71. "expressed as branches by widenable conditions"),
  72. cl::init(true));
  73. namespace {
  74. // Get the condition of \p I. It can either be a guard or a conditional branch.
  75. static Value *getCondition(Instruction *I) {
  76. if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
  77. assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
  78. "Bad guard intrinsic?");
  79. return GI->getArgOperand(0);
  80. }
  81. Value *Cond, *WC;
  82. BasicBlock *IfTrueBB, *IfFalseBB;
  83. if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB))
  84. return Cond;
  85. return cast<BranchInst>(I)->getCondition();
  86. }
  87. // Set the condition for \p I to \p NewCond. \p I can either be a guard or a
  88. // conditional branch.
  89. static void setCondition(Instruction *I, Value *NewCond) {
  90. if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
  91. assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
  92. "Bad guard intrinsic?");
  93. GI->setArgOperand(0, NewCond);
  94. return;
  95. }
  96. cast<BranchInst>(I)->setCondition(NewCond);
  97. }
  98. // Eliminates the guard instruction properly.
  99. static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
  100. GuardInst->eraseFromParent();
  101. if (MSSAU)
  102. MSSAU->removeMemoryAccess(GuardInst);
  103. ++GuardsEliminated;
  104. }
  105. class GuardWideningImpl {
  106. DominatorTree &DT;
  107. PostDominatorTree *PDT;
  108. LoopInfo &LI;
  109. MemorySSAUpdater *MSSAU;
  110. /// Together, these describe the region of interest. This might be all of
  111. /// the blocks within a function, or only a given loop's blocks and preheader.
  112. DomTreeNode *Root;
  113. std::function<bool(BasicBlock*)> BlockFilter;
  114. /// The set of guards and conditional branches whose conditions have been
  115. /// widened into dominating guards.
  116. SmallVector<Instruction *, 16> EliminatedGuardsAndBranches;
  117. /// The set of guards which have been widened to include conditions to other
  118. /// guards.
  119. DenseSet<Instruction *> WidenedGuards;
  120. /// Try to eliminate instruction \p Instr by widening it into an earlier
  121. /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that
  122. /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
  123. /// maps BasicBlocks to the set of guards seen in that block.
  124. bool eliminateInstrViaWidening(
  125. Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
  126. const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
  127. GuardsPerBlock, bool InvertCondition = false);
  128. /// Used to keep track of which widening potential is more effective.
  129. enum WideningScore {
  130. /// Don't widen.
  131. WS_IllegalOrNegative,
  132. /// Widening is performance neutral as far as the cycles spent in check
  133. /// conditions goes (but can still help, e.g., code layout, having less
  134. /// deopt state).
  135. WS_Neutral,
  136. /// Widening is profitable.
  137. WS_Positive,
  138. /// Widening is very profitable. Not significantly different from \c
  139. /// WS_Positive, except by the order.
  140. WS_VeryPositive
  141. };
  142. static StringRef scoreTypeToString(WideningScore WS);
  143. /// Compute the score for widening the condition in \p DominatedInstr
  144. /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
  145. /// inverted condition of the dominating guard.
  146. WideningScore computeWideningScore(Instruction *DominatedInstr,
  147. Instruction *DominatingGuard,
  148. bool InvertCond);
  149. /// Helper to check if \p V can be hoisted to \p InsertPos.
  150. bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
  151. SmallPtrSet<const Instruction *, 8> Visited;
  152. return isAvailableAt(V, InsertPos, Visited);
  153. }
  154. bool isAvailableAt(const Value *V, const Instruction *InsertPos,
  155. SmallPtrSetImpl<const Instruction *> &Visited) const;
  156. /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
  157. /// isAvailableAt returned true.
  158. void makeAvailableAt(Value *V, Instruction *InsertPos) const;
  159. /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
  160. /// to generate an expression computing the logical AND of \p Cond0 and (\p
  161. /// Cond1 XOR \p InvertCondition).
  162. /// Return true if the expression computing the AND is only as
  163. /// expensive as computing one of the two. If \p InsertPt is true then
  164. /// actually generate the resulting expression, make it available at \p
  165. /// InsertPt and return it in \p Result (else no change to the IR is made).
  166. bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
  167. Value *&Result, bool InvertCondition);
  168. /// Represents a range check of the form \c Base + \c Offset u< \c Length,
  169. /// with the constraint that \c Length is not negative. \c CheckInst is the
  170. /// pre-existing instruction in the IR that computes the result of this range
  171. /// check.
  172. class RangeCheck {
  173. const Value *Base;
  174. const ConstantInt *Offset;
  175. const Value *Length;
  176. ICmpInst *CheckInst;
  177. public:
  178. explicit RangeCheck(const Value *Base, const ConstantInt *Offset,
  179. const Value *Length, ICmpInst *CheckInst)
  180. : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
  181. void setBase(const Value *NewBase) { Base = NewBase; }
  182. void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }
  183. const Value *getBase() const { return Base; }
  184. const ConstantInt *getOffset() const { return Offset; }
  185. const APInt &getOffsetValue() const { return getOffset()->getValue(); }
  186. const Value *getLength() const { return Length; };
  187. ICmpInst *getCheckInst() const { return CheckInst; }
  188. void print(raw_ostream &OS, bool PrintTypes = false) {
  189. OS << "Base: ";
  190. Base->printAsOperand(OS, PrintTypes);
  191. OS << " Offset: ";
  192. Offset->printAsOperand(OS, PrintTypes);
  193. OS << " Length: ";
  194. Length->printAsOperand(OS, PrintTypes);
  195. }
  196. LLVM_DUMP_METHOD void dump() {
  197. print(dbgs());
  198. dbgs() << "\n";
  199. }
  200. };
  201. /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
  202. /// append them to \p Checks. Returns true on success, may clobber \c Checks
  203. /// on failure.
  204. bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
  205. SmallPtrSet<const Value *, 8> Visited;
  206. return parseRangeChecks(CheckCond, Checks, Visited);
  207. }
  208. bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
  209. SmallPtrSetImpl<const Value *> &Visited);
  210. /// Combine the checks in \p Checks into a smaller set of checks and append
  211. /// them into \p CombinedChecks. Return true on success (i.e. all of checks
  212. /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
  213. /// and \p CombinedChecks on success and on failure.
  214. bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
  215. SmallVectorImpl<RangeCheck> &CombinedChecks) const;
  216. /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
  217. /// computing only one of the two expressions?
  218. bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) {
  219. Value *ResultUnused;
  220. return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused,
  221. InvertCond);
  222. }
  223. /// If \p InvertCondition is false, Widen \p ToWiden to fail if
  224. /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
  225. /// true (in addition to whatever it is already checking).
  226. void widenGuard(Instruction *ToWiden, Value *NewCondition,
  227. bool InvertCondition) {
  228. Value *Result;
  229. widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
  230. InvertCondition);
  231. if (isGuardAsWidenableBranch(ToWiden)) {
  232. setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
  233. return;
  234. }
  235. setCondition(ToWiden, Result);
  236. }
  237. public:
  238. explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
  239. LoopInfo &LI, MemorySSAUpdater *MSSAU,
  240. DomTreeNode *Root,
  241. std::function<bool(BasicBlock*)> BlockFilter)
  242. : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root),
  243. BlockFilter(BlockFilter) {}
  244. /// The entry point for this pass.
  245. bool run();
  246. };
  247. }
  248. static bool isSupportedGuardInstruction(const Instruction *Insn) {
  249. if (isGuard(Insn))
  250. return true;
  251. if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))
  252. return true;
  253. return false;
  254. }
  255. bool GuardWideningImpl::run() {
  256. DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
  257. bool Changed = false;
  258. for (auto DFI = df_begin(Root), DFE = df_end(Root);
  259. DFI != DFE; ++DFI) {
  260. auto *BB = (*DFI)->getBlock();
  261. if (!BlockFilter(BB))
  262. continue;
  263. auto &CurrentList = GuardsInBlock[BB];
  264. for (auto &I : *BB)
  265. if (isSupportedGuardInstruction(&I))
  266. CurrentList.push_back(cast<Instruction>(&I));
  267. for (auto *II : CurrentList)
  268. Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
  269. }
  270. assert(EliminatedGuardsAndBranches.empty() || Changed);
  271. for (auto *I : EliminatedGuardsAndBranches)
  272. if (!WidenedGuards.count(I)) {
  273. assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
  274. if (isSupportedGuardInstruction(I))
  275. eliminateGuard(I, MSSAU);
  276. else {
  277. assert(isa<BranchInst>(I) &&
  278. "Eliminated something other than guard or branch?");
  279. ++CondBranchEliminated;
  280. }
  281. }
  282. return Changed;
  283. }
  284. bool GuardWideningImpl::eliminateInstrViaWidening(
  285. Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
  286. const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
  287. GuardsInBlock, bool InvertCondition) {
  288. // Ignore trivial true or false conditions. These instructions will be
  289. // trivially eliminated by any cleanup pass. Do not erase them because other
  290. // guards can possibly be widened into them.
  291. if (isa<ConstantInt>(getCondition(Instr)))
  292. return false;
  293. Instruction *BestSoFar = nullptr;
  294. auto BestScoreSoFar = WS_IllegalOrNegative;
  295. // In the set of dominating guards, find the one we can merge GuardInst with
  296. // for the most profit.
  297. for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {
  298. auto *CurBB = DFSI.getPath(i)->getBlock();
  299. if (!BlockFilter(CurBB))
  300. break;
  301. assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
  302. const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
  303. auto I = GuardsInCurBB.begin();
  304. auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)
  305. : GuardsInCurBB.end();
  306. #ifndef NDEBUG
  307. {
  308. unsigned Index = 0;
  309. for (auto &I : *CurBB) {
  310. if (Index == GuardsInCurBB.size())
  311. break;
  312. if (GuardsInCurBB[Index] == &I)
  313. Index++;
  314. }
  315. assert(Index == GuardsInCurBB.size() &&
  316. "Guards expected to be in order!");
  317. }
  318. #endif
  319. assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
  320. for (auto *Candidate : make_range(I, E)) {
  321. auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
  322. LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
  323. << " and " << *getCondition(Candidate) << " is "
  324. << scoreTypeToString(Score) << "\n");
  325. if (Score > BestScoreSoFar) {
  326. BestScoreSoFar = Score;
  327. BestSoFar = Candidate;
  328. }
  329. }
  330. }
  331. if (BestScoreSoFar == WS_IllegalOrNegative) {
  332. LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");
  333. return false;
  334. }
  335. assert(BestSoFar != Instr && "Should have never visited same guard!");
  336. assert(DT.dominates(BestSoFar, Instr) && "Should be!");
  337. LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
  338. << " with score " << scoreTypeToString(BestScoreSoFar)
  339. << "\n");
  340. widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
  341. auto NewGuardCondition = InvertCondition
  342. ? ConstantInt::getFalse(Instr->getContext())
  343. : ConstantInt::getTrue(Instr->getContext());
  344. setCondition(Instr, NewGuardCondition);
  345. EliminatedGuardsAndBranches.push_back(Instr);
  346. WidenedGuards.insert(BestSoFar);
  347. return true;
  348. }
  349. GuardWideningImpl::WideningScore
  350. GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
  351. Instruction *DominatingGuard,
  352. bool InvertCond) {
  353. Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
  354. Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
  355. bool HoistingOutOfLoop = false;
  356. if (DominatingGuardLoop != DominatedInstrLoop) {
  357. // Be conservative and don't widen into a sibling loop. TODO: If the
  358. // sibling is colder, we should consider allowing this.
  359. if (DominatingGuardLoop &&
  360. !DominatingGuardLoop->contains(DominatedInstrLoop))
  361. return WS_IllegalOrNegative;
  362. HoistingOutOfLoop = true;
  363. }
  364. if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
  365. return WS_IllegalOrNegative;
  366. // If the guard was conditional executed, it may never be reached
  367. // dynamically. There are two potential downsides to hoisting it out of the
  368. // conditionally executed region: 1) we may spuriously deopt without need and
  369. // 2) we have the extra cost of computing the guard condition in the common
  370. // case. At the moment, we really only consider the second in our heuristic
  371. // here. TODO: evaluate cost model for spurious deopt
  372. // NOTE: As written, this also lets us hoist right over another guard which
  373. // is essentially just another spelling for control flow.
  374. if (isWideningCondProfitable(getCondition(DominatedInstr),
  375. getCondition(DominatingGuard), InvertCond))
  376. return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
  377. if (HoistingOutOfLoop)
  378. return WS_Positive;
  379. // Returns true if we might be hoisting above explicit control flow. Note
  380. // that this completely ignores implicit control flow (guards, calls which
  381. // throw, etc...). That choice appears arbitrary.
  382. auto MaybeHoistingOutOfIf = [&]() {
  383. auto *DominatingBlock = DominatingGuard->getParent();
  384. auto *DominatedBlock = DominatedInstr->getParent();
  385. if (isGuardAsWidenableBranch(DominatingGuard))
  386. DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
  387. // Same Block?
  388. if (DominatedBlock == DominatingBlock)
  389. return false;
  390. // Obvious successor (common loop header/preheader case)
  391. if (DominatedBlock == DominatingBlock->getUniqueSuccessor())
  392. return false;
  393. // TODO: diamond, triangle cases
  394. if (!PDT) return true;
  395. return !PDT->dominates(DominatedBlock, DominatingBlock);
  396. };
  397. return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
  398. }
  399. bool GuardWideningImpl::isAvailableAt(
  400. const Value *V, const Instruction *Loc,
  401. SmallPtrSetImpl<const Instruction *> &Visited) const {
  402. auto *Inst = dyn_cast<Instruction>(V);
  403. if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
  404. return true;
  405. if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) ||
  406. Inst->mayReadFromMemory())
  407. return false;
  408. Visited.insert(Inst);
  409. // We only want to go _up_ the dominance chain when recursing.
  410. assert(!isa<PHINode>(Loc) &&
  411. "PHIs should return false for isSafeToSpeculativelyExecute");
  412. assert(DT.isReachableFromEntry(Inst->getParent()) &&
  413. "We did a DFS from the block entry!");
  414. return all_of(Inst->operands(),
  415. [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
  416. }
  417. void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
  418. auto *Inst = dyn_cast<Instruction>(V);
  419. if (!Inst || DT.dominates(Inst, Loc))
  420. return;
  421. assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) &&
  422. !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
  423. for (Value *Op : Inst->operands())
  424. makeAvailableAt(Op, Loc);
  425. Inst->moveBefore(Loc);
  426. }
  427. bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
  428. Instruction *InsertPt, Value *&Result,
  429. bool InvertCondition) {
  430. using namespace llvm::PatternMatch;
  431. {
  432. // L >u C0 && L >u C1 -> L >u max(C0, C1)
  433. ConstantInt *RHS0, *RHS1;
  434. Value *LHS;
  435. ICmpInst::Predicate Pred0, Pred1;
  436. if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
  437. match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
  438. if (InvertCondition)
  439. Pred1 = ICmpInst::getInversePredicate(Pred1);
  440. ConstantRange CR0 =
  441. ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
  442. ConstantRange CR1 =
  443. ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
  444. // Given what we're doing here and the semantics of guards, it would
  445. // be correct to use a subset intersection, but that may be too
  446. // aggressive in cases we care about.
  447. if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) {
  448. APInt NewRHSAP;
  449. CmpInst::Predicate Pred;
  450. if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
  451. if (InsertPt) {
  452. ConstantInt *NewRHS =
  453. ConstantInt::get(Cond0->getContext(), NewRHSAP);
  454. Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
  455. }
  456. return true;
  457. }
  458. }
  459. }
  460. }
  461. {
  462. SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
  463. // TODO: Support InvertCondition case?
  464. if (!InvertCondition &&
  465. parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
  466. combineRangeChecks(Checks, CombinedChecks)) {
  467. if (InsertPt) {
  468. Result = nullptr;
  469. for (auto &RC : CombinedChecks) {
  470. makeAvailableAt(RC.getCheckInst(), InsertPt);
  471. if (Result)
  472. Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "",
  473. InsertPt);
  474. else
  475. Result = RC.getCheckInst();
  476. }
  477. assert(Result && "Failed to find result value");
  478. Result->setName("wide.chk");
  479. }
  480. return true;
  481. }
  482. }
  483. // Base case -- just logical-and the two conditions together.
  484. if (InsertPt) {
  485. makeAvailableAt(Cond0, InsertPt);
  486. makeAvailableAt(Cond1, InsertPt);
  487. if (InvertCondition)
  488. Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
  489. Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
  490. }
  491. // We were not able to compute Cond0 AND Cond1 for the price of one.
  492. return false;
  493. }
  494. bool GuardWideningImpl::parseRangeChecks(
  495. Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
  496. SmallPtrSetImpl<const Value *> &Visited) {
  497. if (!Visited.insert(CheckCond).second)
  498. return true;
  499. using namespace llvm::PatternMatch;
  500. {
  501. Value *AndLHS, *AndRHS;
  502. if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS))))
  503. return parseRangeChecks(AndLHS, Checks) &&
  504. parseRangeChecks(AndRHS, Checks);
  505. }
  506. auto *IC = dyn_cast<ICmpInst>(CheckCond);
  507. if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||
  508. (IC->getPredicate() != ICmpInst::ICMP_ULT &&
  509. IC->getPredicate() != ICmpInst::ICMP_UGT))
  510. return false;
  511. const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
  512. if (IC->getPredicate() == ICmpInst::ICMP_UGT)
  513. std::swap(CmpLHS, CmpRHS);
  514. auto &DL = IC->getModule()->getDataLayout();
  515. GuardWideningImpl::RangeCheck Check(
  516. CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())),
  517. CmpRHS, IC);
  518. if (!isKnownNonNegative(Check.getLength(), DL))
  519. return false;
  520. // What we have in \c Check now is a correct interpretation of \p CheckCond.
  521. // Try to see if we can move some constant offsets into the \c Offset field.
  522. bool Changed;
  523. auto &Ctx = CheckCond->getContext();
  524. do {
  525. Value *OpLHS;
  526. ConstantInt *OpRHS;
  527. Changed = false;
  528. #ifndef NDEBUG
  529. auto *BaseInst = dyn_cast<Instruction>(Check.getBase());
  530. assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) &&
  531. "Unreachable instruction?");
  532. #endif
  533. if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
  534. Check.setBase(OpLHS);
  535. APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
  536. Check.setOffset(ConstantInt::get(Ctx, NewOffset));
  537. Changed = true;
  538. } else if (match(Check.getBase(),
  539. m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
  540. KnownBits Known = computeKnownBits(OpLHS, DL);
  541. if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
  542. Check.setBase(OpLHS);
  543. APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
  544. Check.setOffset(ConstantInt::get(Ctx, NewOffset));
  545. Changed = true;
  546. }
  547. }
  548. } while (Changed);
  549. Checks.push_back(Check);
  550. return true;
  551. }
  552. bool GuardWideningImpl::combineRangeChecks(
  553. SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
  554. SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {
  555. unsigned OldCount = Checks.size();
  556. while (!Checks.empty()) {
  557. // Pick all of the range checks with a specific base and length, and try to
  558. // merge them.
  559. const Value *CurrentBase = Checks.front().getBase();
  560. const Value *CurrentLength = Checks.front().getLength();
  561. SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
  562. auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) {
  563. return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;
  564. };
  565. copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
  566. erase_if(Checks, IsCurrentCheck);
  567. assert(CurrentChecks.size() != 0 && "We know we have at least one!");
  568. if (CurrentChecks.size() < 3) {
  569. llvm::append_range(RangeChecksOut, CurrentChecks);
  570. continue;
  571. }
  572. // CurrentChecks.size() will typically be 3 here, but so far there has been
  573. // no need to hard-code that fact.
  574. llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS,
  575. const GuardWideningImpl::RangeCheck &RHS) {
  576. return LHS.getOffsetValue().slt(RHS.getOffsetValue());
  577. });
  578. // Note: std::sort should not invalidate the ChecksStart iterator.
  579. const ConstantInt *MinOffset = CurrentChecks.front().getOffset();
  580. const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();
  581. unsigned BitWidth = MaxOffset->getValue().getBitWidth();
  582. if ((MaxOffset->getValue() - MinOffset->getValue())
  583. .ugt(APInt::getSignedMinValue(BitWidth)))
  584. return false;
  585. APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue();
  586. const APInt &HighOffset = MaxOffset->getValue();
  587. auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) {
  588. return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);
  589. };
  590. if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))
  591. return false;
  592. // We have a series of f+1 checks as:
  593. //
  594. // I+k_0 u< L ... Chk_0
  595. // I+k_1 u< L ... Chk_1
  596. // ...
  597. // I+k_f u< L ... Chk_f
  598. //
  599. // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0
  600. // k_f-k_0 u< INT_MIN+k_f ... Precond_1
  601. // k_f != k_0 ... Precond_2
  602. //
  603. // Claim:
  604. // Chk_0 AND Chk_f implies all the other checks
  605. //
  606. // Informal proof sketch:
  607. //
  608. // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap
  609. // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and
  610. // thus I+k_f is the greatest unsigned value in that range.
  611. //
  612. // This combined with Ckh_(f+1) shows that everything in that range is u< L.
  613. // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)
  614. // lie in [I+k_0,I+k_f], this proving our claim.
  615. //
  616. // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are
  617. // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal
  618. // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping
  619. // range by definition, and the latter case is impossible:
  620. //
  621. // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)
  622. // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  623. //
  624. // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted
  625. // with 'x' above) to be at least >u INT_MIN.
  626. RangeChecksOut.emplace_back(CurrentChecks.front());
  627. RangeChecksOut.emplace_back(CurrentChecks.back());
  628. }
  629. assert(RangeChecksOut.size() <= OldCount && "We pessimized!");
  630. return RangeChecksOut.size() != OldCount;
  631. }
  632. #ifndef NDEBUG
  633. StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
  634. switch (WS) {
  635. case WS_IllegalOrNegative:
  636. return "IllegalOrNegative";
  637. case WS_Neutral:
  638. return "Neutral";
  639. case WS_Positive:
  640. return "Positive";
  641. case WS_VeryPositive:
  642. return "VeryPositive";
  643. }
  644. llvm_unreachable("Fully covered switch above!");
  645. }
  646. #endif
  647. PreservedAnalyses GuardWideningPass::run(Function &F,
  648. FunctionAnalysisManager &AM) {
  649. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  650. auto &LI = AM.getResult<LoopAnalysis>(F);
  651. auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
  652. auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);
  653. std::unique_ptr<MemorySSAUpdater> MSSAU;
  654. if (MSSAA)
  655. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());
  656. if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
  657. DT.getRootNode(), [](BasicBlock *) { return true; })
  658. .run())
  659. return PreservedAnalyses::all();
  660. PreservedAnalyses PA;
  661. PA.preserveSet<CFGAnalyses>();
  662. PA.preserve<MemorySSAAnalysis>();
  663. return PA;
  664. }
  665. PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
  666. LoopStandardAnalysisResults &AR,
  667. LPMUpdater &U) {
  668. BasicBlock *RootBB = L.getLoopPredecessor();
  669. if (!RootBB)
  670. RootBB = L.getHeader();
  671. auto BlockFilter = [&](BasicBlock *BB) {
  672. return BB == RootBB || L.contains(BB);
  673. };
  674. std::unique_ptr<MemorySSAUpdater> MSSAU;
  675. if (AR.MSSA)
  676. MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
  677. if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr,
  678. AR.DT.getNode(RootBB), BlockFilter).run())
  679. return PreservedAnalyses::all();
  680. auto PA = getLoopPassPreservedAnalyses();
  681. if (AR.MSSA)
  682. PA.preserve<MemorySSAAnalysis>();
  683. return PA;
  684. }
  685. namespace {
  686. struct GuardWideningLegacyPass : public FunctionPass {
  687. static char ID;
  688. GuardWideningLegacyPass() : FunctionPass(ID) {
  689. initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
  690. }
  691. bool runOnFunction(Function &F) override {
  692. if (skipFunction(F))
  693. return false;
  694. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  695. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  696. auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
  697. auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
  698. std::unique_ptr<MemorySSAUpdater> MSSAU;
  699. if (MSSAWP)
  700. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
  701. return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
  702. DT.getRootNode(),
  703. [](BasicBlock *) { return true; })
  704. .run();
  705. }
  706. void getAnalysisUsage(AnalysisUsage &AU) const override {
  707. AU.setPreservesCFG();
  708. AU.addRequired<DominatorTreeWrapperPass>();
  709. AU.addRequired<PostDominatorTreeWrapperPass>();
  710. AU.addRequired<LoopInfoWrapperPass>();
  711. AU.addPreserved<MemorySSAWrapperPass>();
  712. }
  713. };
  714. /// Same as above, but restricted to a single loop at a time. Can be
  715. /// scheduled with other loop passes w/o breaking out of LPM
  716. struct LoopGuardWideningLegacyPass : public LoopPass {
  717. static char ID;
  718. LoopGuardWideningLegacyPass() : LoopPass(ID) {
  719. initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
  720. }
  721. bool runOnLoop(Loop *L, LPPassManager &LPM) override {
  722. if (skipLoop(L))
  723. return false;
  724. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  725. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  726. auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
  727. auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
  728. auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
  729. std::unique_ptr<MemorySSAUpdater> MSSAU;
  730. if (MSSAWP)
  731. MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
  732. BasicBlock *RootBB = L->getLoopPredecessor();
  733. if (!RootBB)
  734. RootBB = L->getHeader();
  735. auto BlockFilter = [&](BasicBlock *BB) {
  736. return BB == RootBB || L->contains(BB);
  737. };
  738. return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr,
  739. DT.getNode(RootBB), BlockFilter).run();
  740. }
  741. void getAnalysisUsage(AnalysisUsage &AU) const override {
  742. AU.setPreservesCFG();
  743. getLoopAnalysisUsage(AU);
  744. AU.addPreserved<PostDominatorTreeWrapperPass>();
  745. AU.addPreserved<MemorySSAWrapperPass>();
  746. }
  747. };
  748. }
  749. char GuardWideningLegacyPass::ID = 0;
  750. char LoopGuardWideningLegacyPass::ID = 0;
  751. INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
  752. false, false)
  753. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  754. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  755. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  756. INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
  757. false, false)
  758. INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
  759. "Widen guards (within a single loop, as a loop pass)",
  760. false, false)
  761. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  762. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  763. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  764. INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
  765. "Widen guards (within a single loop, as a loop pass)",
  766. false, false)
  767. FunctionPass *llvm::createGuardWideningPass() {
  768. return new GuardWideningLegacyPass();
  769. }
  770. Pass *llvm::createLoopGuardWideningPass() {
  771. return new LoopGuardWideningLegacyPass();
  772. }