LoopPeel.cpp 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. //===- LoopPeel.cpp -------------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Loop Peeling Utilities.
  10. //===----------------------------------------------------------------------===//
  11. #include "llvm/Transforms/Utils/LoopPeel.h"
  12. #include "llvm/ADT/DenseMap.h"
  13. #include "llvm/ADT/SmallVector.h"
  14. #include "llvm/ADT/Statistic.h"
  15. #include "llvm/Analysis/Loads.h"
  16. #include "llvm/Analysis/LoopInfo.h"
  17. #include "llvm/Analysis/LoopIterator.h"
  18. #include "llvm/Analysis/ScalarEvolution.h"
  19. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  20. #include "llvm/Analysis/TargetTransformInfo.h"
  21. #include "llvm/IR/BasicBlock.h"
  22. #include "llvm/IR/Dominators.h"
  23. #include "llvm/IR/Function.h"
  24. #include "llvm/IR/InstrTypes.h"
  25. #include "llvm/IR/Instruction.h"
  26. #include "llvm/IR/Instructions.h"
  27. #include "llvm/IR/LLVMContext.h"
  28. #include "llvm/IR/MDBuilder.h"
  29. #include "llvm/IR/PatternMatch.h"
  30. #include "llvm/IR/ProfDataUtils.h"
  31. #include "llvm/Support/Casting.h"
  32. #include "llvm/Support/CommandLine.h"
  33. #include "llvm/Support/Debug.h"
  34. #include "llvm/Support/raw_ostream.h"
  35. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  36. #include "llvm/Transforms/Utils/Cloning.h"
  37. #include "llvm/Transforms/Utils/LoopSimplify.h"
  38. #include "llvm/Transforms/Utils/LoopUtils.h"
  39. #include "llvm/Transforms/Utils/ValueMapper.h"
  40. #include <algorithm>
  41. #include <cassert>
  42. #include <cstdint>
  43. #include <optional>
  44. using namespace llvm;
  45. using namespace llvm::PatternMatch;
  46. #define DEBUG_TYPE "loop-peel"
  47. STATISTIC(NumPeeled, "Number of loops peeled");
  48. static cl::opt<unsigned> UnrollPeelCount(
  49. "unroll-peel-count", cl::Hidden,
  50. cl::desc("Set the unroll peeling count, for testing purposes"));
  51. static cl::opt<bool>
  52. UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
  53. cl::desc("Allows loops to be peeled when the dynamic "
  54. "trip count is known to be low."));
  55. static cl::opt<bool>
  56. UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
  57. cl::init(false), cl::Hidden,
  58. cl::desc("Allows loop nests to be peeled."));
  59. static cl::opt<unsigned> UnrollPeelMaxCount(
  60. "unroll-peel-max-count", cl::init(7), cl::Hidden,
  61. cl::desc("Max average trip count which will cause loop peeling."));
  62. static cl::opt<unsigned> UnrollForcePeelCount(
  63. "unroll-force-peel-count", cl::init(0), cl::Hidden,
  64. cl::desc("Force a peel count regardless of profiling information."));
  65. static cl::opt<bool> DisableAdvancedPeeling(
  66. "disable-advanced-peeling", cl::init(false), cl::Hidden,
  67. cl::desc(
  68. "Disable advance peeling. Issues for convergent targets (D134803)."));
  69. static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
  70. // Check whether we are capable of peeling this loop.
  71. bool llvm::canPeel(const Loop *L) {
  72. // Make sure the loop is in simplified form
  73. if (!L->isLoopSimplifyForm())
  74. return false;
  75. if (!DisableAdvancedPeeling)
  76. return true;
  77. SmallVector<BasicBlock *, 4> Exits;
  78. L->getUniqueNonLatchExitBlocks(Exits);
  79. // The latch must either be the only exiting block or all non-latch exit
  80. // blocks have either a deopt or unreachable terminator or compose a chain of
  81. // blocks where the last one is either deopt or unreachable terminated. Both
  82. // deopt and unreachable terminators are a strong indication they are not
  83. // taken. Note that this is a profitability check, not a legality check. Also
  84. // note that LoopPeeling currently can only update the branch weights of latch
  85. // blocks and branch weights to blocks with deopt or unreachable do not need
  86. // updating.
  87. return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable);
  88. }
  89. namespace {
  90. // As a loop is peeled, it may be the case that Phi nodes become
  91. // loop-invariant (ie, known because there is only one choice).
  92. // For example, consider the following function:
  93. // void g(int);
  94. // void binary() {
  95. // int x = 0;
  96. // int y = 0;
  97. // int a = 0;
  98. // for(int i = 0; i <100000; ++i) {
  99. // g(x);
  100. // x = y;
  101. // g(a);
  102. // y = a + 1;
  103. // a = 5;
  104. // }
  105. // }
  106. // Peeling 3 iterations is beneficial because the values for x, y and a
  107. // become known. The IR for this loop looks something like the following:
  108. //
  109. // %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
  110. // %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
  111. // %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
  112. // %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
  113. // ...
  114. // tail call void @_Z1gi(i32 signext %x)
  115. // tail call void @_Z1gi(i32 signext %a)
  116. // %add = add nuw nsw i32 %a, 1
  117. // %inc = add nuw nsw i32 %i, 1
  118. // %exitcond = icmp eq i32 %inc, 100000
  119. // br i1 %exitcond, label %for.cond.cleanup, label %for.body
  120. //
  121. // The arguments for the calls to g will become known after 3 iterations
  122. // of the loop, because the phi nodes values become known after 3 iterations
  123. // of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
  124. // The first iteration has g(0), g(0); the second has g(0), g(5); the
  125. // third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
  126. // Now consider the phi nodes:
  127. // %a is a phi with constants so it is determined after iteration 1.
  128. // %y is a phi based on a constant and %a so it is determined on
  129. // the iteration after %a is determined, so iteration 2.
  130. // %x is a phi based on a constant and %y so it is determined on
  131. // the iteration after %y, so iteration 3.
  132. // %i is based on itself (and is an induction variable) so it is
  133. // never determined.
  134. // This means that peeling off 3 iterations will result in being able to
  135. // remove the phi nodes for %a, %y, and %x. The arguments for the
  136. // corresponding calls to g are determined and the code for computing
  137. // x, y, and a can be removed.
  138. //
  139. // The PhiAnalyzer class calculates how many times a loop should be
  140. // peeled based on the above analysis of the phi nodes in the loop while
  141. // respecting the maximum specified.
  142. class PhiAnalyzer {
  143. public:
  144. PhiAnalyzer(const Loop &L, unsigned MaxIterations);
  145. // Calculate the sufficient minimum number of iterations of the loop to peel
  146. // such that phi instructions become determined (subject to allowable limits)
  147. std::optional<unsigned> calculateIterationsToPeel();
  148. protected:
  149. using PeelCounter = std::optional<unsigned>;
  150. const PeelCounter Unknown = std::nullopt;
  151. // Add 1 respecting Unknown and return Unknown if result over MaxIterations
  152. PeelCounter addOne(PeelCounter PC) const {
  153. if (PC == Unknown)
  154. return Unknown;
  155. return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
  156. }
  157. // Calculate the number of iterations after which the given value
  158. // becomes an invariant.
  159. PeelCounter calculate(const Value &);
  160. const Loop &L;
  161. const unsigned MaxIterations;
  162. // Map of Values to number of iterations to invariance
  163. SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
  164. };
  165. PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
  166. : L(L), MaxIterations(MaxIterations) {
  167. assert(canPeel(&L) && "loop is not suitable for peeling");
  168. assert(MaxIterations > 0 && "no peeling is allowed?");
  169. }
  170. // This function calculates the number of iterations after which the value
  171. // becomes an invariant. The pre-calculated values are memorized in a map.
  172. // N.B. This number will be Unknown or <= MaxIterations.
  173. // The function is calculated according to the following definition:
  174. // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
  175. // F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
  176. // G(%y) = 0 if %y is a loop invariant
  177. // G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
  178. // G(%y) = TODO: if %y is an expression based on phis and loop invariants
  179. // The example looks like:
  180. // %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
  181. // %y = phi(0, 5)
  182. // %a = %y + 1
  183. // G(%y) = Unknown otherwise (including phi not in header block)
  184. PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
  185. // If we already know the answer, take it from the map.
  186. auto I = IterationsToInvariance.find(&V);
  187. if (I != IterationsToInvariance.end())
  188. return I->second;
  189. // Place Unknown to map to avoid infinite recursion. Such
  190. // cycles can never stop on an invariant.
  191. IterationsToInvariance[&V] = Unknown;
  192. if (L.isLoopInvariant(&V))
  193. // Loop invariant so known at start.
  194. return (IterationsToInvariance[&V] = 0);
  195. if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
  196. if (Phi->getParent() != L.getHeader()) {
  197. // Phi is not in header block so Unknown.
  198. assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
  199. return Unknown;
  200. }
  201. // We need to analyze the input from the back edge and add 1.
  202. Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
  203. PeelCounter Iterations = calculate(*Input);
  204. assert(IterationsToInvariance[Input] == Iterations &&
  205. "unexpected value saved");
  206. return (IterationsToInvariance[Phi] = addOne(Iterations));
  207. }
  208. if (const Instruction *I = dyn_cast<Instruction>(&V)) {
  209. if (isa<CmpInst>(I) || I->isBinaryOp()) {
  210. // Binary instructions get the max of the operands.
  211. PeelCounter LHS = calculate(*I->getOperand(0));
  212. if (LHS == Unknown)
  213. return Unknown;
  214. PeelCounter RHS = calculate(*I->getOperand(1));
  215. if (RHS == Unknown)
  216. return Unknown;
  217. return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)});
  218. }
  219. if (I->isCast())
  220. // Cast instructions get the value of the operand.
  221. return (IterationsToInvariance[I] = calculate(*I->getOperand(0)));
  222. }
  223. // TODO: handle more expressions
  224. // Everything else is Unknown.
  225. assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
  226. return Unknown;
  227. }
  228. std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
  229. unsigned Iterations = 0;
  230. for (auto &PHI : L.getHeader()->phis()) {
  231. PeelCounter ToInvariance = calculate(PHI);
  232. if (ToInvariance != Unknown) {
  233. assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
  234. Iterations = std::max(Iterations, *ToInvariance);
  235. if (Iterations == MaxIterations)
  236. break;
  237. }
  238. }
  239. assert((Iterations <= MaxIterations) && "bad result in phi analysis");
  240. return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
  241. }
  242. } // unnamed namespace
  243. // Try to find any invariant memory reads that will become dereferenceable in
  244. // the remainder loop after peeling. The load must also be used (transitively)
  245. // by an exit condition. Returns the number of iterations to peel off (at the
  246. // moment either 0 or 1).
  247. static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
  248. DominatorTree &DT,
  249. AssumptionCache *AC) {
  250. // Skip loops with a single exiting block, because there should be no benefit
  251. // for the heuristic below.
  252. if (L.getExitingBlock())
  253. return 0;
  254. // All non-latch exit blocks must have an UnreachableInst terminator.
  255. // Otherwise the heuristic below may not be profitable.
  256. SmallVector<BasicBlock *, 4> Exits;
  257. L.getUniqueNonLatchExitBlocks(Exits);
  258. if (any_of(Exits, [](const BasicBlock *BB) {
  259. return !isa<UnreachableInst>(BB->getTerminator());
  260. }))
  261. return 0;
  262. // Now look for invariant loads that dominate the latch and are not known to
  263. // be dereferenceable. If there are such loads and no writes, they will become
  264. // dereferenceable in the loop if the first iteration is peeled off. Also
  265. // collect the set of instructions controlled by such loads. Only peel if an
  266. // exit condition uses (transitively) such a load.
  267. BasicBlock *Header = L.getHeader();
  268. BasicBlock *Latch = L.getLoopLatch();
  269. SmallPtrSet<Value *, 8> LoadUsers;
  270. const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
  271. for (BasicBlock *BB : L.blocks()) {
  272. for (Instruction &I : *BB) {
  273. if (I.mayWriteToMemory())
  274. return 0;
  275. auto Iter = LoadUsers.find(&I);
  276. if (Iter != LoadUsers.end()) {
  277. for (Value *U : I.users())
  278. LoadUsers.insert(U);
  279. }
  280. // Do not look for reads in the header; they can already be hoisted
  281. // without peeling.
  282. if (BB == Header)
  283. continue;
  284. if (auto *LI = dyn_cast<LoadInst>(&I)) {
  285. Value *Ptr = LI->getPointerOperand();
  286. if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
  287. !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
  288. for (Value *U : I.users())
  289. LoadUsers.insert(U);
  290. }
  291. }
  292. }
  293. SmallVector<BasicBlock *> ExitingBlocks;
  294. L.getExitingBlocks(ExitingBlocks);
  295. if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
  296. return LoadUsers.contains(Exiting->getTerminator());
  297. }))
  298. return 1;
  299. return 0;
  300. }
  301. // Return the number of iterations to peel off that make conditions in the
  302. // body true/false. For example, if we peel 2 iterations off the loop below,
  303. // the condition i < 2 can be evaluated at compile time.
  304. // for (i = 0; i < n; i++)
  305. // if (i < 2)
  306. // ..
  307. // else
  308. // ..
  309. // }
  310. static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
  311. ScalarEvolution &SE) {
  312. assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
  313. unsigned DesiredPeelCount = 0;
  314. for (auto *BB : L.blocks()) {
  315. auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  316. if (!BI || BI->isUnconditional())
  317. continue;
  318. // Ignore loop exit condition.
  319. if (L.getLoopLatch() == BB)
  320. continue;
  321. Value *Condition = BI->getCondition();
  322. Value *LeftVal, *RightVal;
  323. CmpInst::Predicate Pred;
  324. if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
  325. continue;
  326. const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
  327. const SCEV *RightSCEV = SE.getSCEV(RightVal);
  328. // Do not consider predicates that are known to be true or false
  329. // independently of the loop iteration.
  330. if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
  331. continue;
  332. // Check if we have a condition with one AddRec and one non AddRec
  333. // expression. Normalize LeftSCEV to be the AddRec.
  334. if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
  335. if (isa<SCEVAddRecExpr>(RightSCEV)) {
  336. std::swap(LeftSCEV, RightSCEV);
  337. Pred = ICmpInst::getSwappedPredicate(Pred);
  338. } else
  339. continue;
  340. }
  341. const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
  342. // Avoid huge SCEV computations in the loop below, make sure we only
  343. // consider AddRecs of the loop we are trying to peel.
  344. if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
  345. continue;
  346. if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
  347. !SE.getMonotonicPredicateType(LeftAR, Pred))
  348. continue;
  349. // Check if extending the current DesiredPeelCount lets us evaluate Pred
  350. // or !Pred in the loop body statically.
  351. unsigned NewPeelCount = DesiredPeelCount;
  352. const SCEV *IterVal = LeftAR->evaluateAtIteration(
  353. SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
  354. // If the original condition is not known, get the negated predicate
  355. // (which holds on the else branch) and check if it is known. This allows
  356. // us to peel of iterations that make the original condition false.
  357. if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
  358. Pred = ICmpInst::getInversePredicate(Pred);
  359. const SCEV *Step = LeftAR->getStepRecurrence(SE);
  360. const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
  361. auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
  362. &NewPeelCount]() {
  363. IterVal = NextIterVal;
  364. NextIterVal = SE.getAddExpr(IterVal, Step);
  365. NewPeelCount++;
  366. };
  367. auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
  368. return NewPeelCount < MaxPeelCount;
  369. };
  370. while (CanPeelOneMoreIteration() &&
  371. SE.isKnownPredicate(Pred, IterVal, RightSCEV))
  372. PeelOneMoreIteration();
  373. // With *that* peel count, does the predicate !Pred become known in the
  374. // first iteration of the loop body after peeling?
  375. if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
  376. RightSCEV))
  377. continue; // If not, give up.
  378. // However, for equality comparisons, that isn't always sufficient to
  379. // eliminate the comparsion in loop body, we may need to peel one more
  380. // iteration. See if that makes !Pred become unknown again.
  381. if (ICmpInst::isEquality(Pred) &&
  382. !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
  383. RightSCEV) &&
  384. !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
  385. SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
  386. if (!CanPeelOneMoreIteration())
  387. continue; // Need to peel one more iteration, but can't. Give up.
  388. PeelOneMoreIteration(); // Great!
  389. }
  390. DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
  391. }
  392. return DesiredPeelCount;
  393. }
  394. /// This "heuristic" exactly matches implicit behavior which used to exist
  395. /// inside getLoopEstimatedTripCount. It was added here to keep an
  396. /// improvement inside that API from causing peeling to become more aggressive.
  397. /// This should probably be removed.
  398. static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
  399. BasicBlock *Latch = L->getLoopLatch();
  400. if (!Latch)
  401. return true;
  402. BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
  403. if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
  404. return true;
  405. assert((LatchBR->getSuccessor(0) == L->getHeader() ||
  406. LatchBR->getSuccessor(1) == L->getHeader()) &&
  407. "At least one edge out of the latch must go to the header");
  408. SmallVector<BasicBlock *, 4> ExitBlocks;
  409. L->getUniqueNonLatchExitBlocks(ExitBlocks);
  410. return any_of(ExitBlocks, [](const BasicBlock *EB) {
  411. return !EB->getTerminatingDeoptimizeCall();
  412. });
  413. }
  414. // Return the number of iterations we want to peel off.
  415. void llvm::computePeelCount(Loop *L, unsigned LoopSize,
  416. TargetTransformInfo::PeelingPreferences &PP,
  417. unsigned TripCount, DominatorTree &DT,
  418. ScalarEvolution &SE, AssumptionCache *AC,
  419. unsigned Threshold) {
  420. assert(LoopSize > 0 && "Zero loop size is not allowed!");
  421. // Save the PP.PeelCount value set by the target in
  422. // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
  423. unsigned TargetPeelCount = PP.PeelCount;
  424. PP.PeelCount = 0;
  425. if (!canPeel(L))
  426. return;
  427. // Only try to peel innermost loops by default.
  428. // The constraint can be relaxed by the target in TTI.getPeelingPreferences
  429. // or by the flag -unroll-allow-loop-nests-peeling.
  430. if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
  431. return;
  432. // If the user provided a peel count, use that.
  433. bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
  434. if (UserPeelCount) {
  435. LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
  436. << " iterations.\n");
  437. PP.PeelCount = UnrollForcePeelCount;
  438. PP.PeelProfiledIterations = true;
  439. return;
  440. }
  441. // Skip peeling if it's disabled.
  442. if (!PP.AllowPeeling)
  443. return;
  444. // Check that we can peel at least one iteration.
  445. if (2 * LoopSize > Threshold)
  446. return;
  447. unsigned AlreadyPeeled = 0;
  448. if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
  449. AlreadyPeeled = *Peeled;
  450. // Stop if we already peeled off the maximum number of iterations.
  451. if (AlreadyPeeled >= UnrollPeelMaxCount)
  452. return;
  453. // Pay respect to limitations implied by loop size and the max peel count.
  454. unsigned MaxPeelCount = UnrollPeelMaxCount;
  455. MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
  456. // Start the max computation with the PP.PeelCount value set by the target
  457. // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
  458. unsigned DesiredPeelCount = TargetPeelCount;
  459. // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
  460. // iterations of the loop. For this we compute the number for iterations after
  461. // which every Phi is guaranteed to become an invariant, and try to peel the
  462. // maximum number of iterations among these values, thus turning all those
  463. // Phis into invariants.
  464. if (MaxPeelCount > DesiredPeelCount) {
  465. // Check how many iterations are useful for resolving Phis
  466. auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
  467. if (NumPeels)
  468. DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
  469. }
  470. DesiredPeelCount = std::max(DesiredPeelCount,
  471. countToEliminateCompares(*L, MaxPeelCount, SE));
  472. if (DesiredPeelCount == 0)
  473. DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
  474. if (DesiredPeelCount > 0) {
  475. DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
  476. // Consider max peel count limitation.
  477. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
  478. if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
  479. LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
  480. << " iteration(s) to turn"
  481. << " some Phis into invariants.\n");
  482. PP.PeelCount = DesiredPeelCount;
  483. PP.PeelProfiledIterations = false;
  484. return;
  485. }
  486. }
  487. // Bail if we know the statically calculated trip count.
  488. // In this case we rather prefer partial unrolling.
  489. if (TripCount)
  490. return;
  491. // Do not apply profile base peeling if it is disabled.
  492. if (!PP.PeelProfiledIterations)
  493. return;
  494. // If we don't know the trip count, but have reason to believe the average
  495. // trip count is low, peeling should be beneficial, since we will usually
  496. // hit the peeled section.
  497. // We only do this in the presence of profile information, since otherwise
  498. // our estimates of the trip count are not reliable enough.
  499. if (L->getHeader()->getParent()->hasProfileData()) {
  500. if (violatesLegacyMultiExitLoopCheck(L))
  501. return;
  502. std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
  503. if (!EstimatedTripCount)
  504. return;
  505. LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
  506. << *EstimatedTripCount << "\n");
  507. if (*EstimatedTripCount) {
  508. if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
  509. unsigned PeelCount = *EstimatedTripCount;
  510. LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
  511. PP.PeelCount = PeelCount;
  512. return;
  513. }
  514. LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
  515. LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
  516. LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
  517. LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
  518. LLVM_DEBUG(dbgs() << "Max peel count by cost: "
  519. << (Threshold / LoopSize - 1) << "\n");
  520. }
  521. }
  522. }
  523. struct WeightInfo {
  524. // Weights for current iteration.
  525. SmallVector<uint32_t> Weights;
  526. // Weights to subtract after each iteration.
  527. const SmallVector<uint32_t> SubWeights;
  528. };
  529. /// Update the branch weights of an exiting block of a peeled-off loop
  530. /// iteration.
  531. /// Let F is a weight of the edge to continue (fallthrough) into the loop.
  532. /// Let E is a weight of the edge to an exit.
  533. /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
  534. /// go to exit.
  535. /// Then, Estimated ExitCount = F / E.
  536. /// For I-th (counting from 0) peeled off iteration we set the the weights for
  537. /// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
  538. /// The probability to go to exit 1/(EC-I) increases. At the same time
  539. /// the estimated exit count in the remainder loop reduces by I.
  540. /// To avoid dealing with division rounding we can just multiple both part
  541. /// of weights to E and use weight as (F - I * E, E).
  542. static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
  543. MDBuilder MDB(Term->getContext());
  544. Term->setMetadata(LLVMContext::MD_prof,
  545. MDB.createBranchWeights(Info.Weights));
  546. for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
  547. if (SubWeight != 0)
  548. Info.Weights[Idx] = Info.Weights[Idx] > SubWeight
  549. ? Info.Weights[Idx] - SubWeight
  550. : 1;
  551. }
  552. /// Initialize the weights for all exiting blocks.
  553. static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
  554. Loop *L) {
  555. SmallVector<BasicBlock *> ExitingBlocks;
  556. L->getExitingBlocks(ExitingBlocks);
  557. for (BasicBlock *ExitingBlock : ExitingBlocks) {
  558. Instruction *Term = ExitingBlock->getTerminator();
  559. SmallVector<uint32_t> Weights;
  560. if (!extractBranchWeights(*Term, Weights))
  561. continue;
  562. // See the comment on updateBranchWeights() for an explanation of what we
  563. // do here.
  564. uint32_t FallThroughWeights = 0;
  565. uint32_t ExitWeights = 0;
  566. for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
  567. if (L->contains(Succ))
  568. FallThroughWeights += Weight;
  569. else
  570. ExitWeights += Weight;
  571. }
  572. // Don't try to update weights for degenerate case.
  573. if (FallThroughWeights == 0)
  574. continue;
  575. SmallVector<uint32_t> SubWeights;
  576. for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
  577. if (!L->contains(Succ)) {
  578. // Exit weights stay the same.
  579. SubWeights.push_back(0);
  580. continue;
  581. }
  582. // Subtract exit weights on each iteration, distributed across all
  583. // fallthrough edges.
  584. double W = (double)Weight / (double)FallThroughWeights;
  585. SubWeights.push_back((uint32_t)(ExitWeights * W));
  586. }
  587. WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
  588. }
  589. }
  590. /// Update the weights of original exiting block after peeling off all
  591. /// iterations.
  592. static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) {
  593. MDBuilder MDB(Term->getContext());
  594. Term->setMetadata(LLVMContext::MD_prof,
  595. MDB.createBranchWeights(Info.Weights));
  596. }
  597. /// Clones the body of the loop L, putting it between \p InsertTop and \p
  598. /// InsertBot.
  599. /// \param IterNumber The serial number of the iteration currently being
  600. /// peeled off.
  601. /// \param ExitEdges The exit edges of the original loop.
  602. /// \param[out] NewBlocks A list of the blocks in the newly created clone
  603. /// \param[out] VMap The value map between the loop and the new clone.
  604. /// \param LoopBlocks A helper for DFS-traversal of the loop.
  605. /// \param LVMap A value-map that maps instructions from the original loop to
  606. /// instructions in the last peeled-off iteration.
  607. static void cloneLoopBlocks(
  608. Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
  609. SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
  610. SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
  611. ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
  612. LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
  613. ScalarEvolution &SE) {
  614. BasicBlock *Header = L->getHeader();
  615. BasicBlock *Latch = L->getLoopLatch();
  616. BasicBlock *PreHeader = L->getLoopPreheader();
  617. Function *F = Header->getParent();
  618. LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  619. LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  620. Loop *ParentLoop = L->getParentLoop();
  621. // For each block in the original loop, create a new copy,
  622. // and update the value map with the newly created values.
  623. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
  624. BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
  625. NewBlocks.push_back(NewBB);
  626. // If an original block is an immediate child of the loop L, its copy
  627. // is a child of a ParentLoop after peeling. If a block is a child of
  628. // a nested loop, it is handled in the cloneLoop() call below.
  629. if (ParentLoop && LI->getLoopFor(*BB) == L)
  630. ParentLoop->addBasicBlockToLoop(NewBB, *LI);
  631. VMap[*BB] = NewBB;
  632. // If dominator tree is available, insert nodes to represent cloned blocks.
  633. if (DT) {
  634. if (Header == *BB)
  635. DT->addNewBlock(NewBB, InsertTop);
  636. else {
  637. DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
  638. // VMap must contain entry for IDom, as the iteration order is RPO.
  639. DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
  640. }
  641. }
  642. }
  643. {
  644. // Identify what other metadata depends on the cloned version. After
  645. // cloning, replace the metadata with the corrected version for both
  646. // memory instructions and noalias intrinsics.
  647. std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
  648. cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
  649. Header->getContext(), Ext);
  650. }
  651. // Recursively create the new Loop objects for nested loops, if any,
  652. // to preserve LoopInfo.
  653. for (Loop *ChildLoop : *L) {
  654. cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
  655. }
  656. // Hook-up the control flow for the newly inserted blocks.
  657. // The new header is hooked up directly to the "top", which is either
  658. // the original loop preheader (for the first iteration) or the previous
  659. // iteration's exiting block (for every other iteration)
  660. InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
  661. // Similarly, for the latch:
  662. // The original exiting edge is still hooked up to the loop exit.
  663. // The backedge now goes to the "bottom", which is either the loop's real
  664. // header (for the last peeled iteration) or the copied header of the next
  665. // iteration (for every other iteration)
  666. BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
  667. auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
  668. for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx)
  669. if (LatchTerm->getSuccessor(idx) == Header) {
  670. LatchTerm->setSuccessor(idx, InsertBot);
  671. break;
  672. }
  673. if (DT)
  674. DT->changeImmediateDominator(InsertBot, NewLatch);
  675. // The new copy of the loop body starts with a bunch of PHI nodes
  676. // that pick an incoming value from either the preheader, or the previous
  677. // loop iteration. Since this copy is no longer part of the loop, we
  678. // resolve this statically:
  679. // For the first iteration, we use the value from the preheader directly.
  680. // For any other iteration, we replace the phi with the value generated by
  681. // the immediately preceding clone of the loop body (which represents
  682. // the previous iteration).
  683. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
  684. PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
  685. if (IterNumber == 0) {
  686. VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
  687. } else {
  688. Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
  689. Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
  690. if (LatchInst && L->contains(LatchInst))
  691. VMap[&*I] = LVMap[LatchInst];
  692. else
  693. VMap[&*I] = LatchVal;
  694. }
  695. NewPHI->eraseFromParent();
  696. }
  697. // Fix up the outgoing values - we need to add a value for the iteration
  698. // we've just created. Note that this must happen *after* the incoming
  699. // values are adjusted, since the value going out of the latch may also be
  700. // a value coming into the header.
  701. for (auto Edge : ExitEdges)
  702. for (PHINode &PHI : Edge.second->phis()) {
  703. Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
  704. Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
  705. if (LatchInst && L->contains(LatchInst))
  706. LatchVal = VMap[LatchVal];
  707. PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
  708. SE.forgetValue(&PHI);
  709. }
  710. // LastValueMap is updated with the values for the current loop
  711. // which are used the next time this function is called.
  712. for (auto KV : VMap)
  713. LVMap[KV.first] = KV.second;
  714. }
  715. TargetTransformInfo::PeelingPreferences
  716. llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
  717. const TargetTransformInfo &TTI,
  718. std::optional<bool> UserAllowPeeling,
  719. std::optional<bool> UserAllowProfileBasedPeeling,
  720. bool UnrollingSpecficValues) {
  721. TargetTransformInfo::PeelingPreferences PP;
  722. // Set the default values.
  723. PP.PeelCount = 0;
  724. PP.AllowPeeling = true;
  725. PP.AllowLoopNestsPeeling = false;
  726. PP.PeelProfiledIterations = true;
  727. // Get the target specifc values.
  728. TTI.getPeelingPreferences(L, SE, PP);
  729. // User specified values using cl::opt.
  730. if (UnrollingSpecficValues) {
  731. if (UnrollPeelCount.getNumOccurrences() > 0)
  732. PP.PeelCount = UnrollPeelCount;
  733. if (UnrollAllowPeeling.getNumOccurrences() > 0)
  734. PP.AllowPeeling = UnrollAllowPeeling;
  735. if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
  736. PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
  737. }
  738. // User specifed values provided by argument.
  739. if (UserAllowPeeling)
  740. PP.AllowPeeling = *UserAllowPeeling;
  741. if (UserAllowProfileBasedPeeling)
  742. PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
  743. return PP;
  744. }
  745. /// Peel off the first \p PeelCount iterations of loop \p L.
  746. ///
  747. /// Note that this does not peel them off as a single straight-line block.
  748. /// Rather, each iteration is peeled off separately, and needs to check the
  749. /// exit condition.
  750. /// For loops that dynamically execute \p PeelCount iterations or less
  751. /// this provides a benefit, since the peeled off iterations, which account
  752. /// for the bulk of dynamic execution, can be further simplified by scalar
  753. /// optimizations.
  754. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
  755. ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
  756. bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
  757. assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
  758. assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
  759. LoopBlocksDFS LoopBlocks(L);
  760. LoopBlocks.perform(LI);
  761. BasicBlock *Header = L->getHeader();
  762. BasicBlock *PreHeader = L->getLoopPreheader();
  763. BasicBlock *Latch = L->getLoopLatch();
  764. SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
  765. L->getExitEdges(ExitEdges);
  766. // Remember dominators of blocks we might reach through exits to change them
  767. // later. Immediate dominator of such block might change, because we add more
  768. // routes which can lead to the exit: we can reach it from the peeled
  769. // iterations too.
  770. DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
  771. for (auto *BB : L->blocks()) {
  772. auto *BBDomNode = DT.getNode(BB);
  773. SmallVector<BasicBlock *, 16> ChildrenToUpdate;
  774. for (auto *ChildDomNode : BBDomNode->children()) {
  775. auto *ChildBB = ChildDomNode->getBlock();
  776. if (!L->contains(ChildBB))
  777. ChildrenToUpdate.push_back(ChildBB);
  778. }
  779. // The new idom of the block will be the nearest common dominator
  780. // of all copies of the previous idom. This is equivalent to the
  781. // nearest common dominator of the previous idom and the first latch,
  782. // which dominates all copies of the previous idom.
  783. BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
  784. for (auto *ChildBB : ChildrenToUpdate)
  785. NonLoopBlocksIDom[ChildBB] = NewIDom;
  786. }
  787. Function *F = Header->getParent();
  788. // Set up all the necessary basic blocks. It is convenient to split the
  789. // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
  790. // body, and a new preheader for the "real" loop.
  791. // Peeling the first iteration transforms.
  792. //
  793. // PreHeader:
  794. // ...
  795. // Header:
  796. // LoopBody
  797. // If (cond) goto Header
  798. // Exit:
  799. //
  800. // into
  801. //
  802. // InsertTop:
  803. // LoopBody
  804. // If (!cond) goto Exit
  805. // InsertBot:
  806. // NewPreHeader:
  807. // ...
  808. // Header:
  809. // LoopBody
  810. // If (cond) goto Header
  811. // Exit:
  812. //
  813. // Each following iteration will split the current bottom anchor in two,
  814. // and put the new copy of the loop body between these two blocks. That is,
  815. // after peeling another iteration from the example above, we'll split
  816. // InsertBot, and get:
  817. //
  818. // InsertTop:
  819. // LoopBody
  820. // If (!cond) goto Exit
  821. // InsertBot:
  822. // LoopBody
  823. // If (!cond) goto Exit
  824. // InsertBot.next:
  825. // NewPreHeader:
  826. // ...
  827. // Header:
  828. // LoopBody
  829. // If (cond) goto Header
  830. // Exit:
  831. BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
  832. BasicBlock *InsertBot =
  833. SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
  834. BasicBlock *NewPreHeader =
  835. SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
  836. InsertTop->setName(Header->getName() + ".peel.begin");
  837. InsertBot->setName(Header->getName() + ".peel.next");
  838. NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
  839. Instruction *LatchTerm =
  840. cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
  841. // If we have branch weight information, we'll want to update it for the
  842. // newly created branches.
  843. DenseMap<Instruction *, WeightInfo> Weights;
  844. initBranchWeights(Weights, L);
  845. // Identify what noalias metadata is inside the loop: if it is inside the
  846. // loop, the associated metadata must be cloned for each iteration.
  847. SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
  848. identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
  849. // For each peeled-off iteration, make a copy of the loop.
  850. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
  851. SmallVector<BasicBlock *, 8> NewBlocks;
  852. ValueToValueMapTy VMap;
  853. cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
  854. LoopBlocks, VMap, LVMap, &DT, LI,
  855. LoopLocalNoAliasDeclScopes, *SE);
  856. // Remap to use values from the current iteration instead of the
  857. // previous one.
  858. remapInstructionsInBlocks(NewBlocks, VMap);
  859. // Update IDoms of the blocks reachable through exits.
  860. if (Iter == 0)
  861. for (auto BBIDom : NonLoopBlocksIDom)
  862. DT.changeImmediateDominator(BBIDom.first,
  863. cast<BasicBlock>(LVMap[BBIDom.second]));
  864. #ifdef EXPENSIVE_CHECKS
  865. assert(DT.verify(DominatorTree::VerificationLevel::Fast));
  866. #endif
  867. for (auto &[Term, Info] : Weights) {
  868. auto *TermCopy = cast<Instruction>(VMap[Term]);
  869. updateBranchWeights(TermCopy, Info);
  870. }
  871. // Remove Loop metadata from the latch branch instruction
  872. // because it is not the Loop's latch branch anymore.
  873. auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
  874. LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
  875. InsertTop = InsertBot;
  876. InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
  877. InsertBot->setName(Header->getName() + ".peel.next");
  878. F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
  879. F->end());
  880. }
  881. // Now adjust the phi nodes in the loop header to get their initial values
  882. // from the last peeled-off iteration instead of the preheader.
  883. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
  884. PHINode *PHI = cast<PHINode>(I);
  885. Value *NewVal = PHI->getIncomingValueForBlock(Latch);
  886. Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
  887. if (LatchInst && L->contains(LatchInst))
  888. NewVal = LVMap[LatchInst];
  889. PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
  890. }
  891. for (const auto &[Term, Info] : Weights)
  892. fixupBranchWeights(Term, Info);
  893. // Update Metadata for count of peeled off iterations.
  894. unsigned AlreadyPeeled = 0;
  895. if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
  896. AlreadyPeeled = *Peeled;
  897. addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
  898. if (Loop *ParentLoop = L->getParentLoop())
  899. L = ParentLoop;
  900. // We modified the loop, update SE.
  901. SE->forgetTopmostLoop(L);
  902. #ifdef EXPENSIVE_CHECKS
  903. // Finally DomtTree must be correct.
  904. assert(DT.verify(DominatorTree::VerificationLevel::Fast));
  905. #endif
  906. // FIXME: Incrementally update loop-simplify
  907. simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
  908. NumPeeled++;
  909. return true;
  910. }