LoopFlatten.cpp 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. //===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass flattens pairs nested loops into a single loop.
  10. //
  11. // The intention is to optimise loop nests like this, which together access an
  12. // array linearly:
  13. //
  14. // for (int i = 0; i < N; ++i)
  15. // for (int j = 0; j < M; ++j)
  16. // f(A[i*M+j]);
  17. //
  18. // into one loop:
  19. //
  20. // for (int i = 0; i < (N*M); ++i)
  21. // f(A[i]);
  22. //
  23. // It can also flatten loops where the induction variables are not used in the
  24. // loop. This is only worth doing if the induction variables are only used in an
  25. // expression like i*M+j. If they had any other uses, we would have to insert a
  26. // div/mod to reconstruct the original values, so this wouldn't be profitable.
  27. //
  28. // We also need to prove that N*M will not overflow. The preferred solution is
  29. // to widen the IV, which avoids overflow checks, so that is tried first. If
  30. // the IV cannot be widened, then we try to determine that this new tripcount
  31. // expression won't overflow.
  32. //
  33. // Q: Does LoopFlatten use SCEV?
  34. // Short answer: Yes and no.
  35. //
  36. // Long answer:
  37. // For this transformation to be valid, we require all uses of the induction
  38. // variables to be linear expressions of the form i*M+j. The different Loop
  39. // APIs are used to get some loop components like the induction variable,
  40. // compare statement, etc. In addition, we do some pattern matching to find the
  41. // linear expressions and other loop components like the loop increment. The
  42. // latter are examples of expressions that do use the induction variable, but
  43. // are safe to ignore when we check all uses to be of the form i*M+j. We keep
  44. // track of all of this in bookkeeping struct FlattenInfo.
  45. // We assume the loops to be canonical, i.e. starting at 0 and increment with
  46. // 1. This makes RHS of the compare the loop tripcount (with the right
  47. // predicate). We use SCEV to then sanity check that this tripcount matches
  48. // with the tripcount as computed by SCEV.
  49. //
  50. //===----------------------------------------------------------------------===//
  51. #include "llvm/Transforms/Scalar/LoopFlatten.h"
  52. #include "llvm/ADT/Statistic.h"
  53. #include "llvm/Analysis/AssumptionCache.h"
  54. #include "llvm/Analysis/LoopInfo.h"
  55. #include "llvm/Analysis/MemorySSAUpdater.h"
  56. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  57. #include "llvm/Analysis/ScalarEvolution.h"
  58. #include "llvm/Analysis/TargetTransformInfo.h"
  59. #include "llvm/Analysis/ValueTracking.h"
  60. #include "llvm/IR/Dominators.h"
  61. #include "llvm/IR/Function.h"
  62. #include "llvm/IR/IRBuilder.h"
  63. #include "llvm/IR/Module.h"
  64. #include "llvm/IR/PatternMatch.h"
  65. #include "llvm/IR/Verifier.h"
  66. #include "llvm/InitializePasses.h"
  67. #include "llvm/Pass.h"
  68. #include "llvm/Support/Debug.h"
  69. #include "llvm/Support/raw_ostream.h"
  70. #include "llvm/Transforms/Scalar.h"
  71. #include "llvm/Transforms/Utils/Local.h"
  72. #include "llvm/Transforms/Utils/LoopUtils.h"
  73. #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  74. #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  75. using namespace llvm;
  76. using namespace llvm::PatternMatch;
  77. #define DEBUG_TYPE "loop-flatten"
  78. STATISTIC(NumFlattened, "Number of loops flattened");
  79. static cl::opt<unsigned> RepeatedInstructionThreshold(
  80. "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
  81. cl::desc("Limit on the cost of instructions that can be repeated due to "
  82. "loop flattening"));
  83. static cl::opt<bool>
  84. AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
  85. cl::init(false),
  86. cl::desc("Assume that the product of the two iteration "
  87. "trip counts will never overflow"));
  88. static cl::opt<bool>
  89. WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true),
  90. cl::desc("Widen the loop induction variables, if possible, so "
  91. "overflow checks won't reject flattening"));
  92. // We require all uses of both induction variables to match this pattern:
  93. //
  94. // (OuterPHI * InnerTripCount) + InnerPHI
  95. //
  96. // I.e., it needs to be a linear expression of the induction variables and the
  97. // inner loop trip count. We keep track of all different expressions on which
  98. // checks will be performed in this bookkeeping struct.
  99. //
  100. struct FlattenInfo {
  101. Loop *OuterLoop = nullptr; // The loop pair to be flattened.
  102. Loop *InnerLoop = nullptr;
  103. PHINode *InnerInductionPHI = nullptr; // These PHINodes correspond to loop
  104. PHINode *OuterInductionPHI = nullptr; // induction variables, which are
  105. // expected to start at zero and
  106. // increment by one on each loop.
  107. Value *InnerTripCount = nullptr; // The product of these two tripcounts
  108. Value *OuterTripCount = nullptr; // will be the new flattened loop
  109. // tripcount. Also used to recognise a
  110. // linear expression that will be replaced.
  111. SmallPtrSet<Value *, 4> LinearIVUses; // Contains the linear expressions
  112. // of the form i*M+j that will be
  113. // replaced.
  114. BinaryOperator *InnerIncrement = nullptr; // Uses of induction variables in
  115. BinaryOperator *OuterIncrement = nullptr; // loop control statements that
  116. BranchInst *InnerBranch = nullptr; // are safe to ignore.
  117. BranchInst *OuterBranch = nullptr; // The instruction that needs to be
  118. // updated with new tripcount.
  119. SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;
  120. bool Widened = false; // Whether this holds the flatten info before or after
  121. // widening.
  122. PHINode *NarrowInnerInductionPHI = nullptr; // Holds the old/narrow induction
  123. PHINode *NarrowOuterInductionPHI = nullptr; // phis, i.e. the Phis before IV
  124. // has been apllied. Used to skip
  125. // checks on phi nodes.
  126. FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
  127. bool isNarrowInductionPhi(PHINode *Phi) {
  128. // This can't be the narrow phi if we haven't widened the IV first.
  129. if (!Widened)
  130. return false;
  131. return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
  132. }
  133. bool isInnerLoopIncrement(User *U) {
  134. return InnerIncrement == U;
  135. }
  136. bool isOuterLoopIncrement(User *U) {
  137. return OuterIncrement == U;
  138. }
  139. bool isInnerLoopTest(User *U) {
  140. return InnerBranch->getCondition() == U;
  141. }
  142. bool checkOuterInductionPhiUsers(SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
  143. for (User *U : OuterInductionPHI->users()) {
  144. if (isOuterLoopIncrement(U))
  145. continue;
  146. auto IsValidOuterPHIUses = [&] (User *U) -> bool {
  147. LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
  148. if (!ValidOuterPHIUses.count(U)) {
  149. LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
  150. return false;
  151. }
  152. LLVM_DEBUG(dbgs() << "Use is optimisable\n");
  153. return true;
  154. };
  155. if (auto *V = dyn_cast<TruncInst>(U)) {
  156. for (auto *K : V->users()) {
  157. if (!IsValidOuterPHIUses(K))
  158. return false;
  159. }
  160. continue;
  161. }
  162. if (!IsValidOuterPHIUses(U))
  163. return false;
  164. }
  165. return true;
  166. }
  167. bool matchLinearIVUser(User *U, Value *InnerTripCount,
  168. SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
  169. LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
  170. Value *MatchedMul = nullptr;
  171. Value *MatchedItCount = nullptr;
  172. bool IsAdd = match(U, m_c_Add(m_Specific(InnerInductionPHI),
  173. m_Value(MatchedMul))) &&
  174. match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI),
  175. m_Value(MatchedItCount)));
  176. // Matches the same pattern as above, except it also looks for truncs
  177. // on the phi, which can be the result of widening the induction variables.
  178. bool IsAddTrunc =
  179. match(U, m_c_Add(m_Trunc(m_Specific(InnerInductionPHI)),
  180. m_Value(MatchedMul))) &&
  181. match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)),
  182. m_Value(MatchedItCount)));
  183. if (!MatchedItCount)
  184. return false;
  185. // Look through extends if the IV has been widened.
  186. if (Widened &&
  187. (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
  188. assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
  189. "Unexpected type mismatch in types after widening");
  190. MatchedItCount = isa<SExtInst>(MatchedItCount)
  191. ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
  192. : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
  193. }
  194. if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
  195. LLVM_DEBUG(dbgs() << "Use is optimisable\n");
  196. ValidOuterPHIUses.insert(MatchedMul);
  197. LinearIVUses.insert(U);
  198. return true;
  199. }
  200. LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
  201. return false;
  202. }
  203. bool checkInnerInductionPhiUsers(SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
  204. Value *SExtInnerTripCount = InnerTripCount;
  205. if (Widened &&
  206. (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
  207. SExtInnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
  208. for (User *U : InnerInductionPHI->users()) {
  209. if (isInnerLoopIncrement(U))
  210. continue;
  211. // After widening the IVs, a trunc instruction might have been introduced,
  212. // so look through truncs.
  213. if (isa<TruncInst>(U)) {
  214. if (!U->hasOneUse())
  215. return false;
  216. U = *U->user_begin();
  217. }
  218. // If the use is in the compare (which is also the condition of the inner
  219. // branch) then the compare has been altered by another transformation e.g
  220. // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
  221. // a constant. Ignore this use as the compare gets removed later anyway.
  222. if (isInnerLoopTest(U))
  223. continue;
  224. if (!matchLinearIVUser(U, SExtInnerTripCount, ValidOuterPHIUses))
  225. return false;
  226. }
  227. return true;
  228. }
  229. };
  230. static bool
  231. setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
  232. SmallPtrSetImpl<Instruction *> &IterationInstructions) {
  233. TripCount = TC;
  234. IterationInstructions.insert(Increment);
  235. LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
  236. LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
  237. LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
  238. return true;
  239. }
  240. // Given the RHS of the loop latch compare instruction, verify with SCEV
  241. // that this is indeed the loop tripcount.
  242. // TODO: This used to be a straightforward check but has grown to be quite
  243. // complicated now. It is therefore worth revisiting what the additional
  244. // benefits are of this (compared to relying on canonical loops and pattern
  245. // matching).
  246. static bool verifyTripCount(Value *RHS, Loop *L,
  247. SmallPtrSetImpl<Instruction *> &IterationInstructions,
  248. PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
  249. BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
  250. const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
  251. if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
  252. LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
  253. return false;
  254. }
  255. // The Extend=false flag is used for getTripCountFromExitCount as we want
  256. // to verify and match it with the pattern matched tripcount. Please note
  257. // that overflow checks are performed in checkOverflow, but are first tried
  258. // to avoid by widening the IV.
  259. const SCEV *SCEVTripCount =
  260. SE->getTripCountFromExitCount(BackedgeTakenCount, /*Extend=*/false);
  261. const SCEV *SCEVRHS = SE->getSCEV(RHS);
  262. if (SCEVRHS == SCEVTripCount)
  263. return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
  264. ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
  265. if (ConstantRHS) {
  266. const SCEV *BackedgeTCExt = nullptr;
  267. if (IsWidened) {
  268. const SCEV *SCEVTripCountExt;
  269. // Find the extended backedge taken count and extended trip count using
  270. // SCEV. One of these should now match the RHS of the compare.
  271. BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
  272. SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
  273. if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
  274. LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
  275. return false;
  276. }
  277. }
  278. // If the RHS of the compare is equal to the backedge taken count we need
  279. // to add one to get the trip count.
  280. if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
  281. ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
  282. Value *NewRHS = ConstantInt::get(
  283. ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
  284. return setLoopComponents(NewRHS, TripCount, Increment,
  285. IterationInstructions);
  286. }
  287. return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
  288. }
  289. // If the RHS isn't a constant then check that the reason it doesn't match
  290. // the SCEV trip count is because the RHS is a ZExt or SExt instruction
  291. // (and take the trip count to be the RHS).
  292. if (!IsWidened) {
  293. LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
  294. return false;
  295. }
  296. auto *TripCountInst = dyn_cast<Instruction>(RHS);
  297. if (!TripCountInst) {
  298. LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
  299. return false;
  300. }
  301. if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
  302. SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
  303. LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
  304. return false;
  305. }
  306. return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
  307. }
  308. // Finds the induction variable, increment and trip count for a simple loop that
  309. // we can flatten.
  310. static bool findLoopComponents(
  311. Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
  312. PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
  313. BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
  314. LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
  315. if (!L->isLoopSimplifyForm()) {
  316. LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
  317. return false;
  318. }
  319. // Currently, to simplify the implementation, the Loop induction variable must
  320. // start at zero and increment with a step size of one.
  321. if (!L->isCanonical(*SE)) {
  322. LLVM_DEBUG(dbgs() << "Loop is not canonical\n");
  323. return false;
  324. }
  325. // There must be exactly one exiting block, and it must be the same at the
  326. // latch.
  327. BasicBlock *Latch = L->getLoopLatch();
  328. if (L->getExitingBlock() != Latch) {
  329. LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
  330. return false;
  331. }
  332. // Find the induction PHI. If there is no induction PHI, we can't do the
  333. // transformation. TODO: could other variables trigger this? Do we have to
  334. // search for the best one?
  335. InductionPHI = L->getInductionVariable(*SE);
  336. if (!InductionPHI) {
  337. LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
  338. return false;
  339. }
  340. LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
  341. bool ContinueOnTrue = L->contains(Latch->getTerminator()->getSuccessor(0));
  342. auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
  343. if (ContinueOnTrue)
  344. return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
  345. else
  346. return Pred == CmpInst::ICMP_EQ;
  347. };
  348. // Find Compare and make sure it is valid. getLatchCmpInst checks that the
  349. // back branch of the latch is conditional.
  350. ICmpInst *Compare = L->getLatchCmpInst();
  351. if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
  352. Compare->hasNUsesOrMore(2)) {
  353. LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
  354. return false;
  355. }
  356. BackBranch = cast<BranchInst>(Latch->getTerminator());
  357. IterationInstructions.insert(BackBranch);
  358. LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
  359. IterationInstructions.insert(Compare);
  360. LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
  361. // Find increment and trip count.
  362. // There are exactly 2 incoming values to the induction phi; one from the
  363. // pre-header and one from the latch. The incoming latch value is the
  364. // increment variable.
  365. Increment =
  366. dyn_cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
  367. if (Increment->hasNUsesOrMore(3)) {
  368. LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
  369. return false;
  370. }
  371. // The trip count is the RHS of the compare. If this doesn't match the trip
  372. // count computed by SCEV then this is because the trip count variable
  373. // has been widened so the types don't match, or because it is a constant and
  374. // another transformation has changed the compare (e.g. icmp ult %inc,
  375. // tripcount -> icmp ult %j, tripcount-1), or both.
  376. Value *RHS = Compare->getOperand(1);
  377. return verifyTripCount(RHS, L, IterationInstructions, InductionPHI, TripCount,
  378. Increment, BackBranch, SE, IsWidened);
  379. }
  380. static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
  381. // All PHIs in the inner and outer headers must either be:
  382. // - The induction PHI, which we are going to rewrite as one induction in
  383. // the new loop. This is already checked by findLoopComponents.
  384. // - An outer header PHI with all incoming values from outside the loop.
  385. // LoopSimplify guarantees we have a pre-header, so we don't need to
  386. // worry about that here.
  387. // - Pairs of PHIs in the inner and outer headers, which implement a
  388. // loop-carried dependency that will still be valid in the new loop. To
  389. // be valid, this variable must be modified only in the inner loop.
  390. // The set of PHI nodes in the outer loop header that we know will still be
  391. // valid after the transformation. These will not need to be modified (with
  392. // the exception of the induction variable), but we do need to check that
  393. // there are no unsafe PHI nodes.
  394. SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
  395. SafeOuterPHIs.insert(FI.OuterInductionPHI);
  396. // Check that all PHI nodes in the inner loop header match one of the valid
  397. // patterns.
  398. for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
  399. // The induction PHIs break these rules, and that's OK because we treat
  400. // them specially when doing the transformation.
  401. if (&InnerPHI == FI.InnerInductionPHI)
  402. continue;
  403. if (FI.isNarrowInductionPhi(&InnerPHI))
  404. continue;
  405. // Each inner loop PHI node must have two incoming values/blocks - one
  406. // from the pre-header, and one from the latch.
  407. assert(InnerPHI.getNumIncomingValues() == 2);
  408. Value *PreHeaderValue =
  409. InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
  410. Value *LatchValue =
  411. InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
  412. // The incoming value from the outer loop must be the PHI node in the
  413. // outer loop header, with no modifications made in the top of the outer
  414. // loop.
  415. PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
  416. if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
  417. LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
  418. return false;
  419. }
  420. // The other incoming value must come from the inner loop, without any
  421. // modifications in the tail end of the outer loop. We are in LCSSA form,
  422. // so this will actually be a PHI in the inner loop's exit block, which
  423. // only uses values from inside the inner loop.
  424. PHINode *LCSSAPHI = dyn_cast<PHINode>(
  425. OuterPHI->getIncomingValueForBlock(FI.OuterLoop->getLoopLatch()));
  426. if (!LCSSAPHI) {
  427. LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
  428. return false;
  429. }
  430. // The value used by the LCSSA PHI must be the same one that the inner
  431. // loop's PHI uses.
  432. if (LCSSAPHI->hasConstantValue() != LatchValue) {
  433. LLVM_DEBUG(
  434. dbgs() << "LCSSA PHI incoming value does not match latch value\n");
  435. return false;
  436. }
  437. LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
  438. LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
  439. LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
  440. SafeOuterPHIs.insert(OuterPHI);
  441. FI.InnerPHIsToTransform.insert(&InnerPHI);
  442. }
  443. for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
  444. if (FI.isNarrowInductionPhi(&OuterPHI))
  445. continue;
  446. if (!SafeOuterPHIs.count(&OuterPHI)) {
  447. LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
  448. return false;
  449. }
  450. }
  451. LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
  452. return true;
  453. }
  454. static bool
  455. checkOuterLoopInsts(FlattenInfo &FI,
  456. SmallPtrSetImpl<Instruction *> &IterationInstructions,
  457. const TargetTransformInfo *TTI) {
  458. // Check for instructions in the outer but not inner loop. If any of these
  459. // have side-effects then this transformation is not legal, and if there is
  460. // a significant amount of code here which can't be optimised out that it's
  461. // not profitable (as these instructions would get executed for each
  462. // iteration of the inner loop).
  463. InstructionCost RepeatedInstrCost = 0;
  464. for (auto *B : FI.OuterLoop->getBlocks()) {
  465. if (FI.InnerLoop->contains(B))
  466. continue;
  467. for (auto &I : *B) {
  468. if (!isa<PHINode>(&I) && !I.isTerminator() &&
  469. !isSafeToSpeculativelyExecute(&I)) {
  470. LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
  471. "side effects: ";
  472. I.dump());
  473. return false;
  474. }
  475. // The execution count of the outer loop's iteration instructions
  476. // (increment, compare and branch) will be increased, but the
  477. // equivalent instructions will be removed from the inner loop, so
  478. // they make a net difference of zero.
  479. if (IterationInstructions.count(&I))
  480. continue;
  481. // The uncoditional branch to the inner loop's header will turn into
  482. // a fall-through, so adds no cost.
  483. BranchInst *Br = dyn_cast<BranchInst>(&I);
  484. if (Br && Br->isUnconditional() &&
  485. Br->getSuccessor(0) == FI.InnerLoop->getHeader())
  486. continue;
  487. // Multiplies of the outer iteration variable and inner iteration
  488. // count will be optimised out.
  489. if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI),
  490. m_Specific(FI.InnerTripCount))))
  491. continue;
  492. InstructionCost Cost =
  493. TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
  494. LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
  495. RepeatedInstrCost += Cost;
  496. }
  497. }
  498. LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
  499. << RepeatedInstrCost << "\n");
  500. // Bail out if flattening the loops would cause instructions in the outer
  501. // loop but not in the inner loop to be executed extra times.
  502. if (RepeatedInstrCost > RepeatedInstructionThreshold) {
  503. LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
  504. return false;
  505. }
  506. LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
  507. return true;
  508. }
  509. // We require all uses of both induction variables to match this pattern:
  510. //
  511. // (OuterPHI * InnerTripCount) + InnerPHI
  512. //
  513. // Any uses of the induction variables not matching that pattern would
  514. // require a div/mod to reconstruct in the flattened loop, so the
  515. // transformation wouldn't be profitable.
  516. static bool checkIVUsers(FlattenInfo &FI) {
  517. // Check that all uses of the inner loop's induction variable match the
  518. // expected pattern, recording the uses of the outer IV.
  519. SmallPtrSet<Value *, 4> ValidOuterPHIUses;
  520. if (!FI.checkInnerInductionPhiUsers(ValidOuterPHIUses))
  521. return false;
  522. // Check that there are no uses of the outer IV other than the ones found
  523. // as part of the pattern above.
  524. if (!FI.checkOuterInductionPhiUsers(ValidOuterPHIUses))
  525. return false;
  526. LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
  527. dbgs() << "Found " << FI.LinearIVUses.size()
  528. << " value(s) that can be replaced:\n";
  529. for (Value *V : FI.LinearIVUses) {
  530. dbgs() << " ";
  531. V->dump();
  532. });
  533. return true;
  534. }
  535. // Return an OverflowResult dependant on if overflow of the multiplication of
  536. // InnerTripCount and OuterTripCount can be assumed not to happen.
  537. static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
  538. AssumptionCache *AC) {
  539. Function *F = FI.OuterLoop->getHeader()->getParent();
  540. const DataLayout &DL = F->getParent()->getDataLayout();
  541. // For debugging/testing.
  542. if (AssumeNoOverflow)
  543. return OverflowResult::NeverOverflows;
  544. // Check if the multiply could not overflow due to known ranges of the
  545. // input values.
  546. OverflowResult OR = computeOverflowForUnsignedMul(
  547. FI.InnerTripCount, FI.OuterTripCount, DL, AC,
  548. FI.OuterLoop->getLoopPreheader()->getTerminator(), DT);
  549. if (OR != OverflowResult::MayOverflow)
  550. return OR;
  551. for (Value *V : FI.LinearIVUses) {
  552. for (Value *U : V->users()) {
  553. if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
  554. for (Value *GEPUser : U->users()) {
  555. auto *GEPUserInst = cast<Instruction>(GEPUser);
  556. if (!isa<LoadInst>(GEPUserInst) &&
  557. !(isa<StoreInst>(GEPUserInst) &&
  558. GEP == GEPUserInst->getOperand(1)))
  559. continue;
  560. if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
  561. FI.InnerLoop))
  562. continue;
  563. // The IV is used as the operand of a GEP which dominates the loop
  564. // latch, and the IV is at least as wide as the address space of the
  565. // GEP. In this case, the GEP would wrap around the address space
  566. // before the IV increment wraps, which would be UB.
  567. if (GEP->isInBounds() &&
  568. V->getType()->getIntegerBitWidth() >=
  569. DL.getPointerTypeSizeInBits(GEP->getType())) {
  570. LLVM_DEBUG(
  571. dbgs() << "use of linear IV would be UB if overflow occurred: ";
  572. GEP->dump());
  573. return OverflowResult::NeverOverflows;
  574. }
  575. }
  576. }
  577. }
  578. }
  579. return OverflowResult::MayOverflow;
  580. }
  581. static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
  582. ScalarEvolution *SE, AssumptionCache *AC,
  583. const TargetTransformInfo *TTI) {
  584. SmallPtrSet<Instruction *, 8> IterationInstructions;
  585. if (!findLoopComponents(FI.InnerLoop, IterationInstructions,
  586. FI.InnerInductionPHI, FI.InnerTripCount,
  587. FI.InnerIncrement, FI.InnerBranch, SE, FI.Widened))
  588. return false;
  589. if (!findLoopComponents(FI.OuterLoop, IterationInstructions,
  590. FI.OuterInductionPHI, FI.OuterTripCount,
  591. FI.OuterIncrement, FI.OuterBranch, SE, FI.Widened))
  592. return false;
  593. // Both of the loop trip count values must be invariant in the outer loop
  594. // (non-instructions are all inherently invariant).
  595. if (!FI.OuterLoop->isLoopInvariant(FI.InnerTripCount)) {
  596. LLVM_DEBUG(dbgs() << "inner loop trip count not invariant\n");
  597. return false;
  598. }
  599. if (!FI.OuterLoop->isLoopInvariant(FI.OuterTripCount)) {
  600. LLVM_DEBUG(dbgs() << "outer loop trip count not invariant\n");
  601. return false;
  602. }
  603. if (!checkPHIs(FI, TTI))
  604. return false;
  605. // FIXME: it should be possible to handle different types correctly.
  606. if (FI.InnerInductionPHI->getType() != FI.OuterInductionPHI->getType())
  607. return false;
  608. if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
  609. return false;
  610. // Find the values in the loop that can be replaced with the linearized
  611. // induction variable, and check that there are no other uses of the inner
  612. // or outer induction variable. If there were, we could still do this
  613. // transformation, but we'd have to insert a div/mod to calculate the
  614. // original IVs, so it wouldn't be profitable.
  615. if (!checkIVUsers(FI))
  616. return false;
  617. LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
  618. return true;
  619. }
  620. static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
  621. ScalarEvolution *SE, AssumptionCache *AC,
  622. const TargetTransformInfo *TTI, LPMUpdater *U,
  623. MemorySSAUpdater *MSSAU) {
  624. Function *F = FI.OuterLoop->getHeader()->getParent();
  625. LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
  626. {
  627. using namespace ore;
  628. OptimizationRemark Remark(DEBUG_TYPE, "Flattened", FI.InnerLoop->getStartLoc(),
  629. FI.InnerLoop->getHeader());
  630. OptimizationRemarkEmitter ORE(F);
  631. Remark << "Flattened into outer loop";
  632. ORE.emit(Remark);
  633. }
  634. Value *NewTripCount = BinaryOperator::CreateMul(
  635. FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
  636. FI.OuterLoop->getLoopPreheader()->getTerminator());
  637. LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
  638. NewTripCount->dump());
  639. // Fix up PHI nodes that take values from the inner loop back-edge, which
  640. // we are about to remove.
  641. FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
  642. // The old Phi will be optimised away later, but for now we can't leave
  643. // leave it in an invalid state, so are updating them too.
  644. for (PHINode *PHI : FI.InnerPHIsToTransform)
  645. PHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
  646. // Modify the trip count of the outer loop to be the product of the two
  647. // trip counts.
  648. cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
  649. // Replace the inner loop backedge with an unconditional branch to the exit.
  650. BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
  651. BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
  652. InnerExitingBlock->getTerminator()->eraseFromParent();
  653. BranchInst::Create(InnerExitBlock, InnerExitingBlock);
  654. // Update the DomTree and MemorySSA.
  655. DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
  656. if (MSSAU)
  657. MSSAU->removeEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
  658. // Replace all uses of the polynomial calculated from the two induction
  659. // variables with the one new one.
  660. IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator());
  661. for (Value *V : FI.LinearIVUses) {
  662. Value *OuterValue = FI.OuterInductionPHI;
  663. if (FI.Widened)
  664. OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
  665. "flatten.trunciv");
  666. LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: ";
  667. OuterValue->dump());
  668. V->replaceAllUsesWith(OuterValue);
  669. }
  670. // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
  671. // deleted, and any information that have about the outer loop invalidated.
  672. SE->forgetLoop(FI.OuterLoop);
  673. SE->forgetLoop(FI.InnerLoop);
  674. if (U)
  675. U->markLoopAsDeleted(*FI.InnerLoop, FI.InnerLoop->getName());
  676. LI->erase(FI.InnerLoop);
  677. // Increment statistic value.
  678. NumFlattened++;
  679. return true;
  680. }
  681. static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
  682. ScalarEvolution *SE, AssumptionCache *AC,
  683. const TargetTransformInfo *TTI) {
  684. if (!WidenIV) {
  685. LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
  686. return false;
  687. }
  688. LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
  689. Module *M = FI.InnerLoop->getHeader()->getParent()->getParent();
  690. auto &DL = M->getDataLayout();
  691. auto *InnerType = FI.InnerInductionPHI->getType();
  692. auto *OuterType = FI.OuterInductionPHI->getType();
  693. unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
  694. auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
  695. // If both induction types are less than the maximum legal integer width,
  696. // promote both to the widest type available so we know calculating
  697. // (OuterTripCount * InnerTripCount) as the new trip count is safe.
  698. if (InnerType != OuterType ||
  699. InnerType->getScalarSizeInBits() >= MaxLegalSize ||
  700. MaxLegalType->getScalarSizeInBits() <
  701. InnerType->getScalarSizeInBits() * 2) {
  702. LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
  703. return false;
  704. }
  705. SCEVExpander Rewriter(*SE, DL, "loopflatten");
  706. SmallVector<WeakTrackingVH, 4> DeadInsts;
  707. unsigned ElimExt = 0;
  708. unsigned Widened = 0;
  709. auto CreateWideIV = [&](WideIVInfo WideIV, bool &Deleted) -> bool {
  710. PHINode *WidePhi =
  711. createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts, ElimExt, Widened,
  712. true /* HasGuards */, true /* UsePostIncrementRanges */);
  713. if (!WidePhi)
  714. return false;
  715. LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
  716. LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
  717. Deleted = RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
  718. return true;
  719. };
  720. bool Deleted;
  721. if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false}, Deleted))
  722. return false;
  723. // Add the narrow phi to list, so that it will be adjusted later when the
  724. // the transformation is performed.
  725. if (!Deleted)
  726. FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
  727. if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false}, Deleted))
  728. return false;
  729. assert(Widened && "Widened IV expected");
  730. FI.Widened = true;
  731. // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
  732. FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
  733. FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
  734. // After widening, rediscover all the loop components.
  735. return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
  736. }
  737. static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
  738. ScalarEvolution *SE, AssumptionCache *AC,
  739. const TargetTransformInfo *TTI, LPMUpdater *U,
  740. MemorySSAUpdater *MSSAU) {
  741. LLVM_DEBUG(
  742. dbgs() << "Loop flattening running on outer loop "
  743. << FI.OuterLoop->getHeader()->getName() << " and inner loop "
  744. << FI.InnerLoop->getHeader()->getName() << " in "
  745. << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
  746. if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
  747. return false;
  748. // Check if we can widen the induction variables to avoid overflow checks.
  749. bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
  750. // It can happen that after widening of the IV, flattening may not be
  751. // possible/happening, e.g. when it is deemed unprofitable. So bail here if
  752. // that is the case.
  753. // TODO: IV widening without performing the actual flattening transformation
  754. // is not ideal. While this codegen change should not matter much, it is an
  755. // unnecessary change which is better to avoid. It's unlikely this happens
  756. // often, because if it's unprofitibale after widening, it should be
  757. // unprofitabe before widening as checked in the first round of checks. But
  758. // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
  759. // relaxed. Because this is making a code change (the IV widening, but not
  760. // the flattening), we return true here.
  761. if (FI.Widened && !CanFlatten)
  762. return true;
  763. // If we have widened and can perform the transformation, do that here.
  764. if (CanFlatten)
  765. return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
  766. // Otherwise, if we haven't widened the IV, check if the new iteration
  767. // variable might overflow. In this case, we need to version the loop, and
  768. // select the original version at runtime if the iteration space is too
  769. // large.
  770. // TODO: We currently don't version the loop.
  771. OverflowResult OR = checkOverflow(FI, DT, AC);
  772. if (OR == OverflowResult::AlwaysOverflowsHigh ||
  773. OR == OverflowResult::AlwaysOverflowsLow) {
  774. LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
  775. return false;
  776. } else if (OR == OverflowResult::MayOverflow) {
  777. LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
  778. return false;
  779. }
  780. LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
  781. return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
  782. }
  783. bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
  784. AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U,
  785. MemorySSAUpdater *MSSAU) {
  786. bool Changed = false;
  787. for (Loop *InnerLoop : LN.getLoops()) {
  788. auto *OuterLoop = InnerLoop->getParentLoop();
  789. if (!OuterLoop)
  790. continue;
  791. FlattenInfo FI(OuterLoop, InnerLoop);
  792. Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
  793. }
  794. return Changed;
  795. }
  796. PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
  797. LoopStandardAnalysisResults &AR,
  798. LPMUpdater &U) {
  799. bool Changed = false;
  800. Optional<MemorySSAUpdater> MSSAU;
  801. if (AR.MSSA) {
  802. MSSAU = MemorySSAUpdater(AR.MSSA);
  803. if (VerifyMemorySSA)
  804. AR.MSSA->verifyMemorySSA();
  805. }
  806. // The loop flattening pass requires loops to be
  807. // in simplified form, and also needs LCSSA. Running
  808. // this pass will simplify all loops that contain inner loops,
  809. // regardless of whether anything ends up being flattened.
  810. Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
  811. MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
  812. if (!Changed)
  813. return PreservedAnalyses::all();
  814. if (AR.MSSA && VerifyMemorySSA)
  815. AR.MSSA->verifyMemorySSA();
  816. auto PA = getLoopPassPreservedAnalyses();
  817. if (AR.MSSA)
  818. PA.preserve<MemorySSAAnalysis>();
  819. return PA;
  820. }
  821. namespace {
  822. class LoopFlattenLegacyPass : public FunctionPass {
  823. public:
  824. static char ID; // Pass ID, replacement for typeid
  825. LoopFlattenLegacyPass() : FunctionPass(ID) {
  826. initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry());
  827. }
  828. // Possibly flatten loop L into its child.
  829. bool runOnFunction(Function &F) override;
  830. void getAnalysisUsage(AnalysisUsage &AU) const override {
  831. getLoopAnalysisUsage(AU);
  832. AU.addRequired<TargetTransformInfoWrapperPass>();
  833. AU.addPreserved<TargetTransformInfoWrapperPass>();
  834. AU.addRequired<AssumptionCacheTracker>();
  835. AU.addPreserved<AssumptionCacheTracker>();
  836. AU.addPreserved<MemorySSAWrapperPass>();
  837. }
  838. };
  839. } // namespace
  840. char LoopFlattenLegacyPass::ID = 0;
  841. INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
  842. false, false)
  843. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  844. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  845. INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
  846. false, false)
  847. FunctionPass *llvm::createLoopFlattenPass() {
  848. return new LoopFlattenLegacyPass();
  849. }
  850. bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
  851. ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
  852. LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  853. auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
  854. DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
  855. auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
  856. auto *TTI = &TTIP.getTTI(F);
  857. auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  858. auto *MSSA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
  859. Optional<MemorySSAUpdater> MSSAU;
  860. if (MSSA)
  861. MSSAU = MemorySSAUpdater(&MSSA->getMSSA());
  862. bool Changed = false;
  863. for (Loop *L : *LI) {
  864. auto LN = LoopNest::getLoopNest(*L, *SE);
  865. Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr,
  866. MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
  867. }
  868. return Changed;
  869. }