LoopUnrollAndJamPass.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass implements an unroll and jam pass. Most of the work is done by
  10. // Utils/UnrollLoopAndJam.cpp.
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
  13. #include "llvm/ADT/ArrayRef.h"
  14. #include "llvm/ADT/None.h"
  15. #include "llvm/ADT/Optional.h"
  16. #include "llvm/ADT/PriorityWorklist.h"
  17. #include "llvm/ADT/SmallPtrSet.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/Analysis/AssumptionCache.h"
  20. #include "llvm/Analysis/CodeMetrics.h"
  21. #include "llvm/Analysis/DependenceAnalysis.h"
  22. #include "llvm/Analysis/LoopAnalysisManager.h"
  23. #include "llvm/Analysis/LoopInfo.h"
  24. #include "llvm/Analysis/LoopPass.h"
  25. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  26. #include "llvm/Analysis/ScalarEvolution.h"
  27. #include "llvm/Analysis/TargetTransformInfo.h"
  28. #include "llvm/IR/BasicBlock.h"
  29. #include "llvm/IR/Constants.h"
  30. #include "llvm/IR/Dominators.h"
  31. #include "llvm/IR/Function.h"
  32. #include "llvm/IR/Instructions.h"
  33. #include "llvm/IR/Metadata.h"
  34. #include "llvm/IR/PassManager.h"
  35. #include "llvm/InitializePasses.h"
  36. #include "llvm/Pass.h"
  37. #include "llvm/PassRegistry.h"
  38. #include "llvm/Support/Casting.h"
  39. #include "llvm/Support/CommandLine.h"
  40. #include "llvm/Support/Compiler.h"
  41. #include "llvm/Support/Debug.h"
  42. #include "llvm/Support/raw_ostream.h"
  43. #include "llvm/Transforms/Scalar.h"
  44. #include "llvm/Transforms/Utils.h"
  45. #include "llvm/Transforms/Utils/LCSSA.h"
  46. #include "llvm/Transforms/Utils/LoopPeel.h"
  47. #include "llvm/Transforms/Utils/LoopSimplify.h"
  48. #include "llvm/Transforms/Utils/LoopUtils.h"
  49. #include "llvm/Transforms/Utils/UnrollLoop.h"
  50. #include <cassert>
  51. #include <cstdint>
  52. namespace llvm {
  53. class Instruction;
  54. class Value;
  55. } // namespace llvm
  56. using namespace llvm;
  57. #define DEBUG_TYPE "loop-unroll-and-jam"
  58. /// @{
  59. /// Metadata attribute names
  60. static const char *const LLVMLoopUnrollAndJamFollowupAll =
  61. "llvm.loop.unroll_and_jam.followup_all";
  62. static const char *const LLVMLoopUnrollAndJamFollowupInner =
  63. "llvm.loop.unroll_and_jam.followup_inner";
  64. static const char *const LLVMLoopUnrollAndJamFollowupOuter =
  65. "llvm.loop.unroll_and_jam.followup_outer";
  66. static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =
  67. "llvm.loop.unroll_and_jam.followup_remainder_inner";
  68. static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =
  69. "llvm.loop.unroll_and_jam.followup_remainder_outer";
  70. /// @}
  71. static cl::opt<bool>
  72. AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
  73. cl::desc("Allows loops to be unroll-and-jammed."));
  74. static cl::opt<unsigned> UnrollAndJamCount(
  75. "unroll-and-jam-count", cl::Hidden,
  76. cl::desc("Use this unroll count for all loops including those with "
  77. "unroll_and_jam_count pragma values, for testing purposes"));
  78. static cl::opt<unsigned> UnrollAndJamThreshold(
  79. "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
  80. cl::desc("Threshold to use for inner loop when doing unroll and jam."));
  81. static cl::opt<unsigned> PragmaUnrollAndJamThreshold(
  82. "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
  83. cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
  84. "unroll_count pragma."));
  85. // Returns the loop hint metadata node with the given name (for example,
  86. // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
  87. // returned.
  88. static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
  89. if (MDNode *LoopID = L->getLoopID())
  90. return GetUnrollMetadata(LoopID, Name);
  91. return nullptr;
  92. }
  93. // Returns true if the loop has any metadata starting with Prefix. For example a
  94. // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
  95. static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
  96. if (MDNode *LoopID = L->getLoopID()) {
  97. // First operand should refer to the loop id itself.
  98. assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
  99. assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
  100. for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
  101. MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
  102. if (!MD)
  103. continue;
  104. MDString *S = dyn_cast<MDString>(MD->getOperand(0));
  105. if (!S)
  106. continue;
  107. if (S->getString().startswith(Prefix))
  108. return true;
  109. }
  110. }
  111. return false;
  112. }
  113. // Returns true if the loop has an unroll_and_jam(enable) pragma.
  114. static bool hasUnrollAndJamEnablePragma(const Loop *L) {
  115. return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
  116. }
  117. // If loop has an unroll_and_jam_count pragma return the (necessarily
  118. // positive) value from the pragma. Otherwise return 0.
  119. static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
  120. MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
  121. if (MD) {
  122. assert(MD->getNumOperands() == 2 &&
  123. "Unroll count hint metadata should have two operands.");
  124. unsigned Count =
  125. mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
  126. assert(Count >= 1 && "Unroll count must be positive.");
  127. return Count;
  128. }
  129. return 0;
  130. }
  131. // Returns loop size estimation for unrolled loop.
  132. static uint64_t
  133. getUnrollAndJammedLoopSize(unsigned LoopSize,
  134. TargetTransformInfo::UnrollingPreferences &UP) {
  135. assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
  136. return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
  137. }
  138. // Calculates unroll and jam count and writes it to UP.Count. Returns true if
  139. // unroll count was set explicitly.
  140. static bool computeUnrollAndJamCount(
  141. Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
  142. LoopInfo *LI, ScalarEvolution &SE,
  143. const SmallPtrSetImpl<const Value *> &EphValues,
  144. OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
  145. unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
  146. unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
  147. TargetTransformInfo::PeelingPreferences &PP) {
  148. // First up use computeUnrollCount from the loop unroller to get a count
  149. // for unrolling the outer loop, plus any loops requiring explicit
  150. // unrolling we leave to the unroller. This uses UP.Threshold /
  151. // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
  152. // We have already checked that the loop has no unroll.* pragmas.
  153. unsigned MaxTripCount = 0;
  154. bool UseUpperBound = false;
  155. bool ExplicitUnroll = computeUnrollCount(
  156. L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
  157. /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
  158. UseUpperBound);
  159. if (ExplicitUnroll || UseUpperBound) {
  160. // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
  161. // for the unroller instead.
  162. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
  163. "computeUnrollCount\n");
  164. UP.Count = 0;
  165. return false;
  166. }
  167. // Override with any explicit Count from the "unroll-and-jam-count" option.
  168. bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
  169. if (UserUnrollCount) {
  170. UP.Count = UnrollAndJamCount;
  171. UP.Force = true;
  172. if (UP.AllowRemainder &&
  173. getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
  174. getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
  175. UP.UnrollAndJamInnerLoopThreshold)
  176. return true;
  177. }
  178. // Check for unroll_and_jam pragmas
  179. unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
  180. if (PragmaCount > 0) {
  181. UP.Count = PragmaCount;
  182. UP.Runtime = true;
  183. UP.Force = true;
  184. if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
  185. getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
  186. getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
  187. UP.UnrollAndJamInnerLoopThreshold)
  188. return true;
  189. }
  190. bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
  191. bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
  192. bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
  193. // If the loop has an unrolling pragma, we want to be more aggressive with
  194. // unrolling limits.
  195. if (ExplicitUnrollAndJam)
  196. UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;
  197. if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
  198. UP.UnrollAndJamInnerLoopThreshold) {
  199. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
  200. "inner loop too large\n");
  201. UP.Count = 0;
  202. return false;
  203. }
  204. // We have a sensible limit for the outer loop, now adjust it for the inner
  205. // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
  206. // explicitly, we want to stick to it.
  207. if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
  208. while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
  209. UP.UnrollAndJamInnerLoopThreshold)
  210. UP.Count--;
  211. }
  212. // If we are explicitly unroll and jamming, we are done. Otherwise there are a
  213. // number of extra performance heuristics to check.
  214. if (ExplicitUnrollAndJam)
  215. return true;
  216. // If the inner loop count is known and small, leave the entire loop nest to
  217. // be the unroller
  218. if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
  219. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
  220. "being left for the unroller\n");
  221. UP.Count = 0;
  222. return false;
  223. }
  224. // Check for situations where UnJ is likely to be unprofitable. Including
  225. // subloops with more than 1 block.
  226. if (SubLoop->getBlocks().size() != 1) {
  227. LLVM_DEBUG(
  228. dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
  229. UP.Count = 0;
  230. return false;
  231. }
  232. // Limit to loops where there is something to gain from unrolling and
  233. // jamming the loop. In this case, look for loads that are invariant in the
  234. // outer loop and can become shared.
  235. unsigned NumInvariant = 0;
  236. for (BasicBlock *BB : SubLoop->getBlocks()) {
  237. for (Instruction &I : *BB) {
  238. if (auto *Ld = dyn_cast<LoadInst>(&I)) {
  239. Value *V = Ld->getPointerOperand();
  240. const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
  241. if (SE.isLoopInvariant(LSCEV, L))
  242. NumInvariant++;
  243. }
  244. }
  245. }
  246. if (NumInvariant == 0) {
  247. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
  248. UP.Count = 0;
  249. return false;
  250. }
  251. return false;
  252. }
  253. static LoopUnrollResult
  254. tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
  255. ScalarEvolution &SE, const TargetTransformInfo &TTI,
  256. AssumptionCache &AC, DependenceInfo &DI,
  257. OptimizationRemarkEmitter &ORE, int OptLevel) {
  258. TargetTransformInfo::UnrollingPreferences UP =
  259. gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
  260. None, None, None, None, None, None);
  261. TargetTransformInfo::PeelingPreferences PP =
  262. gatherPeelingPreferences(L, SE, TTI, None, None);
  263. TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
  264. if (EnableMode & TM_Disable)
  265. return LoopUnrollResult::Unmodified;
  266. if (EnableMode & TM_ForcedByUser)
  267. UP.UnrollAndJam = true;
  268. if (AllowUnrollAndJam.getNumOccurrences() > 0)
  269. UP.UnrollAndJam = AllowUnrollAndJam;
  270. if (UnrollAndJamThreshold.getNumOccurrences() > 0)
  271. UP.UnrollAndJamInnerLoopThreshold = UnrollAndJamThreshold;
  272. // Exit early if unrolling is disabled.
  273. if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)
  274. return LoopUnrollResult::Unmodified;
  275. LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
  276. << L->getHeader()->getParent()->getName() << "] Loop %"
  277. << L->getHeader()->getName() << "\n");
  278. // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
  279. // the unroller, so long as it does not explicitly have unroll_and_jam
  280. // metadata. This means #pragma nounroll will disable unroll and jam as well
  281. // as unrolling
  282. if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
  283. !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
  284. LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
  285. return LoopUnrollResult::Unmodified;
  286. }
  287. if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
  288. LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
  289. return LoopUnrollResult::Unmodified;
  290. }
  291. // Approximate the loop size and collect useful info
  292. unsigned NumInlineCandidates;
  293. bool NotDuplicatable;
  294. bool Convergent;
  295. SmallPtrSet<const Value *, 32> EphValues;
  296. CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
  297. Loop *SubLoop = L->getSubLoops()[0];
  298. unsigned InnerLoopSize =
  299. ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
  300. Convergent, TTI, EphValues, UP.BEInsns);
  301. unsigned OuterLoopSize =
  302. ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
  303. TTI, EphValues, UP.BEInsns);
  304. LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
  305. LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
  306. if (NotDuplicatable) {
  307. LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
  308. "instructions.\n");
  309. return LoopUnrollResult::Unmodified;
  310. }
  311. if (NumInlineCandidates != 0) {
  312. LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
  313. return LoopUnrollResult::Unmodified;
  314. }
  315. if (Convergent) {
  316. LLVM_DEBUG(
  317. dbgs() << " Not unrolling loop with convergent instructions.\n");
  318. return LoopUnrollResult::Unmodified;
  319. }
  320. // Save original loop IDs for after the transformation.
  321. MDNode *OrigOuterLoopID = L->getLoopID();
  322. MDNode *OrigSubLoopID = SubLoop->getLoopID();
  323. // To assign the loop id of the epilogue, assign it before unrolling it so it
  324. // is applied to every inner loop of the epilogue. We later apply the loop ID
  325. // for the jammed inner loop.
  326. Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
  327. OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
  328. LLVMLoopUnrollAndJamFollowupRemainderInner});
  329. if (NewInnerEpilogueLoopID.hasValue())
  330. SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
  331. // Find trip count and trip multiple
  332. BasicBlock *Latch = L->getLoopLatch();
  333. BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
  334. unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
  335. unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
  336. unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
  337. // Decide if, and by how much, to unroll
  338. bool IsCountSetExplicitly = computeUnrollAndJamCount(
  339. L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
  340. OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
  341. if (UP.Count <= 1)
  342. return LoopUnrollResult::Unmodified;
  343. // Unroll factor (Count) must be less or equal to TripCount.
  344. if (OuterTripCount && UP.Count > OuterTripCount)
  345. UP.Count = OuterTripCount;
  346. Loop *EpilogueOuterLoop = nullptr;
  347. LoopUnrollResult UnrollResult = UnrollAndJamLoop(
  348. L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
  349. &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
  350. // Assign new loop attributes.
  351. if (EpilogueOuterLoop) {
  352. Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
  353. OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
  354. LLVMLoopUnrollAndJamFollowupRemainderOuter});
  355. if (NewOuterEpilogueLoopID.hasValue())
  356. EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
  357. }
  358. Optional<MDNode *> NewInnerLoopID =
  359. makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
  360. LLVMLoopUnrollAndJamFollowupInner});
  361. if (NewInnerLoopID.hasValue())
  362. SubLoop->setLoopID(NewInnerLoopID.getValue());
  363. else
  364. SubLoop->setLoopID(OrigSubLoopID);
  365. if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
  366. Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
  367. OrigOuterLoopID,
  368. {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
  369. if (NewOuterLoopID.hasValue()) {
  370. L->setLoopID(NewOuterLoopID.getValue());
  371. // Do not setLoopAlreadyUnrolled if a followup was given.
  372. return UnrollResult;
  373. }
  374. }
  375. // If loop has an unroll count pragma or unrolled by explicitly set count
  376. // mark loop as unrolled to prevent unrolling beyond that requested.
  377. if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
  378. L->setLoopAlreadyUnrolled();
  379. return UnrollResult;
  380. }
  381. static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,
  382. ScalarEvolution &SE,
  383. const TargetTransformInfo &TTI,
  384. AssumptionCache &AC, DependenceInfo &DI,
  385. OptimizationRemarkEmitter &ORE, int OptLevel,
  386. LPMUpdater &U) {
  387. bool DidSomething = false;
  388. ArrayRef<Loop *> Loops = LN.getLoops();
  389. Loop *OutmostLoop = &LN.getOutermostLoop();
  390. // Add the loop nests in the reverse order of LN. See method
  391. // declaration.
  392. SmallPriorityWorklist<Loop *, 4> Worklist;
  393. appendLoopsToWorklist(Loops, Worklist);
  394. while (!Worklist.empty()) {
  395. Loop *L = Worklist.pop_back_val();
  396. std::string LoopName = std::string(L->getName());
  397. LoopUnrollResult Result =
  398. tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
  399. if (Result != LoopUnrollResult::Unmodified)
  400. DidSomething = true;
  401. if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)
  402. U.markLoopAsDeleted(*L, LoopName);
  403. }
  404. return DidSomething;
  405. }
  406. namespace {
  407. class LoopUnrollAndJam : public LoopPass {
  408. public:
  409. static char ID; // Pass ID, replacement for typeid
  410. unsigned OptLevel;
  411. LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
  412. initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
  413. }
  414. bool runOnLoop(Loop *L, LPPassManager &LPM) override {
  415. if (skipLoop(L))
  416. return false;
  417. auto *F = L->getHeader()->getParent();
  418. auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
  419. auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  420. auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
  421. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  422. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
  423. auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  424. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
  425. LoopUnrollResult Result =
  426. tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
  427. if (Result == LoopUnrollResult::FullyUnrolled)
  428. LPM.markLoopAsDeleted(*L);
  429. return Result != LoopUnrollResult::Unmodified;
  430. }
  431. /// This transformation requires natural loop information & requires that
  432. /// loop preheaders be inserted into the CFG...
  433. void getAnalysisUsage(AnalysisUsage &AU) const override {
  434. AU.addRequired<DominatorTreeWrapperPass>();
  435. AU.addRequired<LoopInfoWrapperPass>();
  436. AU.addRequired<ScalarEvolutionWrapperPass>();
  437. AU.addRequired<TargetTransformInfoWrapperPass>();
  438. AU.addRequired<AssumptionCacheTracker>();
  439. AU.addRequired<DependenceAnalysisWrapperPass>();
  440. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  441. getLoopAnalysisUsage(AU);
  442. }
  443. };
  444. } // end anonymous namespace
  445. char LoopUnrollAndJam::ID = 0;
  446. INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
  447. "Unroll and Jam loops", false, false)
  448. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  449. INITIALIZE_PASS_DEPENDENCY(LoopPass)
  450. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  451. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  452. INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
  453. INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
  454. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  455. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  456. INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
  457. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  458. INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
  459. "Unroll and Jam loops", false, false)
  460. Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
  461. return new LoopUnrollAndJam(OptLevel);
  462. }
  463. PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,
  464. LoopAnalysisManager &AM,
  465. LoopStandardAnalysisResults &AR,
  466. LPMUpdater &U) {
  467. Function &F = *LN.getParent();
  468. DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
  469. OptimizationRemarkEmitter ORE(&F);
  470. if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
  471. OptLevel, U))
  472. return PreservedAnalyses::all();
  473. auto PA = getLoopPassPreservedAnalyses();
  474. PA.preserve<LoopNestAnalysis>();
  475. return PA;
  476. }