LoopUnrollAndJamPass.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass implements an unroll and jam pass. Most of the work is done by
  10. // Utils/UnrollLoopAndJam.cpp.
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
  13. #include "llvm/ADT/ArrayRef.h"
  14. #include "llvm/ADT/PriorityWorklist.h"
  15. #include "llvm/ADT/SmallPtrSet.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/Analysis/AssumptionCache.h"
  18. #include "llvm/Analysis/CodeMetrics.h"
  19. #include "llvm/Analysis/DependenceAnalysis.h"
  20. #include "llvm/Analysis/LoopAnalysisManager.h"
  21. #include "llvm/Analysis/LoopInfo.h"
  22. #include "llvm/Analysis/LoopNestAnalysis.h"
  23. #include "llvm/Analysis/LoopPass.h"
  24. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  25. #include "llvm/Analysis/ScalarEvolution.h"
  26. #include "llvm/Analysis/TargetTransformInfo.h"
  27. #include "llvm/IR/BasicBlock.h"
  28. #include "llvm/IR/Constants.h"
  29. #include "llvm/IR/Dominators.h"
  30. #include "llvm/IR/Function.h"
  31. #include "llvm/IR/Instructions.h"
  32. #include "llvm/IR/Metadata.h"
  33. #include "llvm/IR/PassManager.h"
  34. #include "llvm/InitializePasses.h"
  35. #include "llvm/Pass.h"
  36. #include "llvm/PassRegistry.h"
  37. #include "llvm/Support/Casting.h"
  38. #include "llvm/Support/CommandLine.h"
  39. #include "llvm/Support/Compiler.h"
  40. #include "llvm/Support/Debug.h"
  41. #include "llvm/Support/raw_ostream.h"
  42. #include "llvm/Transforms/Scalar.h"
  43. #include "llvm/Transforms/Scalar/LoopPassManager.h"
  44. #include "llvm/Transforms/Utils/LoopPeel.h"
  45. #include "llvm/Transforms/Utils/LoopUtils.h"
  46. #include "llvm/Transforms/Utils/UnrollLoop.h"
  47. #include <cassert>
  48. #include <cstdint>
  49. namespace llvm {
  50. class Instruction;
  51. class Value;
  52. } // namespace llvm
  53. using namespace llvm;
  54. #define DEBUG_TYPE "loop-unroll-and-jam"
  55. /// @{
  56. /// Metadata attribute names
  57. static const char *const LLVMLoopUnrollAndJamFollowupAll =
  58. "llvm.loop.unroll_and_jam.followup_all";
  59. static const char *const LLVMLoopUnrollAndJamFollowupInner =
  60. "llvm.loop.unroll_and_jam.followup_inner";
  61. static const char *const LLVMLoopUnrollAndJamFollowupOuter =
  62. "llvm.loop.unroll_and_jam.followup_outer";
  63. static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =
  64. "llvm.loop.unroll_and_jam.followup_remainder_inner";
  65. static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =
  66. "llvm.loop.unroll_and_jam.followup_remainder_outer";
  67. /// @}
  68. static cl::opt<bool>
  69. AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
  70. cl::desc("Allows loops to be unroll-and-jammed."));
  71. static cl::opt<unsigned> UnrollAndJamCount(
  72. "unroll-and-jam-count", cl::Hidden,
  73. cl::desc("Use this unroll count for all loops including those with "
  74. "unroll_and_jam_count pragma values, for testing purposes"));
  75. static cl::opt<unsigned> UnrollAndJamThreshold(
  76. "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
  77. cl::desc("Threshold to use for inner loop when doing unroll and jam."));
  78. static cl::opt<unsigned> PragmaUnrollAndJamThreshold(
  79. "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
  80. cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
  81. "unroll_count pragma."));
  82. // Returns the loop hint metadata node with the given name (for example,
  83. // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
  84. // returned.
  85. static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
  86. if (MDNode *LoopID = L->getLoopID())
  87. return GetUnrollMetadata(LoopID, Name);
  88. return nullptr;
  89. }
  90. // Returns true if the loop has any metadata starting with Prefix. For example a
  91. // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
  92. static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
  93. if (MDNode *LoopID = L->getLoopID()) {
  94. // First operand should refer to the loop id itself.
  95. assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
  96. assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
  97. for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
  98. MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
  99. if (!MD)
  100. continue;
  101. MDString *S = dyn_cast<MDString>(MD->getOperand(0));
  102. if (!S)
  103. continue;
  104. if (S->getString().startswith(Prefix))
  105. return true;
  106. }
  107. }
  108. return false;
  109. }
  110. // Returns true if the loop has an unroll_and_jam(enable) pragma.
  111. static bool hasUnrollAndJamEnablePragma(const Loop *L) {
  112. return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
  113. }
  114. // If loop has an unroll_and_jam_count pragma return the (necessarily
  115. // positive) value from the pragma. Otherwise return 0.
  116. static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
  117. MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
  118. if (MD) {
  119. assert(MD->getNumOperands() == 2 &&
  120. "Unroll count hint metadata should have two operands.");
  121. unsigned Count =
  122. mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
  123. assert(Count >= 1 && "Unroll count must be positive.");
  124. return Count;
  125. }
  126. return 0;
  127. }
  128. // Returns loop size estimation for unrolled loop.
  129. static uint64_t
  130. getUnrollAndJammedLoopSize(unsigned LoopSize,
  131. TargetTransformInfo::UnrollingPreferences &UP) {
  132. assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
  133. return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
  134. }
  135. // Calculates unroll and jam count and writes it to UP.Count. Returns true if
  136. // unroll count was set explicitly.
  137. static bool computeUnrollAndJamCount(
  138. Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
  139. LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE,
  140. const SmallPtrSetImpl<const Value *> &EphValues,
  141. OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
  142. unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
  143. unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
  144. TargetTransformInfo::PeelingPreferences &PP) {
  145. // First up use computeUnrollCount from the loop unroller to get a count
  146. // for unrolling the outer loop, plus any loops requiring explicit
  147. // unrolling we leave to the unroller. This uses UP.Threshold /
  148. // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
  149. // We have already checked that the loop has no unroll.* pragmas.
  150. unsigned MaxTripCount = 0;
  151. bool UseUpperBound = false;
  152. bool ExplicitUnroll = computeUnrollCount(
  153. L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
  154. /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
  155. UseUpperBound);
  156. if (ExplicitUnroll || UseUpperBound) {
  157. // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
  158. // for the unroller instead.
  159. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
  160. "computeUnrollCount\n");
  161. UP.Count = 0;
  162. return false;
  163. }
  164. // Override with any explicit Count from the "unroll-and-jam-count" option.
  165. bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
  166. if (UserUnrollCount) {
  167. UP.Count = UnrollAndJamCount;
  168. UP.Force = true;
  169. if (UP.AllowRemainder &&
  170. getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
  171. getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
  172. UP.UnrollAndJamInnerLoopThreshold)
  173. return true;
  174. }
  175. // Check for unroll_and_jam pragmas
  176. unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
  177. if (PragmaCount > 0) {
  178. UP.Count = PragmaCount;
  179. UP.Runtime = true;
  180. UP.Force = true;
  181. if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
  182. getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
  183. getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
  184. UP.UnrollAndJamInnerLoopThreshold)
  185. return true;
  186. }
  187. bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
  188. bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
  189. bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
  190. // If the loop has an unrolling pragma, we want to be more aggressive with
  191. // unrolling limits.
  192. if (ExplicitUnrollAndJam)
  193. UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;
  194. if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
  195. UP.UnrollAndJamInnerLoopThreshold) {
  196. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
  197. "inner loop too large\n");
  198. UP.Count = 0;
  199. return false;
  200. }
  201. // We have a sensible limit for the outer loop, now adjust it for the inner
  202. // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
  203. // explicitly, we want to stick to it.
  204. if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
  205. while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
  206. UP.UnrollAndJamInnerLoopThreshold)
  207. UP.Count--;
  208. }
  209. // If we are explicitly unroll and jamming, we are done. Otherwise there are a
  210. // number of extra performance heuristics to check.
  211. if (ExplicitUnrollAndJam)
  212. return true;
  213. // If the inner loop count is known and small, leave the entire loop nest to
  214. // be the unroller
  215. if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
  216. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
  217. "being left for the unroller\n");
  218. UP.Count = 0;
  219. return false;
  220. }
  221. // Check for situations where UnJ is likely to be unprofitable. Including
  222. // subloops with more than 1 block.
  223. if (SubLoop->getBlocks().size() != 1) {
  224. LLVM_DEBUG(
  225. dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
  226. UP.Count = 0;
  227. return false;
  228. }
  229. // Limit to loops where there is something to gain from unrolling and
  230. // jamming the loop. In this case, look for loads that are invariant in the
  231. // outer loop and can become shared.
  232. unsigned NumInvariant = 0;
  233. for (BasicBlock *BB : SubLoop->getBlocks()) {
  234. for (Instruction &I : *BB) {
  235. if (auto *Ld = dyn_cast<LoadInst>(&I)) {
  236. Value *V = Ld->getPointerOperand();
  237. const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
  238. if (SE.isLoopInvariant(LSCEV, L))
  239. NumInvariant++;
  240. }
  241. }
  242. }
  243. if (NumInvariant == 0) {
  244. LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
  245. UP.Count = 0;
  246. return false;
  247. }
  248. return false;
  249. }
  250. static LoopUnrollResult
  251. tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
  252. ScalarEvolution &SE, const TargetTransformInfo &TTI,
  253. AssumptionCache &AC, DependenceInfo &DI,
  254. OptimizationRemarkEmitter &ORE, int OptLevel) {
  255. TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
  256. L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt,
  257. std::nullopt, std::nullopt, std::nullopt, std::nullopt);
  258. TargetTransformInfo::PeelingPreferences PP =
  259. gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt);
  260. TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
  261. if (EnableMode & TM_Disable)
  262. return LoopUnrollResult::Unmodified;
  263. if (EnableMode & TM_ForcedByUser)
  264. UP.UnrollAndJam = true;
  265. if (AllowUnrollAndJam.getNumOccurrences() > 0)
  266. UP.UnrollAndJam = AllowUnrollAndJam;
  267. if (UnrollAndJamThreshold.getNumOccurrences() > 0)
  268. UP.UnrollAndJamInnerLoopThreshold = UnrollAndJamThreshold;
  269. // Exit early if unrolling is disabled.
  270. if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)
  271. return LoopUnrollResult::Unmodified;
  272. LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
  273. << L->getHeader()->getParent()->getName() << "] Loop %"
  274. << L->getHeader()->getName() << "\n");
  275. // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
  276. // the unroller, so long as it does not explicitly have unroll_and_jam
  277. // metadata. This means #pragma nounroll will disable unroll and jam as well
  278. // as unrolling
  279. if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
  280. !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
  281. LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
  282. return LoopUnrollResult::Unmodified;
  283. }
  284. if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
  285. LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
  286. return LoopUnrollResult::Unmodified;
  287. }
  288. // Approximate the loop size and collect useful info
  289. unsigned NumInlineCandidates;
  290. bool NotDuplicatable;
  291. bool Convergent;
  292. SmallPtrSet<const Value *, 32> EphValues;
  293. CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
  294. Loop *SubLoop = L->getSubLoops()[0];
  295. InstructionCost InnerLoopSizeIC =
  296. ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
  297. Convergent, TTI, EphValues, UP.BEInsns);
  298. InstructionCost OuterLoopSizeIC =
  299. ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
  300. TTI, EphValues, UP.BEInsns);
  301. LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n");
  302. LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n");
  303. if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
  304. LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
  305. << " with invalid cost.\n");
  306. return LoopUnrollResult::Unmodified;
  307. }
  308. unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
  309. unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
  310. if (NotDuplicatable) {
  311. LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
  312. "instructions.\n");
  313. return LoopUnrollResult::Unmodified;
  314. }
  315. if (NumInlineCandidates != 0) {
  316. LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
  317. return LoopUnrollResult::Unmodified;
  318. }
  319. if (Convergent) {
  320. LLVM_DEBUG(
  321. dbgs() << " Not unrolling loop with convergent instructions.\n");
  322. return LoopUnrollResult::Unmodified;
  323. }
  324. // Save original loop IDs for after the transformation.
  325. MDNode *OrigOuterLoopID = L->getLoopID();
  326. MDNode *OrigSubLoopID = SubLoop->getLoopID();
  327. // To assign the loop id of the epilogue, assign it before unrolling it so it
  328. // is applied to every inner loop of the epilogue. We later apply the loop ID
  329. // for the jammed inner loop.
  330. std::optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
  331. OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
  332. LLVMLoopUnrollAndJamFollowupRemainderInner});
  333. if (NewInnerEpilogueLoopID)
  334. SubLoop->setLoopID(*NewInnerEpilogueLoopID);
  335. // Find trip count and trip multiple
  336. BasicBlock *Latch = L->getLoopLatch();
  337. BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
  338. unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
  339. unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
  340. unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
  341. // Decide if, and by how much, to unroll
  342. bool IsCountSetExplicitly = computeUnrollAndJamCount(
  343. L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
  344. OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
  345. if (UP.Count <= 1)
  346. return LoopUnrollResult::Unmodified;
  347. // Unroll factor (Count) must be less or equal to TripCount.
  348. if (OuterTripCount && UP.Count > OuterTripCount)
  349. UP.Count = OuterTripCount;
  350. Loop *EpilogueOuterLoop = nullptr;
  351. LoopUnrollResult UnrollResult = UnrollAndJamLoop(
  352. L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
  353. &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
  354. // Assign new loop attributes.
  355. if (EpilogueOuterLoop) {
  356. std::optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
  357. OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
  358. LLVMLoopUnrollAndJamFollowupRemainderOuter});
  359. if (NewOuterEpilogueLoopID)
  360. EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID);
  361. }
  362. std::optional<MDNode *> NewInnerLoopID =
  363. makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
  364. LLVMLoopUnrollAndJamFollowupInner});
  365. if (NewInnerLoopID)
  366. SubLoop->setLoopID(*NewInnerLoopID);
  367. else
  368. SubLoop->setLoopID(OrigSubLoopID);
  369. if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
  370. std::optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
  371. OrigOuterLoopID,
  372. {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
  373. if (NewOuterLoopID) {
  374. L->setLoopID(*NewOuterLoopID);
  375. // Do not setLoopAlreadyUnrolled if a followup was given.
  376. return UnrollResult;
  377. }
  378. }
  379. // If loop has an unroll count pragma or unrolled by explicitly set count
  380. // mark loop as unrolled to prevent unrolling beyond that requested.
  381. if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
  382. L->setLoopAlreadyUnrolled();
  383. return UnrollResult;
  384. }
  385. static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,
  386. ScalarEvolution &SE,
  387. const TargetTransformInfo &TTI,
  388. AssumptionCache &AC, DependenceInfo &DI,
  389. OptimizationRemarkEmitter &ORE, int OptLevel,
  390. LPMUpdater &U) {
  391. bool DidSomething = false;
  392. ArrayRef<Loop *> Loops = LN.getLoops();
  393. Loop *OutmostLoop = &LN.getOutermostLoop();
  394. // Add the loop nests in the reverse order of LN. See method
  395. // declaration.
  396. SmallPriorityWorklist<Loop *, 4> Worklist;
  397. appendLoopsToWorklist(Loops, Worklist);
  398. while (!Worklist.empty()) {
  399. Loop *L = Worklist.pop_back_val();
  400. std::string LoopName = std::string(L->getName());
  401. LoopUnrollResult Result =
  402. tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
  403. if (Result != LoopUnrollResult::Unmodified)
  404. DidSomething = true;
  405. if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)
  406. U.markLoopAsDeleted(*L, LoopName);
  407. }
  408. return DidSomething;
  409. }
  410. namespace {
  411. class LoopUnrollAndJam : public LoopPass {
  412. public:
  413. static char ID; // Pass ID, replacement for typeid
  414. unsigned OptLevel;
  415. LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
  416. initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
  417. }
  418. bool runOnLoop(Loop *L, LPPassManager &LPM) override {
  419. if (skipLoop(L))
  420. return false;
  421. auto *F = L->getHeader()->getParent();
  422. auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
  423. auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  424. auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
  425. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  426. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
  427. auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  428. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
  429. LoopUnrollResult Result =
  430. tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
  431. if (Result == LoopUnrollResult::FullyUnrolled)
  432. LPM.markLoopAsDeleted(*L);
  433. return Result != LoopUnrollResult::Unmodified;
  434. }
  435. /// This transformation requires natural loop information & requires that
  436. /// loop preheaders be inserted into the CFG...
  437. void getAnalysisUsage(AnalysisUsage &AU) const override {
  438. AU.addRequired<DominatorTreeWrapperPass>();
  439. AU.addRequired<LoopInfoWrapperPass>();
  440. AU.addRequired<ScalarEvolutionWrapperPass>();
  441. AU.addRequired<TargetTransformInfoWrapperPass>();
  442. AU.addRequired<AssumptionCacheTracker>();
  443. AU.addRequired<DependenceAnalysisWrapperPass>();
  444. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  445. getLoopAnalysisUsage(AU);
  446. }
  447. };
  448. } // end anonymous namespace
  449. char LoopUnrollAndJam::ID = 0;
  450. INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
  451. "Unroll and Jam loops", false, false)
  452. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  453. INITIALIZE_PASS_DEPENDENCY(LoopPass)
  454. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  455. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  456. INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
  457. INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
  458. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  459. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  460. INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
  461. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  462. INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
  463. "Unroll and Jam loops", false, false)
  464. Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
  465. return new LoopUnrollAndJam(OptLevel);
  466. }
  467. PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,
  468. LoopAnalysisManager &AM,
  469. LoopStandardAnalysisResults &AR,
  470. LPMUpdater &U) {
  471. Function &F = *LN.getParent();
  472. DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
  473. OptimizationRemarkEmitter ORE(&F);
  474. if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
  475. OptLevel, U))
  476. return PreservedAnalyses::all();
  477. auto PA = getLoopPassPreservedAnalyses();
  478. PA.preserve<LoopNestAnalysis>();
  479. return PA;
  480. }