ManualOptimizer.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. //===------ ManualOptimizer.cpp -------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Handle pragma/metadata-directed transformations.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "polly/ManualOptimizer.h"
  13. #include "polly/DependenceInfo.h"
  14. #include "polly/Options.h"
  15. #include "polly/ScheduleTreeTransform.h"
  16. #include "polly/Support/ScopHelper.h"
  17. #include "llvm/ADT/Optional.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/Analysis/LoopInfo.h"
  20. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  21. #include "llvm/IR/Metadata.h"
  22. #include "llvm/Transforms/Utils/LoopUtils.h"
  23. #define DEBUG_TYPE "polly-opt-manual"
  24. using namespace polly;
  25. using namespace llvm;
  26. namespace {
  27. static cl::opt<bool> IgnoreDepcheck(
  28. "polly-pragma-ignore-depcheck",
  29. cl::desc("Skip the dependency check for pragma-based transformations"),
  30. cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
  31. /// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument
  32. /// instead of a Loop.
  33. static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
  34. if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.disable"))
  35. return TM_SuppressedByUser;
  36. Optional<int> Count =
  37. getOptionalIntLoopAttribute(LoopID, "llvm.loop.unroll.count");
  38. if (Count.hasValue())
  39. return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
  40. if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.enable"))
  41. return TM_ForcedByUser;
  42. if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.full"))
  43. return TM_ForcedByUser;
  44. if (hasDisableAllTransformsHint(LoopID))
  45. return TM_Disable;
  46. return TM_Unspecified;
  47. }
  48. // Return the first DebugLoc in the list.
  49. static DebugLoc findFirstDebugLoc(MDNode *MD) {
  50. if (MD) {
  51. for (const MDOperand &X : drop_begin(MD->operands(), 1)) {
  52. Metadata *A = X.get();
  53. if (!isa<DILocation>(A))
  54. continue;
  55. return cast<DILocation>(A);
  56. }
  57. }
  58. return {};
  59. }
  60. static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) {
  61. // First find dedicated transformation location
  62. // (such as the location of #pragma clang loop)
  63. MDNode *MD = findOptionMDForLoopID(LoopMD, Name);
  64. if (DebugLoc K = findFirstDebugLoc(MD))
  65. return K;
  66. // Otherwise, fall back to the location of the loop itself
  67. return findFirstDebugLoc(LoopMD);
  68. }
  69. /// Apply full or partial unrolling.
  70. static isl::schedule applyLoopUnroll(MDNode *LoopMD,
  71. isl::schedule_node BandToUnroll) {
  72. TransformationMode UnrollMode = ::hasUnrollTransformation(LoopMD);
  73. if (UnrollMode & TM_Disable)
  74. return {};
  75. assert(!BandToUnroll.is_null());
  76. // TODO: Isl's codegen also supports unrolling by isl_ast_build via
  77. // isl_schedule_node_band_set_ast_build_options({ unroll[x] }) which would be
  78. // more efficient because the content duplication is delayed. However, the
  79. // unrolled loop could be input of another loop transformation which expects
  80. // the explicit schedule nodes. That is, we would need this explicit expansion
  81. // anyway and using the ISL codegen option is a compile-time optimization.
  82. int64_t Factor = getOptionalIntLoopAttribute(LoopMD, "llvm.loop.unroll.count")
  83. .getValueOr(0);
  84. bool Full = getBooleanLoopAttribute(LoopMD, "llvm.loop.unroll.full");
  85. assert((!Full || !(Factor > 0)) &&
  86. "Cannot unroll fully and partially at the same time");
  87. if (Full)
  88. return applyFullUnroll(BandToUnroll);
  89. if (Factor > 0)
  90. return applyPartialUnroll(BandToUnroll, Factor);
  91. // For heuristic unrolling, fall back to LLVM's LoopUnroll pass.
  92. return {};
  93. }
  94. static isl::schedule applyLoopFission(MDNode *LoopMD,
  95. isl::schedule_node BandToFission) {
  96. // TODO: Make it possible to selectively fission substatements.
  97. // TODO: Apply followup loop properties.
  98. // TODO: Instead of fission every statement, find the maximum set that does
  99. // not cause a dependency violation.
  100. return applyMaxFission(BandToFission);
  101. }
  102. // Return the properties from a LoopID. Scalar properties are ignored.
  103. static auto getLoopMDProps(MDNode *LoopMD) {
  104. return map_range(
  105. make_filter_range(
  106. drop_begin(LoopMD->operands(), 1),
  107. [](const MDOperand &MDOp) { return isa<MDNode>(MDOp.get()); }),
  108. [](const MDOperand &MDOp) { return cast<MDNode>(MDOp.get()); });
  109. }
  110. /// Recursively visit all nodes in a schedule, loop for loop-transformations
  111. /// metadata and apply the first encountered.
  112. class SearchTransformVisitor
  113. : public RecursiveScheduleTreeVisitor<SearchTransformVisitor> {
  114. private:
  115. using BaseTy = RecursiveScheduleTreeVisitor<SearchTransformVisitor>;
  116. BaseTy &getBase() { return *this; }
  117. const BaseTy &getBase() const { return *this; }
  118. polly::Scop *S;
  119. const Dependences *D;
  120. OptimizationRemarkEmitter *ORE;
  121. // Set after a transformation is applied. Recursive search must be aborted
  122. // once this happens to ensure that any new followup transformation is
  123. // transformed in innermost-first order.
  124. isl::schedule Result;
  125. /// Check wether a schedule after a transformation is legal. Return the old
  126. /// schedule without the transformation.
  127. isl::schedule
  128. checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion,
  129. const isl::schedule_node &OrigBand,
  130. StringRef DebugLocAttr, StringRef TransPrefix,
  131. StringRef RemarkName, StringRef TransformationName) {
  132. if (D->isValidSchedule(*S, Result))
  133. return Result;
  134. LLVMContext &Ctx = LoopMD->getContext();
  135. LLVM_DEBUG(dbgs() << "Dependency violation detected\n");
  136. DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr);
  137. if (IgnoreDepcheck) {
  138. LLVM_DEBUG(dbgs() << "Still accepting transformation due to "
  139. "-polly-pragma-ignore-depcheck\n");
  140. if (ORE) {
  141. ORE->emit(
  142. OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion)
  143. << (Twine("Could not verify dependencies for ") +
  144. TransformationName +
  145. "; still applying because of -polly-pragma-ignore-depcheck")
  146. .str());
  147. }
  148. return Result;
  149. }
  150. LLVM_DEBUG(dbgs() << "Rolling back transformation\n");
  151. if (ORE) {
  152. ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName,
  153. TransformLoc, CodeRegion)
  154. << (Twine("not applying ") + TransformationName +
  155. ": cannot ensure semantic equivalence due to possible "
  156. "dependency violations")
  157. .str());
  158. }
  159. // If illegal, revert and remove the transformation to not risk re-trying
  160. // indefintely.
  161. MDNode *NewLoopMD =
  162. makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {});
  163. BandAttr *Attr = getBandAttr(OrigBand);
  164. Attr->Metadata = NewLoopMD;
  165. // Roll back old schedule.
  166. return OrigBand.get_schedule();
  167. }
  168. public:
  169. SearchTransformVisitor(polly::Scop *S, const Dependences *D,
  170. OptimizationRemarkEmitter *ORE)
  171. : S(S), D(D), ORE(ORE) {}
  172. static isl::schedule applyOneTransformation(polly::Scop *S,
  173. const Dependences *D,
  174. OptimizationRemarkEmitter *ORE,
  175. const isl::schedule &Sched) {
  176. SearchTransformVisitor Transformer(S, D, ORE);
  177. Transformer.visit(Sched);
  178. return Transformer.Result;
  179. }
  180. void visitBand(isl::schedule_node_band Band) {
  181. // Transform inner loops first (depth-first search).
  182. getBase().visitBand(Band);
  183. if (!Result.is_null())
  184. return;
  185. // Since it is (currently) not possible to have a BandAttr marker that is
  186. // specific to each loop in a band, we only support single-loop bands.
  187. if (isl_schedule_node_band_n_member(Band.get()) != 1)
  188. return;
  189. BandAttr *Attr = getBandAttr(Band);
  190. if (!Attr) {
  191. // Band has no attribute.
  192. return;
  193. }
  194. // CodeRegion used but ORE to determine code hotness.
  195. // TODO: Works only for original loop; for transformed loops, should track
  196. // where the loop's body code comes from.
  197. Loop *Loop = Attr->OriginalLoop;
  198. Value *CodeRegion = nullptr;
  199. if (Loop)
  200. CodeRegion = Loop->getHeader();
  201. MDNode *LoopMD = Attr->Metadata;
  202. if (!LoopMD)
  203. return;
  204. // Iterate over loop properties to find the first transformation.
  205. // FIXME: If there are more than one transformation in the LoopMD (making
  206. // the order of transformations ambiguous), all others are silently ignored.
  207. for (MDNode *MD : getLoopMDProps(LoopMD)) {
  208. auto *NameMD = dyn_cast<MDString>(MD->getOperand(0).get());
  209. if (!NameMD)
  210. continue;
  211. StringRef AttrName = NameMD->getString();
  212. // Honor transformation order; transform the first transformation in the
  213. // list first.
  214. if (AttrName == "llvm.loop.unroll.enable" ||
  215. AttrName == "llvm.loop.unroll.count" ||
  216. AttrName == "llvm.loop.unroll.full") {
  217. Result = applyLoopUnroll(LoopMD, Band);
  218. if (!Result.is_null())
  219. return;
  220. } else if (AttrName == "llvm.loop.distribute.enable") {
  221. Result = applyLoopFission(LoopMD, Band);
  222. if (!Result.is_null())
  223. Result = checkDependencyViolation(
  224. LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc",
  225. "llvm.loop.distribute.", "FailedRequestedFission",
  226. "loop fission/distribution");
  227. if (!Result.is_null())
  228. return;
  229. }
  230. // not a loop transformation; look for next property
  231. }
  232. }
  233. void visitNode(isl::schedule_node Other) {
  234. if (!Result.is_null())
  235. return;
  236. getBase().visitNode(Other);
  237. }
  238. };
  239. } // namespace
  240. isl::schedule
  241. polly::applyManualTransformations(Scop *S, isl::schedule Sched,
  242. const Dependences &D,
  243. OptimizationRemarkEmitter *ORE) {
  244. // Search the loop nest for transformations until fixpoint.
  245. while (true) {
  246. isl::schedule Result =
  247. SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched);
  248. if (Result.is_null()) {
  249. // No (more) transformation has been found.
  250. break;
  251. }
  252. // Use transformed schedule and look for more transformations.
  253. Sched = Result;
  254. }
  255. return Sched;
  256. }