LoopGenerators.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains functions to create scalar loops and orchestrate the
  10. // creation of parallel loops as LLVM-IR.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "polly/CodeGen/LoopGenerators.h"
  14. #include "polly/Options.h"
  15. #include "polly/ScopDetection.h"
  16. #include "llvm/Analysis/LoopInfo.h"
  17. #include "llvm/IR/DataLayout.h"
  18. #include "llvm/IR/DebugInfoMetadata.h"
  19. #include "llvm/IR/Dominators.h"
  20. #include "llvm/IR/Module.h"
  21. #include "llvm/Support/CommandLine.h"
  22. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  23. using namespace llvm;
  24. using namespace polly;
  25. int polly::PollyNumThreads;
  26. OMPGeneralSchedulingType polly::PollyScheduling;
  27. int polly::PollyChunkSize;
  28. static cl::opt<int, true>
  29. XPollyNumThreads("polly-num-threads",
  30. cl::desc("Number of threads to use (0 = auto)"),
  31. cl::Hidden, cl::location(polly::PollyNumThreads),
  32. cl::init(0), cl::cat(PollyCategory));
  33. static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
  34. "polly-scheduling",
  35. cl::desc("Scheduling type of parallel OpenMP for loops"),
  36. cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
  37. "Static scheduling"),
  38. clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
  39. "Dynamic scheduling"),
  40. clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
  41. "Guided scheduling"),
  42. clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
  43. "Runtime determined (OMP_SCHEDULE)")),
  44. cl::Hidden, cl::location(polly::PollyScheduling),
  45. cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
  46. cl::cat(PollyCategory));
  47. static cl::opt<int, true>
  48. XPollyChunkSize("polly-scheduling-chunksize",
  49. cl::desc("Chunksize to use by the OpenMP runtime calls"),
  50. cl::Hidden, cl::location(polly::PollyChunkSize),
  51. cl::init(0), cl::Optional, cl::cat(PollyCategory));
  52. // We generate a loop of either of the following structures:
  53. //
  54. // BeforeBB BeforeBB
  55. // | |
  56. // v v
  57. // GuardBB PreHeaderBB
  58. // / | | _____
  59. // __ PreHeaderBB | v \/ |
  60. // / \ / | HeaderBB latch
  61. // latch HeaderBB | |\ |
  62. // \ / \ / | \------/
  63. // < \ / |
  64. // \ / v
  65. // ExitBB ExitBB
  66. //
  67. // depending on whether or not we know that it is executed at least once. If
  68. // not, GuardBB checks if the loop is executed at least once. If this is the
  69. // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
  70. // contains the loop iv 'polly.indvar', the incremented loop iv
  71. // 'polly.indvar_next' as well as the condition to check if we execute another
  72. // iteration of the loop. After the loop has finished, we branch to ExitBB.
  73. // We expect the type of UB, LB, UB+Stride to be large enough for values that
  74. // UB may take throughout the execution of the loop, including the computation
  75. // of indvar + Stride before the final abort.
  76. Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
  77. PollyIRBuilder &Builder, LoopInfo &LI,
  78. DominatorTree &DT, BasicBlock *&ExitBB,
  79. ICmpInst::Predicate Predicate,
  80. ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
  81. bool LoopVectDisabled) {
  82. Function *F = Builder.GetInsertBlock()->getParent();
  83. LLVMContext &Context = F->getContext();
  84. assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
  85. IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
  86. assert(LoopIVType && "UB is not integer?");
  87. BasicBlock *BeforeBB = Builder.GetInsertBlock();
  88. BasicBlock *GuardBB =
  89. UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
  90. BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
  91. BasicBlock *PreHeaderBB =
  92. BasicBlock::Create(Context, "polly.loop_preheader", F);
  93. // Update LoopInfo
  94. Loop *OuterLoop = LI.getLoopFor(BeforeBB);
  95. Loop *NewLoop = LI.AllocateLoop();
  96. if (OuterLoop)
  97. OuterLoop->addChildLoop(NewLoop);
  98. else
  99. LI.addTopLevelLoop(NewLoop);
  100. if (OuterLoop) {
  101. if (GuardBB)
  102. OuterLoop->addBasicBlockToLoop(GuardBB, LI);
  103. OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
  104. }
  105. NewLoop->addBasicBlockToLoop(HeaderBB, LI);
  106. // Notify the annotator (if present) that we have a new loop, but only
  107. // after the header block is set.
  108. if (Annotator)
  109. Annotator->pushLoop(NewLoop, Parallel);
  110. // ExitBB
  111. ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
  112. ExitBB->setName("polly.loop_exit");
  113. // BeforeBB
  114. if (GuardBB) {
  115. BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
  116. DT.addNewBlock(GuardBB, BeforeBB);
  117. // GuardBB
  118. Builder.SetInsertPoint(GuardBB);
  119. Value *LoopGuard;
  120. LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
  121. LoopGuard->setName("polly.loop_guard");
  122. Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
  123. DT.addNewBlock(PreHeaderBB, GuardBB);
  124. } else {
  125. BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
  126. DT.addNewBlock(PreHeaderBB, BeforeBB);
  127. }
  128. // PreHeaderBB
  129. Builder.SetInsertPoint(PreHeaderBB);
  130. Builder.CreateBr(HeaderBB);
  131. // HeaderBB
  132. DT.addNewBlock(HeaderBB, PreHeaderBB);
  133. Builder.SetInsertPoint(HeaderBB);
  134. PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
  135. IV->addIncoming(LB, PreHeaderBB);
  136. Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
  137. Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
  138. Value *LoopCondition =
  139. Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
  140. // Create the loop latch and annotate it as such.
  141. BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
  142. if (Annotator)
  143. Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
  144. IV->addIncoming(IncrementedIV, HeaderBB);
  145. if (GuardBB)
  146. DT.changeImmediateDominator(ExitBB, GuardBB);
  147. else
  148. DT.changeImmediateDominator(ExitBB, HeaderBB);
  149. // The loop body should be added here.
  150. Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
  151. return IV;
  152. }
  153. Value *ParallelLoopGenerator::createParallelLoop(
  154. Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
  155. ValueMapT &Map, BasicBlock::iterator *LoopBody) {
  156. AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
  157. BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
  158. Value *IV;
  159. Function *SubFn;
  160. std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
  161. *LoopBody = Builder.GetInsertPoint();
  162. Builder.SetInsertPoint(&*BeforeLoop);
  163. Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
  164. "polly.par.userContext");
  165. // Add one as the upper bound provided by OpenMP is a < comparison
  166. // whereas the codegenForSequential function creates a <= comparison.
  167. UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
  168. // Execute the prepared subfunction in parallel.
  169. deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride);
  170. return IV;
  171. }
  172. Function *ParallelLoopGenerator::createSubFnDefinition() {
  173. Function *F = Builder.GetInsertBlock()->getParent();
  174. Function *SubFn = prepareSubFnDefinition(F);
  175. // Certain backends (e.g., NVPTX) do not support '.'s in function names.
  176. // Hence, we ensure that all '.'s are replaced by '_'s.
  177. std::string FunctionName = SubFn->getName().str();
  178. std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
  179. SubFn->setName(FunctionName);
  180. // Do not run any polly pass on the new function.
  181. SubFn->addFnAttr(PollySkipFnAttr);
  182. return SubFn;
  183. }
  184. AllocaInst *
  185. ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
  186. SmallVector<Type *, 8> Members;
  187. for (Value *V : Values)
  188. Members.push_back(V->getType());
  189. const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
  190. // We do not want to allocate the alloca inside any loop, thus we allocate it
  191. // in the entry block of the function and use annotations to denote the actual
  192. // live span (similar to clang).
  193. BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
  194. Instruction *IP = &*EntryBB.getFirstInsertionPt();
  195. StructType *Ty = StructType::get(Builder.getContext(), Members);
  196. AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
  197. "polly.par.userContext", IP);
  198. for (unsigned i = 0; i < Values.size(); i++) {
  199. Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
  200. Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
  201. Builder.CreateStore(Values[i], Address);
  202. }
  203. return Struct;
  204. }
  205. void ParallelLoopGenerator::extractValuesFromStruct(
  206. SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
  207. for (unsigned i = 0; i < OldValues.size(); i++) {
  208. Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
  209. Type *ElemTy = cast<GetElementPtrInst>(Address)->getResultElementType();
  210. Value *NewValue = Builder.CreateLoad(ElemTy, Address);
  211. NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
  212. Map[OldValues[i]] = NewValue;
  213. }
  214. }
  215. DebugLoc polly::createDebugLocForGeneratedCode(Function *F) {
  216. if (!F)
  217. return DebugLoc();
  218. LLVMContext &Ctx = F->getContext();
  219. DISubprogram *DILScope =
  220. dyn_cast_or_null<DISubprogram>(F->getMetadata(LLVMContext::MD_dbg));
  221. if (!DILScope)
  222. return DebugLoc();
  223. return DILocation::get(Ctx, 0, 0, DILScope);
  224. }