LoopGenerators.cpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains functions to create scalar loops and orchestrate the
  10. // creation of parallel loops as LLVM-IR.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "polly/CodeGen/LoopGenerators.h"
  14. #include "polly/Options.h"
  15. #include "polly/ScopDetection.h"
  16. #include "llvm/Analysis/LoopInfo.h"
  17. #include "llvm/IR/DataLayout.h"
  18. #include "llvm/IR/Dominators.h"
  19. #include "llvm/IR/Module.h"
  20. #include "llvm/Support/CommandLine.h"
  21. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  22. using namespace llvm;
  23. using namespace polly;
  24. int polly::PollyNumThreads;
  25. OMPGeneralSchedulingType polly::PollyScheduling;
  26. int polly::PollyChunkSize;
  27. static cl::opt<int, true>
  28. XPollyNumThreads("polly-num-threads",
  29. cl::desc("Number of threads to use (0 = auto)"),
  30. cl::Hidden, cl::location(polly::PollyNumThreads),
  31. cl::init(0), cl::cat(PollyCategory));
  32. static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
  33. "polly-scheduling",
  34. cl::desc("Scheduling type of parallel OpenMP for loops"),
  35. cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
  36. "Static scheduling"),
  37. clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
  38. "Dynamic scheduling"),
  39. clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
  40. "Guided scheduling"),
  41. clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
  42. "Runtime determined (OMP_SCHEDULE)")),
  43. cl::Hidden, cl::location(polly::PollyScheduling),
  44. cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
  45. cl::cat(PollyCategory));
  46. static cl::opt<int, true>
  47. XPollyChunkSize("polly-scheduling-chunksize",
  48. cl::desc("Chunksize to use by the OpenMP runtime calls"),
  49. cl::Hidden, cl::location(polly::PollyChunkSize),
  50. cl::init(0), cl::Optional, cl::cat(PollyCategory));
  51. // We generate a loop of either of the following structures:
  52. //
  53. // BeforeBB BeforeBB
  54. // | |
  55. // v v
  56. // GuardBB PreHeaderBB
  57. // / | | _____
  58. // __ PreHeaderBB | v \/ |
  59. // / \ / | HeaderBB latch
  60. // latch HeaderBB | |\ |
  61. // \ / \ / | \------/
  62. // < \ / |
  63. // \ / v
  64. // ExitBB ExitBB
  65. //
  66. // depending on whether or not we know that it is executed at least once. If
  67. // not, GuardBB checks if the loop is executed at least once. If this is the
  68. // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
  69. // contains the loop iv 'polly.indvar', the incremented loop iv
  70. // 'polly.indvar_next' as well as the condition to check if we execute another
  71. // iteration of the loop. After the loop has finished, we branch to ExitBB.
  72. // We expect the type of UB, LB, UB+Stride to be large enough for values that
  73. // UB may take throughout the execution of the loop, including the computation
  74. // of indvar + Stride before the final abort.
  75. Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
  76. PollyIRBuilder &Builder, LoopInfo &LI,
  77. DominatorTree &DT, BasicBlock *&ExitBB,
  78. ICmpInst::Predicate Predicate,
  79. ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
  80. bool LoopVectDisabled) {
  81. Function *F = Builder.GetInsertBlock()->getParent();
  82. LLVMContext &Context = F->getContext();
  83. assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
  84. IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
  85. assert(LoopIVType && "UB is not integer?");
  86. BasicBlock *BeforeBB = Builder.GetInsertBlock();
  87. BasicBlock *GuardBB =
  88. UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
  89. BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
  90. BasicBlock *PreHeaderBB =
  91. BasicBlock::Create(Context, "polly.loop_preheader", F);
  92. // Update LoopInfo
  93. Loop *OuterLoop = LI.getLoopFor(BeforeBB);
  94. Loop *NewLoop = LI.AllocateLoop();
  95. if (OuterLoop)
  96. OuterLoop->addChildLoop(NewLoop);
  97. else
  98. LI.addTopLevelLoop(NewLoop);
  99. if (OuterLoop) {
  100. if (GuardBB)
  101. OuterLoop->addBasicBlockToLoop(GuardBB, LI);
  102. OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
  103. }
  104. NewLoop->addBasicBlockToLoop(HeaderBB, LI);
  105. // Notify the annotator (if present) that we have a new loop, but only
  106. // after the header block is set.
  107. if (Annotator)
  108. Annotator->pushLoop(NewLoop, Parallel);
  109. // ExitBB
  110. ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
  111. ExitBB->setName("polly.loop_exit");
  112. // BeforeBB
  113. if (GuardBB) {
  114. BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
  115. DT.addNewBlock(GuardBB, BeforeBB);
  116. // GuardBB
  117. Builder.SetInsertPoint(GuardBB);
  118. Value *LoopGuard;
  119. LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
  120. LoopGuard->setName("polly.loop_guard");
  121. Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
  122. DT.addNewBlock(PreHeaderBB, GuardBB);
  123. } else {
  124. BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
  125. DT.addNewBlock(PreHeaderBB, BeforeBB);
  126. }
  127. // PreHeaderBB
  128. Builder.SetInsertPoint(PreHeaderBB);
  129. Builder.CreateBr(HeaderBB);
  130. // HeaderBB
  131. DT.addNewBlock(HeaderBB, PreHeaderBB);
  132. Builder.SetInsertPoint(HeaderBB);
  133. PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
  134. IV->addIncoming(LB, PreHeaderBB);
  135. Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
  136. Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
  137. Value *LoopCondition =
  138. Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
  139. // Create the loop latch and annotate it as such.
  140. BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
  141. if (Annotator)
  142. Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
  143. IV->addIncoming(IncrementedIV, HeaderBB);
  144. if (GuardBB)
  145. DT.changeImmediateDominator(ExitBB, GuardBB);
  146. else
  147. DT.changeImmediateDominator(ExitBB, HeaderBB);
  148. // The loop body should be added here.
  149. Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
  150. return IV;
  151. }
  152. Value *ParallelLoopGenerator::createParallelLoop(
  153. Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
  154. ValueMapT &Map, BasicBlock::iterator *LoopBody) {
  155. AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
  156. BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
  157. Value *IV;
  158. Function *SubFn;
  159. std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
  160. *LoopBody = Builder.GetInsertPoint();
  161. Builder.SetInsertPoint(&*BeforeLoop);
  162. Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
  163. "polly.par.userContext");
  164. // Add one as the upper bound provided by OpenMP is a < comparison
  165. // whereas the codegenForSequential function creates a <= comparison.
  166. UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
  167. // Execute the prepared subfunction in parallel.
  168. deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride);
  169. return IV;
  170. }
  171. Function *ParallelLoopGenerator::createSubFnDefinition() {
  172. Function *F = Builder.GetInsertBlock()->getParent();
  173. Function *SubFn = prepareSubFnDefinition(F);
  174. // Certain backends (e.g., NVPTX) do not support '.'s in function names.
  175. // Hence, we ensure that all '.'s are replaced by '_'s.
  176. std::string FunctionName = SubFn->getName().str();
  177. std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
  178. SubFn->setName(FunctionName);
  179. // Do not run any polly pass on the new function.
  180. SubFn->addFnAttr(PollySkipFnAttr);
  181. return SubFn;
  182. }
  183. AllocaInst *
  184. ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
  185. SmallVector<Type *, 8> Members;
  186. for (Value *V : Values)
  187. Members.push_back(V->getType());
  188. const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
  189. // We do not want to allocate the alloca inside any loop, thus we allocate it
  190. // in the entry block of the function and use annotations to denote the actual
  191. // live span (similar to clang).
  192. BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
  193. Instruction *IP = &*EntryBB.getFirstInsertionPt();
  194. StructType *Ty = StructType::get(Builder.getContext(), Members);
  195. AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
  196. "polly.par.userContext", IP);
  197. for (unsigned i = 0; i < Values.size(); i++) {
  198. Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
  199. Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
  200. Builder.CreateStore(Values[i], Address);
  201. }
  202. return Struct;
  203. }
  204. void ParallelLoopGenerator::extractValuesFromStruct(
  205. SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
  206. for (unsigned i = 0; i < OldValues.size(); i++) {
  207. Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
  208. Type *ElemTy = cast<GetElementPtrInst>(Address)->getResultElementType();
  209. Value *NewValue = Builder.CreateLoad(ElemTy, Address);
  210. NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
  211. Map[OldValues[i]] = NewValue;
  212. }
  213. }