123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558 |
- //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file contains functions to create parallel loops as LLVM-IR.
- //
- //===----------------------------------------------------------------------===//
- #include "polly/CodeGen/LoopGeneratorsKMP.h"
- #include "llvm/IR/Dominators.h"
- #include "llvm/IR/Module.h"
- using namespace llvm;
- using namespace polly;
- void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
- Value *SubFnParam,
- Value *LB, Value *UB,
- Value *Stride) {
- const std::string Name = "__kmpc_fork_call";
- Function *F = M->getFunction(Name);
- Type *KMPCMicroTy = StructType::getTypeByName(M->getContext(), "kmpc_micro");
- if (!KMPCMicroTy) {
- // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
- Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(),
- Builder.getInt32Ty()->getPointerTo()};
- KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
- }
- // If F is not available, declare it.
- if (!F) {
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
- KMPCMicroTy->getPointerTo()};
- FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast(
- SubFn, KMPCMicroTy->getPointerTo());
- Value *Args[] = {SourceLocationInfo,
- Builder.getInt32(4) /* Number of arguments (w/o Task) */,
- Task,
- LB,
- UB,
- Stride,
- SubFnParam};
- Builder.CreateCall(F, Args);
- }
- void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn,
- Value *SubFnParam,
- Value *LB, Value *UB,
- Value *Stride) {
- // Inform OpenMP runtime about the number of threads if greater than zero
- if (PollyNumThreads > 0) {
- Value *GlobalThreadID = createCallGlobalThreadNum();
- createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
- }
- // Tell the runtime we start a parallel loop
- createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
- }
- Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
- std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(),
- Builder.getInt32Ty()->getPointerTo(),
- LongType,
- LongType,
- LongType,
- Builder.getInt8PtrTy()};
- FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
- Function *SubFn = Function::Create(FT, Function::InternalLinkage,
- F->getName() + "_polly_subfn", M);
- // Name the function's arguments
- Function::arg_iterator AI = SubFn->arg_begin();
- AI->setName("polly.kmpc.global_tid");
- std::advance(AI, 1);
- AI->setName("polly.kmpc.bound_tid");
- std::advance(AI, 1);
- AI->setName("polly.kmpc.lb");
- std::advance(AI, 1);
- AI->setName("polly.kmpc.ub");
- std::advance(AI, 1);
- AI->setName("polly.kmpc.inc");
- std::advance(AI, 1);
- AI->setName("polly.kmpc.shared");
- return SubFn;
- }
- // Create a subfunction of the following (preliminary) structure:
- //
- // PrevBB
- // |
- // v
- // HeaderBB
- // / | _____
- // / v v |
- // / PreHeaderBB |
- // | | |
- // | v |
- // | CheckNextBB |
- // \ | \_____/
- // \ |
- // v v
- // ExitBB
- //
- // HeaderBB will hold allocations, loading of variables and kmp-init calls.
- // CheckNextBB will check for more work (dynamic / static chunked) or will be
- // empty (static non chunked).
- // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
- // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
- // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
- // chunked scheduling case. ExitBB marks the end of the parallel execution.
- // The possibly empty BasicBlocks will automatically be removed.
- std::tuple<Value *, Function *>
- ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
- AllocaInst *StructData,
- SetVector<Value *> Data, ValueMapT &Map) {
- Function *SubFn = createSubFnDefinition();
- LLVMContext &Context = SubFn->getContext();
- // Store the previous basic block.
- BasicBlock *PrevBB = Builder.GetInsertBlock();
- // Create basic blocks.
- BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
- BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
- BasicBlock *CheckNextBB =
- BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
- BasicBlock *PreHeaderBB =
- BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
- DT.addNewBlock(HeaderBB, PrevBB);
- DT.addNewBlock(ExitBB, HeaderBB);
- DT.addNewBlock(CheckNextBB, HeaderBB);
- DT.addNewBlock(PreHeaderBB, HeaderBB);
- // Fill up basic block HeaderBB.
- Builder.SetInsertPoint(HeaderBB);
- Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
- Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
- Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
- "polly.par.lastIterPtr");
- Value *StridePtr =
- Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
- // Get iterator for retrieving the previously defined parameters.
- Function::arg_iterator AI = SubFn->arg_begin();
- // First argument holds "global thread ID".
- Value *IDPtr = &*AI;
- // Skip "bound thread ID" since it is not used (but had to be defined).
- std::advance(AI, 2);
- // Move iterator to: LB, UB, Stride, Shared variable struct.
- Value *LB = &*AI;
- std::advance(AI, 1);
- Value *UB = &*AI;
- std::advance(AI, 1);
- Value *Stride = &*AI;
- std::advance(AI, 1);
- Value *Shared = &*AI;
- Value *UserContext = Builder.CreateBitCast(Shared, StructData->getType(),
- "polly.par.userContext");
- extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
- Map);
- const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4);
- Value *ID = Builder.CreateAlignedLoad(Builder.getInt32Ty(), IDPtr, Alignment,
- "polly.par.global_tid");
- Builder.CreateAlignedStore(LB, LBPtr, Alignment);
- Builder.CreateAlignedStore(UB, UBPtr, Alignment);
- Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
- Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
- // Subtract one as the upper bound provided by openmp is a < comparison
- // whereas the codegenForSequential function creates a <= comparison.
- Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
- "polly.indvar.UBAdjusted");
- Value *ChunkSize =
- ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
- OMPGeneralSchedulingType Scheduling =
- getSchedType(PollyChunkSize, PollyScheduling);
- switch (Scheduling) {
- case OMPGeneralSchedulingType::Dynamic:
- case OMPGeneralSchedulingType::Guided:
- case OMPGeneralSchedulingType::Runtime:
- // "DYNAMIC" scheduling types are handled below (including 'runtime')
- {
- UB = AdjustedUB;
- createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
- Value *HasWork =
- createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
- Value *HasIteration =
- Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
- Builder.getInt32(1), "polly.hasIteration");
- Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
- Builder.SetInsertPoint(CheckNextBB);
- HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
- HasIteration =
- Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
- Builder.getInt32(1), "polly.hasWork");
- Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
- Builder.SetInsertPoint(PreHeaderBB);
- LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
- "polly.indvar.LB");
- UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
- "polly.indvar.UB");
- }
- break;
- case OMPGeneralSchedulingType::StaticChunked:
- case OMPGeneralSchedulingType::StaticNonChunked:
- // "STATIC" scheduling types are handled below
- {
- Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment);
- createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
- Value *ChunkedStride = Builder.CreateAlignedLoad(
- LongType, StridePtr, Alignment, "polly.kmpc.stride");
- LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
- "polly.indvar.LB");
- UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
- "polly.indvar.UB.temp");
- Value *UBInRange =
- Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB,
- "polly.indvar.UB.inRange");
- UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB");
- Builder.CreateAlignedStore(UB, UBPtr, Alignment);
- Value *HasIteration = Builder.CreateICmp(
- llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
- Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
- if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
- Builder.SetInsertPoint(PreHeaderBB);
- LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
- "polly.indvar.LB.entry");
- UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
- "polly.indvar.UB.entry");
- }
- Builder.SetInsertPoint(CheckNextBB);
- if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
- Value *NextLB =
- Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB");
- Value *NextUB = Builder.CreateAdd(UB, ChunkedStride);
- Value *NextUBOutOfBounds =
- Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB,
- AdjustedUB, "polly.indvar.nextUB.outOfBounds");
- NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB,
- "polly.indvar.nextUB");
- Builder.CreateAlignedStore(NextLB, LBPtr, Alignment);
- Builder.CreateAlignedStore(NextUB, UBPtr, Alignment);
- Value *HasWork =
- Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB,
- AdjustedUB, "polly.hasWork");
- Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB);
- } else {
- Builder.CreateBr(ExitBB);
- }
- Builder.SetInsertPoint(PreHeaderBB);
- }
- break;
- }
- Builder.CreateBr(CheckNextBB);
- Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
- BasicBlock *AfterBB;
- Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, LI, DT, AfterBB,
- ICmpInst::ICMP_SLE, nullptr, true,
- /* UseGuard */ false);
- BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
- // Add code to terminate this subfunction.
- Builder.SetInsertPoint(ExitBB);
- // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
- if (Scheduling == OMPGeneralSchedulingType::StaticChunked ||
- Scheduling == OMPGeneralSchedulingType::StaticNonChunked) {
- createCallStaticFini(ID);
- }
- Builder.CreateRetVoid();
- Builder.SetInsertPoint(&*LoopBody);
- return std::make_tuple(IV, SubFn);
- }
- Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
- const std::string Name = "__kmpc_global_thread_num";
- Function *F = M->getFunction(Name);
- // If F is not available, declare it.
- if (!F) {
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo()};
- FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- return Builder.CreateCall(F, {SourceLocationInfo});
- }
- void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
- Value *NumThreads) {
- const std::string Name = "__kmpc_push_num_threads";
- Function *F = M->getFunction(Name);
- // If F is not available, declare it.
- if (!F) {
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
- Builder.getInt32Ty()};
- FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
- Builder.CreateCall(F, Args);
- }
- void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
- Value *IsLastPtr,
- Value *LBPtr, Value *UBPtr,
- Value *StridePtr,
- Value *ChunkSize) {
- const std::string Name =
- is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
- Function *F = M->getFunction(Name);
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- // If F is not available, declare it.
- if (!F) {
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo(),
- Builder.getInt32Ty(),
- Builder.getInt32Ty(),
- Builder.getInt32Ty()->getPointerTo(),
- LongType->getPointerTo(),
- LongType->getPointerTo(),
- LongType->getPointerTo(),
- LongType,
- LongType};
- FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- // The parameter 'ChunkSize' will hold strictly positive integer values,
- // regardless of PollyChunkSize's value
- Value *Args[] = {
- SourceLocationInfo,
- GlobalThreadID,
- Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
- IsLastPtr,
- LBPtr,
- UBPtr,
- StridePtr,
- ConstantInt::get(LongType, 1),
- ChunkSize};
- Builder.CreateCall(F, Args);
- }
- void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
- const std::string Name = "__kmpc_for_static_fini";
- Function *F = M->getFunction(Name);
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- // If F is not available, declare it.
- if (!F) {
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()};
- FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- Value *Args[] = {SourceLocationInfo, GlobalThreadID};
- Builder.CreateCall(F, Args);
- }
- void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
- Value *LB, Value *UB,
- Value *Inc,
- Value *ChunkSize) {
- const std::string Name =
- is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
- Function *F = M->getFunction(Name);
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- // If F is not available, declare it.
- if (!F) {
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo(),
- Builder.getInt32Ty(),
- Builder.getInt32Ty(),
- LongType,
- LongType,
- LongType,
- LongType};
- FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- // The parameter 'ChunkSize' will hold strictly positive integer values,
- // regardless of PollyChunkSize's value
- Value *Args[] = {
- SourceLocationInfo,
- GlobalThreadID,
- Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
- LB,
- UB,
- Inc,
- ChunkSize};
- Builder.CreateCall(F, Args);
- }
- Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
- Value *IsLastPtr,
- Value *LBPtr,
- Value *UBPtr,
- Value *StridePtr) {
- const std::string Name =
- is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
- Function *F = M->getFunction(Name);
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), "struct.ident_t");
- // If F is not available, declare it.
- if (!F) {
- GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
- Type *Params[] = {IdentTy->getPointerTo(),
- Builder.getInt32Ty(),
- Builder.getInt32Ty()->getPointerTo(),
- LongType->getPointerTo(),
- LongType->getPointerTo(),
- LongType->getPointerTo()};
- FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
- F = Function::Create(Ty, Linkage, Name, M);
- }
- Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
- StridePtr};
- return Builder.CreateCall(F, Args);
- }
- // TODO: This function currently creates a source location dummy. It might be
- // necessary to (actually) provide information, in the future.
- GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
- const std::string LocName = ".loc.dummy";
- GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
- if (SourceLocDummy == nullptr) {
- const std::string StructName = "struct.ident_t";
- StructType *IdentTy =
- StructType::getTypeByName(M->getContext(), StructName);
- // If the ident_t StructType is not available, declare it.
- // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
- if (!IdentTy) {
- Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
- Builder.getInt32Ty(), Builder.getInt32Ty(),
- Builder.getInt8PtrTy()};
- IdentTy =
- StructType::create(M->getContext(), LocMembers, StructName, false);
- }
- const auto ArrayType =
- llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
- // Global Variable Definitions
- GlobalVariable *StrVar =
- new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage,
- nullptr, ".str.ident");
- StrVar->setAlignment(llvm::Align(1));
- SourceLocDummy = new GlobalVariable(
- *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
- SourceLocDummy->setAlignment(llvm::Align(8));
- // Constant Definitions
- Constant *InitStr = ConstantDataArray::getString(
- M->getContext(), "Source location dummy.", true);
- Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
- ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
- Constant *LocInitStruct = ConstantStruct::get(
- IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
- Builder.getInt32(0), StrPtr});
- // Initialize variables
- StrVar->setInitializer(InitStr);
- SourceLocDummy->setInitializer(LocInitStruct);
- }
- return SourceLocDummy;
- }
- bool ParallelLoopGeneratorKMP::is64BitArch() {
- return (LongType->getIntegerBitWidth() == 64);
- }
- OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
- int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
- if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
- return OMPGeneralSchedulingType::StaticNonChunked;
- return Scheduling;
- }
|