12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459 |
- //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // OpenMP specific optimizations:
- //
- // - Deduplication of runtime calls, e.g., omp_get_thread_num.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Transforms/IPO/OpenMPOpt.h"
- #include "llvm/ADT/EnumeratedArray.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/Analysis/CallGraph.h"
- #include "llvm/Analysis/CallGraphSCCPass.h"
- #include "llvm/Analysis/OptimizationRemarkEmitter.h"
- #include "llvm/Analysis/ValueTracking.h"
- #include "llvm/Frontend/OpenMP/OMPConstants.h"
- #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Transforms/IPO.h"
- #include "llvm/Transforms/IPO/Attributor.h"
- #include "llvm/Transforms/Utils/BasicBlockUtils.h"
- #include "llvm/Transforms/Utils/CallGraphUpdater.h"
- #include "llvm/Transforms/Utils/CodeExtractor.h"
- using namespace llvm;
- using namespace omp;
- #define DEBUG_TYPE "openmp-opt"
- static cl::opt<bool> DisableOpenMPOptimizations(
- "openmp-opt-disable", cl::ZeroOrMore,
- cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
- cl::init(false));
- static cl::opt<bool> EnableParallelRegionMerging(
- "openmp-opt-enable-merging", cl::ZeroOrMore,
- cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
- cl::init(false));
- static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
- cl::Hidden);
- static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
- cl::init(false), cl::Hidden);
- static cl::opt<bool> HideMemoryTransferLatency(
- "openmp-hide-memory-transfer-latency",
- cl::desc("[WIP] Tries to hide the latency of host to device memory"
- " transfers"),
- cl::Hidden, cl::init(false));
- STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
- "Number of OpenMP runtime calls deduplicated");
- STATISTIC(NumOpenMPParallelRegionsDeleted,
- "Number of OpenMP parallel regions deleted");
- STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
- "Number of OpenMP runtime functions identified");
- STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
- "Number of OpenMP runtime function uses identified");
- STATISTIC(NumOpenMPTargetRegionKernels,
- "Number of OpenMP target region entry points (=kernels) identified");
- STATISTIC(
- NumOpenMPParallelRegionsReplacedInGPUStateMachine,
- "Number of OpenMP parallel regions replaced with ID in GPU state machines");
- STATISTIC(NumOpenMPParallelRegionsMerged,
- "Number of OpenMP parallel regions merged");
- #if !defined(NDEBUG)
- static constexpr auto TAG = "[" DEBUG_TYPE "]";
- #endif
- namespace {
- struct AAICVTracker;
- /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
- /// Attributor runs.
- struct OMPInformationCache : public InformationCache {
- OMPInformationCache(Module &M, AnalysisGetter &AG,
- BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
- SmallPtrSetImpl<Kernel> &Kernels)
- : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
- Kernels(Kernels) {
- OMPBuilder.initialize();
- initializeRuntimeFunctions();
- initializeInternalControlVars();
- }
- /// Generic information that describes an internal control variable.
- struct InternalControlVarInfo {
- /// The kind, as described by InternalControlVar enum.
- InternalControlVar Kind;
- /// The name of the ICV.
- StringRef Name;
- /// Environment variable associated with this ICV.
- StringRef EnvVarName;
- /// Initial value kind.
- ICVInitValue InitKind;
- /// Initial value.
- ConstantInt *InitValue;
- /// Setter RTL function associated with this ICV.
- RuntimeFunction Setter;
- /// Getter RTL function associated with this ICV.
- RuntimeFunction Getter;
- /// RTL Function corresponding to the override clause of this ICV
- RuntimeFunction Clause;
- };
- /// Generic information that describes a runtime function
- struct RuntimeFunctionInfo {
- /// The kind, as described by the RuntimeFunction enum.
- RuntimeFunction Kind;
- /// The name of the function.
- StringRef Name;
- /// Flag to indicate a variadic function.
- bool IsVarArg;
- /// The return type of the function.
- Type *ReturnType;
- /// The argument types of the function.
- SmallVector<Type *, 8> ArgumentTypes;
- /// The declaration if available.
- Function *Declaration = nullptr;
- /// Uses of this runtime function per function containing the use.
- using UseVector = SmallVector<Use *, 16>;
- /// Clear UsesMap for runtime function.
- void clearUsesMap() { UsesMap.clear(); }
- /// Boolean conversion that is true if the runtime function was found.
- operator bool() const { return Declaration; }
- /// Return the vector of uses in function \p F.
- UseVector &getOrCreateUseVector(Function *F) {
- std::shared_ptr<UseVector> &UV = UsesMap[F];
- if (!UV)
- UV = std::make_shared<UseVector>();
- return *UV;
- }
- /// Return the vector of uses in function \p F or `nullptr` if there are
- /// none.
- const UseVector *getUseVector(Function &F) const {
- auto I = UsesMap.find(&F);
- if (I != UsesMap.end())
- return I->second.get();
- return nullptr;
- }
- /// Return how many functions contain uses of this runtime function.
- size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
- /// Return the number of arguments (or the minimal number for variadic
- /// functions).
- size_t getNumArgs() const { return ArgumentTypes.size(); }
- /// Run the callback \p CB on each use and forget the use if the result is
- /// true. The callback will be fed the function in which the use was
- /// encountered as second argument.
- void foreachUse(SmallVectorImpl<Function *> &SCC,
- function_ref<bool(Use &, Function &)> CB) {
- for (Function *F : SCC)
- foreachUse(CB, F);
- }
- /// Run the callback \p CB on each use within the function \p F and forget
- /// the use if the result is true.
- void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
- SmallVector<unsigned, 8> ToBeDeleted;
- ToBeDeleted.clear();
- unsigned Idx = 0;
- UseVector &UV = getOrCreateUseVector(F);
- for (Use *U : UV) {
- if (CB(*U, *F))
- ToBeDeleted.push_back(Idx);
- ++Idx;
- }
- // Remove the to-be-deleted indices in reverse order as prior
- // modifications will not modify the smaller indices.
- while (!ToBeDeleted.empty()) {
- unsigned Idx = ToBeDeleted.pop_back_val();
- UV[Idx] = UV.back();
- UV.pop_back();
- }
- }
- private:
- /// Map from functions to all uses of this runtime function contained in
- /// them.
- DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
- };
- /// An OpenMP-IR-Builder instance
- OpenMPIRBuilder OMPBuilder;
- /// Map from runtime function kind to the runtime function description.
- EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
- RuntimeFunction::OMPRTL___last>
- RFIs;
- /// Map from ICV kind to the ICV description.
- EnumeratedArray<InternalControlVarInfo, InternalControlVar,
- InternalControlVar::ICV___last>
- ICVs;
- /// Helper to initialize all internal control variable information for those
- /// defined in OMPKinds.def.
- void initializeInternalControlVars() {
- #define ICV_RT_SET(_Name, RTL) \
- { \
- auto &ICV = ICVs[_Name]; \
- ICV.Setter = RTL; \
- }
- #define ICV_RT_GET(Name, RTL) \
- { \
- auto &ICV = ICVs[Name]; \
- ICV.Getter = RTL; \
- }
- #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
- { \
- auto &ICV = ICVs[Enum]; \
- ICV.Name = _Name; \
- ICV.Kind = Enum; \
- ICV.InitKind = Init; \
- ICV.EnvVarName = _EnvVarName; \
- switch (ICV.InitKind) { \
- case ICV_IMPLEMENTATION_DEFINED: \
- ICV.InitValue = nullptr; \
- break; \
- case ICV_ZERO: \
- ICV.InitValue = ConstantInt::get( \
- Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
- break; \
- case ICV_FALSE: \
- ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
- break; \
- case ICV_LAST: \
- break; \
- } \
- }
- #include "llvm/Frontend/OpenMP/OMPKinds.def"
- }
- /// Returns true if the function declaration \p F matches the runtime
- /// function types, that is, return type \p RTFRetType, and argument types
- /// \p RTFArgTypes.
- static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
- SmallVector<Type *, 8> &RTFArgTypes) {
- // TODO: We should output information to the user (under debug output
- // and via remarks).
- if (!F)
- return false;
- if (F->getReturnType() != RTFRetType)
- return false;
- if (F->arg_size() != RTFArgTypes.size())
- return false;
- auto RTFTyIt = RTFArgTypes.begin();
- for (Argument &Arg : F->args()) {
- if (Arg.getType() != *RTFTyIt)
- return false;
- ++RTFTyIt;
- }
- return true;
- }
- // Helper to collect all uses of the declaration in the UsesMap.
- unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
- unsigned NumUses = 0;
- if (!RFI.Declaration)
- return NumUses;
- OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
- if (CollectStats) {
- NumOpenMPRuntimeFunctionsIdentified += 1;
- NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
- }
- // TODO: We directly convert uses into proper calls and unknown uses.
- for (Use &U : RFI.Declaration->uses()) {
- if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
- if (ModuleSlice.count(UserI->getFunction())) {
- RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
- ++NumUses;
- }
- } else {
- RFI.getOrCreateUseVector(nullptr).push_back(&U);
- ++NumUses;
- }
- }
- return NumUses;
- }
- // Helper function to recollect uses of a runtime function.
- void recollectUsesForFunction(RuntimeFunction RTF) {
- auto &RFI = RFIs[RTF];
- RFI.clearUsesMap();
- collectUses(RFI, /*CollectStats*/ false);
- }
- // Helper function to recollect uses of all runtime functions.
- void recollectUses() {
- for (int Idx = 0; Idx < RFIs.size(); ++Idx)
- recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
- }
- /// Helper to initialize all runtime function information for those defined
- /// in OpenMPKinds.def.
- void initializeRuntimeFunctions() {
- Module &M = *((*ModuleSlice.begin())->getParent());
- // Helper macros for handling __VA_ARGS__ in OMP_RTL
- #define OMP_TYPE(VarName, ...) \
- Type *VarName = OMPBuilder.VarName; \
- (void)VarName;
- #define OMP_ARRAY_TYPE(VarName, ...) \
- ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
- (void)VarName##Ty; \
- PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
- (void)VarName##PtrTy;
- #define OMP_FUNCTION_TYPE(VarName, ...) \
- FunctionType *VarName = OMPBuilder.VarName; \
- (void)VarName; \
- PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
- (void)VarName##Ptr;
- #define OMP_STRUCT_TYPE(VarName, ...) \
- StructType *VarName = OMPBuilder.VarName; \
- (void)VarName; \
- PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
- (void)VarName##Ptr;
- #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
- { \
- SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
- Function *F = M.getFunction(_Name); \
- if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
- auto &RFI = RFIs[_Enum]; \
- RFI.Kind = _Enum; \
- RFI.Name = _Name; \
- RFI.IsVarArg = _IsVarArg; \
- RFI.ReturnType = OMPBuilder._ReturnType; \
- RFI.ArgumentTypes = std::move(ArgsTypes); \
- RFI.Declaration = F; \
- unsigned NumUses = collectUses(RFI); \
- (void)NumUses; \
- LLVM_DEBUG({ \
- dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
- << " found\n"; \
- if (RFI.Declaration) \
- dbgs() << TAG << "-> got " << NumUses << " uses in " \
- << RFI.getNumFunctionsWithUses() \
- << " different functions.\n"; \
- }); \
- } \
- }
- #include "llvm/Frontend/OpenMP/OMPKinds.def"
- // TODO: We should attach the attributes defined in OMPKinds.def.
- }
- /// Collection of known kernels (\see Kernel) in the module.
- SmallPtrSetImpl<Kernel> &Kernels;
- };
- /// Used to map the values physically (in the IR) stored in an offload
- /// array, to a vector in memory.
- struct OffloadArray {
- /// Physical array (in the IR).
- AllocaInst *Array = nullptr;
- /// Mapped values.
- SmallVector<Value *, 8> StoredValues;
- /// Last stores made in the offload array.
- SmallVector<StoreInst *, 8> LastAccesses;
- OffloadArray() = default;
- /// Initializes the OffloadArray with the values stored in \p Array before
- /// instruction \p Before is reached. Returns false if the initialization
- /// fails.
- /// This MUST be used immediately after the construction of the object.
- bool initialize(AllocaInst &Array, Instruction &Before) {
- if (!Array.getAllocatedType()->isArrayTy())
- return false;
- if (!getValues(Array, Before))
- return false;
- this->Array = &Array;
- return true;
- }
- static const unsigned DeviceIDArgNum = 1;
- static const unsigned BasePtrsArgNum = 3;
- static const unsigned PtrsArgNum = 4;
- static const unsigned SizesArgNum = 5;
- private:
- /// Traverses the BasicBlock where \p Array is, collecting the stores made to
- /// \p Array, leaving StoredValues with the values stored before the
- /// instruction \p Before is reached.
- bool getValues(AllocaInst &Array, Instruction &Before) {
- // Initialize container.
- const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
- StoredValues.assign(NumValues, nullptr);
- LastAccesses.assign(NumValues, nullptr);
- // TODO: This assumes the instruction \p Before is in the same
- // BasicBlock as Array. Make it general, for any control flow graph.
- BasicBlock *BB = Array.getParent();
- if (BB != Before.getParent())
- return false;
- const DataLayout &DL = Array.getModule()->getDataLayout();
- const unsigned int PointerSize = DL.getPointerSize();
- for (Instruction &I : *BB) {
- if (&I == &Before)
- break;
- if (!isa<StoreInst>(&I))
- continue;
- auto *S = cast<StoreInst>(&I);
- int64_t Offset = -1;
- auto *Dst =
- GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
- if (Dst == &Array) {
- int64_t Idx = Offset / PointerSize;
- StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
- LastAccesses[Idx] = S;
- }
- }
- return isFilled();
- }
- /// Returns true if all values in StoredValues and
- /// LastAccesses are not nullptrs.
- bool isFilled() {
- const unsigned NumValues = StoredValues.size();
- for (unsigned I = 0; I < NumValues; ++I) {
- if (!StoredValues[I] || !LastAccesses[I])
- return false;
- }
- return true;
- }
- };
- struct OpenMPOpt {
- using OptimizationRemarkGetter =
- function_ref<OptimizationRemarkEmitter &(Function *)>;
- OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
- OptimizationRemarkGetter OREGetter,
- OMPInformationCache &OMPInfoCache, Attributor &A)
- : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
- OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
- /// Check if any remarks are enabled for openmp-opt
- bool remarksEnabled() {
- auto &Ctx = M.getContext();
- return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
- }
- /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
- bool run() {
- if (SCC.empty())
- return false;
- bool Changed = false;
- LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
- << " functions in a slice with "
- << OMPInfoCache.ModuleSlice.size() << " functions\n");
- if (PrintICVValues)
- printICVs();
- if (PrintOpenMPKernels)
- printKernels();
- Changed |= rewriteDeviceCodeStateMachine();
- Changed |= runAttributor();
- // Recollect uses, in case Attributor deleted any.
- OMPInfoCache.recollectUses();
- Changed |= deleteParallelRegions();
- if (HideMemoryTransferLatency)
- Changed |= hideMemTransfersLatency();
- if (remarksEnabled())
- analysisGlobalization();
- Changed |= deduplicateRuntimeCalls();
- if (EnableParallelRegionMerging) {
- if (mergeParallelRegions()) {
- deduplicateRuntimeCalls();
- Changed = true;
- }
- }
- return Changed;
- }
- /// Print initial ICV values for testing.
- /// FIXME: This should be done from the Attributor once it is added.
- void printICVs() const {
- InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
- ICV_proc_bind};
- for (Function *F : OMPInfoCache.ModuleSlice) {
- for (auto ICV : ICVs) {
- auto ICVInfo = OMPInfoCache.ICVs[ICV];
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
- << " Value: "
- << (ICVInfo.InitValue
- ? ICVInfo.InitValue->getValue().toString(10, true)
- : "IMPLEMENTATION_DEFINED");
- };
- emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
- }
- }
- }
- /// Print OpenMP GPU kernels for testing.
- void printKernels() const {
- for (Function *F : SCC) {
- if (!OMPInfoCache.Kernels.count(F))
- continue;
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "OpenMP GPU kernel "
- << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
- };
- emitRemarkOnFunction(F, "OpenMPGPU", Remark);
- }
- }
- /// Return the call if \p U is a callee use in a regular call. If \p RFI is
- /// given it has to be the callee or a nullptr is returned.
- static CallInst *getCallIfRegularCall(
- Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
- CallInst *CI = dyn_cast<CallInst>(U.getUser());
- if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
- (!RFI || CI->getCalledFunction() == RFI->Declaration))
- return CI;
- return nullptr;
- }
- /// Return the call if \p V is a regular call. If \p RFI is given it has to be
- /// the callee or a nullptr is returned.
- static CallInst *getCallIfRegularCall(
- Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
- CallInst *CI = dyn_cast<CallInst>(&V);
- if (CI && !CI->hasOperandBundles() &&
- (!RFI || CI->getCalledFunction() == RFI->Declaration))
- return CI;
- return nullptr;
- }
- private:
- /// Merge parallel regions when it is safe.
- bool mergeParallelRegions() {
- const unsigned CallbackCalleeOperand = 2;
- const unsigned CallbackFirstArgOperand = 3;
- using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
- // Check if there are any __kmpc_fork_call calls to merge.
- OMPInformationCache::RuntimeFunctionInfo &RFI =
- OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
- if (!RFI.Declaration)
- return false;
- // Unmergable calls that prevent merging a parallel region.
- OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
- OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
- OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
- };
- bool Changed = false;
- LoopInfo *LI = nullptr;
- DominatorTree *DT = nullptr;
- SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
- BasicBlock *StartBB = nullptr, *EndBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
- BasicBlock *CGStartBB = CodeGenIP.getBlock();
- BasicBlock *CGEndBB =
- SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
- assert(StartBB != nullptr && "StartBB should not be null");
- CGStartBB->getTerminator()->setSuccessor(0, StartBB);
- assert(EndBB != nullptr && "EndBB should not be null");
- EndBB->getTerminator()->setSuccessor(0, CGEndBB);
- };
- auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
- Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
- ReplacementValue = &Inner;
- return CodeGenIP;
- };
- auto FiniCB = [&](InsertPointTy CodeGenIP) {};
- /// Create a sequential execution region within a merged parallel region,
- /// encapsulated in a master construct with a barrier for synchronization.
- auto CreateSequentialRegion = [&](Function *OuterFn,
- BasicBlock *OuterPredBB,
- Instruction *SeqStartI,
- Instruction *SeqEndI) {
- // Isolate the instructions of the sequential region to a separate
- // block.
- BasicBlock *ParentBB = SeqStartI->getParent();
- BasicBlock *SeqEndBB =
- SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
- BasicBlock *SeqAfterBB =
- SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
- BasicBlock *SeqStartBB =
- SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
- assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
- "Expected a different CFG");
- const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
- ParentBB->getTerminator()->eraseFromParent();
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
- BasicBlock *CGStartBB = CodeGenIP.getBlock();
- BasicBlock *CGEndBB =
- SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
- assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
- CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
- assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
- SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
- };
- auto FiniCB = [&](InsertPointTy CodeGenIP) {};
- // Find outputs from the sequential region to outside users and
- // broadcast their values to them.
- for (Instruction &I : *SeqStartBB) {
- SmallPtrSet<Instruction *, 4> OutsideUsers;
- for (User *Usr : I.users()) {
- Instruction &UsrI = *cast<Instruction>(Usr);
- // Ignore outputs to LT intrinsics, code extraction for the merged
- // parallel region will fix them.
- if (UsrI.isLifetimeStartOrEnd())
- continue;
- if (UsrI.getParent() != SeqStartBB)
- OutsideUsers.insert(&UsrI);
- }
- if (OutsideUsers.empty())
- continue;
- // Emit an alloca in the outer region to store the broadcasted
- // value.
- const DataLayout &DL = M.getDataLayout();
- AllocaInst *AllocaI = new AllocaInst(
- I.getType(), DL.getAllocaAddrSpace(), nullptr,
- I.getName() + ".seq.output.alloc", &OuterFn->front().front());
- // Emit a store instruction in the sequential BB to update the
- // value.
- new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
- // Emit a load instruction and replace the use of the output value
- // with it.
- for (Instruction *UsrI : OutsideUsers) {
- LoadInst *LoadI = new LoadInst(I.getType(), AllocaI,
- I.getName() + ".seq.output.load", UsrI);
- UsrI->replaceUsesOfWith(&I, LoadI);
- }
- }
- OpenMPIRBuilder::LocationDescription Loc(
- InsertPointTy(ParentBB, ParentBB->end()), DL);
- InsertPointTy SeqAfterIP =
- OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
- OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
- BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
- LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
- << "\n");
- };
- // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
- // contained in BB and only separated by instructions that can be
- // redundantly executed in parallel. The block BB is split before the first
- // call (in MergableCIs) and after the last so the entire region we merge
- // into a single parallel region is contained in a single basic block
- // without any other instructions. We use the OpenMPIRBuilder to outline
- // that block and call the resulting function via __kmpc_fork_call.
- auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
- // TODO: Change the interface to allow single CIs expanded, e.g, to
- // include an outer loop.
- assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
- auto Remark = [&](OptimizationRemark OR) {
- OR << "Parallel region at "
- << ore::NV("OpenMPParallelMergeFront",
- MergableCIs.front()->getDebugLoc())
- << " merged with parallel regions at ";
- for (auto *CI : llvm::drop_begin(MergableCIs)) {
- OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
- if (CI != MergableCIs.back())
- OR << ", ";
- }
- return OR;
- };
- emitRemark<OptimizationRemark>(MergableCIs.front(),
- "OpenMPParallelRegionMerging", Remark);
- Function *OriginalFn = BB->getParent();
- LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
- << " parallel regions in " << OriginalFn->getName()
- << "\n");
- // Isolate the calls to merge in a separate block.
- EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
- BasicBlock *AfterBB =
- SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
- StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
- "omp.par.merged");
- assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
- const DebugLoc DL = BB->getTerminator()->getDebugLoc();
- BB->getTerminator()->eraseFromParent();
- // Create sequential regions for sequential instructions that are
- // in-between mergable parallel regions.
- for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
- It != End; ++It) {
- Instruction *ForkCI = *It;
- Instruction *NextForkCI = *(It + 1);
- // Continue if there are not in-between instructions.
- if (ForkCI->getNextNode() == NextForkCI)
- continue;
- CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
- NextForkCI->getPrevNode());
- }
- OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
- DL);
- IRBuilder<>::InsertPoint AllocaIP(
- &OriginalFn->getEntryBlock(),
- OriginalFn->getEntryBlock().getFirstInsertionPt());
- // Create the merged parallel region with default proc binding, to
- // avoid overriding binding settings, and without explicit cancellation.
- InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
- Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
- OMP_PROC_BIND_default, /* IsCancellable */ false);
- BranchInst::Create(AfterBB, AfterIP.getBlock());
- // Perform the actual outlining.
- OMPInfoCache.OMPBuilder.finalize(/* AllowExtractorSinking */ true);
- Function *OutlinedFn = MergableCIs.front()->getCaller();
- // Replace the __kmpc_fork_call calls with direct calls to the outlined
- // callbacks.
- SmallVector<Value *, 8> Args;
- for (auto *CI : MergableCIs) {
- Value *Callee =
- CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
- FunctionType *FT =
- cast<FunctionType>(Callee->getType()->getPointerElementType());
- Args.clear();
- Args.push_back(OutlinedFn->getArg(0));
- Args.push_back(OutlinedFn->getArg(1));
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- Args.push_back(CI->getArgOperand(U));
- CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
- if (CI->getDebugLoc())
- NewCI->setDebugLoc(CI->getDebugLoc());
- // Forward parameter attributes from the callback to the callee.
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
- NewCI->addParamAttr(
- U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
- // Emit an explicit barrier to replace the implicit fork-join barrier.
- if (CI != MergableCIs.back()) {
- // TODO: Remove barrier if the merged parallel region includes the
- // 'nowait' clause.
- OMPInfoCache.OMPBuilder.createBarrier(
- InsertPointTy(NewCI->getParent(),
- NewCI->getNextNode()->getIterator()),
- OMPD_parallel);
- }
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Parallel region at "
- << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
- << " merged with "
- << ore::NV("OpenMPParallelMergeFront",
- MergableCIs.front()->getDebugLoc());
- };
- if (CI != MergableCIs.front())
- emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
- Remark);
- CI->eraseFromParent();
- }
- assert(OutlinedFn != OriginalFn && "Outlining failed");
- CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
- CGUpdater.reanalyzeFunction(*OriginalFn);
- NumOpenMPParallelRegionsMerged += MergableCIs.size();
- return true;
- };
- // Helper function that identifes sequences of
- // __kmpc_fork_call uses in a basic block.
- auto DetectPRsCB = [&](Use &U, Function &F) {
- CallInst *CI = getCallIfRegularCall(U, &RFI);
- BB2PRMap[CI->getParent()].insert(CI);
- return false;
- };
- BB2PRMap.clear();
- RFI.foreachUse(SCC, DetectPRsCB);
- SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
- // Find mergable parallel regions within a basic block that are
- // safe to merge, that is any in-between instructions can safely
- // execute in parallel after merging.
- // TODO: support merging across basic-blocks.
- for (auto &It : BB2PRMap) {
- auto &CIs = It.getSecond();
- if (CIs.size() < 2)
- continue;
- BasicBlock *BB = It.getFirst();
- SmallVector<CallInst *, 4> MergableCIs;
- /// Returns true if the instruction is mergable, false otherwise.
- /// A terminator instruction is unmergable by definition since merging
- /// works within a BB. Instructions before the mergable region are
- /// mergable if they are not calls to OpenMP runtime functions that may
- /// set different execution parameters for subsequent parallel regions.
- /// Instructions in-between parallel regions are mergable if they are not
- /// calls to any non-intrinsic function since that may call a non-mergable
- /// OpenMP runtime function.
- auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
- // We do not merge across BBs, hence return false (unmergable) if the
- // instruction is a terminator.
- if (I.isTerminator())
- return false;
- if (!isa<CallInst>(&I))
- return true;
- CallInst *CI = cast<CallInst>(&I);
- if (IsBeforeMergableRegion) {
- Function *CalledFunction = CI->getCalledFunction();
- if (!CalledFunction)
- return false;
- // Return false (unmergable) if the call before the parallel
- // region calls an explicit affinity (proc_bind) or number of
- // threads (num_threads) compiler-generated function. Those settings
- // may be incompatible with following parallel regions.
- // TODO: ICV tracking to detect compatibility.
- for (const auto &RFI : UnmergableCallsInfo) {
- if (CalledFunction == RFI.Declaration)
- return false;
- }
- } else {
- // Return false (unmergable) if there is a call instruction
- // in-between parallel regions when it is not an intrinsic. It
- // may call an unmergable OpenMP runtime function in its callpath.
- // TODO: Keep track of possible OpenMP calls in the callpath.
- if (!isa<IntrinsicInst>(CI))
- return false;
- }
- return true;
- };
- // Find maximal number of parallel region CIs that are safe to merge.
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- Instruction &I = *It;
- ++It;
- if (CIs.count(&I)) {
- MergableCIs.push_back(cast<CallInst>(&I));
- continue;
- }
- // Continue expanding if the instruction is mergable.
- if (IsMergable(I, MergableCIs.empty()))
- continue;
- // Forward the instruction iterator to skip the next parallel region
- // since there is an unmergable instruction which can affect it.
- for (; It != End; ++It) {
- Instruction &SkipI = *It;
- if (CIs.count(&SkipI)) {
- LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
- << " due to " << I << "\n");
- ++It;
- break;
- }
- }
- // Store mergable regions found.
- if (MergableCIs.size() > 1) {
- MergableCIsVector.push_back(MergableCIs);
- LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
- << " parallel regions in block " << BB->getName()
- << " of function " << BB->getParent()->getName()
- << "\n";);
- }
- MergableCIs.clear();
- }
- if (!MergableCIsVector.empty()) {
- Changed = true;
- for (auto &MergableCIs : MergableCIsVector)
- Merge(MergableCIs, BB);
- }
- }
- if (Changed) {
- /// Re-collect use for fork calls, emitted barrier calls, and
- /// any emitted master/end_master calls.
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
- }
- return Changed;
- }
- /// Try to delete parallel regions if possible.
- bool deleteParallelRegions() {
- const unsigned CallbackCalleeOperand = 2;
- OMPInformationCache::RuntimeFunctionInfo &RFI =
- OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
- if (!RFI.Declaration)
- return false;
- bool Changed = false;
- auto DeleteCallCB = [&](Use &U, Function &) {
- CallInst *CI = getCallIfRegularCall(U);
- if (!CI)
- return false;
- auto *Fn = dyn_cast<Function>(
- CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
- if (!Fn)
- return false;
- if (!Fn->onlyReadsMemory())
- return false;
- if (!Fn->hasFnAttribute(Attribute::WillReturn))
- return false;
- LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
- << CI->getCaller()->getName() << "\n");
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Parallel region in "
- << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
- << " deleted";
- };
- emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
- Remark);
- CGUpdater.removeCallSite(*CI);
- CI->eraseFromParent();
- Changed = true;
- ++NumOpenMPParallelRegionsDeleted;
- return true;
- };
- RFI.foreachUse(SCC, DeleteCallCB);
- return Changed;
- }
- /// Try to eliminate runtime calls by reusing existing ones.
- bool deduplicateRuntimeCalls() {
- bool Changed = false;
- RuntimeFunction DeduplicableRuntimeCallIDs[] = {
- OMPRTL_omp_get_num_threads,
- OMPRTL_omp_in_parallel,
- OMPRTL_omp_get_cancellation,
- OMPRTL_omp_get_thread_limit,
- OMPRTL_omp_get_supported_active_levels,
- OMPRTL_omp_get_level,
- OMPRTL_omp_get_ancestor_thread_num,
- OMPRTL_omp_get_team_size,
- OMPRTL_omp_get_active_level,
- OMPRTL_omp_in_final,
- OMPRTL_omp_get_proc_bind,
- OMPRTL_omp_get_num_places,
- OMPRTL_omp_get_num_procs,
- OMPRTL_omp_get_place_num,
- OMPRTL_omp_get_partition_num_places,
- OMPRTL_omp_get_partition_place_nums};
- // Global-tid is handled separately.
- SmallSetVector<Value *, 16> GTIdArgs;
- collectGlobalThreadIdArguments(GTIdArgs);
- LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
- << " global thread ID arguments\n");
- for (Function *F : SCC) {
- for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
- Changed |= deduplicateRuntimeCalls(
- *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
- // __kmpc_global_thread_num is special as we can replace it with an
- // argument in enough cases to make it worth trying.
- Value *GTIdArg = nullptr;
- for (Argument &Arg : F->args())
- if (GTIdArgs.count(&Arg)) {
- GTIdArg = &Arg;
- break;
- }
- Changed |= deduplicateRuntimeCalls(
- *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
- }
- return Changed;
- }
- /// Tries to hide the latency of runtime calls that involve host to
- /// device memory transfers by splitting them into their "issue" and "wait"
- /// versions. The "issue" is moved upwards as much as possible. The "wait" is
- /// moved downards as much as possible. The "issue" issues the memory transfer
- /// asynchronously, returning a handle. The "wait" waits in the returned
- /// handle for the memory transfer to finish.
- bool hideMemTransfersLatency() {
- auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
- bool Changed = false;
- auto SplitMemTransfers = [&](Use &U, Function &Decl) {
- auto *RTCall = getCallIfRegularCall(U, &RFI);
- if (!RTCall)
- return false;
- OffloadArray OffloadArrays[3];
- if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
- return false;
- LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
- // TODO: Check if can be moved upwards.
- bool WasSplit = false;
- Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
- if (WaitMovementPoint)
- WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
- Changed |= WasSplit;
- return WasSplit;
- };
- RFI.foreachUse(SCC, SplitMemTransfers);
- return Changed;
- }
- void analysisGlobalization() {
- RuntimeFunction GlobalizationRuntimeIDs[] = {
- OMPRTL___kmpc_data_sharing_coalesced_push_stack,
- OMPRTL___kmpc_data_sharing_push_stack};
- for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) {
- auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID];
- auto CheckGlobalization = [&](Use &U, Function &Decl) {
- if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
- auto Remark = [&](OptimizationRemarkAnalysis ORA) {
- return ORA
- << "Found thread data sharing on the GPU. "
- << "Expect degraded performance due to data globalization.";
- };
- emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization",
- Remark);
- }
- return false;
- };
- RFI.foreachUse(SCC, CheckGlobalization);
- }
- }
- /// Maps the values stored in the offload arrays passed as arguments to
- /// \p RuntimeCall into the offload arrays in \p OAs.
- bool getValuesInOffloadArrays(CallInst &RuntimeCall,
- MutableArrayRef<OffloadArray> OAs) {
- assert(OAs.size() == 3 && "Need space for three offload arrays!");
- // A runtime call that involves memory offloading looks something like:
- // call void @__tgt_target_data_begin_mapper(arg0, arg1,
- // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
- // ...)
- // So, the idea is to access the allocas that allocate space for these
- // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
- // Therefore:
- // i8** %offload_baseptrs.
- Value *BasePtrsArg =
- RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
- // i8** %offload_ptrs.
- Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
- // i8** %offload_sizes.
- Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
- // Get values stored in **offload_baseptrs.
- auto *V = getUnderlyingObject(BasePtrsArg);
- if (!isa<AllocaInst>(V))
- return false;
- auto *BasePtrsArray = cast<AllocaInst>(V);
- if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
- return false;
- // Get values stored in **offload_baseptrs.
- V = getUnderlyingObject(PtrsArg);
- if (!isa<AllocaInst>(V))
- return false;
- auto *PtrsArray = cast<AllocaInst>(V);
- if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
- return false;
- // Get values stored in **offload_sizes.
- V = getUnderlyingObject(SizesArg);
- // If it's a [constant] global array don't analyze it.
- if (isa<GlobalValue>(V))
- return isa<Constant>(V);
- if (!isa<AllocaInst>(V))
- return false;
- auto *SizesArray = cast<AllocaInst>(V);
- if (!OAs[2].initialize(*SizesArray, RuntimeCall))
- return false;
- return true;
- }
- /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
- /// For now this is a way to test that the function getValuesInOffloadArrays
- /// is working properly.
- /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
- void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
- assert(OAs.size() == 3 && "There are three offload arrays to debug!");
- LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
- std::string ValuesStr;
- raw_string_ostream Printer(ValuesStr);
- std::string Separator = " --- ";
- for (auto *BP : OAs[0].StoredValues) {
- BP->print(Printer);
- Printer << Separator;
- }
- LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
- ValuesStr.clear();
- for (auto *P : OAs[1].StoredValues) {
- P->print(Printer);
- Printer << Separator;
- }
- LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
- ValuesStr.clear();
- for (auto *S : OAs[2].StoredValues) {
- S->print(Printer);
- Printer << Separator;
- }
- LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
- }
- /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
- /// moved. Returns nullptr if the movement is not possible, or not worth it.
- Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
- // FIXME: This traverses only the BasicBlock where RuntimeCall is.
- // Make it traverse the CFG.
- Instruction *CurrentI = &RuntimeCall;
- bool IsWorthIt = false;
- while ((CurrentI = CurrentI->getNextNode())) {
- // TODO: Once we detect the regions to be offloaded we should use the
- // alias analysis manager to check if CurrentI may modify one of
- // the offloaded regions.
- if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
- if (IsWorthIt)
- return CurrentI;
- return nullptr;
- }
- // FIXME: For now if we move it over anything without side effect
- // is worth it.
- IsWorthIt = true;
- }
- // Return end of BasicBlock.
- return RuntimeCall.getParent()->getTerminator();
- }
- /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
- bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
- Instruction &WaitMovementPoint) {
- // Create stack allocated handle (__tgt_async_info) at the beginning of the
- // function. Used for storing information of the async transfer, allowing to
- // wait on it later.
- auto &IRBuilder = OMPInfoCache.OMPBuilder;
- auto *F = RuntimeCall.getCaller();
- Instruction *FirstInst = &(F->getEntryBlock().front());
- AllocaInst *Handle = new AllocaInst(
- IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
- // Add "issue" runtime call declaration:
- // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
- // i8**, i8**, i64*, i64*)
- FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___tgt_target_data_begin_mapper_issue);
- // Change RuntimeCall call site for its asynchronous version.
- SmallVector<Value *, 16> Args;
- for (auto &Arg : RuntimeCall.args())
- Args.push_back(Arg.get());
- Args.push_back(Handle);
- CallInst *IssueCallsite =
- CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
- RuntimeCall.eraseFromParent();
- // Add "wait" runtime call declaration:
- // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
- FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___tgt_target_data_begin_mapper_wait);
- Value *WaitParams[2] = {
- IssueCallsite->getArgOperand(
- OffloadArray::DeviceIDArgNum), // device_id.
- Handle // handle to wait on.
- };
- CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
- return true;
- }
- static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
- bool GlobalOnly, bool &SingleChoice) {
- if (CurrentIdent == NextIdent)
- return CurrentIdent;
- // TODO: Figure out how to actually combine multiple debug locations. For
- // now we just keep an existing one if there is a single choice.
- if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
- SingleChoice = !CurrentIdent;
- return NextIdent;
- }
- return nullptr;
- }
- /// Return an `struct ident_t*` value that represents the ones used in the
- /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
- /// return a local `struct ident_t*`. For now, if we cannot find a suitable
- /// return value we create one from scratch. We also do not yet combine
- /// information, e.g., the source locations, see combinedIdentStruct.
- Value *
- getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
- Function &F, bool GlobalOnly) {
- bool SingleChoice = true;
- Value *Ident = nullptr;
- auto CombineIdentStruct = [&](Use &U, Function &Caller) {
- CallInst *CI = getCallIfRegularCall(U, &RFI);
- if (!CI || &F != &Caller)
- return false;
- Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
- /* GlobalOnly */ true, SingleChoice);
- return false;
- };
- RFI.foreachUse(SCC, CombineIdentStruct);
- if (!Ident || !SingleChoice) {
- // The IRBuilder uses the insertion block to get to the module, this is
- // unfortunate but we work around it for now.
- if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
- OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
- &F.getEntryBlock(), F.getEntryBlock().begin()));
- // Create a fallback location if non was found.
- // TODO: Use the debug locations of the calls instead.
- Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
- Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
- }
- return Ident;
- }
- /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
- /// \p ReplVal if given.
- bool deduplicateRuntimeCalls(Function &F,
- OMPInformationCache::RuntimeFunctionInfo &RFI,
- Value *ReplVal = nullptr) {
- auto *UV = RFI.getUseVector(F);
- if (!UV || UV->size() + (ReplVal != nullptr) < 2)
- return false;
- LLVM_DEBUG(
- dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
- << (ReplVal ? " with an existing value\n" : "\n") << "\n");
- assert((!ReplVal || (isa<Argument>(ReplVal) &&
- cast<Argument>(ReplVal)->getParent() == &F)) &&
- "Unexpected replacement value!");
- // TODO: Use dominance to find a good position instead.
- auto CanBeMoved = [this](CallBase &CB) {
- unsigned NumArgs = CB.getNumArgOperands();
- if (NumArgs == 0)
- return true;
- if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
- return false;
- for (unsigned u = 1; u < NumArgs; ++u)
- if (isa<Instruction>(CB.getArgOperand(u)))
- return false;
- return true;
- };
- if (!ReplVal) {
- for (Use *U : *UV)
- if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
- if (!CanBeMoved(*CI))
- continue;
- auto Remark = [&](OptimizationRemark OR) {
- auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
- return OR << "OpenMP runtime call "
- << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
- << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
- };
- emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
- CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
- ReplVal = CI;
- break;
- }
- if (!ReplVal)
- return false;
- }
- // If we use a call as a replacement value we need to make sure the ident is
- // valid at the new location. For now we just pick a global one, either
- // existing and used by one of the calls, or created from scratch.
- if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
- if (CI->getNumArgOperands() > 0 &&
- CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
- Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
- /* GlobalOnly */ true);
- CI->setArgOperand(0, Ident);
- }
- }
- bool Changed = false;
- auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
- CallInst *CI = getCallIfRegularCall(U, &RFI);
- if (!CI || CI == ReplVal || &F != &Caller)
- return false;
- assert(CI->getCaller() == &F && "Unexpected call!");
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "OpenMP runtime call "
- << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
- };
- emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
- CGUpdater.removeCallSite(*CI);
- CI->replaceAllUsesWith(ReplVal);
- CI->eraseFromParent();
- ++NumOpenMPRuntimeCallsDeduplicated;
- Changed = true;
- return true;
- };
- RFI.foreachUse(SCC, ReplaceAndDeleteCB);
- return Changed;
- }
- /// Collect arguments that represent the global thread id in \p GTIdArgs.
- void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) {
- // TODO: Below we basically perform a fixpoint iteration with a pessimistic
- // initialization. We could define an AbstractAttribute instead and
- // run the Attributor here once it can be run as an SCC pass.
- // Helper to check the argument \p ArgNo at all call sites of \p F for
- // a GTId.
- auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
- if (!F.hasLocalLinkage())
- return false;
- for (Use &U : F.uses()) {
- if (CallInst *CI = getCallIfRegularCall(U)) {
- Value *ArgOp = CI->getArgOperand(ArgNo);
- if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
- getCallIfRegularCall(
- *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
- continue;
- }
- return false;
- }
- return true;
- };
- // Helper to identify uses of a GTId as GTId arguments.
- auto AddUserArgs = [&](Value >Id) {
- for (Use &U : GTId.uses())
- if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
- if (CI->isArgOperand(&U))
- if (Function *Callee = CI->getCalledFunction())
- if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
- GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
- };
- // The argument users of __kmpc_global_thread_num calls are GTIds.
- OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
- OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
- GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
- if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
- AddUserArgs(*CI);
- return false;
- });
- // Transitively search for more arguments by looking at the users of the
- // ones we know already. During the search the GTIdArgs vector is extended
- // so we cannot cache the size nor can we use a range based for.
- for (unsigned u = 0; u < GTIdArgs.size(); ++u)
- AddUserArgs(*GTIdArgs[u]);
- }
- /// Kernel (=GPU) optimizations and utility functions
- ///
- ///{{
- /// Check if \p F is a kernel, hence entry point for target offloading.
- bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
- /// Cache to remember the unique kernel for a function.
- DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
- /// Find the unique kernel that will execute \p F, if any.
- Kernel getUniqueKernelFor(Function &F);
- /// Find the unique kernel that will execute \p I, if any.
- Kernel getUniqueKernelFor(Instruction &I) {
- return getUniqueKernelFor(*I.getFunction());
- }
- /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
- /// the cases we can avoid taking the address of a function.
- bool rewriteDeviceCodeStateMachine();
- ///
- ///}}
- /// Emit a remark generically
- ///
- /// This template function can be used to generically emit a remark. The
- /// RemarkKind should be one of the following:
- /// - OptimizationRemark to indicate a successful optimization attempt
- /// - OptimizationRemarkMissed to report a failed optimization attempt
- /// - OptimizationRemarkAnalysis to provide additional information about an
- /// optimization attempt
- ///
- /// The remark is built using a callback function provided by the caller that
- /// takes a RemarkKind as input and returns a RemarkKind.
- template <typename RemarkKind,
- typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
- void emitRemark(Instruction *Inst, StringRef RemarkName,
- RemarkCallBack &&RemarkCB) const {
- Function *F = Inst->getParent()->getParent();
- auto &ORE = OREGetter(F);
- ORE.emit(
- [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); });
- }
- /// Emit a remark on a function. Since only OptimizationRemark is supporting
- /// this, it can't be made generic.
- void
- emitRemarkOnFunction(Function *F, StringRef RemarkName,
- function_ref<OptimizationRemark(OptimizationRemark &&)>
- &&RemarkCB) const {
- auto &ORE = OREGetter(F);
- ORE.emit([&]() {
- return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F));
- });
- }
- /// The underlying module.
- Module &M;
- /// The SCC we are operating on.
- SmallVectorImpl<Function *> &SCC;
- /// Callback to update the call graph, the first argument is a removed call,
- /// the second an optional replacement call.
- CallGraphUpdater &CGUpdater;
- /// Callback to get an OptimizationRemarkEmitter from a Function *
- OptimizationRemarkGetter OREGetter;
- /// OpenMP-specific information cache. Also Used for Attributor runs.
- OMPInformationCache &OMPInfoCache;
- /// Attributor instance.
- Attributor &A;
- /// Helper function to run Attributor on SCC.
- bool runAttributor() {
- if (SCC.empty())
- return false;
- registerAAs();
- ChangeStatus Changed = A.run();
- LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
- << " functions, result: " << Changed << ".\n");
- return Changed == ChangeStatus::CHANGED;
- }
- /// Populate the Attributor with abstract attribute opportunities in the
- /// function.
- void registerAAs() {
- if (SCC.empty())
- return;
- // Create CallSite AA for all Getters.
- for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
- auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
- auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
- auto CreateAA = [&](Use &U, Function &Caller) {
- CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
- if (!CI)
- return false;
- auto &CB = cast<CallBase>(*CI);
- IRPosition CBPos = IRPosition::callsite_function(CB);
- A.getOrCreateAAFor<AAICVTracker>(CBPos);
- return false;
- };
- GetterRFI.foreachUse(SCC, CreateAA);
- }
- }
- };
- Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
- if (!OMPInfoCache.ModuleSlice.count(&F))
- return nullptr;
- // Use a scope to keep the lifetime of the CachedKernel short.
- {
- Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
- if (CachedKernel)
- return *CachedKernel;
- // TODO: We should use an AA to create an (optimistic and callback
- // call-aware) call graph. For now we stick to simple patterns that
- // are less powerful, basically the worst fixpoint.
- if (isKernel(F)) {
- CachedKernel = Kernel(&F);
- return *CachedKernel;
- }
- CachedKernel = nullptr;
- if (!F.hasLocalLinkage()) {
- // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "[OMP100] Potentially unknown OpenMP target region caller";
- };
- emitRemarkOnFunction(&F, "OMP100", Remark);
- return nullptr;
- }
- }
- auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
- if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
- // Allow use in equality comparisons.
- if (Cmp->isEquality())
- return getUniqueKernelFor(*Cmp);
- return nullptr;
- }
- if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
- // Allow direct calls.
- if (CB->isCallee(&U))
- return getUniqueKernelFor(*CB);
- // Allow the use in __kmpc_kernel_prepare_parallel calls.
- if (Function *Callee = CB->getCalledFunction())
- if (Callee->getName() == "__kmpc_kernel_prepare_parallel")
- return getUniqueKernelFor(*CB);
- return nullptr;
- }
- // Disallow every other use.
- return nullptr;
- };
- // TODO: In the future we want to track more than just a unique kernel.
- SmallPtrSet<Kernel, 2> PotentialKernels;
- OMPInformationCache::foreachUse(F, [&](const Use &U) {
- PotentialKernels.insert(GetUniqueKernelForUse(U));
- });
- Kernel K = nullptr;
- if (PotentialKernels.size() == 1)
- K = *PotentialKernels.begin();
- // Cache the result.
- UniqueKernelMap[&F] = K;
- return K;
- }
- bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
- OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI =
- OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel];
- bool Changed = false;
- if (!KernelPrepareParallelRFI)
- return Changed;
- for (Function *F : SCC) {
- // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at
- // all.
- bool UnknownUse = false;
- bool KernelPrepareUse = false;
- unsigned NumDirectCalls = 0;
- SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
- OMPInformationCache::foreachUse(*F, [&](Use &U) {
- if (auto *CB = dyn_cast<CallBase>(U.getUser()))
- if (CB->isCallee(&U)) {
- ++NumDirectCalls;
- return;
- }
- if (isa<ICmpInst>(U.getUser())) {
- ToBeReplacedStateMachineUses.push_back(&U);
- return;
- }
- if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall(
- *U.getUser(), &KernelPrepareParallelRFI)) {
- KernelPrepareUse = true;
- ToBeReplacedStateMachineUses.push_back(&U);
- return;
- }
- UnknownUse = true;
- });
- // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel
- // use.
- if (!KernelPrepareUse)
- continue;
- {
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Found a parallel region that is called in a target "
- "region but not part of a combined target construct nor "
- "nesed inside a target construct without intermediate "
- "code. This can lead to excessive register usage for "
- "unrelated target regions in the same translation unit "
- "due to spurious call edges assumed by ptxas.";
- };
- emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
- }
- // If this ever hits, we should investigate.
- // TODO: Checking the number of uses is not a necessary restriction and
- // should be lifted.
- if (UnknownUse || NumDirectCalls != 1 ||
- ToBeReplacedStateMachineUses.size() != 2) {
- {
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Parallel region is used in "
- << (UnknownUse ? "unknown" : "unexpected")
- << " ways; will not attempt to rewrite the state machine.";
- };
- emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
- }
- continue;
- }
- // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give
- // up if the function is not called from a unique kernel.
- Kernel K = getUniqueKernelFor(*F);
- if (!K) {
- {
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Parallel region is not known to be called from a "
- "unique single target region, maybe the surrounding "
- "function has external linkage?; will not attempt to "
- "rewrite the state machine use.";
- };
- emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
- Remark);
- }
- continue;
- }
- // We now know F is a parallel body function called only from the kernel K.
- // We also identified the state machine uses in which we replace the
- // function pointer by a new global symbol for identification purposes. This
- // ensures only direct calls to the function are left.
- {
- auto RemarkParalleRegion = [&](OptimizationRemark OR) {
- return OR << "Specialize parallel region that is only reached from a "
- "single target region to avoid spurious call edges and "
- "excessive register usage in other target regions. "
- "(parallel region ID: "
- << ore::NV("OpenMPParallelRegion", F->getName())
- << ", kernel ID: "
- << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
- };
- emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD",
- RemarkParalleRegion);
- auto RemarkKernel = [&](OptimizationRemark OR) {
- return OR << "Target region containing the parallel region that is "
- "specialized. (parallel region ID: "
- << ore::NV("OpenMPParallelRegion", F->getName())
- << ", kernel ID: "
- << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
- };
- emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
- }
- Module &M = *F->getParent();
- Type *Int8Ty = Type::getInt8Ty(M.getContext());
- auto *ID = new GlobalVariable(
- M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
- UndefValue::get(Int8Ty), F->getName() + ".ID");
- for (Use *U : ToBeReplacedStateMachineUses)
- U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
- ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
- Changed = true;
- }
- return Changed;
- }
- /// Abstract Attribute for tracking ICV values.
- struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
- using Base = StateWrapper<BooleanState, AbstractAttribute>;
- AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
- void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
- }
- /// Returns true if value is assumed to be tracked.
- bool isAssumedTracked() const { return getAssumed(); }
- /// Returns true if value is known to be tracked.
- bool isKnownTracked() const { return getAssumed(); }
- /// Create an abstract attribute biew for the position \p IRP.
- static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
- /// Return the value with which \p I can be replaced for specific \p ICV.
- virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
- const Instruction *I,
- Attributor &A) const {
- return None;
- }
- /// Return an assumed unique ICV value if a single candidate is found. If
- /// there cannot be one, return a nullptr. If it is not clear yet, return the
- /// Optional::NoneType.
- virtual Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const = 0;
- // Currently only nthreads is being tracked.
- // this array will only grow with time.
- InternalControlVar TrackableICVs[1] = {ICV_nthreads};
- /// See AbstractAttribute::getName()
- const std::string getName() const override { return "AAICVTracker"; }
- /// See AbstractAttribute::getIdAddr()
- const char *getIdAddr() const override { return &ID; }
- /// This function should return true if the type of the \p AA is AAICVTracker
- static bool classof(const AbstractAttribute *AA) {
- return (AA->getIdAddr() == &ID);
- }
- static const char ID;
- };
- struct AAICVTrackerFunction : public AAICVTracker {
- AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
- // FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTrackerFunction"; }
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
- /// We don't manifest anything for this AA.
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
- // Map of ICV to their values at specific program point.
- EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
- InternalControlVar::ICV___last>
- ICVReplacementValuesMap;
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
- Function *F = getAnchorScope();
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- for (InternalControlVar ICV : TrackableICVs) {
- auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
- auto &ValuesMap = ICVReplacementValuesMap[ICV];
- auto TrackValues = [&](Use &U, Function &) {
- CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
- if (!CI)
- return false;
- // FIXME: handle setters with more that 1 arguments.
- /// Track new value.
- if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
- HasChanged = ChangeStatus::CHANGED;
- return false;
- };
- auto CallCheck = [&](Instruction &I) {
- Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
- if (ReplVal.hasValue() &&
- ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
- HasChanged = ChangeStatus::CHANGED;
- return true;
- };
- // Track all changes of an ICV.
- SetterRFI.foreachUse(TrackValues, F);
- A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
- /* CheckBBLivenessOnly */ true);
- /// TODO: Figure out a way to avoid adding entry in
- /// ICVReplacementValuesMap
- Instruction *Entry = &F->getEntryBlock().front();
- if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
- ValuesMap.insert(std::make_pair(Entry, nullptr));
- }
- return HasChanged;
- }
- /// Hepler to check if \p I is a call and get the value for it if it is
- /// unique.
- Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
- InternalControlVar &ICV) const {
- const auto *CB = dyn_cast<CallBase>(I);
- if (!CB || CB->hasFnAttr("no_openmp") ||
- CB->hasFnAttr("no_openmp_routines"))
- return None;
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
- auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
- Function *CalledFunction = CB->getCalledFunction();
- // Indirect call, assume ICV changes.
- if (CalledFunction == nullptr)
- return nullptr;
- if (CalledFunction == GetterRFI.Declaration)
- return None;
- if (CalledFunction == SetterRFI.Declaration) {
- if (ICVReplacementValuesMap[ICV].count(I))
- return ICVReplacementValuesMap[ICV].lookup(I);
- return nullptr;
- }
- // Since we don't know, assume it changes the ICV.
- if (CalledFunction->isDeclaration())
- return nullptr;
- const auto &ICVTrackingAA =
- A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB));
- if (ICVTrackingAA.isAssumedTracked())
- return ICVTrackingAA.getUniqueReplacementValue(ICV);
- // If we don't know, assume it changes.
- return nullptr;
- }
- // We don't check unique value for a function, so return None.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return None;
- }
- /// Return the value with which \p I can be replaced for specific \p ICV.
- Optional<Value *> getReplacementValue(InternalControlVar ICV,
- const Instruction *I,
- Attributor &A) const override {
- const auto &ValuesMap = ICVReplacementValuesMap[ICV];
- if (ValuesMap.count(I))
- return ValuesMap.lookup(I);
- SmallVector<const Instruction *, 16> Worklist;
- SmallPtrSet<const Instruction *, 16> Visited;
- Worklist.push_back(I);
- Optional<Value *> ReplVal;
- while (!Worklist.empty()) {
- const Instruction *CurrInst = Worklist.pop_back_val();
- if (!Visited.insert(CurrInst).second)
- continue;
- const BasicBlock *CurrBB = CurrInst->getParent();
- // Go up and look for all potential setters/calls that might change the
- // ICV.
- while ((CurrInst = CurrInst->getPrevNode())) {
- if (ValuesMap.count(CurrInst)) {
- Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
- // Unknown value, track new.
- if (!ReplVal.hasValue()) {
- ReplVal = NewReplVal;
- break;
- }
- // If we found a new value, we can't know the icv value anymore.
- if (NewReplVal.hasValue())
- if (ReplVal != NewReplVal)
- return nullptr;
- break;
- }
- Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
- if (!NewReplVal.hasValue())
- continue;
- // Unknown value, track new.
- if (!ReplVal.hasValue()) {
- ReplVal = NewReplVal;
- break;
- }
- // if (NewReplVal.hasValue())
- // We found a new value, we can't know the icv value anymore.
- if (ReplVal != NewReplVal)
- return nullptr;
- }
- // If we are in the same BB and we have a value, we are done.
- if (CurrBB == I->getParent() && ReplVal.hasValue())
- return ReplVal;
- // Go through all predecessors and add terminators for analysis.
- for (const BasicBlock *Pred : predecessors(CurrBB))
- if (const Instruction *Terminator = Pred->getTerminator())
- Worklist.push_back(Terminator);
- }
- return ReplVal;
- }
- };
- struct AAICVTrackerFunctionReturned : AAICVTracker {
- AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
- // FIXME: come up with better string.
- const std::string getAsStr() const override {
- return "ICVTrackerFunctionReturned";
- }
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
- /// We don't manifest anything for this AA.
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
- // Map of ICV to their values at specific program point.
- EnumeratedArray<Optional<Value *>, InternalControlVar,
- InternalControlVar::ICV___last>
- ICVReplacementValuesMap;
- /// Return the value with which \p I can be replaced for specific \p ICV.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return ICVReplacementValuesMap[ICV];
- }
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
- *this, IRPosition::function(*getAnchorScope()));
- if (!ICVTrackingAA.isAssumedTracked())
- return indicatePessimisticFixpoint();
- for (InternalControlVar ICV : TrackableICVs) {
- Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
- Optional<Value *> UniqueICVValue;
- auto CheckReturnInst = [&](Instruction &I) {
- Optional<Value *> NewReplVal =
- ICVTrackingAA.getReplacementValue(ICV, &I, A);
- // If we found a second ICV value there is no unique returned value.
- if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
- return false;
- UniqueICVValue = NewReplVal;
- return true;
- };
- if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
- /* CheckBBLivenessOnly */ true))
- UniqueICVValue = nullptr;
- if (UniqueICVValue == ReplVal)
- continue;
- ReplVal = UniqueICVValue;
- Changed = ChangeStatus::CHANGED;
- }
- return Changed;
- }
- };
- struct AAICVTrackerCallSite : AAICVTracker {
- AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
- void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
- // We only initialize this AA for getters, so we need to know which ICV it
- // gets.
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- for (InternalControlVar ICV : TrackableICVs) {
- auto ICVInfo = OMPInfoCache.ICVs[ICV];
- auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
- if (Getter.Declaration == getAssociatedFunction()) {
- AssociatedICV = ICVInfo.Kind;
- return;
- }
- }
- /// Unknown ICV.
- indicatePessimisticFixpoint();
- }
- ChangeStatus manifest(Attributor &A) override {
- if (!ReplVal.hasValue() || !ReplVal.getValue())
- return ChangeStatus::UNCHANGED;
- A.changeValueAfterManifest(*getCtxI(), **ReplVal);
- A.deleteAfterManifest(*getCtxI());
- return ChangeStatus::CHANGED;
- }
- // FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
- InternalControlVar AssociatedICV;
- Optional<Value *> ReplVal;
- ChangeStatus updateImpl(Attributor &A) override {
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
- *this, IRPosition::function(*getAnchorScope()));
- // We don't have any information, so we assume it changes the ICV.
- if (!ICVTrackingAA.isAssumedTracked())
- return indicatePessimisticFixpoint();
- Optional<Value *> NewReplVal =
- ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
- if (ReplVal == NewReplVal)
- return ChangeStatus::UNCHANGED;
- ReplVal = NewReplVal;
- return ChangeStatus::CHANGED;
- }
- // Return the value with which associated value can be replaced for specific
- // \p ICV.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return ReplVal;
- }
- };
- struct AAICVTrackerCallSiteReturned : AAICVTracker {
- AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
- // FIXME: come up with better string.
- const std::string getAsStr() const override {
- return "ICVTrackerCallSiteReturned";
- }
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
- /// We don't manifest anything for this AA.
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
- // Map of ICV to their values at specific program point.
- EnumeratedArray<Optional<Value *>, InternalControlVar,
- InternalControlVar::ICV___last>
- ICVReplacementValuesMap;
- /// Return the value with which associated value can be replaced for specific
- /// \p ICV.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return ICVReplacementValuesMap[ICV];
- }
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
- *this, IRPosition::returned(*getAssociatedFunction()));
- // We don't have any information, so we assume it changes the ICV.
- if (!ICVTrackingAA.isAssumedTracked())
- return indicatePessimisticFixpoint();
- for (InternalControlVar ICV : TrackableICVs) {
- Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
- Optional<Value *> NewReplVal =
- ICVTrackingAA.getUniqueReplacementValue(ICV);
- if (ReplVal == NewReplVal)
- continue;
- ReplVal = NewReplVal;
- Changed = ChangeStatus::CHANGED;
- }
- return Changed;
- }
- };
- } // namespace
- const char AAICVTracker::ID = 0;
- AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
- Attributor &A) {
- AAICVTracker *AA = nullptr;
- switch (IRP.getPositionKind()) {
- case IRPosition::IRP_INVALID:
- case IRPosition::IRP_FLOAT:
- case IRPosition::IRP_ARGUMENT:
- case IRPosition::IRP_CALL_SITE_ARGUMENT:
- llvm_unreachable("ICVTracker can only be created for function position!");
- case IRPosition::IRP_RETURNED:
- AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
- break;
- case IRPosition::IRP_CALL_SITE_RETURNED:
- AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
- break;
- case IRPosition::IRP_CALL_SITE:
- AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
- break;
- case IRPosition::IRP_FUNCTION:
- AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
- break;
- }
- return *AA;
- }
- PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
- CGSCCAnalysisManager &AM,
- LazyCallGraph &CG, CGSCCUpdateResult &UR) {
- if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
- return PreservedAnalyses::all();
- if (DisableOpenMPOptimizations)
- return PreservedAnalyses::all();
- SmallVector<Function *, 16> SCC;
- // If there are kernels in the module, we have to run on all SCC's.
- bool SCCIsInteresting = !OMPInModule.getKernels().empty();
- for (LazyCallGraph::Node &N : C) {
- Function *Fn = &N.getFunction();
- SCC.push_back(Fn);
- // Do we already know that the SCC contains kernels,
- // or that OpenMP functions are called from this SCC?
- if (SCCIsInteresting)
- continue;
- // If not, let's check that.
- SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
- }
- if (!SCCIsInteresting || SCC.empty())
- return PreservedAnalyses::all();
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
- AnalysisGetter AG(FAM);
- auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
- return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
- };
- CallGraphUpdater CGUpdater;
- CGUpdater.initialize(CG, C, AM, UR);
- SetVector<Function *> Functions(SCC.begin(), SCC.end());
- BumpPtrAllocator Allocator;
- OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
- /*CGSCC*/ Functions, OMPInModule.getKernels());
- Attributor A(Functions, InfoCache, CGUpdater);
- OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
- bool Changed = OMPOpt.run();
- if (Changed)
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
- }
- namespace {
- struct OpenMPOptLegacyPass : public CallGraphSCCPass {
- CallGraphUpdater CGUpdater;
- OpenMPInModule OMPInModule;
- static char ID;
- OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
- initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
- bool doInitialization(CallGraph &CG) override {
- // Disable the pass if there is no OpenMP (runtime call) in the module.
- containsOpenMP(CG.getModule(), OMPInModule);
- return false;
- }
- bool runOnSCC(CallGraphSCC &CGSCC) override {
- if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
- return false;
- if (DisableOpenMPOptimizations || skipSCC(CGSCC))
- return false;
- SmallVector<Function *, 16> SCC;
- // If there are kernels in the module, we have to run on all SCC's.
- bool SCCIsInteresting = !OMPInModule.getKernels().empty();
- for (CallGraphNode *CGN : CGSCC) {
- Function *Fn = CGN->getFunction();
- if (!Fn || Fn->isDeclaration())
- continue;
- SCC.push_back(Fn);
- // Do we already know that the SCC contains kernels,
- // or that OpenMP functions are called from this SCC?
- if (SCCIsInteresting)
- continue;
- // If not, let's check that.
- SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
- }
- if (!SCCIsInteresting || SCC.empty())
- return false;
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- CGUpdater.initialize(CG, CGSCC);
- // Maintain a map of functions to avoid rebuilding the ORE
- DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
- auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
- std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
- if (!ORE)
- ORE = std::make_unique<OptimizationRemarkEmitter>(F);
- return *ORE;
- };
- AnalysisGetter AG;
- SetVector<Function *> Functions(SCC.begin(), SCC.end());
- BumpPtrAllocator Allocator;
- OMPInformationCache InfoCache(
- *(Functions.back()->getParent()), AG, Allocator,
- /*CGSCC*/ Functions, OMPInModule.getKernels());
- Attributor A(Functions, InfoCache, CGUpdater);
- OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
- return OMPOpt.run();
- }
- bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
- };
- } // end anonymous namespace
- void OpenMPInModule::identifyKernels(Module &M) {
- NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
- if (!MD)
- return;
- for (auto *Op : MD->operands()) {
- if (Op->getNumOperands() < 2)
- continue;
- MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
- if (!KindID || KindID->getString() != "kernel")
- continue;
- Function *KernelFn =
- mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
- if (!KernelFn)
- continue;
- ++NumOpenMPTargetRegionKernels;
- Kernels.insert(KernelFn);
- }
- }
- bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
- if (OMPInModule.isKnown())
- return OMPInModule;
- auto RecordFunctionsContainingUsesOf = [&](Function *F) {
- for (User *U : F->users())
- if (auto *I = dyn_cast<Instruction>(U))
- OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
- };
- // MSVC doesn't like long if-else chains for some reason and instead just
- // issues an error. Work around it..
- do {
- #define OMP_RTL(_Enum, _Name, ...) \
- if (Function *F = M.getFunction(_Name)) { \
- RecordFunctionsContainingUsesOf(F); \
- OMPInModule = true; \
- }
- #include "llvm/Frontend/OpenMP/OMPKinds.def"
- } while (false);
- // Identify kernels once. TODO: We should split the OMPInformationCache into a
- // module and an SCC part. The kernel information, among other things, could
- // go into the module part.
- if (OMPInModule.isKnown() && OMPInModule) {
- OMPInModule.identifyKernels(M);
- return true;
- }
- return OMPInModule = false;
- }
- char OpenMPOptLegacyPass::ID = 0;
- INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
- "OpenMP specific optimizations", false, false)
- INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
- INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
- "OpenMP specific optimizations", false, false)
- Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
|