123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617 |
- #pragma once
- #ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-parameter"
- #endif
- //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines the OpenMPIRBuilder class and helpers used as a convenient
- // way to create LLVM instructions for OpenMP directives.
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
- #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
- #include "llvm/Frontend/OpenMP/OMPConstants.h"
- #include "llvm/IR/DebugLoc.h"
- #include "llvm/IR/IRBuilder.h"
- #include "llvm/Support/Allocator.h"
- #include <forward_list>
- namespace llvm {
- class CanonicalLoopInfo;
- /// An interface to create LLVM-IR for OpenMP directives.
- ///
- /// Each OpenMP directive has a corresponding public generator method.
- class OpenMPIRBuilder {
- public:
- /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
- /// not have an effect on \p M (see initialize).
- OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
- ~OpenMPIRBuilder();
- /// Initialize the internal state, this will put structures types and
- /// potentially other helpers into the underlying module. Must be called
- /// before any other method and only once!
- void initialize();
- /// Finalize the underlying module, e.g., by outlining regions.
- /// \param Fn The function to be finalized. If not used,
- /// all functions are finalized.
- void finalize(Function *Fn = nullptr);
- /// Add attributes known for \p FnID to \p Fn.
- void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
- /// Type used throughout for insertion points.
- using InsertPointTy = IRBuilder<>::InsertPoint;
- /// Callback type for variable finalization (think destructors).
- ///
- /// \param CodeGenIP is the insertion point at which the finalization code
- /// should be placed.
- ///
- /// A finalize callback knows about all objects that need finalization, e.g.
- /// destruction, when the scope of the currently generated construct is left
- /// at the time, and location, the callback is invoked.
- using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
- struct FinalizationInfo {
- /// The finalization callback provided by the last in-flight invocation of
- /// createXXXX for the directive of kind DK.
- FinalizeCallbackTy FiniCB;
- /// The directive kind of the innermost directive that has an associated
- /// region which might require finalization when it is left.
- omp::Directive DK;
- /// Flag to indicate if the directive is cancellable.
- bool IsCancellable;
- };
- /// Push a finalization callback on the finalization stack.
- ///
- /// NOTE: Temporary solution until Clang CG is gone.
- void pushFinalizationCB(const FinalizationInfo &FI) {
- FinalizationStack.push_back(FI);
- }
- /// Pop the last finalization callback from the finalization stack.
- ///
- /// NOTE: Temporary solution until Clang CG is gone.
- void popFinalizationCB() { FinalizationStack.pop_back(); }
- /// Callback type for body (=inner region) code generation
- ///
- /// The callback takes code locations as arguments, each describing a
- /// location at which code might need to be generated or a location that is
- /// the target of control transfer.
- ///
- /// \param AllocaIP is the insertion point at which new alloca instructions
- /// should be placed.
- /// \param CodeGenIP is the insertion point at which the body code should be
- /// placed.
- /// \param ContinuationBB is the basic block target to leave the body.
- ///
- /// Note that all blocks pointed to by the arguments have terminators.
- using BodyGenCallbackTy =
- function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationBB)>;
- // This is created primarily for sections construct as llvm::function_ref
- // (BodyGenCallbackTy) is not storable (as described in the comments of
- // function_ref class - function_ref contains non-ownable reference
- // to the callable.
- using StorableBodyGenCallbackTy =
- std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationBB)>;
- /// Callback type for loop body code generation.
- ///
- /// \param CodeGenIP is the insertion point where the loop's body code must be
- /// placed. This will be a dedicated BasicBlock with a
- /// conditional branch from the loop condition check and
- /// terminated with an unconditional branch to the loop
- /// latch.
- /// \param IndVar is the induction variable usable at the insertion point.
- using LoopBodyGenCallbackTy =
- function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
- /// Callback type for variable privatization (think copy & default
- /// constructor).
- ///
- /// \param AllocaIP is the insertion point at which new alloca instructions
- /// should be placed.
- /// \param CodeGenIP is the insertion point at which the privatization code
- /// should be placed.
- /// \param Original The value being copied/created, should not be used in the
- /// generated IR.
- /// \param Inner The equivalent of \p Original that should be used in the
- /// generated IR; this is equal to \p Original if the value is
- /// a pointer and can thus be passed directly, otherwise it is
- /// an equivalent but different value.
- /// \param ReplVal The replacement value, thus a copy or new created version
- /// of \p Inner.
- ///
- /// \returns The new insertion point where code generation continues and
- /// \p ReplVal the replacement value.
- using PrivatizeCallbackTy = function_ref<InsertPointTy(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
- Value &Inner, Value *&ReplVal)>;
- /// Description of a LLVM-IR insertion point (IP) and a debug/source location
- /// (filename, line, column, ...).
- struct LocationDescription {
- template <typename T, typename U>
- LocationDescription(const IRBuilder<T, U> &IRB)
- : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
- LocationDescription(const InsertPointTy &IP) : IP(IP) {}
- LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
- : IP(IP), DL(DL) {}
- InsertPointTy IP;
- DebugLoc DL;
- };
- /// Emitter methods for OpenMP directives.
- ///
- ///{
- /// Generator for '#omp barrier'
- ///
- /// \param Loc The location where the barrier directive was encountered.
- /// \param DK The kind of directive that caused the barrier.
- /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
- /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
- /// should be checked and acted upon.
- ///
- /// \returns The insertion point after the barrier.
- InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
- bool ForceSimpleCall = false,
- bool CheckCancelFlag = true);
- /// Generator for '#omp cancel'
- ///
- /// \param Loc The location where the directive was encountered.
- /// \param IfCondition The evaluated 'if' clause expression, if any.
- /// \param CanceledDirective The kind of directive that is cancled.
- ///
- /// \returns The insertion point after the barrier.
- InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
- omp::Directive CanceledDirective);
- /// Generator for '#omp parallel'
- ///
- /// \param Loc The insert and source location description.
- /// \param AllocaIP The insertion points to be used for alloca instructions.
- /// \param BodyGenCB Callback that will generate the region code.
- /// \param PrivCB Callback to copy a given variable (think copy constructor).
- /// \param FiniCB Callback to finalize variable copies.
- /// \param IfCondition The evaluated 'if' clause expression, if any.
- /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
- /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
- /// \param IsCancellable Flag to indicate a cancellable parallel region.
- ///
- /// \returns The insertion position *after* the parallel.
- IRBuilder<>::InsertPoint
- createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
- FinalizeCallbackTy FiniCB, Value *IfCondition,
- Value *NumThreads, omp::ProcBindKind ProcBind,
- bool IsCancellable);
- /// Generator for the control flow structure of an OpenMP canonical loop.
- ///
- /// This generator operates on the logical iteration space of the loop, i.e.
- /// the caller only has to provide a loop trip count of the loop as defined by
- /// base language semantics. The trip count is interpreted as an unsigned
- /// integer. The induction variable passed to \p BodyGenCB will be of the same
- /// type and run from 0 to \p TripCount - 1. It is up to the callback to
- /// convert the logical iteration variable to the loop counter variable in the
- /// loop body.
- ///
- /// \param Loc The insert and source location description. The insert
- /// location can be between two instructions or the end of a
- /// degenerate block (e.g. a BB under construction).
- /// \param BodyGenCB Callback that will generate the loop body code.
- /// \param TripCount Number of iterations the loop body is executed.
- /// \param Name Base name used to derive BB and instruction names.
- ///
- /// \returns An object representing the created control flow structure which
- /// can be used for loop-associated directives.
- CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
- LoopBodyGenCallbackTy BodyGenCB,
- Value *TripCount,
- const Twine &Name = "loop");
- /// Generator for the control flow structure of an OpenMP canonical loop.
- ///
- /// Instead of a logical iteration space, this allows specifying user-defined
- /// loop counter values using increment, upper- and lower bounds. To
- /// disambiguate the terminology when counting downwards, instead of lower
- /// bounds we use \p Start for the loop counter value in the first body
- /// iteration.
- ///
- /// Consider the following limitations:
- ///
- /// * A loop counter space over all integer values of its bit-width cannot be
- /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
- /// stored into an 8 bit integer):
- ///
- /// DO I = 0, 255, 1
- ///
- /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
- /// effectively counting downwards:
- ///
- /// for (uint8_t i = 100u; i > 0; i += 127u)
- ///
- ///
- /// TODO: May need to add additional parameters to represent:
- ///
- /// * Allow representing downcounting with unsigned integers.
- ///
- /// * Sign of the step and the comparison operator might disagree:
- ///
- /// for (int i = 0; i < 42; i -= 1u)
- ///
- //
- /// \param Loc The insert and source location description.
- /// \param BodyGenCB Callback that will generate the loop body code.
- /// \param Start Value of the loop counter for the first iterations.
- /// \param Stop Loop counter values past this will stop the loop.
- /// \param Step Loop counter increment after each iteration; negative
- /// means counting down.
- /// \param IsSigned Whether Start, Stop and Step are signed integers.
- /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
- /// counter.
- /// \param ComputeIP Insertion point for instructions computing the trip
- /// count. Can be used to ensure the trip count is available
- /// at the outermost loop of a loop nest. If not set,
- /// defaults to the preheader of the generated loop.
- /// \param Name Base name used to derive BB and instruction names.
- ///
- /// \returns An object representing the created control flow structure which
- /// can be used for loop-associated directives.
- CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
- LoopBodyGenCallbackTy BodyGenCB,
- Value *Start, Value *Stop, Value *Step,
- bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP = {},
- const Twine &Name = "loop");
- /// Collapse a loop nest into a single loop.
- ///
- /// Merges loops of a loop nest into a single CanonicalLoopNest representation
- /// that has the same number of innermost loop iterations as the origin loop
- /// nest. The induction variables of the input loops are derived from the
- /// collapsed loop's induction variable. This is intended to be used to
- /// implement OpenMP's collapse clause. Before applying a directive,
- /// collapseLoops normalizes a loop nest to contain only a single loop and the
- /// directive's implementation does not need to handle multiple loops itself.
- /// This does not remove the need to handle all loop nest handling by
- /// directives, such as the ordered(<n>) clause or the simd schedule-clause
- /// modifier of the worksharing-loop directive.
- ///
- /// Example:
- /// \code
- /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
- /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
- /// body(i, j);
- /// \endcode
- ///
- /// After collapsing with Loops={i,j}, the loop is changed to
- /// \code
- /// for (int ij = 0; ij < 63; ++ij) {
- /// int i = ij / 9;
- /// int j = ij % 9;
- /// body(i, j);
- /// }
- /// \endcode
- ///
- /// In the current implementation, the following limitations apply:
- ///
- /// * All input loops have an induction variable of the same type.
- ///
- /// * The collapsed loop will have the same trip count integer type as the
- /// input loops. Therefore it is possible that the collapsed loop cannot
- /// represent all iterations of the input loops. For instance, assuming a
- /// 32 bit integer type, and two input loops both iterating 2^16 times, the
- /// theoretical trip count of the collapsed loop would be 2^32 iteration,
- /// which cannot be represented in an 32-bit integer. Behavior is undefined
- /// in this case.
- ///
- /// * The trip counts of every input loop must be available at \p ComputeIP.
- /// Non-rectangular loops are not yet supported.
- ///
- /// * At each nest level, code between a surrounding loop and its nested loop
- /// is hoisted into the loop body, and such code will be executed more
- /// often than before collapsing (or not at all if any inner loop iteration
- /// has a trip count of 0). This is permitted by the OpenMP specification.
- ///
- /// \param DL Debug location for instructions added for collapsing,
- /// such as instructions to compute/derive the input loop's
- /// induction variables.
- /// \param Loops Loops in the loop nest to collapse. Loops are specified
- /// from outermost-to-innermost and every control flow of a
- /// loop's body must pass through its directly nested loop.
- /// \param ComputeIP Where additional instruction that compute the collapsed
- /// trip count. If not set, defaults to before the generated
- /// loop.
- ///
- /// \returns The CanonicalLoopInfo object representing the collapsed loop.
- CanonicalLoopInfo *collapseLoops(DebugLoc DL,
- ArrayRef<CanonicalLoopInfo *> Loops,
- InsertPointTy ComputeIP);
- /// Modifies the canonical loop to be a statically-scheduled workshare loop.
- ///
- /// This takes a \p LoopInfo representing a canonical loop, such as the one
- /// created by \p createCanonicalLoop and emits additional instructions to
- /// turn it into a workshare loop. In particular, it calls to an OpenMP
- /// runtime function in the preheader to obtain the loop bounds to be used in
- /// the current thread, updates the relevant instructions in the canonical
- /// loop and calls to an OpenMP runtime finalization function after the loop.
- ///
- /// TODO: Workshare loops with static scheduling may contain up to two loops
- /// that fulfill the requirements of an OpenMP canonical loop. One for
- /// iterating over all iterations of a chunk and another one for iterating
- /// over all chunks that are executed on the same thread. Returning
- /// CanonicalLoopInfo objects representing them may eventually be useful for
- /// the apply clause planned in OpenMP 6.0, but currently whether these are
- /// canonical loops is irrelevant.
- ///
- /// \param DL Debug location for instructions added for the
- /// workshare-loop construct itself.
- /// \param CLI A descriptor of the canonical loop to workshare.
- /// \param AllocaIP An insertion point for Alloca instructions usable in the
- /// preheader of the loop.
- /// \param NeedsBarrier Indicates whether a barrier must be inserted after
- /// the loop.
- /// \param Chunk The size of loop chunk considered as a unit when
- /// scheduling. If \p nullptr, defaults to 1.
- ///
- /// \returns Point where to insert code after the workshare construct.
- InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
- /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
- ///
- /// This takes a \p LoopInfo representing a canonical loop, such as the one
- /// created by \p createCanonicalLoop and emits additional instructions to
- /// turn it into a workshare loop. In particular, it calls to an OpenMP
- /// runtime function in the preheader to obtain, and then in each iteration
- /// to update the loop counter.
- ///
- /// \param DL Debug location for instructions added for the
- /// workshare-loop construct itself.
- /// \param CLI A descriptor of the canonical loop to workshare.
- /// \param AllocaIP An insertion point for Alloca instructions usable in the
- /// preheader of the loop.
- /// \param SchedType Type of scheduling to be passed to the init function.
- /// \param NeedsBarrier Indicates whether a barrier must be insterted after
- /// the loop.
- /// \param Chunk The size of loop chunk considered as a unit when
- /// scheduling. If \p nullptr, defaults to 1.
- ///
- /// \returns Point where to insert code after the workshare construct.
- InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- omp::OMPScheduleType SchedType,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
- /// Modifies the canonical loop to be a workshare loop.
- ///
- /// This takes a \p LoopInfo representing a canonical loop, such as the one
- /// created by \p createCanonicalLoop and emits additional instructions to
- /// turn it into a workshare loop. In particular, it calls to an OpenMP
- /// runtime function in the preheader to obtain the loop bounds to be used in
- /// the current thread, updates the relevant instructions in the canonical
- /// loop and calls to an OpenMP runtime finalization function after the loop.
- ///
- /// \param DL Debug location for instructions added for the
- /// workshare-loop construct itself.
- /// \param CLI A descriptor of the canonical loop to workshare.
- /// \param AllocaIP An insertion point for Alloca instructions usable in the
- /// preheader of the loop.
- /// \param NeedsBarrier Indicates whether a barrier must be insterted after
- /// the loop.
- ///
- /// \returns Point where to insert code after the workshare construct.
- InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier);
- /// Tile a loop nest.
- ///
- /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
- /// \p/ Loops must be perfectly nested, from outermost to innermost loop
- /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
- /// of every loop and every tile sizes must be usable in the outermost
- /// loop's preheader. This implies that the loop nest is rectangular.
- ///
- /// Example:
- /// \code
- /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
- /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
- /// body(i, j);
- /// \endcode
- ///
- /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
- /// \code
- /// for (int i1 = 0; i1 < 3; ++i1)
- /// for (int j1 = 0; j1 < 2; ++j1)
- /// for (int i2 = 0; i2 < 5; ++i2)
- /// for (int j2 = 0; j2 < 7; ++j2)
- /// body(i1*3+i2, j1*3+j2);
- /// \endcode
- ///
- /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
- /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
- /// handles non-constant trip counts, non-constant tile sizes and trip counts
- /// that are not multiples of the tile size. In the latter case the tile loop
- /// of the last floor-loop iteration will have fewer iterations than specified
- /// as its tile size.
- ///
- ///
- /// @param DL Debug location for instructions added by tiling, for
- /// instance the floor- and tile trip count computation.
- /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
- /// invalidated by this method, i.e. should not used after
- /// tiling.
- /// @param TileSizes For each loop in \p Loops, the tile size for that
- /// dimensions.
- ///
- /// \returns A list of generated loops. Contains twice as many loops as the
- /// input loop nest; the first half are the floor loops and the
- /// second half are the tile loops.
- std::vector<CanonicalLoopInfo *>
- tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
- ArrayRef<Value *> TileSizes);
- /// Fully unroll a loop.
- ///
- /// Instead of unrolling the loop immediately (and duplicating its body
- /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
- /// metadata.
- ///
- /// \param DL Debug location for instructions added by unrolling.
- /// \param Loop The loop to unroll. The loop will be invalidated.
- void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
- /// Fully or partially unroll a loop. How the loop is unrolled is determined
- /// using LLVM's LoopUnrollPass.
- ///
- /// \param DL Debug location for instructions added by unrolling.
- /// \param Loop The loop to unroll. The loop will be invalidated.
- void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
- /// Partially unroll a loop.
- ///
- /// The CanonicalLoopInfo of the unrolled loop for use with chained
- /// loop-associated directive can be requested using \p UnrolledCLI. Not
- /// needing the CanonicalLoopInfo allows more efficient code generation by
- /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
- /// A loop-associated directive applied to the unrolled loop needs to know the
- /// new trip count which means that if using a heuristically determined unroll
- /// factor (\p Factor == 0), that factor must be computed immediately. We are
- /// using the same logic as the LoopUnrollPass to derived the unroll factor,
- /// but which assumes that some canonicalization has taken place (e.g.
- /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
- /// better when the unrolled loop's CanonicalLoopInfo is not needed.
- ///
- /// \param DL Debug location for instructions added by unrolling.
- /// \param Loop The loop to unroll. The loop will be invalidated.
- /// \param Factor The factor to unroll the loop by. A factor of 0
- /// indicates that a heuristic should be used to determine
- /// the unroll-factor.
- /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
- /// partially unrolled loop. Otherwise, uses loop metadata
- /// to defer unrolling to the LoopUnrollPass.
- void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
- CanonicalLoopInfo **UnrolledCLI);
- /// Add metadata to simd-ize a loop.
- ///
- /// \param DL Debug location for instructions added by unrolling.
- /// \param Loop The loop to simd-ize.
- void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop);
- /// Generator for '#omp flush'
- ///
- /// \param Loc The location where the flush directive was encountered
- void createFlush(const LocationDescription &Loc);
- /// Generator for '#omp taskwait'
- ///
- /// \param Loc The location where the taskwait directive was encountered.
- void createTaskwait(const LocationDescription &Loc);
- /// Generator for '#omp taskyield'
- ///
- /// \param Loc The location where the taskyield directive was encountered.
- void createTaskyield(const LocationDescription &Loc);
- /// Functions used to generate reductions. Such functions take two Values
- /// representing LHS and RHS of the reduction, respectively, and a reference
- /// to the value that is updated to refer to the reduction result.
- using ReductionGenTy =
- function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
- /// Functions used to generate atomic reductions. Such functions take two
- /// Values representing pointers to LHS and RHS of the reduction, as well as
- /// the element type of these pointers. They are expected to atomically
- /// update the LHS to the reduced value.
- using AtomicReductionGenTy =
- function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
- /// Information about an OpenMP reduction.
- struct ReductionInfo {
- ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
- ReductionGenTy ReductionGen,
- AtomicReductionGenTy AtomicReductionGen)
- : ElementType(ElementType), Variable(Variable),
- PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
- AtomicReductionGen(AtomicReductionGen) {
- assert(cast<PointerType>(Variable->getType())
- ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
- }
- /// Reduction element type, must match pointee type of variable.
- Type *ElementType;
- /// Reduction variable of pointer type.
- Value *Variable;
- /// Thread-private partial reduction variable.
- Value *PrivateVariable;
- /// Callback for generating the reduction body. The IR produced by this will
- /// be used to combine two values in a thread-safe context, e.g., under
- /// lock or within the same thread, and therefore need not be atomic.
- ReductionGenTy ReductionGen;
- /// Callback for generating the atomic reduction body, may be null. The IR
- /// produced by this will be used to atomically combine two values during
- /// reduction. If null, the implementation will use the non-atomic version
- /// along with the appropriate synchronization mechanisms.
- AtomicReductionGenTy AtomicReductionGen;
- };
- // TODO: provide atomic and non-atomic reduction generators for reduction
- // operators defined by the OpenMP specification.
- /// Generator for '#omp reduction'.
- ///
- /// Emits the IR instructing the runtime to perform the specific kind of
- /// reductions. Expects reduction variables to have been privatized and
- /// initialized to reduction-neutral values separately. Emits the calls to
- /// runtime functions as well as the reduction function and the basic blocks
- /// performing the reduction atomically and non-atomically.
- ///
- /// The code emitted for the following:
- ///
- /// \code
- /// type var_1;
- /// type var_2;
- /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
- /// /* body */;
- /// \endcode
- ///
- /// corresponds to the following sketch.
- ///
- /// \code
- /// void _outlined_par() {
- /// // N is the number of different reductions.
- /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
- /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
- /// _omp_reduction_func,
- /// _gomp_critical_user.reduction.var)) {
- /// case 1: {
- /// var_1 = var_1 <reduction-op> privatized_var_1;
- /// var_2 = var_2 <reduction-op> privatized_var_2;
- /// // ...
- /// __kmpc_end_reduce(...);
- /// break;
- /// }
- /// case 2: {
- /// _Atomic<ReductionOp>(var_1, privatized_var_1);
- /// _Atomic<ReductionOp>(var_2, privatized_var_2);
- /// // ...
- /// break;
- /// }
- /// default: break;
- /// }
- /// }
- ///
- /// void _omp_reduction_func(void **lhs, void **rhs) {
- /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
- /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
- /// // ...
- /// }
- /// \endcode
- ///
- /// \param Loc The location where the reduction was
- /// encountered. Must be within the associate
- /// directive and after the last local access to the
- /// reduction variables.
- /// \param AllocaIP An insertion point suitable for allocas usable
- /// in reductions.
- /// \param ReductionInfos A list of info on each reduction variable.
- /// \param IsNoWait A flag set if the reduction is marked as nowait.
- InsertPointTy createReductions(const LocationDescription &Loc,
- InsertPointTy AllocaIP,
- ArrayRef<ReductionInfo> ReductionInfos,
- bool IsNoWait = false);
- ///}
- /// Return the insertion point used by the underlying IRBuilder.
- InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
- /// Update the internal location to \p Loc.
- bool updateToLocation(const LocationDescription &Loc) {
- Builder.restoreIP(Loc.IP);
- Builder.SetCurrentDebugLocation(Loc.DL);
- return Loc.IP.getBlock() != nullptr;
- }
- /// Return the function declaration for the runtime function with \p FnID.
- FunctionCallee getOrCreateRuntimeFunction(Module &M,
- omp::RuntimeFunction FnID);
- Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
- /// Return the (LLVM-IR) string describing the source location \p LocStr.
- Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
- /// Return the (LLVM-IR) string describing the default source location.
- Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
- /// Return the (LLVM-IR) string describing the source location identified by
- /// the arguments.
- Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
- unsigned Line, unsigned Column,
- uint32_t &SrcLocStrSize);
- /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
- /// fallback if \p DL does not specify the function name.
- Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
- Function *F = nullptr);
- /// Return the (LLVM-IR) string describing the source location \p Loc.
- Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
- uint32_t &SrcLocStrSize);
- /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
- /// TODO: Create a enum class for the Reserve2Flags
- Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
- omp::IdentFlag Flags = omp::IdentFlag(0),
- unsigned Reserve2Flags = 0);
- /// Create a hidden global flag \p Name in the module with initial value \p
- /// Value.
- GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
- /// Generate control flow and cleanup for cancellation.
- ///
- /// \param CancelFlag Flag indicating if the cancellation is performed.
- /// \param CanceledDirective The kind of directive that is cancled.
- /// \param ExitCB Extra code to be generated in the exit block.
- void emitCancelationCheckImpl(Value *CancelFlag,
- omp::Directive CanceledDirective,
- FinalizeCallbackTy ExitCB = {});
- /// Generate a barrier runtime call.
- ///
- /// \param Loc The location at which the request originated and is fulfilled.
- /// \param DK The directive which caused the barrier
- /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
- /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
- /// should be checked and acted upon.
- ///
- /// \returns The insertion point after the barrier.
- InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
- omp::Directive DK, bool ForceSimpleCall,
- bool CheckCancelFlag);
- /// Generate a flush runtime call.
- ///
- /// \param Loc The location at which the request originated and is fulfilled.
- void emitFlush(const LocationDescription &Loc);
- /// The finalization stack made up of finalize callbacks currently in-flight,
- /// wrapped into FinalizationInfo objects that reference also the finalization
- /// target block and the kind of cancellable directive.
- SmallVector<FinalizationInfo, 8> FinalizationStack;
- /// Return true if the last entry in the finalization stack is of kind \p DK
- /// and cancellable.
- bool isLastFinalizationInfoCancellable(omp::Directive DK) {
- return !FinalizationStack.empty() &&
- FinalizationStack.back().IsCancellable &&
- FinalizationStack.back().DK == DK;
- }
- /// Generate a taskwait runtime call.
- ///
- /// \param Loc The location at which the request originated and is fulfilled.
- void emitTaskwaitImpl(const LocationDescription &Loc);
- /// Generate a taskyield runtime call.
- ///
- /// \param Loc The location at which the request originated and is fulfilled.
- void emitTaskyieldImpl(const LocationDescription &Loc);
- /// Return the current thread ID.
- ///
- /// \param Ident The ident (ident_t*) describing the query origin.
- Value *getOrCreateThreadID(Value *Ident);
- /// The underlying LLVM-IR module
- Module &M;
- /// The LLVM-IR Builder used to create IR.
- IRBuilder<> Builder;
- /// Map to remember source location strings
- StringMap<Constant *> SrcLocStrMap;
- /// Map to remember existing ident_t*.
- DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
- /// Helper that contains information about regions we need to outline
- /// during finalization.
- struct OutlineInfo {
- using PostOutlineCBTy = std::function<void(Function &)>;
- PostOutlineCBTy PostOutlineCB;
- BasicBlock *EntryBB, *ExitBB;
- SmallVector<Value *, 2> ExcludeArgsFromAggregate;
- /// Collect all blocks in between EntryBB and ExitBB in both the given
- /// vector and set.
- void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
- SmallVectorImpl<BasicBlock *> &BlockVector);
- /// Return the function that contains the region to be outlined.
- Function *getFunction() const { return EntryBB->getParent(); }
- };
- /// Collection of regions that need to be outlined during finalization.
- SmallVector<OutlineInfo, 16> OutlineInfos;
- /// Collection of owned canonical loop objects that eventually need to be
- /// free'd.
- std::forward_list<CanonicalLoopInfo> LoopInfos;
- /// Add a new region that will be outlined later.
- void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
- /// An ordered map of auto-generated variables to their unique names.
- /// It stores variables with the following names: 1) ".gomp_critical_user_" +
- /// <critical_section_name> + ".var" for "omp critical" directives; 2)
- /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
- /// variables.
- StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
- /// Create the global variable holding the offload mappings information.
- GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
- std::string VarName);
- /// Create the global variable holding the offload names information.
- GlobalVariable *
- createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
- std::string VarName);
- struct MapperAllocas {
- AllocaInst *ArgsBase = nullptr;
- AllocaInst *Args = nullptr;
- AllocaInst *ArgSizes = nullptr;
- };
- /// Create the allocas instruction used in call to mapper functions.
- void createMapperAllocas(const LocationDescription &Loc,
- InsertPointTy AllocaIP, unsigned NumOperands,
- struct MapperAllocas &MapperAllocas);
- /// Create the call for the target mapper function.
- /// \param Loc The source location description.
- /// \param MapperFunc Function to be called.
- /// \param SrcLocInfo Source location information global.
- /// \param MaptypesArg The argument types.
- /// \param MapnamesArg The argument names.
- /// \param MapperAllocas The AllocaInst used for the call.
- /// \param DeviceID Device ID for the call.
- /// \param NumOperands Number of operands in the call.
- void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
- Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
- struct MapperAllocas &MapperAllocas, int64_t DeviceID,
- unsigned NumOperands);
- public:
- /// Generator for __kmpc_copyprivate
- ///
- /// \param Loc The source location description.
- /// \param BufSize Number of elements in the buffer.
- /// \param CpyBuf List of pointers to data to be copied.
- /// \param CpyFn function to call for copying data.
- /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
- ///
- /// \return The insertion position *after* the CopyPrivate call.
- InsertPointTy createCopyPrivate(const LocationDescription &Loc,
- llvm::Value *BufSize, llvm::Value *CpyBuf,
- llvm::Value *CpyFn, llvm::Value *DidIt);
- /// Generator for '#omp single'
- ///
- /// \param Loc The source location description.
- /// \param BodyGenCB Callback that will generate the region code.
- /// \param FiniCB Callback to finalize variable copies.
- /// \param DidIt Local variable used as a flag to indicate 'single' thread
- ///
- /// \returns The insertion position *after* the single call.
- InsertPointTy createSingle(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
- /// Generator for '#omp master'
- ///
- /// \param Loc The insert and source location description.
- /// \param BodyGenCB Callback that will generate the region code.
- /// \param FiniCB Callback to finalize variable copies.
- ///
- /// \returns The insertion position *after* the master.
- InsertPointTy createMaster(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB);
- /// Generator for '#omp masked'
- ///
- /// \param Loc The insert and source location description.
- /// \param BodyGenCB Callback that will generate the region code.
- /// \param FiniCB Callback to finialize variable copies.
- ///
- /// \returns The insertion position *after* the masked.
- InsertPointTy createMasked(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB, Value *Filter);
- /// Generator for '#omp critical'
- ///
- /// \param Loc The insert and source location description.
- /// \param BodyGenCB Callback that will generate the region body code.
- /// \param FiniCB Callback to finalize variable copies.
- /// \param CriticalName name of the lock used by the critical directive
- /// \param HintInst Hint Instruction for hint clause associated with critical
- ///
- /// \returns The insertion position *after* the critical.
- InsertPointTy createCritical(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB,
- StringRef CriticalName, Value *HintInst);
- /// Generator for '#omp ordered depend (source | sink)'
- ///
- /// \param Loc The insert and source location description.
- /// \param AllocaIP The insertion point to be used for alloca instructions.
- /// \param NumLoops The number of loops in depend clause.
- /// \param StoreValues The value will be stored in vector address.
- /// \param Name The name of alloca instruction.
- /// \param IsDependSource If true, depend source; otherwise, depend sink.
- ///
- /// \return The insertion position *after* the ordered.
- InsertPointTy createOrderedDepend(const LocationDescription &Loc,
- InsertPointTy AllocaIP, unsigned NumLoops,
- ArrayRef<llvm::Value *> StoreValues,
- const Twine &Name, bool IsDependSource);
- /// Generator for '#omp ordered [threads | simd]'
- ///
- /// \param Loc The insert and source location description.
- /// \param BodyGenCB Callback that will generate the region code.
- /// \param FiniCB Callback to finalize variable copies.
- /// \param IsThreads If true, with threads clause or without clause;
- /// otherwise, with simd clause;
- ///
- /// \returns The insertion position *after* the ordered.
- InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB,
- bool IsThreads);
- /// Generator for '#omp sections'
- ///
- /// \param Loc The insert and source location description.
- /// \param AllocaIP The insertion points to be used for alloca instructions.
- /// \param SectionCBs Callbacks that will generate body of each section.
- /// \param PrivCB Callback to copy a given variable (think copy constructor).
- /// \param FiniCB Callback to finalize variable copies.
- /// \param IsCancellable Flag to indicate a cancellable parallel region.
- /// \param IsNowait If true, barrier - to ensure all sections are executed
- /// before moving forward will not be generated.
- /// \returns The insertion position *after* the sections.
- InsertPointTy createSections(const LocationDescription &Loc,
- InsertPointTy AllocaIP,
- ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
- PrivatizeCallbackTy PrivCB,
- FinalizeCallbackTy FiniCB, bool IsCancellable,
- bool IsNowait);
- /// Generator for '#omp section'
- ///
- /// \param Loc The insert and source location description.
- /// \param BodyGenCB Callback that will generate the region body code.
- /// \param FiniCB Callback to finalize variable copies.
- /// \returns The insertion position *after* the section.
- InsertPointTy createSection(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB);
- /// Generate conditional branch and relevant BasicBlocks through which private
- /// threads copy the 'copyin' variables from Master copy to threadprivate
- /// copies.
- ///
- /// \param IP insertion block for copyin conditional
- /// \param MasterVarPtr a pointer to the master variable
- /// \param PrivateVarPtr a pointer to the threadprivate variable
- /// \param IntPtrTy Pointer size type
- /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
- // and copy.in.end block
- ///
- /// \returns The insertion point where copying operation to be emitted.
- InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
- Value *PrivateAddr,
- llvm::IntegerType *IntPtrTy,
- bool BranchtoEnd = true);
- /// Create a runtime call for kmpc_Alloc
- ///
- /// \param Loc The insert and source location description.
- /// \param Size Size of allocated memory space
- /// \param Allocator Allocator information instruction
- /// \param Name Name of call Instruction for OMP_alloc
- ///
- /// \returns CallInst to the OMP_Alloc call
- CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
- Value *Allocator, std::string Name = "");
- /// Create a runtime call for kmpc_free
- ///
- /// \param Loc The insert and source location description.
- /// \param Addr Address of memory space to be freed
- /// \param Allocator Allocator information instruction
- /// \param Name Name of call Instruction for OMP_Free
- ///
- /// \returns CallInst to the OMP_Free call
- CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
- Value *Allocator, std::string Name = "");
- /// Create a runtime call for kmpc_threadprivate_cached
- ///
- /// \param Loc The insert and source location description.
- /// \param Pointer pointer to data to be cached
- /// \param Size size of data to be cached
- /// \param Name Name of call Instruction for callinst
- ///
- /// \returns CallInst to the thread private cache call.
- CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
- llvm::Value *Pointer,
- llvm::ConstantInt *Size,
- const llvm::Twine &Name = Twine(""));
- /// Create a runtime call for __tgt_interop_init
- ///
- /// \param Loc The insert and source location description.
- /// \param InteropVar variable to be allocated
- /// \param InteropType type of interop operation
- /// \param Device devide to which offloading will occur
- /// \param NumDependences number of dependence variables
- /// \param DependenceAddress pointer to dependence variables
- /// \param HaveNowaitClause does nowait clause exist
- ///
- /// \returns CallInst to the __tgt_interop_init call
- CallInst *createOMPInteropInit(const LocationDescription &Loc,
- Value *InteropVar,
- omp::OMPInteropType InteropType, Value *Device,
- Value *NumDependences,
- Value *DependenceAddress,
- bool HaveNowaitClause);
- /// Create a runtime call for __tgt_interop_destroy
- ///
- /// \param Loc The insert and source location description.
- /// \param InteropVar variable to be allocated
- /// \param Device devide to which offloading will occur
- /// \param NumDependences number of dependence variables
- /// \param DependenceAddress pointer to dependence variables
- /// \param HaveNowaitClause does nowait clause exist
- ///
- /// \returns CallInst to the __tgt_interop_destroy call
- CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
- Value *InteropVar, Value *Device,
- Value *NumDependences,
- Value *DependenceAddress,
- bool HaveNowaitClause);
- /// Create a runtime call for __tgt_interop_use
- ///
- /// \param Loc The insert and source location description.
- /// \param InteropVar variable to be allocated
- /// \param Device devide to which offloading will occur
- /// \param NumDependences number of dependence variables
- /// \param DependenceAddress pointer to dependence variables
- /// \param HaveNowaitClause does nowait clause exist
- ///
- /// \returns CallInst to the __tgt_interop_use call
- CallInst *createOMPInteropUse(const LocationDescription &Loc,
- Value *InteropVar, Value *Device,
- Value *NumDependences, Value *DependenceAddress,
- bool HaveNowaitClause);
- /// The `omp target` interface
- ///
- /// For more information about the usage of this interface,
- /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
- ///
- ///{
- /// Create a runtime call for kmpc_target_init
- ///
- /// \param Loc The insert and source location description.
- /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
- /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
- bool RequiresFullRuntime);
- /// Create a runtime call for kmpc_target_deinit
- ///
- /// \param Loc The insert and source location description.
- /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
- /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
- bool RequiresFullRuntime);
- ///}
- /// Declarations for LLVM-IR types (simple, array, function and structure) are
- /// generated below. Their names are defined and used in OpenMPKinds.def. Here
- /// we provide the declarations, the initializeTypes function will provide the
- /// values.
- ///
- ///{
- #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
- #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
- ArrayType *VarName##Ty = nullptr; \
- PointerType *VarName##PtrTy = nullptr;
- #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
- FunctionType *VarName = nullptr; \
- PointerType *VarName##Ptr = nullptr;
- #define OMP_STRUCT_TYPE(VarName, StrName, ...) \
- StructType *VarName = nullptr; \
- PointerType *VarName##Ptr = nullptr;
- #include "llvm/Frontend/OpenMP/OMPKinds.def"
- ///}
- private:
- /// Create all simple and struct types exposed by the runtime and remember
- /// the llvm::PointerTypes of them for easy access later.
- void initializeTypes(Module &M);
- /// Common interface for generating entry calls for OMP Directives.
- /// if the directive has a region/body, It will set the insertion
- /// point to the body
- ///
- /// \param OMPD Directive to generate entry blocks for
- /// \param EntryCall Call to the entry OMP Runtime Function
- /// \param ExitBB block where the region ends.
- /// \param Conditional indicate if the entry call result will be used
- /// to evaluate a conditional of whether a thread will execute
- /// body code or not.
- ///
- /// \return The insertion position in exit block
- InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
- BasicBlock *ExitBB,
- bool Conditional = false);
- /// Common interface to finalize the region
- ///
- /// \param OMPD Directive to generate exiting code for
- /// \param FinIP Insertion point for emitting Finalization code and exit call
- /// \param ExitCall Call to the ending OMP Runtime Function
- /// \param HasFinalize indicate if the directive will require finalization
- /// and has a finalization callback in the stack that
- /// should be called.
- ///
- /// \return The insertion position in exit block
- InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
- InsertPointTy FinIP,
- Instruction *ExitCall,
- bool HasFinalize = true);
- /// Common Interface to generate OMP inlined regions
- ///
- /// \param OMPD Directive to generate inlined region for
- /// \param EntryCall Call to the entry OMP Runtime Function
- /// \param ExitCall Call to the ending OMP Runtime Function
- /// \param BodyGenCB Body code generation callback.
- /// \param FiniCB Finalization Callback. Will be called when finalizing region
- /// \param Conditional indicate if the entry call result will be used
- /// to evaluate a conditional of whether a thread will execute
- /// body code or not.
- /// \param HasFinalize indicate if the directive will require finalization
- /// and has a finalization callback in the stack that
- /// should be called.
- /// \param IsCancellable if HasFinalize is set to true, indicate if the
- /// the directive should be cancellable.
- /// \return The insertion point after the region
- InsertPointTy
- EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
- Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB, bool Conditional = false,
- bool HasFinalize = true, bool IsCancellable = false);
- /// Get the platform-specific name separator.
- /// \param Parts different parts of the final name that needs separation
- /// \param FirstSeparator First separator used between the initial two
- /// parts of the name.
- /// \param Separator separator used between all of the rest consecutive
- /// parts of the name
- static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
- StringRef FirstSeparator,
- StringRef Separator);
- /// Gets (if variable with the given name already exist) or creates
- /// internal global variable with the specified Name. The created variable has
- /// linkage CommonLinkage by default and is initialized by null value.
- /// \param Ty Type of the global variable. If it is exist already the type
- /// must be the same.
- /// \param Name Name of the variable.
- Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
- unsigned AddressSpace = 0);
- /// Returns corresponding lock object for the specified critical region
- /// name. If the lock object does not exist it is created, otherwise the
- /// reference to the existing copy is returned.
- /// \param CriticalName Name of the critical region.
- ///
- Value *getOMPCriticalRegionLock(StringRef CriticalName);
- /// Callback type for Atomic Expression update
- /// ex:
- /// \code{.cpp}
- /// unsigned x = 0;
- /// #pragma omp atomic update
- /// x = Expr(x_old); //Expr() is any legal operation
- /// \endcode
- ///
- /// \param XOld the value of the atomic memory address to use for update
- /// \param IRB reference to the IRBuilder to use
- ///
- /// \returns Value to update X to.
- using AtomicUpdateCallbackTy =
- const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
- private:
- enum AtomicKind { Read, Write, Update, Capture };
- /// Determine whether to emit flush or not
- ///
- /// \param Loc The insert and source location description.
- /// \param AO The required atomic ordering
- /// \param AK The OpenMP atomic operation kind used.
- ///
- /// \returns wether a flush was emitted or not
- bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
- AtomicOrdering AO, AtomicKind AK);
- /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
- /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
- /// Only Scalar data types.
- ///
- /// \param AllocIP Instruction to create AllocaInst before.
- /// \param X The target atomic pointer to be updated
- /// \param XElemTy The element type of the atomic pointer.
- /// \param Expr The value to update X with.
- /// \param AO Atomic ordering of the generated atomic
- /// instructions.
- /// \param RMWOp The binary operation used for update. If
- /// operation is not supported by atomicRMW,
- /// or belong to {FADD, FSUB, BAD_BINOP}.
- /// Then a `cmpExch` based atomic will be generated.
- /// \param UpdateOp Code generator for complex expressions that cannot be
- /// expressed through atomicrmw instruction.
- /// \param VolatileX true if \a X volatile?
- /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
- /// update expression, false otherwise.
- /// (e.g. true for X = X BinOp Expr)
- ///
- /// \returns A pair of the old value of X before the update, and the value
- /// used for the update.
- std::pair<Value *, Value *>
- emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
- AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
- bool IsXBinopExpr);
- /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
- ///
- /// \Return The instruction
- Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
- AtomicRMWInst::BinOp RMWOp);
- public:
- /// a struct to pack relevant information while generating atomic Ops
- struct AtomicOpValue {
- Value *Var = nullptr;
- Type *ElemTy = nullptr;
- bool IsSigned = false;
- bool IsVolatile = false;
- };
- /// Emit atomic Read for : V = X --- Only Scalar data types.
- ///
- /// \param Loc The insert and source location description.
- /// \param X The target pointer to be atomically read
- /// \param V Memory address where to store atomically read
- /// value
- /// \param AO Atomic ordering of the generated atomic
- /// instructions.
- ///
- /// \return Insertion point after generated atomic read IR.
- InsertPointTy createAtomicRead(const LocationDescription &Loc,
- AtomicOpValue &X, AtomicOpValue &V,
- AtomicOrdering AO);
- /// Emit atomic write for : X = Expr --- Only Scalar data types.
- ///
- /// \param Loc The insert and source location description.
- /// \param X The target pointer to be atomically written to
- /// \param Expr The value to store.
- /// \param AO Atomic ordering of the generated atomic
- /// instructions.
- ///
- /// \return Insertion point after generated atomic Write IR.
- InsertPointTy createAtomicWrite(const LocationDescription &Loc,
- AtomicOpValue &X, Value *Expr,
- AtomicOrdering AO);
- /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
- /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
- /// Only Scalar data types.
- ///
- /// \param Loc The insert and source location description.
- /// \param AllocIP Instruction to create AllocaInst before.
- /// \param X The target atomic pointer to be updated
- /// \param Expr The value to update X with.
- /// \param AO Atomic ordering of the generated atomic instructions.
- /// \param RMWOp The binary operation used for update. If operation
- /// is not supported by atomicRMW, or belong to
- /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
- /// atomic will be generated.
- /// \param UpdateOp Code generator for complex expressions that cannot be
- /// expressed through atomicrmw instruction.
- /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
- /// update expression, false otherwise.
- /// (e.g. true for X = X BinOp Expr)
- ///
- /// \return Insertion point after generated atomic update IR.
- InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
- Instruction *AllocIP, AtomicOpValue &X,
- Value *Expr, AtomicOrdering AO,
- AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp,
- bool IsXBinopExpr);
- /// Emit atomic update for constructs: --- Only Scalar data types
- /// V = X; X = X BinOp Expr ,
- /// X = X BinOp Expr; V = X,
- /// V = X; X = Expr BinOp X,
- /// X = Expr BinOp X; V = X,
- /// V = X; X = UpdateOp(X),
- /// X = UpdateOp(X); V = X,
- ///
- /// \param Loc The insert and source location description.
- /// \param AllocIP Instruction to create AllocaInst before.
- /// \param X The target atomic pointer to be updated
- /// \param V Memory address where to store captured value
- /// \param Expr The value to update X with.
- /// \param AO Atomic ordering of the generated atomic instructions
- /// \param RMWOp The binary operation used for update. If
- /// operation is not supported by atomicRMW, or belong to
- /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
- /// atomic will be generated.
- /// \param UpdateOp Code generator for complex expressions that cannot be
- /// expressed through atomicrmw instruction.
- /// \param UpdateExpr true if X is an in place update of the form
- /// X = X BinOp Expr or X = Expr BinOp X
- /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
- /// update expression, false otherwise.
- /// (e.g. true for X = X BinOp Expr)
- /// \param IsPostfixUpdate true if original value of 'x' must be stored in
- /// 'v', not an updated one.
- ///
- /// \return Insertion point after generated atomic capture IR.
- InsertPointTy
- createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP,
- AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
- AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
- bool IsPostfixUpdate, bool IsXBinopExpr);
- /// Create the control flow structure of a canonical OpenMP loop.
- ///
- /// The emitted loop will be disconnected, i.e. no edge to the loop's
- /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
- /// IRBuilder location is not preserved.
- ///
- /// \param DL DebugLoc used for the instructions in the skeleton.
- /// \param TripCount Value to be used for the trip count.
- /// \param F Function in which to insert the BasicBlocks.
- /// \param PreInsertBefore Where to insert BBs that execute before the body,
- /// typically the body itself.
- /// \param PostInsertBefore Where to insert BBs that execute after the body.
- /// \param Name Base name used to derive BB
- /// and instruction names.
- ///
- /// \returns The CanonicalLoopInfo that represents the emitted loop.
- CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
- Function *F,
- BasicBlock *PreInsertBefore,
- BasicBlock *PostInsertBefore,
- const Twine &Name = {});
- };
- /// Class to represented the control flow structure of an OpenMP canonical loop.
- ///
- /// The control-flow structure is standardized for easy consumption by
- /// directives associated with loops. For instance, the worksharing-loop
- /// construct may change this control flow such that each loop iteration is
- /// executed on only one thread. The constraints of a canonical loop in brief
- /// are:
- ///
- /// * The number of loop iterations must have been computed before entering the
- /// loop.
- ///
- /// * Has an (unsigned) logical induction variable that starts at zero and
- /// increments by one.
- ///
- /// * The loop's CFG itself has no side-effects. The OpenMP specification
- /// itself allows side-effects, but the order in which they happen, including
- /// how often or whether at all, is unspecified. We expect that the frontend
- /// will emit those side-effect instructions somewhere (e.g. before the loop)
- /// such that the CanonicalLoopInfo itself can be side-effect free.
- ///
- /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
- /// execution of a loop body that satifies these constraints. It does NOT
- /// represent arbitrary SESE regions that happen to contain a loop. Do not use
- /// CanonicalLoopInfo for such purposes.
- ///
- /// The control flow can be described as follows:
- ///
- /// Preheader
- /// |
- /// /-> Header
- /// | |
- /// | Cond---\
- /// | | |
- /// | Body |
- /// | | | |
- /// | <...> |
- /// | | | |
- /// \--Latch |
- /// |
- /// Exit
- /// |
- /// After
- ///
- /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
- /// including) and end at AfterIP (at the After's first instruction, excluding).
- /// That is, instructions in the Preheader and After blocks (except the
- /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
- /// side-effects. Typically, the Preheader is used to compute the loop's trip
- /// count. The instructions from BodyIP (at the Body block's first instruction,
- /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
- /// control and thus can have side-effects. The body block is the single entry
- /// point into the loop body, which may contain arbitrary control flow as long
- /// as all control paths eventually branch to the Latch block.
- ///
- /// TODO: Consider adding another standardized BasicBlock between Body CFG and
- /// Latch to guarantee that there is only a single edge to the latch. It would
- /// make loop transformations easier to not needing to consider multiple
- /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
- /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
- /// executes after each body iteration.
- ///
- /// There must be no loop-carried dependencies through llvm::Values. This is
- /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
- /// for the induction variable.
- ///
- /// All code in Header, Cond, Latch and Exit (plus the terminator of the
- /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
- /// by assertOK(). They are expected to not be modified unless explicitly
- /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
- /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
- /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
- /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
- /// anymore as its underlying control flow may not exist anymore.
- /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
- /// may also return a new CanonicalLoopInfo that can be passed to other
- /// loop-associated construct implementing methods. These loop-transforming
- /// methods may either create a new CanonicalLoopInfo usually using
- /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
- /// modify one of the input CanonicalLoopInfo and return it as representing the
- /// modified loop. What is done is an implementation detail of
- /// transformation-implementing method and callers should always assume that the
- /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
- /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
- /// created by createCanonicalLoop, such that transforming methods do not have
- /// to special case where the CanonicalLoopInfo originated from.
- ///
- /// Generally, methods consuming CanonicalLoopInfo do not need an
- /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
- /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
- /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
- /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
- /// any InsertPoint in the Preheader, After or Block can still be used after
- /// calling such a method.
- ///
- /// TODO: Provide mechanisms for exception handling and cancellation points.
- ///
- /// Defined outside OpenMPIRBuilder because nested classes cannot be
- /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
- class CanonicalLoopInfo {
- friend class OpenMPIRBuilder;
- private:
- BasicBlock *Header = nullptr;
- BasicBlock *Cond = nullptr;
- BasicBlock *Latch = nullptr;
- BasicBlock *Exit = nullptr;
- /// Add the control blocks of this loop to \p BBs.
- ///
- /// This does not include any block from the body, including the one returned
- /// by getBody().
- ///
- /// FIXME: This currently includes the Preheader and After blocks even though
- /// their content is (mostly) not under CanonicalLoopInfo's control.
- /// Re-evaluated whether this makes sense.
- void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
- public:
- /// Returns whether this object currently represents the IR of a loop. If
- /// returning false, it may have been consumed by a loop transformation or not
- /// been intialized. Do not use in this case;
- bool isValid() const { return Header; }
- /// The preheader ensures that there is only a single edge entering the loop.
- /// Code that must be execute before any loop iteration can be emitted here,
- /// such as computing the loop trip count and begin lifetime markers. Code in
- /// the preheader is not considered part of the canonical loop.
- BasicBlock *getPreheader() const;
- /// The header is the entry for each iteration. In the canonical control flow,
- /// it only contains the PHINode for the induction variable.
- BasicBlock *getHeader() const {
- assert(isValid() && "Requires a valid canonical loop");
- return Header;
- }
- /// The condition block computes whether there is another loop iteration. If
- /// yes, branches to the body; otherwise to the exit block.
- BasicBlock *getCond() const {
- assert(isValid() && "Requires a valid canonical loop");
- return Cond;
- }
- /// The body block is the single entry for a loop iteration and not controlled
- /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
- /// eventually branch to the \p Latch block.
- BasicBlock *getBody() const {
- assert(isValid() && "Requires a valid canonical loop");
- return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
- }
- /// Reaching the latch indicates the end of the loop body code. In the
- /// canonical control flow, it only contains the increment of the induction
- /// variable.
- BasicBlock *getLatch() const {
- assert(isValid() && "Requires a valid canonical loop");
- return Latch;
- }
- /// Reaching the exit indicates no more iterations are being executed.
- BasicBlock *getExit() const {
- assert(isValid() && "Requires a valid canonical loop");
- return Exit;
- }
- /// The after block is intended for clean-up code such as lifetime end
- /// markers. It is separate from the exit block to ensure, analogous to the
- /// preheader, it having just a single entry edge and being free from PHI
- /// nodes should there be multiple loop exits (such as from break
- /// statements/cancellations).
- BasicBlock *getAfter() const {
- assert(isValid() && "Requires a valid canonical loop");
- return Exit->getSingleSuccessor();
- }
- /// Returns the llvm::Value containing the number of loop iterations. It must
- /// be valid in the preheader and always interpreted as an unsigned integer of
- /// any bit-width.
- Value *getTripCount() const {
- assert(isValid() && "Requires a valid canonical loop");
- Instruction *CmpI = &Cond->front();
- assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
- return CmpI->getOperand(1);
- }
- /// Returns the instruction representing the current logical induction
- /// variable. Always unsigned, always starting at 0 with an increment of one.
- Instruction *getIndVar() const {
- assert(isValid() && "Requires a valid canonical loop");
- Instruction *IndVarPHI = &Header->front();
- assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
- return IndVarPHI;
- }
- /// Return the type of the induction variable (and the trip count).
- Type *getIndVarType() const {
- assert(isValid() && "Requires a valid canonical loop");
- return getIndVar()->getType();
- }
- /// Return the insertion point for user code before the loop.
- OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
- assert(isValid() && "Requires a valid canonical loop");
- BasicBlock *Preheader = getPreheader();
- return {Preheader, std::prev(Preheader->end())};
- };
- /// Return the insertion point for user code in the body.
- OpenMPIRBuilder::InsertPointTy getBodyIP() const {
- assert(isValid() && "Requires a valid canonical loop");
- BasicBlock *Body = getBody();
- return {Body, Body->begin()};
- };
- /// Return the insertion point for user code after the loop.
- OpenMPIRBuilder::InsertPointTy getAfterIP() const {
- assert(isValid() && "Requires a valid canonical loop");
- BasicBlock *After = getAfter();
- return {After, After->begin()};
- };
- Function *getFunction() const {
- assert(isValid() && "Requires a valid canonical loop");
- return Header->getParent();
- }
- /// Consistency self-check.
- void assertOK() const;
- /// Invalidate this loop. That is, the underlying IR does not fulfill the
- /// requirements of an OpenMP canonical loop anymore.
- void invalidate();
- };
- } // end namespace llvm
- #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
- #ifdef __GNUC__
- #pragma GCC diagnostic pop
- #endif
|