OMPIRBuilder.h 110 KB


  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the OpenMPIRBuilder class and helpers used as a convenient
  15. // way to create LLVM instructions for OpenMP directives.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  19. #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  20. #include "llvm/Analysis/MemorySSAUpdater.h"
  21. #include "llvm/Frontend/OpenMP/OMPConstants.h"
  22. #include "llvm/IR/DebugLoc.h"
  23. #include "llvm/IR/IRBuilder.h"
  24. #include "llvm/Support/Allocator.h"
  25. #include <forward_list>
  26. #include <map>
  27. #include <optional>
  28. namespace llvm {
  29. class CanonicalLoopInfo;
  30. struct TargetRegionEntryInfo;
  31. class OffloadEntriesInfoManager;
  32. /// Move the instruction after an InsertPoint to the beginning of another
  33. /// BasicBlock.
  34. ///
  35. /// The instructions after \p IP are moved to the beginning of \p New which must
  36. /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
  37. /// \p New will be added such that there is no semantic change. Otherwise, the
  38. /// \p IP insert block remains degenerate and it is up to the caller to insert a
  39. /// terminator.
  40. void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
  41. bool CreateBranch);
  42. /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
  43. /// insert location will stick to after the instruction before the insertion
  44. /// point (instead of moving with the instruction the InsertPoint stores
  45. /// internally).
  46. void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
  47. /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
  48. /// (missing the terminator).
  49. ///
  50. /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
  51. /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
  52. /// is true, a branch to the new successor will new created such that
  53. /// semantically there is no change; otherwise the block of the insertion point
  54. /// remains degenerate and it is the caller's responsibility to insert a
  55. /// terminator. Returns the new successor block.
  56. BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
  57. llvm::Twine Name = {});
  58. /// Split a BasicBlock at \p Builder's insertion point, even if the block is
  59. /// degenerate (missing the terminator). Its new insert location will stick to
  60. /// after the instruction before the insertion point (instead of moving with the
  61. /// instruction the InsertPoint stores internally).
  62. BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
  63. llvm::Twine Name = {});
  64. /// Split a BasicBlock at \p Builder's insertion point, even if the block is
  65. /// degenerate (missing the terminator). Its new insert location will stick to
  66. /// after the instruction before the insertion point (instead of moving with the
  67. /// instruction the InsertPoint stores internally).
  68. BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
  69. /// Like splitBB, but reuses the current block's name for the new name.
  70. BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
  71. llvm::Twine Suffix = ".split");
  72. /// Captures attributes that affect generating LLVM-IR using the
  73. /// OpenMPIRBuilder and related classes. Note that not all attributes are
  74. /// required for all classes or functions. In some use cases the configuration
  75. /// is not necessary at all, because because the only functions that are called
  76. /// are ones that are not dependent on the configuration.
  77. class OpenMPIRBuilderConfig {
  78. public:
  79. /// Flag for specifying if the compilation is done for embedded device code
  80. /// or host code.
  81. std::optional<bool> IsEmbedded;
  82. /// Flag for specifying if the compilation is done for an offloading target,
  83. /// like GPU.
  84. std::optional<bool> IsTargetCodegen;
  85. /// Flag for specifying weather a requires unified_shared_memory
  86. /// directive is present or not.
  87. std::optional<bool> HasRequiresUnifiedSharedMemory;
  88. // Flag for specifying if offloading is mandatory.
  89. std::optional<bool> OpenMPOffloadMandatory;
  90. /// First separator used between the initial two parts of a name.
  91. std::optional<StringRef> FirstSeparator;
  92. /// Separator used between all of the rest consecutive parts of s name
  93. std::optional<StringRef> Separator;
  94. OpenMPIRBuilderConfig() {}
  95. OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen,
  96. bool HasRequiresUnifiedSharedMemory,
  97. bool OpenMPOffloadMandatory)
  98. : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen),
  99. HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory),
  100. OpenMPOffloadMandatory(OpenMPOffloadMandatory) {}
  101. // Getters functions that assert if the required values are not present.
  102. bool isEmbedded() const {
  103. assert(IsEmbedded.has_value() && "IsEmbedded is not set");
  104. return *IsEmbedded;
  105. }
  106. bool isTargetCodegen() const {
  107. assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set");
  108. return *IsTargetCodegen;
  109. }
  110. bool hasRequiresUnifiedSharedMemory() const {
  111. assert(HasRequiresUnifiedSharedMemory.has_value() &&
  112. "HasUnifiedSharedMemory is not set");
  113. return *HasRequiresUnifiedSharedMemory;
  114. }
  115. bool openMPOffloadMandatory() const {
  116. assert(OpenMPOffloadMandatory.has_value() &&
  117. "OpenMPOffloadMandatory is not set");
  118. return *OpenMPOffloadMandatory;
  119. }
  120. // Returns the FirstSeparator if set, otherwise use the default
  121. // separator depending on isTargetCodegen
  122. StringRef firstSeparator() const {
  123. if (FirstSeparator.has_value())
  124. return *FirstSeparator;
  125. if (isTargetCodegen())
  126. return "_";
  127. return ".";
  128. }
  129. // Returns the Separator if set, otherwise use the default
  130. // separator depending on isTargetCodegen
  131. StringRef separator() const {
  132. if (Separator.has_value())
  133. return *Separator;
  134. if (isTargetCodegen())
  135. return "$";
  136. return ".";
  137. }
  138. void setIsEmbedded(bool Value) { IsEmbedded = Value; }
  139. void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; }
  140. void setHasRequiresUnifiedSharedMemory(bool Value) {
  141. HasRequiresUnifiedSharedMemory = Value;
  142. }
  143. void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
  144. void setSeparator(StringRef S) { Separator = S; }
  145. };
  146. /// An interface to create LLVM-IR for OpenMP directives.
  147. ///
  148. /// Each OpenMP directive has a corresponding public generator method.
  149. class OpenMPIRBuilder {
  150. public:
  151. /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
  152. /// not have an effect on \p M (see initialize)
  153. OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
  154. ~OpenMPIRBuilder();
  155. /// Initialize the internal state, this will put structures types and
  156. /// potentially other helpers into the underlying module. Must be called
  157. /// before any other method and only once!
  158. void initialize();
  159. void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
  160. /// Finalize the underlying module, e.g., by outlining regions.
  161. /// \param Fn The function to be finalized. If not used,
  162. /// all functions are finalized.
  163. void finalize(Function *Fn = nullptr);
  164. /// Add attributes known for \p FnID to \p Fn.
  165. void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
  166. /// Type used throughout for insertion points.
  167. using InsertPointTy = IRBuilder<>::InsertPoint;
  168. /// Get the create a name using the platform specific separators.
  169. /// \param Parts parts of the final name that needs separation
  170. /// The created name has a first separator between the first and second part
  171. /// and a second separator between all other parts.
  172. /// E.g. with FirstSeparator "$" and Separator "." and
  173. /// parts: "p1", "p2", "p3", "p4"
  174. /// The resulting name is "p1$p2.p3.p4"
  175. /// The separators are retrieved from the OpenMPIRBuilderConfig.
  176. std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
  177. /// Callback type for variable finalization (think destructors).
  178. ///
  179. /// \param CodeGenIP is the insertion point at which the finalization code
  180. /// should be placed.
  181. ///
  182. /// A finalize callback knows about all objects that need finalization, e.g.
  183. /// destruction, when the scope of the currently generated construct is left
  184. /// at the time, and location, the callback is invoked.
  185. using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
  186. struct FinalizationInfo {
  187. /// The finalization callback provided by the last in-flight invocation of
  188. /// createXXXX for the directive of kind DK.
  189. FinalizeCallbackTy FiniCB;
  190. /// The directive kind of the innermost directive that has an associated
  191. /// region which might require finalization when it is left.
  192. omp::Directive DK;
  193. /// Flag to indicate if the directive is cancellable.
  194. bool IsCancellable;
  195. };
  196. /// Push a finalization callback on the finalization stack.
  197. ///
  198. /// NOTE: Temporary solution until Clang CG is gone.
  199. void pushFinalizationCB(const FinalizationInfo &FI) {
  200. FinalizationStack.push_back(FI);
  201. }
  202. /// Pop the last finalization callback from the finalization stack.
  203. ///
  204. /// NOTE: Temporary solution until Clang CG is gone.
  205. void popFinalizationCB() { FinalizationStack.pop_back(); }
  206. /// Callback type for body (=inner region) code generation
  207. ///
  208. /// The callback takes code locations as arguments, each describing a
  209. /// location where additional instructions can be inserted.
  210. ///
  211. /// The CodeGenIP may be in the middle of a basic block or point to the end of
  212. /// it. The basic block may have a terminator or be degenerate. The callback
  213. /// function may just insert instructions at that position, but also split the
  214. /// block (without the Before argument of BasicBlock::splitBasicBlock such
  215. /// that the identify of the split predecessor block is preserved) and insert
  216. /// additional control flow, including branches that do not lead back to what
  217. /// follows the CodeGenIP. Note that since the callback is allowed to split
  218. /// the block, callers must assume that InsertPoints to positions in the
  219. /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
  220. /// such InsertPoints need to be preserved, it can split the block itself
  221. /// before calling the callback.
  222. ///
  223. /// AllocaIP and CodeGenIP must not point to the same position.
  224. ///
  225. /// \param AllocaIP is the insertion point at which new alloca instructions
  226. /// should be placed. The BasicBlock it is pointing to must
  227. /// not be split.
  228. /// \param CodeGenIP is the insertion point at which the body code should be
  229. /// placed.
  230. using BodyGenCallbackTy =
  231. function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
  232. // This is created primarily for sections construct as llvm::function_ref
  233. // (BodyGenCallbackTy) is not storable (as described in the comments of
  234. // function_ref class - function_ref contains non-ownable reference
  235. // to the callable.
  236. using StorableBodyGenCallbackTy =
  237. std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
  238. /// Callback type for loop body code generation.
  239. ///
  240. /// \param CodeGenIP is the insertion point where the loop's body code must be
  241. /// placed. This will be a dedicated BasicBlock with a
  242. /// conditional branch from the loop condition check and
  243. /// terminated with an unconditional branch to the loop
  244. /// latch.
  245. /// \param IndVar is the induction variable usable at the insertion point.
  246. using LoopBodyGenCallbackTy =
  247. function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
  248. /// Callback type for variable privatization (think copy & default
  249. /// constructor).
  250. ///
  251. /// \param AllocaIP is the insertion point at which new alloca instructions
  252. /// should be placed.
  253. /// \param CodeGenIP is the insertion point at which the privatization code
  254. /// should be placed.
  255. /// \param Original The value being copied/created, should not be used in the
  256. /// generated IR.
  257. /// \param Inner The equivalent of \p Original that should be used in the
  258. /// generated IR; this is equal to \p Original if the value is
  259. /// a pointer and can thus be passed directly, otherwise it is
  260. /// an equivalent but different value.
  261. /// \param ReplVal The replacement value, thus a copy or new created version
  262. /// of \p Inner.
  263. ///
  264. /// \returns The new insertion point where code generation continues and
  265. /// \p ReplVal the replacement value.
  266. using PrivatizeCallbackTy = function_ref<InsertPointTy(
  267. InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
  268. Value &Inner, Value *&ReplVal)>;
  269. /// Description of a LLVM-IR insertion point (IP) and a debug/source location
  270. /// (filename, line, column, ...).
  271. struct LocationDescription {
  272. LocationDescription(const IRBuilderBase &IRB)
  273. : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
  274. LocationDescription(const InsertPointTy &IP) : IP(IP) {}
  275. LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
  276. : IP(IP), DL(DL) {}
  277. InsertPointTy IP;
  278. DebugLoc DL;
  279. };
  280. /// Emitter methods for OpenMP directives.
  281. ///
  282. ///{
  283. /// Generator for '#omp barrier'
  284. ///
  285. /// \param Loc The location where the barrier directive was encountered.
  286. /// \param DK The kind of directive that caused the barrier.
  287. /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
  288. /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
  289. /// should be checked and acted upon.
  290. ///
  291. /// \returns The insertion point after the barrier.
  292. InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
  293. bool ForceSimpleCall = false,
  294. bool CheckCancelFlag = true);
  295. /// Generator for '#omp cancel'
  296. ///
  297. /// \param Loc The location where the directive was encountered.
  298. /// \param IfCondition The evaluated 'if' clause expression, if any.
  299. /// \param CanceledDirective The kind of directive that is cancled.
  300. ///
  301. /// \returns The insertion point after the barrier.
  302. InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
  303. omp::Directive CanceledDirective);
  304. /// Generator for '#omp parallel'
  305. ///
  306. /// \param Loc The insert and source location description.
  307. /// \param AllocaIP The insertion points to be used for alloca instructions.
  308. /// \param BodyGenCB Callback that will generate the region code.
  309. /// \param PrivCB Callback to copy a given variable (think copy constructor).
  310. /// \param FiniCB Callback to finalize variable copies.
  311. /// \param IfCondition The evaluated 'if' clause expression, if any.
  312. /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
  313. /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
  314. /// \param IsCancellable Flag to indicate a cancellable parallel region.
  315. ///
  316. /// \returns The insertion position *after* the parallel.
  317. IRBuilder<>::InsertPoint
  318. createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
  319. BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
  320. FinalizeCallbackTy FiniCB, Value *IfCondition,
  321. Value *NumThreads, omp::ProcBindKind ProcBind,
  322. bool IsCancellable);
  323. /// Generator for the control flow structure of an OpenMP canonical loop.
  324. ///
  325. /// This generator operates on the logical iteration space of the loop, i.e.
  326. /// the caller only has to provide a loop trip count of the loop as defined by
  327. /// base language semantics. The trip count is interpreted as an unsigned
  328. /// integer. The induction variable passed to \p BodyGenCB will be of the same
  329. /// type and run from 0 to \p TripCount - 1. It is up to the callback to
  330. /// convert the logical iteration variable to the loop counter variable in the
  331. /// loop body.
  332. ///
  333. /// \param Loc The insert and source location description. The insert
  334. /// location can be between two instructions or the end of a
  335. /// degenerate block (e.g. a BB under construction).
  336. /// \param BodyGenCB Callback that will generate the loop body code.
  337. /// \param TripCount Number of iterations the loop body is executed.
  338. /// \param Name Base name used to derive BB and instruction names.
  339. ///
  340. /// \returns An object representing the created control flow structure which
  341. /// can be used for loop-associated directives.
  342. CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
  343. LoopBodyGenCallbackTy BodyGenCB,
  344. Value *TripCount,
  345. const Twine &Name = "loop");
  346. /// Generator for the control flow structure of an OpenMP canonical loop.
  347. ///
  348. /// Instead of a logical iteration space, this allows specifying user-defined
  349. /// loop counter values using increment, upper- and lower bounds. To
  350. /// disambiguate the terminology when counting downwards, instead of lower
  351. /// bounds we use \p Start for the loop counter value in the first body
  352. /// iteration.
  353. ///
  354. /// Consider the following limitations:
  355. ///
  356. /// * A loop counter space over all integer values of its bit-width cannot be
  357. /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
  358. /// stored into an 8 bit integer):
  359. ///
  360. /// DO I = 0, 255, 1
  361. ///
  362. /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
  363. /// effectively counting downwards:
  364. ///
  365. /// for (uint8_t i = 100u; i > 0; i += 127u)
  366. ///
  367. ///
  368. /// TODO: May need to add additional parameters to represent:
  369. ///
  370. /// * Allow representing downcounting with unsigned integers.
  371. ///
  372. /// * Sign of the step and the comparison operator might disagree:
  373. ///
  374. /// for (int i = 0; i < 42; i -= 1u)
  375. ///
  376. //
  377. /// \param Loc The insert and source location description.
  378. /// \param BodyGenCB Callback that will generate the loop body code.
  379. /// \param Start Value of the loop counter for the first iterations.
  380. /// \param Stop Loop counter values past this will stop the loop.
  381. /// \param Step Loop counter increment after each iteration; negative
  382. /// means counting down.
  383. /// \param IsSigned Whether Start, Stop and Step are signed integers.
  384. /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
  385. /// counter.
  386. /// \param ComputeIP Insertion point for instructions computing the trip
  387. /// count. Can be used to ensure the trip count is available
  388. /// at the outermost loop of a loop nest. If not set,
  389. /// defaults to the preheader of the generated loop.
  390. /// \param Name Base name used to derive BB and instruction names.
  391. ///
  392. /// \returns An object representing the created control flow structure which
  393. /// can be used for loop-associated directives.
  394. CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
  395. LoopBodyGenCallbackTy BodyGenCB,
  396. Value *Start, Value *Stop, Value *Step,
  397. bool IsSigned, bool InclusiveStop,
  398. InsertPointTy ComputeIP = {},
  399. const Twine &Name = "loop");
  400. /// Collapse a loop nest into a single loop.
  401. ///
  402. /// Merges loops of a loop nest into a single CanonicalLoopNest representation
  403. /// that has the same number of innermost loop iterations as the origin loop
  404. /// nest. The induction variables of the input loops are derived from the
  405. /// collapsed loop's induction variable. This is intended to be used to
  406. /// implement OpenMP's collapse clause. Before applying a directive,
  407. /// collapseLoops normalizes a loop nest to contain only a single loop and the
  408. /// directive's implementation does not need to handle multiple loops itself.
  409. /// This does not remove the need to handle all loop nest handling by
  410. /// directives, such as the ordered(<n>) clause or the simd schedule-clause
  411. /// modifier of the worksharing-loop directive.
  412. ///
  413. /// Example:
  414. /// \code
  415. /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
  416. /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
  417. /// body(i, j);
  418. /// \endcode
  419. ///
  420. /// After collapsing with Loops={i,j}, the loop is changed to
  421. /// \code
  422. /// for (int ij = 0; ij < 63; ++ij) {
  423. /// int i = ij / 9;
  424. /// int j = ij % 9;
  425. /// body(i, j);
  426. /// }
  427. /// \endcode
  428. ///
  429. /// In the current implementation, the following limitations apply:
  430. ///
  431. /// * All input loops have an induction variable of the same type.
  432. ///
  433. /// * The collapsed loop will have the same trip count integer type as the
  434. /// input loops. Therefore it is possible that the collapsed loop cannot
  435. /// represent all iterations of the input loops. For instance, assuming a
  436. /// 32 bit integer type, and two input loops both iterating 2^16 times, the
  437. /// theoretical trip count of the collapsed loop would be 2^32 iteration,
  438. /// which cannot be represented in an 32-bit integer. Behavior is undefined
  439. /// in this case.
  440. ///
  441. /// * The trip counts of every input loop must be available at \p ComputeIP.
  442. /// Non-rectangular loops are not yet supported.
  443. ///
  444. /// * At each nest level, code between a surrounding loop and its nested loop
  445. /// is hoisted into the loop body, and such code will be executed more
  446. /// often than before collapsing (or not at all if any inner loop iteration
  447. /// has a trip count of 0). This is permitted by the OpenMP specification.
  448. ///
  449. /// \param DL Debug location for instructions added for collapsing,
  450. /// such as instructions to compute/derive the input loop's
  451. /// induction variables.
  452. /// \param Loops Loops in the loop nest to collapse. Loops are specified
  453. /// from outermost-to-innermost and every control flow of a
  454. /// loop's body must pass through its directly nested loop.
  455. /// \param ComputeIP Where additional instruction that compute the collapsed
  456. /// trip count. If not set, defaults to before the generated
  457. /// loop.
  458. ///
  459. /// \returns The CanonicalLoopInfo object representing the collapsed loop.
  460. CanonicalLoopInfo *collapseLoops(DebugLoc DL,
  461. ArrayRef<CanonicalLoopInfo *> Loops,
  462. InsertPointTy ComputeIP);
  463. private:
  464. /// Modifies the canonical loop to be a statically-scheduled workshare loop.
  465. ///
  466. /// This takes a \p LoopInfo representing a canonical loop, such as the one
  467. /// created by \p createCanonicalLoop and emits additional instructions to
  468. /// turn it into a workshare loop. In particular, it calls to an OpenMP
  469. /// runtime function in the preheader to obtain the loop bounds to be used in
  470. /// the current thread, updates the relevant instructions in the canonical
  471. /// loop and calls to an OpenMP runtime finalization function after the loop.
  472. ///
  473. /// \param DL Debug location for instructions added for the
  474. /// workshare-loop construct itself.
  475. /// \param CLI A descriptor of the canonical loop to workshare.
  476. /// \param AllocaIP An insertion point for Alloca instructions usable in the
  477. /// preheader of the loop.
  478. /// \param NeedsBarrier Indicates whether a barrier must be inserted after
  479. /// the loop.
  480. ///
  481. /// \returns Point where to insert code after the workshare construct.
  482. InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  483. InsertPointTy AllocaIP,
  484. bool NeedsBarrier);
  485. /// Modifies the canonical loop a statically-scheduled workshare loop with a
  486. /// user-specified chunk size.
  487. ///
  488. /// \param DL Debug location for instructions added for the
  489. /// workshare-loop construct itself.
  490. /// \param CLI A descriptor of the canonical loop to workshare.
  491. /// \param AllocaIP An insertion point for Alloca instructions usable in
  492. /// the preheader of the loop.
  493. /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
  494. /// loop.
  495. /// \param ChunkSize The user-specified chunk size.
  496. ///
  497. /// \returns Point where to insert code after the workshare construct.
  498. InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
  499. CanonicalLoopInfo *CLI,
  500. InsertPointTy AllocaIP,
  501. bool NeedsBarrier,
  502. Value *ChunkSize);
  503. /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
  504. ///
  505. /// This takes a \p LoopInfo representing a canonical loop, such as the one
  506. /// created by \p createCanonicalLoop and emits additional instructions to
  507. /// turn it into a workshare loop. In particular, it calls to an OpenMP
  508. /// runtime function in the preheader to obtain, and then in each iteration
  509. /// to update the loop counter.
  510. ///
  511. /// \param DL Debug location for instructions added for the
  512. /// workshare-loop construct itself.
  513. /// \param CLI A descriptor of the canonical loop to workshare.
  514. /// \param AllocaIP An insertion point for Alloca instructions usable in the
  515. /// preheader of the loop.
  516. /// \param SchedType Type of scheduling to be passed to the init function.
  517. /// \param NeedsBarrier Indicates whether a barrier must be insterted after
  518. /// the loop.
  519. /// \param Chunk The size of loop chunk considered as a unit when
  520. /// scheduling. If \p nullptr, defaults to 1.
  521. ///
  522. /// \returns Point where to insert code after the workshare construct.
  523. InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  524. InsertPointTy AllocaIP,
  525. omp::OMPScheduleType SchedType,
  526. bool NeedsBarrier,
  527. Value *Chunk = nullptr);
  528. /// Create alternative version of the loop to support if clause
  529. ///
  530. /// OpenMP if clause can require to generate second loop. This loop
  531. /// will be executed when if clause condition is not met. createIfVersion
  532. /// adds branch instruction to the copied loop if \p ifCond is not met.
  533. ///
  534. /// \param Loop Original loop which should be versioned.
  535. /// \param IfCond Value which corresponds to if clause condition
  536. /// \param VMap Value to value map to define relation between
  537. /// original and copied loop values and loop blocks.
  538. /// \param NamePrefix Optional name prefix for if.then if.else blocks.
  539. void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
  540. ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
  541. public:
  542. /// Modifies the canonical loop to be a workshare loop.
  543. ///
  544. /// This takes a \p LoopInfo representing a canonical loop, such as the one
  545. /// created by \p createCanonicalLoop and emits additional instructions to
  546. /// turn it into a workshare loop. In particular, it calls to an OpenMP
  547. /// runtime function in the preheader to obtain the loop bounds to be used in
  548. /// the current thread, updates the relevant instructions in the canonical
  549. /// loop and calls to an OpenMP runtime finalization function after the loop.
  550. ///
  551. /// The concrete transformation is done by applyStaticWorkshareLoop,
  552. /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
  553. /// on the value of \p SchedKind and \p ChunkSize.
  554. ///
  555. /// \param DL Debug location for instructions added for the
  556. /// workshare-loop construct itself.
  557. /// \param CLI A descriptor of the canonical loop to workshare.
  558. /// \param AllocaIP An insertion point for Alloca instructions usable in the
  559. /// preheader of the loop.
  560. /// \param NeedsBarrier Indicates whether a barrier must be insterted after
  561. /// the loop.
  562. /// \param SchedKind Scheduling algorithm to use.
  563. /// \param ChunkSize The chunk size for the inner loop.
  564. /// \param HasSimdModifier Whether the simd modifier is present in the
  565. /// schedule clause.
  566. /// \param HasMonotonicModifier Whether the monotonic modifier is present in
  567. /// the schedule clause.
  568. /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
  569. /// present in the schedule clause.
  570. /// \param HasOrderedClause Whether the (parameterless) ordered clause is
  571. /// present.
  572. ///
  573. /// \returns Point where to insert code after the workshare construct.
  574. InsertPointTy applyWorkshareLoop(
  575. DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
  576. bool NeedsBarrier,
  577. llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
  578. Value *ChunkSize = nullptr, bool HasSimdModifier = false,
  579. bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
  580. bool HasOrderedClause = false);
  581. /// Tile a loop nest.
  582. ///
  583. /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
  584. /// \p/ Loops must be perfectly nested, from outermost to innermost loop
  585. /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
  586. /// of every loop and every tile sizes must be usable in the outermost
  587. /// loop's preheader. This implies that the loop nest is rectangular.
  588. ///
  589. /// Example:
  590. /// \code
  591. /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
  592. /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
  593. /// body(i, j);
  594. /// \endcode
  595. ///
  596. /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
  597. /// \code
  598. /// for (int i1 = 0; i1 < 3; ++i1)
  599. /// for (int j1 = 0; j1 < 2; ++j1)
  600. /// for (int i2 = 0; i2 < 5; ++i2)
  601. /// for (int j2 = 0; j2 < 7; ++j2)
  602. /// body(i1*3+i2, j1*3+j2);
  603. /// \endcode
  604. ///
  605. /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
  606. /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
  607. /// handles non-constant trip counts, non-constant tile sizes and trip counts
  608. /// that are not multiples of the tile size. In the latter case the tile loop
  609. /// of the last floor-loop iteration will have fewer iterations than specified
  610. /// as its tile size.
  611. ///
  612. ///
  613. /// @param DL Debug location for instructions added by tiling, for
  614. /// instance the floor- and tile trip count computation.
  615. /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
  616. /// invalidated by this method, i.e. should not used after
  617. /// tiling.
  618. /// @param TileSizes For each loop in \p Loops, the tile size for that
  619. /// dimensions.
  620. ///
  621. /// \returns A list of generated loops. Contains twice as many loops as the
  622. /// input loop nest; the first half are the floor loops and the
  623. /// second half are the tile loops.
  624. std::vector<CanonicalLoopInfo *>
  625. tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  626. ArrayRef<Value *> TileSizes);
  627. /// Fully unroll a loop.
  628. ///
  629. /// Instead of unrolling the loop immediately (and duplicating its body
  630. /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
  631. /// metadata.
  632. ///
  633. /// \param DL Debug location for instructions added by unrolling.
  634. /// \param Loop The loop to unroll. The loop will be invalidated.
  635. void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
  636. /// Fully or partially unroll a loop. How the loop is unrolled is determined
  637. /// using LLVM's LoopUnrollPass.
  638. ///
  639. /// \param DL Debug location for instructions added by unrolling.
  640. /// \param Loop The loop to unroll. The loop will be invalidated.
  641. void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
  642. /// Partially unroll a loop.
  643. ///
  644. /// The CanonicalLoopInfo of the unrolled loop for use with chained
  645. /// loop-associated directive can be requested using \p UnrolledCLI. Not
  646. /// needing the CanonicalLoopInfo allows more efficient code generation by
  647. /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
  648. /// A loop-associated directive applied to the unrolled loop needs to know the
  649. /// new trip count which means that if using a heuristically determined unroll
  650. /// factor (\p Factor == 0), that factor must be computed immediately. We are
  651. /// using the same logic as the LoopUnrollPass to derived the unroll factor,
  652. /// but which assumes that some canonicalization has taken place (e.g.
  653. /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
  654. /// better when the unrolled loop's CanonicalLoopInfo is not needed.
  655. ///
  656. /// \param DL Debug location for instructions added by unrolling.
  657. /// \param Loop The loop to unroll. The loop will be invalidated.
  658. /// \param Factor The factor to unroll the loop by. A factor of 0
  659. /// indicates that a heuristic should be used to determine
  660. /// the unroll-factor.
  661. /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
  662. /// partially unrolled loop. Otherwise, uses loop metadata
  663. /// to defer unrolling to the LoopUnrollPass.
  664. void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
  665. CanonicalLoopInfo **UnrolledCLI);
  666. /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
  667. /// is cloned. The metadata which prevents vectorization is added to
  668. /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
  669. /// to false.
  670. ///
  671. /// \param Loop The loop to simd-ize.
  672. /// \param AlignedVars The map which containts pairs of the pointer
  673. /// and its corresponding alignment.
  674. /// \param IfCond The value which corresponds to the if clause
  675. /// condition.
  676. /// \param Order The enum to map order clause.
  677. /// \param Simdlen The Simdlen length to apply to the simd loop.
  678. /// \param Safelen The Safelen length to apply to the simd loop.
  679. void applySimd(CanonicalLoopInfo *Loop,
  680. MapVector<Value *, Value *> AlignedVars, Value *IfCond,
  681. omp::OrderKind Order, ConstantInt *Simdlen,
  682. ConstantInt *Safelen);
  683. /// Generator for '#omp flush'
  684. ///
  685. /// \param Loc The location where the flush directive was encountered
  686. void createFlush(const LocationDescription &Loc);
  687. /// Generator for '#omp taskwait'
  688. ///
  689. /// \param Loc The location where the taskwait directive was encountered.
  690. void createTaskwait(const LocationDescription &Loc);
  691. /// Generator for '#omp taskyield'
  692. ///
  693. /// \param Loc The location where the taskyield directive was encountered.
  694. void createTaskyield(const LocationDescription &Loc);
  695. /// A struct to pack the relevant information for an OpenMP depend clause.
  696. struct DependData {
  697. omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
  698. Type *DepValueType;
  699. Value *DepVal;
  700. explicit DependData() = default;
  701. DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
  702. Value *DepVal)
  703. : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
  704. };
  705. /// Generator for `#omp task`
  706. ///
  707. /// \param Loc The location where the task construct was encountered.
  708. /// \param AllocaIP The insertion point to be used for alloca instructions.
  709. /// \param BodyGenCB Callback that will generate the region code.
  710. /// \param Tied True if the task is tied, false if the task is untied.
  711. /// \param Final i1 value which is `true` if the task is final, `false` if the
  712. /// task is not final.
  713. /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
  714. /// task is generated, and the encountering thread must
  715. /// suspend the current task region, for which execution
  716. /// cannot be resumed until execution of the structured
  717. /// block that is associated with the generated task is
  718. /// completed.
  719. InsertPointTy createTask(const LocationDescription &Loc,
  720. InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
  721. bool Tied = true, Value *Final = nullptr,
  722. Value *IfCondition = nullptr,
  723. SmallVector<DependData> Dependencies = {});
  724. /// Generator for the taskgroup construct
  725. ///
  726. /// \param Loc The location where the taskgroup construct was encountered.
  727. /// \param AllocaIP The insertion point to be used for alloca instructions.
  728. /// \param BodyGenCB Callback that will generate the region code.
  729. InsertPointTy createTaskgroup(const LocationDescription &Loc,
  730. InsertPointTy AllocaIP,
  731. BodyGenCallbackTy BodyGenCB);
  732. /// Functions used to generate reductions. Such functions take two Values
  733. /// representing LHS and RHS of the reduction, respectively, and a reference
  734. /// to the value that is updated to refer to the reduction result.
  735. using ReductionGenTy =
  736. function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
  737. /// Functions used to generate atomic reductions. Such functions take two
  738. /// Values representing pointers to LHS and RHS of the reduction, as well as
  739. /// the element type of these pointers. They are expected to atomically
  740. /// update the LHS to the reduced value.
  741. using AtomicReductionGenTy =
  742. function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
  743. /// Information about an OpenMP reduction.
  744. struct ReductionInfo {
  745. ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
  746. ReductionGenTy ReductionGen,
  747. AtomicReductionGenTy AtomicReductionGen)
  748. : ElementType(ElementType), Variable(Variable),
  749. PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
  750. AtomicReductionGen(AtomicReductionGen) {
  751. assert(cast<PointerType>(Variable->getType())
  752. ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
  753. }
  754. /// Reduction element type, must match pointee type of variable.
  755. Type *ElementType;
  756. /// Reduction variable of pointer type.
  757. Value *Variable;
  758. /// Thread-private partial reduction variable.
  759. Value *PrivateVariable;
  760. /// Callback for generating the reduction body. The IR produced by this will
  761. /// be used to combine two values in a thread-safe context, e.g., under
  762. /// lock or within the same thread, and therefore need not be atomic.
  763. ReductionGenTy ReductionGen;
  764. /// Callback for generating the atomic reduction body, may be null. The IR
  765. /// produced by this will be used to atomically combine two values during
  766. /// reduction. If null, the implementation will use the non-atomic version
  767. /// along with the appropriate synchronization mechanisms.
  768. AtomicReductionGenTy AtomicReductionGen;
  769. };
  770. // TODO: provide atomic and non-atomic reduction generators for reduction
  771. // operators defined by the OpenMP specification.
  772. /// Generator for '#omp reduction'.
  773. ///
  774. /// Emits the IR instructing the runtime to perform the specific kind of
  775. /// reductions. Expects reduction variables to have been privatized and
  776. /// initialized to reduction-neutral values separately. Emits the calls to
  777. /// runtime functions as well as the reduction function and the basic blocks
  778. /// performing the reduction atomically and non-atomically.
  779. ///
  780. /// The code emitted for the following:
  781. ///
  782. /// \code
  783. /// type var_1;
  784. /// type var_2;
  785. /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
  786. /// /* body */;
  787. /// \endcode
  788. ///
  789. /// corresponds to the following sketch.
  790. ///
  791. /// \code
  792. /// void _outlined_par() {
  793. /// // N is the number of different reductions.
  794. /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
  795. /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
  796. /// _omp_reduction_func,
  797. /// _gomp_critical_user.reduction.var)) {
  798. /// case 1: {
  799. /// var_1 = var_1 <reduction-op> privatized_var_1;
  800. /// var_2 = var_2 <reduction-op> privatized_var_2;
  801. /// // ...
  802. /// __kmpc_end_reduce(...);
  803. /// break;
  804. /// }
  805. /// case 2: {
  806. /// _Atomic<ReductionOp>(var_1, privatized_var_1);
  807. /// _Atomic<ReductionOp>(var_2, privatized_var_2);
  808. /// // ...
  809. /// break;
  810. /// }
  811. /// default: break;
  812. /// }
  813. /// }
  814. ///
  815. /// void _omp_reduction_func(void **lhs, void **rhs) {
  816. /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
  817. /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
  818. /// // ...
  819. /// }
  820. /// \endcode
  821. ///
  822. /// \param Loc The location where the reduction was
  823. /// encountered. Must be within the associate
  824. /// directive and after the last local access to the
  825. /// reduction variables.
  826. /// \param AllocaIP An insertion point suitable for allocas usable
  827. /// in reductions.
  828. /// \param ReductionInfos A list of info on each reduction variable.
  829. /// \param IsNoWait A flag set if the reduction is marked as nowait.
  830. InsertPointTy createReductions(const LocationDescription &Loc,
  831. InsertPointTy AllocaIP,
  832. ArrayRef<ReductionInfo> ReductionInfos,
  833. bool IsNoWait = false);
  834. ///}
  835. /// Return the insertion point used by the underlying IRBuilder.
  836. InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
  837. /// Update the internal location to \p Loc.
  838. bool updateToLocation(const LocationDescription &Loc) {
  839. Builder.restoreIP(Loc.IP);
  840. Builder.SetCurrentDebugLocation(Loc.DL);
  841. return Loc.IP.getBlock() != nullptr;
  842. }
  843. /// Return the function declaration for the runtime function with \p FnID.
  844. FunctionCallee getOrCreateRuntimeFunction(Module &M,
  845. omp::RuntimeFunction FnID);
  846. Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
  847. /// Return the (LLVM-IR) string describing the source location \p LocStr.
  848. Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
  849. /// Return the (LLVM-IR) string describing the default source location.
  850. Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
  851. /// Return the (LLVM-IR) string describing the source location identified by
  852. /// the arguments.
  853. Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
  854. unsigned Line, unsigned Column,
  855. uint32_t &SrcLocStrSize);
  856. /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
  857. /// fallback if \p DL does not specify the function name.
  858. Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
  859. Function *F = nullptr);
  860. /// Return the (LLVM-IR) string describing the source location \p Loc.
  861. Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
  862. uint32_t &SrcLocStrSize);
  863. /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
  864. /// TODO: Create a enum class for the Reserve2Flags
  865. Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
  866. omp::IdentFlag Flags = omp::IdentFlag(0),
  867. unsigned Reserve2Flags = 0);
  868. /// Create a hidden global flag \p Name in the module with initial value \p
  869. /// Value.
  870. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
  871. /// Create an offloading section struct used to register this global at
  872. /// runtime.
  873. ///
  874. /// Type struct __tgt_offload_entry{
  875. /// void *addr; // Pointer to the offload entry info.
  876. /// // (function or global)
  877. /// char *name; // Name of the function or global.
  878. /// size_t size; // Size of the entry info (0 if it a function).
  879. /// int32_t flags;
  880. /// int32_t reserved;
  881. /// };
  882. ///
  883. /// \param Addr The pointer to the global being registered.
  884. /// \param Name The symbol name associated with the global.
  885. /// \param Size The size in bytes of the global (0 for functions).
  886. /// \param Flags Flags associated with the entry.
  887. /// \param SectionName The section this entry will be placed at.
  888. void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
  889. int32_t Flags,
  890. StringRef SectionName = "omp_offloading_entries");
  891. /// Generate control flow and cleanup for cancellation.
  892. ///
  893. /// \param CancelFlag Flag indicating if the cancellation is performed.
  894. /// \param CanceledDirective The kind of directive that is cancled.
  895. /// \param ExitCB Extra code to be generated in the exit block.
  896. void emitCancelationCheckImpl(Value *CancelFlag,
  897. omp::Directive CanceledDirective,
  898. FinalizeCallbackTy ExitCB = {});
  899. /// Generate a target region entry call.
  900. ///
  901. /// \param Loc The location at which the request originated and is fulfilled.
  902. /// \param Return Return value of the created function returned by reference.
  903. /// \param DeviceID Identifier for the device via the 'device' clause.
  904. /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
  905. /// or 0 if unspecified and -1 if there is no 'teams' clause.
  906. /// \param NumThreads Number of threads via the 'thread_limit' clause.
  907. /// \param HostPtr Pointer to the host-side pointer of the target kernel.
  908. /// \param KernelArgs Array of arguments to the kernel.
  909. InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
  910. Value *Ident, Value *DeviceID, Value *NumTeams,
  911. Value *NumThreads, Value *HostPtr,
  912. ArrayRef<Value *> KernelArgs);
  913. /// Generate a barrier runtime call.
  914. ///
  915. /// \param Loc The location at which the request originated and is fulfilled.
  916. /// \param DK The directive which caused the barrier
  917. /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
  918. /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
  919. /// should be checked and acted upon.
  920. ///
  921. /// \returns The insertion point after the barrier.
  922. InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
  923. omp::Directive DK, bool ForceSimpleCall,
  924. bool CheckCancelFlag);
  925. /// Generate a flush runtime call.
  926. ///
  927. /// \param Loc The location at which the request originated and is fulfilled.
  928. void emitFlush(const LocationDescription &Loc);
  929. /// The finalization stack made up of finalize callbacks currently in-flight,
  930. /// wrapped into FinalizationInfo objects that reference also the finalization
  931. /// target block and the kind of cancellable directive.
  932. SmallVector<FinalizationInfo, 8> FinalizationStack;
  933. /// Return true if the last entry in the finalization stack is of kind \p DK
  934. /// and cancellable.
  935. bool isLastFinalizationInfoCancellable(omp::Directive DK) {
  936. return !FinalizationStack.empty() &&
  937. FinalizationStack.back().IsCancellable &&
  938. FinalizationStack.back().DK == DK;
  939. }
  940. /// Generate a taskwait runtime call.
  941. ///
  942. /// \param Loc The location at which the request originated and is fulfilled.
  943. void emitTaskwaitImpl(const LocationDescription &Loc);
  944. /// Generate a taskyield runtime call.
  945. ///
  946. /// \param Loc The location at which the request originated and is fulfilled.
  947. void emitTaskyieldImpl(const LocationDescription &Loc);
  948. /// Return the current thread ID.
  949. ///
  950. /// \param Ident The ident (ident_t*) describing the query origin.
  951. Value *getOrCreateThreadID(Value *Ident);
  952. /// The OpenMPIRBuilder Configuration
  953. OpenMPIRBuilderConfig Config;
  954. /// The underlying LLVM-IR module
  955. Module &M;
  956. /// The LLVM-IR Builder used to create IR.
  957. IRBuilder<> Builder;
  958. /// Map to remember source location strings
  959. StringMap<Constant *> SrcLocStrMap;
  960. /// Map to remember existing ident_t*.
  961. DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
  962. /// Helper that contains information about regions we need to outline
  963. /// during finalization.
  964. struct OutlineInfo {
  965. using PostOutlineCBTy = std::function<void(Function &)>;
  966. PostOutlineCBTy PostOutlineCB;
  967. BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
  968. SmallVector<Value *, 2> ExcludeArgsFromAggregate;
  969. /// Collect all blocks in between EntryBB and ExitBB in both the given
  970. /// vector and set.
  971. void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
  972. SmallVectorImpl<BasicBlock *> &BlockVector);
  973. /// Return the function that contains the region to be outlined.
  974. Function *getFunction() const { return EntryBB->getParent(); }
  975. };
  976. /// Collection of regions that need to be outlined during finalization.
  977. SmallVector<OutlineInfo, 16> OutlineInfos;
  978. /// Collection of owned canonical loop objects that eventually need to be
  979. /// free'd.
  980. std::forward_list<CanonicalLoopInfo> LoopInfos;
  981. /// Add a new region that will be outlined later.
  982. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
  983. /// An ordered map of auto-generated variables to their unique names.
  984. /// It stores variables with the following names: 1) ".gomp_critical_user_" +
  985. /// <critical_section_name> + ".var" for "omp critical" directives; 2)
  986. /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
  987. /// variables.
  988. StringMap<Constant*, BumpPtrAllocator> InternalVars;
  989. /// Create the global variable holding the offload mappings information.
  990. GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
  991. std::string VarName);
  992. /// Create the global variable holding the offload names information.
  993. GlobalVariable *
  994. createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
  995. std::string VarName);
  996. struct MapperAllocas {
  997. AllocaInst *ArgsBase = nullptr;
  998. AllocaInst *Args = nullptr;
  999. AllocaInst *ArgSizes = nullptr;
  1000. };
  1001. /// Create the allocas instruction used in call to mapper functions.
  1002. void createMapperAllocas(const LocationDescription &Loc,
  1003. InsertPointTy AllocaIP, unsigned NumOperands,
  1004. struct MapperAllocas &MapperAllocas);
  1005. /// Create the call for the target mapper function.
  1006. /// \param Loc The source location description.
  1007. /// \param MapperFunc Function to be called.
  1008. /// \param SrcLocInfo Source location information global.
  1009. /// \param MaptypesArg The argument types.
  1010. /// \param MapnamesArg The argument names.
  1011. /// \param MapperAllocas The AllocaInst used for the call.
  1012. /// \param DeviceID Device ID for the call.
  1013. /// \param NumOperands Number of operands in the call.
  1014. void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
  1015. Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
  1016. struct MapperAllocas &MapperAllocas, int64_t DeviceID,
  1017. unsigned NumOperands);
  1018. /// Container for the arguments used to pass data to the runtime library.
  1019. struct TargetDataRTArgs {
  1020. explicit TargetDataRTArgs() {}
  1021. /// The array of base pointer passed to the runtime library.
  1022. Value *BasePointersArray = nullptr;
  1023. /// The array of section pointers passed to the runtime library.
  1024. Value *PointersArray = nullptr;
  1025. /// The array of sizes passed to the runtime library.
  1026. Value *SizesArray = nullptr;
  1027. /// The array of map types passed to the runtime library for the beginning
  1028. /// of the region or for the entire region if there are no separate map
  1029. /// types for the region end.
  1030. Value *MapTypesArray = nullptr;
  1031. /// The array of map types passed to the runtime library for the end of the
  1032. /// region, or nullptr if there are no separate map types for the region
  1033. /// end.
  1034. Value *MapTypesArrayEnd = nullptr;
  1035. /// The array of user-defined mappers passed to the runtime library.
  1036. Value *MappersArray = nullptr;
  1037. /// The array of original declaration names of mapped pointers sent to the
  1038. /// runtime library for debugging
  1039. Value *MapNamesArray = nullptr;
  1040. };
  1041. /// Struct that keeps the information that should be kept throughout
  1042. /// a 'target data' region.
  1043. class TargetDataInfo {
  1044. /// Set to true if device pointer information have to be obtained.
  1045. bool RequiresDevicePointerInfo = false;
  1046. /// Set to true if Clang emits separate runtime calls for the beginning and
  1047. /// end of the region. These calls might have separate map type arrays.
  1048. bool SeparateBeginEndCalls = false;
  1049. public:
  1050. TargetDataRTArgs RTArgs;
  1051. /// Indicate whether any user-defined mapper exists.
  1052. bool HasMapper = false;
  1053. /// The total number of pointers passed to the runtime library.
  1054. unsigned NumberOfPtrs = 0u;
  1055. explicit TargetDataInfo() {}
  1056. explicit TargetDataInfo(bool RequiresDevicePointerInfo,
  1057. bool SeparateBeginEndCalls)
  1058. : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
  1059. SeparateBeginEndCalls(SeparateBeginEndCalls) {}
  1060. /// Clear information about the data arrays.
  1061. void clearArrayInfo() {
  1062. RTArgs = TargetDataRTArgs();
  1063. HasMapper = false;
  1064. NumberOfPtrs = 0u;
  1065. }
  1066. /// Return true if the current target data information has valid arrays.
  1067. bool isValid() {
  1068. return RTArgs.BasePointersArray && RTArgs.PointersArray &&
  1069. RTArgs.SizesArray && RTArgs.MapTypesArray &&
  1070. (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
  1071. }
  1072. bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
  1073. bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
  1074. };
  1075. /// Emit the arguments to be passed to the runtime library based on the
  1076. /// arrays of base pointers, pointers, sizes, map types, and mappers. If
  1077. /// ForEndCall, emit map types to be passed for the end of the region instead
  1078. /// of the beginning.
  1079. void emitOffloadingArraysArgument(IRBuilderBase &Builder,
  1080. OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
  1081. OpenMPIRBuilder::TargetDataInfo &Info,
  1082. bool EmitDebug = false,
  1083. bool ForEndCall = false);
  1084. /// Creates offloading entry for the provided entry ID \a ID, address \a
  1085. /// Addr, size \a Size, and flags \a Flags.
  1086. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
  1087. int32_t Flags, GlobalValue::LinkageTypes);
  1088. /// The kind of errors that can occur when emitting the offload entries and
  1089. /// metadata.
  1090. enum EmitMetadataErrorKind {
  1091. EMIT_MD_TARGET_REGION_ERROR,
  1092. EMIT_MD_DECLARE_TARGET_ERROR,
  1093. EMIT_MD_GLOBAL_VAR_LINK_ERROR
  1094. };
  1095. /// Callback function type
  1096. using EmitMetadataErrorReportFunctionTy =
  1097. std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
  1098. // Emit the offloading entries and metadata so that the device codegen side
  1099. // can easily figure out what to emit. The produced metadata looks like
  1100. // this:
  1101. //
  1102. // !omp_offload.info = !{!1, ...}
  1103. //
  1104. // We only generate metadata for function that contain target regions.
  1105. void createOffloadEntriesAndInfoMetadata(
  1106. OffloadEntriesInfoManager &OffloadEntriesInfoManager,
  1107. EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
  1108. public:
  1109. /// Generator for __kmpc_copyprivate
  1110. ///
  1111. /// \param Loc The source location description.
  1112. /// \param BufSize Number of elements in the buffer.
  1113. /// \param CpyBuf List of pointers to data to be copied.
  1114. /// \param CpyFn function to call for copying data.
  1115. /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
  1116. ///
  1117. /// \return The insertion position *after* the CopyPrivate call.
  1118. InsertPointTy createCopyPrivate(const LocationDescription &Loc,
  1119. llvm::Value *BufSize, llvm::Value *CpyBuf,
  1120. llvm::Value *CpyFn, llvm::Value *DidIt);
  1121. /// Generator for '#omp single'
  1122. ///
  1123. /// \param Loc The source location description.
  1124. /// \param BodyGenCB Callback that will generate the region code.
  1125. /// \param FiniCB Callback to finalize variable copies.
  1126. /// \param IsNowait If false, a barrier is emitted.
  1127. /// \param DidIt Local variable used as a flag to indicate 'single' thread
  1128. ///
  1129. /// \returns The insertion position *after* the single call.
  1130. InsertPointTy createSingle(const LocationDescription &Loc,
  1131. BodyGenCallbackTy BodyGenCB,
  1132. FinalizeCallbackTy FiniCB, bool IsNowait,
  1133. llvm::Value *DidIt);
  1134. /// Generator for '#omp master'
  1135. ///
  1136. /// \param Loc The insert and source location description.
  1137. /// \param BodyGenCB Callback that will generate the region code.
  1138. /// \param FiniCB Callback to finalize variable copies.
  1139. ///
  1140. /// \returns The insertion position *after* the master.
  1141. InsertPointTy createMaster(const LocationDescription &Loc,
  1142. BodyGenCallbackTy BodyGenCB,
  1143. FinalizeCallbackTy FiniCB);
  1144. /// Generator for '#omp masked'
  1145. ///
  1146. /// \param Loc The insert and source location description.
  1147. /// \param BodyGenCB Callback that will generate the region code.
  1148. /// \param FiniCB Callback to finialize variable copies.
  1149. ///
  1150. /// \returns The insertion position *after* the masked.
  1151. InsertPointTy createMasked(const LocationDescription &Loc,
  1152. BodyGenCallbackTy BodyGenCB,
  1153. FinalizeCallbackTy FiniCB, Value *Filter);
  1154. /// Generator for '#omp critical'
  1155. ///
  1156. /// \param Loc The insert and source location description.
  1157. /// \param BodyGenCB Callback that will generate the region body code.
  1158. /// \param FiniCB Callback to finalize variable copies.
  1159. /// \param CriticalName name of the lock used by the critical directive
  1160. /// \param HintInst Hint Instruction for hint clause associated with critical
  1161. ///
  1162. /// \returns The insertion position *after* the critical.
  1163. InsertPointTy createCritical(const LocationDescription &Loc,
  1164. BodyGenCallbackTy BodyGenCB,
  1165. FinalizeCallbackTy FiniCB,
  1166. StringRef CriticalName, Value *HintInst);
  1167. /// Generator for '#omp ordered depend (source | sink)'
  1168. ///
  1169. /// \param Loc The insert and source location description.
  1170. /// \param AllocaIP The insertion point to be used for alloca instructions.
  1171. /// \param NumLoops The number of loops in depend clause.
  1172. /// \param StoreValues The value will be stored in vector address.
  1173. /// \param Name The name of alloca instruction.
  1174. /// \param IsDependSource If true, depend source; otherwise, depend sink.
  1175. ///
  1176. /// \return The insertion position *after* the ordered.
  1177. InsertPointTy createOrderedDepend(const LocationDescription &Loc,
  1178. InsertPointTy AllocaIP, unsigned NumLoops,
  1179. ArrayRef<llvm::Value *> StoreValues,
  1180. const Twine &Name, bool IsDependSource);
  1181. /// Generator for '#omp ordered [threads | simd]'
  1182. ///
  1183. /// \param Loc The insert and source location description.
  1184. /// \param BodyGenCB Callback that will generate the region code.
  1185. /// \param FiniCB Callback to finalize variable copies.
  1186. /// \param IsThreads If true, with threads clause or without clause;
  1187. /// otherwise, with simd clause;
  1188. ///
  1189. /// \returns The insertion position *after* the ordered.
  1190. InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
  1191. BodyGenCallbackTy BodyGenCB,
  1192. FinalizeCallbackTy FiniCB,
  1193. bool IsThreads);
  1194. /// Generator for '#omp sections'
  1195. ///
  1196. /// \param Loc The insert and source location description.
  1197. /// \param AllocaIP The insertion points to be used for alloca instructions.
  1198. /// \param SectionCBs Callbacks that will generate body of each section.
  1199. /// \param PrivCB Callback to copy a given variable (think copy constructor).
  1200. /// \param FiniCB Callback to finalize variable copies.
  1201. /// \param IsCancellable Flag to indicate a cancellable parallel region.
  1202. /// \param IsNowait If true, barrier - to ensure all sections are executed
  1203. /// before moving forward will not be generated.
  1204. /// \returns The insertion position *after* the sections.
  1205. InsertPointTy createSections(const LocationDescription &Loc,
  1206. InsertPointTy AllocaIP,
  1207. ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
  1208. PrivatizeCallbackTy PrivCB,
  1209. FinalizeCallbackTy FiniCB, bool IsCancellable,
  1210. bool IsNowait);
  1211. /// Generator for '#omp section'
  1212. ///
  1213. /// \param Loc The insert and source location description.
  1214. /// \param BodyGenCB Callback that will generate the region body code.
  1215. /// \param FiniCB Callback to finalize variable copies.
  1216. /// \returns The insertion position *after* the section.
  1217. InsertPointTy createSection(const LocationDescription &Loc,
  1218. BodyGenCallbackTy BodyGenCB,
  1219. FinalizeCallbackTy FiniCB);
  1220. /// Generate conditional branch and relevant BasicBlocks through which private
  1221. /// threads copy the 'copyin' variables from Master copy to threadprivate
  1222. /// copies.
  1223. ///
  1224. /// \param IP insertion block for copyin conditional
  1225. /// \param MasterVarPtr a pointer to the master variable
  1226. /// \param PrivateVarPtr a pointer to the threadprivate variable
  1227. /// \param IntPtrTy Pointer size type
  1228. /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
  1229. // and copy.in.end block
  1230. ///
  1231. /// \returns The insertion point where copying operation to be emitted.
  1232. InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
  1233. Value *PrivateAddr,
  1234. llvm::IntegerType *IntPtrTy,
  1235. bool BranchtoEnd = true);
  1236. /// Create a runtime call for kmpc_Alloc
  1237. ///
  1238. /// \param Loc The insert and source location description.
  1239. /// \param Size Size of allocated memory space
  1240. /// \param Allocator Allocator information instruction
  1241. /// \param Name Name of call Instruction for OMP_alloc
  1242. ///
  1243. /// \returns CallInst to the OMP_Alloc call
  1244. CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
  1245. Value *Allocator, std::string Name = "");
  1246. /// Create a runtime call for kmpc_free
  1247. ///
  1248. /// \param Loc The insert and source location description.
  1249. /// \param Addr Address of memory space to be freed
  1250. /// \param Allocator Allocator information instruction
  1251. /// \param Name Name of call Instruction for OMP_Free
  1252. ///
  1253. /// \returns CallInst to the OMP_Free call
  1254. CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
  1255. Value *Allocator, std::string Name = "");
  1256. /// Create a runtime call for kmpc_threadprivate_cached
  1257. ///
  1258. /// \param Loc The insert and source location description.
  1259. /// \param Pointer pointer to data to be cached
  1260. /// \param Size size of data to be cached
  1261. /// \param Name Name of call Instruction for callinst
  1262. ///
  1263. /// \returns CallInst to the thread private cache call.
  1264. CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
  1265. llvm::Value *Pointer,
  1266. llvm::ConstantInt *Size,
  1267. const llvm::Twine &Name = Twine(""));
  1268. /// Create a runtime call for __tgt_interop_init
  1269. ///
  1270. /// \param Loc The insert and source location description.
  1271. /// \param InteropVar variable to be allocated
  1272. /// \param InteropType type of interop operation
  1273. /// \param Device devide to which offloading will occur
  1274. /// \param NumDependences number of dependence variables
  1275. /// \param DependenceAddress pointer to dependence variables
  1276. /// \param HaveNowaitClause does nowait clause exist
  1277. ///
  1278. /// \returns CallInst to the __tgt_interop_init call
  1279. CallInst *createOMPInteropInit(const LocationDescription &Loc,
  1280. Value *InteropVar,
  1281. omp::OMPInteropType InteropType, Value *Device,
  1282. Value *NumDependences,
  1283. Value *DependenceAddress,
  1284. bool HaveNowaitClause);
  1285. /// Create a runtime call for __tgt_interop_destroy
  1286. ///
  1287. /// \param Loc The insert and source location description.
  1288. /// \param InteropVar variable to be allocated
  1289. /// \param Device devide to which offloading will occur
  1290. /// \param NumDependences number of dependence variables
  1291. /// \param DependenceAddress pointer to dependence variables
  1292. /// \param HaveNowaitClause does nowait clause exist
  1293. ///
  1294. /// \returns CallInst to the __tgt_interop_destroy call
  1295. CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
  1296. Value *InteropVar, Value *Device,
  1297. Value *NumDependences,
  1298. Value *DependenceAddress,
  1299. bool HaveNowaitClause);
  1300. /// Create a runtime call for __tgt_interop_use
  1301. ///
  1302. /// \param Loc The insert and source location description.
  1303. /// \param InteropVar variable to be allocated
  1304. /// \param Device devide to which offloading will occur
  1305. /// \param NumDependences number of dependence variables
  1306. /// \param DependenceAddress pointer to dependence variables
  1307. /// \param HaveNowaitClause does nowait clause exist
  1308. ///
  1309. /// \returns CallInst to the __tgt_interop_use call
  1310. CallInst *createOMPInteropUse(const LocationDescription &Loc,
  1311. Value *InteropVar, Value *Device,
  1312. Value *NumDependences, Value *DependenceAddress,
  1313. bool HaveNowaitClause);
  1314. /// The `omp target` interface
  1315. ///
  1316. /// For more information about the usage of this interface,
  1317. /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
  1318. ///
  1319. ///{
  1320. /// Create a runtime call for kmpc_target_init
  1321. ///
  1322. /// \param Loc The insert and source location description.
  1323. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
  1324. InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
  1325. /// Create a runtime call for kmpc_target_deinit
  1326. ///
  1327. /// \param Loc The insert and source location description.
  1328. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
  1329. void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
  1330. ///}
  1331. private:
  1332. // Sets the function attributes expected for the outlined function
  1333. void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
  1334. int32_t NumTeams,
  1335. int32_t NumThreads);
  1336. // Creates the function ID/Address for the given outlined function.
  1337. // In the case of an embedded device function the address of the function is
  1338. // used, in the case of a non-offload function a constant is created.
  1339. Constant *createOutlinedFunctionID(Function *OutlinedFn,
  1340. StringRef EntryFnIDName);
  1341. // Creates the region entry address for the outlined function
  1342. Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
  1343. StringRef EntryFnName);
  1344. public:
  1345. /// Functions used to generate a function with the given name.
  1346. using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
  1347. /// Create a unique name for the entry function using the source location
  1348. /// information of the current target region. The name will be something like:
  1349. ///
  1350. /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
  1351. ///
  1352. /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
  1353. /// mangled name of the function that encloses the target region and BB is the
  1354. /// line number of the target region. CC is a count added when more than one
  1355. /// region is located at the same location.
  1356. ///
  1357. /// If this target outline function is not an offload entry, we don't need to
  1358. /// register it. This may happen if it is guarded by an if clause that is
  1359. /// false at compile time, or no target archs have been specified.
  1360. ///
  1361. /// The created target region ID is used by the runtime library to identify
  1362. /// the current target region, so it only has to be unique and not
  1363. /// necessarily point to anything. It could be the pointer to the outlined
  1364. /// function that implements the target region, but we aren't using that so
  1365. /// that the compiler doesn't need to keep that, and could therefore inline
  1366. /// the host function if proven worthwhile during optimization. In the other
  1367. /// hand, if emitting code for the device, the ID has to be the function
  1368. /// address so that it can retrieved from the offloading entry and launched
  1369. /// by the runtime library. We also mark the outlined function to have
  1370. /// external linkage in case we are emitting code for the device, because
  1371. /// these functions will be entry points to the device.
  1372. ///
  1373. /// \param InfoManager The info manager keeping track of the offload entries
  1374. /// \param EntryInfo The entry information about the function
  1375. /// \param GenerateFunctionCallback The callback function to generate the code
  1376. /// \param NumTeams Number default teams
  1377. /// \param NumThreads Number default threads
  1378. /// \param OutlinedFunction Pointer to the outlined function
  1379. /// \param EntryFnIDName Name of the ID o be created
  1380. void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
  1381. TargetRegionEntryInfo &EntryInfo,
  1382. FunctionGenCallback &GenerateFunctionCallback,
  1383. int32_t NumTeams, int32_t NumThreads,
  1384. bool IsOffloadEntry, Function *&OutlinedFn,
  1385. Constant *&OutlinedFnID);
  1386. /// Registers the given function and sets up the attribtues of the function
  1387. /// Returns the FunctionID.
  1388. ///
  1389. /// \param InfoManager The info manager keeping track of the offload entries
  1390. /// \param EntryInfo The entry information about the function
  1391. /// \param OutlinedFunction Pointer to the outlined function
  1392. /// \param EntryFnName Name of the outlined function
  1393. /// \param EntryFnIDName Name of the ID o be created
  1394. /// \param NumTeams Number default teams
  1395. /// \param NumThreads Number default threads
  1396. Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
  1397. TargetRegionEntryInfo &EntryInfo,
  1398. Function *OutlinedFunction,
  1399. StringRef EntryFnName,
  1400. StringRef EntryFnIDName,
  1401. int32_t NumTeams, int32_t NumThreads);
  1402. /// Declarations for LLVM-IR types (simple, array, function and structure) are
  1403. /// generated below. Their names are defined and used in OpenMPKinds.def. Here
  1404. /// we provide the declarations, the initializeTypes function will provide the
  1405. /// values.
  1406. ///
  1407. ///{
  1408. #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
  1409. #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
  1410. ArrayType *VarName##Ty = nullptr; \
  1411. PointerType *VarName##PtrTy = nullptr;
  1412. #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
  1413. FunctionType *VarName = nullptr; \
  1414. PointerType *VarName##Ptr = nullptr;
  1415. #define OMP_STRUCT_TYPE(VarName, StrName, ...) \
  1416. StructType *VarName = nullptr; \
  1417. PointerType *VarName##Ptr = nullptr;
  1418. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  1419. ///}
  1420. private:
  1421. /// Create all simple and struct types exposed by the runtime and remember
  1422. /// the llvm::PointerTypes of them for easy access later.
  1423. void initializeTypes(Module &M);
  1424. /// Common interface for generating entry calls for OMP Directives.
  1425. /// if the directive has a region/body, It will set the insertion
  1426. /// point to the body
  1427. ///
  1428. /// \param OMPD Directive to generate entry blocks for
  1429. /// \param EntryCall Call to the entry OMP Runtime Function
  1430. /// \param ExitBB block where the region ends.
  1431. /// \param Conditional indicate if the entry call result will be used
  1432. /// to evaluate a conditional of whether a thread will execute
  1433. /// body code or not.
  1434. ///
  1435. /// \return The insertion position in exit block
  1436. InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
  1437. BasicBlock *ExitBB,
  1438. bool Conditional = false);
  1439. /// Common interface to finalize the region
  1440. ///
  1441. /// \param OMPD Directive to generate exiting code for
  1442. /// \param FinIP Insertion point for emitting Finalization code and exit call
  1443. /// \param ExitCall Call to the ending OMP Runtime Function
  1444. /// \param HasFinalize indicate if the directive will require finalization
  1445. /// and has a finalization callback in the stack that
  1446. /// should be called.
  1447. ///
  1448. /// \return The insertion position in exit block
  1449. InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
  1450. InsertPointTy FinIP,
  1451. Instruction *ExitCall,
  1452. bool HasFinalize = true);
  1453. /// Common Interface to generate OMP inlined regions
  1454. ///
  1455. /// \param OMPD Directive to generate inlined region for
  1456. /// \param EntryCall Call to the entry OMP Runtime Function
  1457. /// \param ExitCall Call to the ending OMP Runtime Function
  1458. /// \param BodyGenCB Body code generation callback.
  1459. /// \param FiniCB Finalization Callback. Will be called when finalizing region
  1460. /// \param Conditional indicate if the entry call result will be used
  1461. /// to evaluate a conditional of whether a thread will execute
  1462. /// body code or not.
  1463. /// \param HasFinalize indicate if the directive will require finalization
  1464. /// and has a finalization callback in the stack that
  1465. /// should be called.
  1466. /// \param IsCancellable if HasFinalize is set to true, indicate if the
  1467. /// the directive should be cancellable.
  1468. /// \return The insertion point after the region
  1469. InsertPointTy
  1470. EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
  1471. Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
  1472. FinalizeCallbackTy FiniCB, bool Conditional = false,
  1473. bool HasFinalize = true, bool IsCancellable = false);
  1474. /// Get the platform-specific name separator.
  1475. /// \param Parts different parts of the final name that needs separation
  1476. /// \param FirstSeparator First separator used between the initial two
  1477. /// parts of the name.
  1478. /// \param Separator separator used between all of the rest consecutive
  1479. /// parts of the name
  1480. static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
  1481. StringRef FirstSeparator,
  1482. StringRef Separator);
  1483. /// Returns corresponding lock object for the specified critical region
  1484. /// name. If the lock object does not exist it is created, otherwise the
  1485. /// reference to the existing copy is returned.
  1486. /// \param CriticalName Name of the critical region.
  1487. ///
  1488. Value *getOMPCriticalRegionLock(StringRef CriticalName);
  1489. /// Callback type for Atomic Expression update
  1490. /// ex:
  1491. /// \code{.cpp}
  1492. /// unsigned x = 0;
  1493. /// #pragma omp atomic update
  1494. /// x = Expr(x_old); //Expr() is any legal operation
  1495. /// \endcode
  1496. ///
  1497. /// \param XOld the value of the atomic memory address to use for update
  1498. /// \param IRB reference to the IRBuilder to use
  1499. ///
  1500. /// \returns Value to update X to.
  1501. using AtomicUpdateCallbackTy =
  1502. const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
  1503. private:
  1504. enum AtomicKind { Read, Write, Update, Capture, Compare };
  1505. /// Determine whether to emit flush or not
  1506. ///
  1507. /// \param Loc The insert and source location description.
  1508. /// \param AO The required atomic ordering
  1509. /// \param AK The OpenMP atomic operation kind used.
  1510. ///
  1511. /// \returns wether a flush was emitted or not
  1512. bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
  1513. AtomicOrdering AO, AtomicKind AK);
  1514. /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
  1515. /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
  1516. /// Only Scalar data types.
  1517. ///
  1518. /// \param AllocaIP The insertion point to be used for alloca
  1519. /// instructions.
  1520. /// \param X The target atomic pointer to be updated
  1521. /// \param XElemTy The element type of the atomic pointer.
  1522. /// \param Expr The value to update X with.
  1523. /// \param AO Atomic ordering of the generated atomic
  1524. /// instructions.
  1525. /// \param RMWOp The binary operation used for update. If
  1526. /// operation is not supported by atomicRMW,
  1527. /// or belong to {FADD, FSUB, BAD_BINOP}.
  1528. /// Then a `cmpExch` based atomic will be generated.
  1529. /// \param UpdateOp Code generator for complex expressions that cannot be
  1530. /// expressed through atomicrmw instruction.
  1531. /// \param VolatileX true if \a X volatile?
  1532. /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
  1533. /// update expression, false otherwise.
  1534. /// (e.g. true for X = X BinOp Expr)
  1535. ///
  1536. /// \returns A pair of the old value of X before the update, and the value
  1537. /// used for the update.
  1538. std::pair<Value *, Value *>
  1539. emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
  1540. AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  1541. AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
  1542. bool IsXBinopExpr);
  1543. /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
  1544. ///
  1545. /// \Return The instruction
  1546. Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
  1547. AtomicRMWInst::BinOp RMWOp);
  1548. public:
  1549. /// a struct to pack relevant information while generating atomic Ops
  1550. struct AtomicOpValue {
  1551. Value *Var = nullptr;
  1552. Type *ElemTy = nullptr;
  1553. bool IsSigned = false;
  1554. bool IsVolatile = false;
  1555. };
  1556. /// Emit atomic Read for : V = X --- Only Scalar data types.
  1557. ///
  1558. /// \param Loc The insert and source location description.
  1559. /// \param X The target pointer to be atomically read
  1560. /// \param V Memory address where to store atomically read
  1561. /// value
  1562. /// \param AO Atomic ordering of the generated atomic
  1563. /// instructions.
  1564. ///
  1565. /// \return Insertion point after generated atomic read IR.
  1566. InsertPointTy createAtomicRead(const LocationDescription &Loc,
  1567. AtomicOpValue &X, AtomicOpValue &V,
  1568. AtomicOrdering AO);
  1569. /// Emit atomic write for : X = Expr --- Only Scalar data types.
  1570. ///
  1571. /// \param Loc The insert and source location description.
  1572. /// \param X The target pointer to be atomically written to
  1573. /// \param Expr The value to store.
  1574. /// \param AO Atomic ordering of the generated atomic
  1575. /// instructions.
  1576. ///
  1577. /// \return Insertion point after generated atomic Write IR.
  1578. InsertPointTy createAtomicWrite(const LocationDescription &Loc,
  1579. AtomicOpValue &X, Value *Expr,
  1580. AtomicOrdering AO);
  1581. /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
  1582. /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
  1583. /// Only Scalar data types.
  1584. ///
  1585. /// \param Loc The insert and source location description.
  1586. /// \param AllocaIP The insertion point to be used for alloca instructions.
  1587. /// \param X The target atomic pointer to be updated
  1588. /// \param Expr The value to update X with.
  1589. /// \param AO Atomic ordering of the generated atomic instructions.
  1590. /// \param RMWOp The binary operation used for update. If operation
  1591. /// is not supported by atomicRMW, or belong to
  1592. /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
  1593. /// atomic will be generated.
  1594. /// \param UpdateOp Code generator for complex expressions that cannot be
  1595. /// expressed through atomicrmw instruction.
  1596. /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
  1597. /// update expression, false otherwise.
  1598. /// (e.g. true for X = X BinOp Expr)
  1599. ///
  1600. /// \return Insertion point after generated atomic update IR.
  1601. InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
  1602. InsertPointTy AllocaIP, AtomicOpValue &X,
  1603. Value *Expr, AtomicOrdering AO,
  1604. AtomicRMWInst::BinOp RMWOp,
  1605. AtomicUpdateCallbackTy &UpdateOp,
  1606. bool IsXBinopExpr);
  1607. /// Emit atomic update for constructs: --- Only Scalar data types
  1608. /// V = X; X = X BinOp Expr ,
  1609. /// X = X BinOp Expr; V = X,
  1610. /// V = X; X = Expr BinOp X,
  1611. /// X = Expr BinOp X; V = X,
  1612. /// V = X; X = UpdateOp(X),
  1613. /// X = UpdateOp(X); V = X,
  1614. ///
  1615. /// \param Loc The insert and source location description.
  1616. /// \param AllocaIP The insertion point to be used for alloca instructions.
  1617. /// \param X The target atomic pointer to be updated
  1618. /// \param V Memory address where to store captured value
  1619. /// \param Expr The value to update X with.
  1620. /// \param AO Atomic ordering of the generated atomic instructions
  1621. /// \param RMWOp The binary operation used for update. If
  1622. /// operation is not supported by atomicRMW, or belong to
  1623. /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
  1624. /// atomic will be generated.
  1625. /// \param UpdateOp Code generator for complex expressions that cannot be
  1626. /// expressed through atomicrmw instruction.
  1627. /// \param UpdateExpr true if X is an in place update of the form
  1628. /// X = X BinOp Expr or X = Expr BinOp X
  1629. /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
  1630. /// update expression, false otherwise.
  1631. /// (e.g. true for X = X BinOp Expr)
  1632. /// \param IsPostfixUpdate true if original value of 'x' must be stored in
  1633. /// 'v', not an updated one.
  1634. ///
  1635. /// \return Insertion point after generated atomic capture IR.
  1636. InsertPointTy
  1637. createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
  1638. AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
  1639. AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  1640. AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
  1641. bool IsPostfixUpdate, bool IsXBinopExpr);
  1642. /// Emit atomic compare for constructs: --- Only scalar data types
  1643. /// cond-expr-stmt:
  1644. /// x = x ordop expr ? expr : x;
  1645. /// x = expr ordop x ? expr : x;
  1646. /// x = x == e ? d : x;
  1647. /// x = e == x ? d : x; (this one is not in the spec)
  1648. /// cond-update-stmt:
  1649. /// if (x ordop expr) { x = expr; }
  1650. /// if (expr ordop x) { x = expr; }
  1651. /// if (x == e) { x = d; }
  1652. /// if (e == x) { x = d; } (this one is not in the spec)
  1653. /// conditional-update-capture-atomic:
  1654. /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
  1655. /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
  1656. /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
  1657. /// IsFailOnly=true)
  1658. /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
  1659. /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
  1660. /// IsFailOnly=true)
  1661. ///
  1662. /// \param Loc The insert and source location description.
  1663. /// \param X The target atomic pointer to be updated.
  1664. /// \param V Memory address where to store captured value (for
  1665. /// compare capture only).
  1666. /// \param R Memory address where to store comparison result
  1667. /// (for compare capture with '==' only).
  1668. /// \param E The expected value ('e') for forms that use an
  1669. /// equality comparison or an expression ('expr') for
  1670. /// forms that use 'ordop' (logically an atomic maximum or
  1671. /// minimum).
  1672. /// \param D The desired value for forms that use an equality
  1673. /// comparison. If forms that use 'ordop', it should be
  1674. /// \p nullptr.
  1675. /// \param AO Atomic ordering of the generated atomic instructions.
  1676. /// \param Op Atomic compare operation. It can only be ==, <, or >.
  1677. /// \param IsXBinopExpr True if the conditional statement is in the form where
  1678. /// x is on LHS. It only matters for < or >.
  1679. /// \param IsPostfixUpdate True if original value of 'x' must be stored in
  1680. /// 'v', not an updated one (for compare capture
  1681. /// only).
  1682. /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
  1683. /// only when the comparison fails. This is only valid for
  1684. /// the case the comparison is '=='.
  1685. ///
  1686. /// \return Insertion point after generated atomic capture IR.
  1687. InsertPointTy
  1688. createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
  1689. AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
  1690. AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
  1691. bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
  1692. /// Create the control flow structure of a canonical OpenMP loop.
  1693. ///
  1694. /// The emitted loop will be disconnected, i.e. no edge to the loop's
  1695. /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
  1696. /// IRBuilder location is not preserved.
  1697. ///
  1698. /// \param DL DebugLoc used for the instructions in the skeleton.
  1699. /// \param TripCount Value to be used for the trip count.
  1700. /// \param F Function in which to insert the BasicBlocks.
  1701. /// \param PreInsertBefore Where to insert BBs that execute before the body,
  1702. /// typically the body itself.
  1703. /// \param PostInsertBefore Where to insert BBs that execute after the body.
  1704. /// \param Name Base name used to derive BB
  1705. /// and instruction names.
  1706. ///
  1707. /// \returns The CanonicalLoopInfo that represents the emitted loop.
  1708. CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
  1709. Function *F,
  1710. BasicBlock *PreInsertBefore,
  1711. BasicBlock *PostInsertBefore,
  1712. const Twine &Name = {});
  1713. /// OMP Offload Info Metadata name string
  1714. const std::string ompOffloadInfoName = "omp_offload.info";
  1715. /// Loads all the offload entries information from the host IR
  1716. /// metadata. This function is only meant to be used with device code
  1717. /// generation.
  1718. ///
  1719. /// \param M Module to load Metadata info from. Module passed maybe
  1720. /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
  1721. /// \param OffloadEntriesInfoManager Initialize Offload Entry information.
  1722. void
  1723. loadOffloadInfoMetadata(Module &M,
  1724. OffloadEntriesInfoManager &OffloadEntriesInfoManager);
  1725. /// Gets (if variable with the given name already exist) or creates
  1726. /// internal global variable with the specified Name. The created variable has
  1727. /// linkage CommonLinkage by default and is initialized by null value.
  1728. /// \param Ty Type of the global variable. If it is exist already the type
  1729. /// must be the same.
  1730. /// \param Name Name of the variable.
  1731. GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
  1732. unsigned AddressSpace = 0);
  1733. };
  1734. /// Data structure to contain the information needed to uniquely identify
  1735. /// a target entry.
  1736. struct TargetRegionEntryInfo {
  1737. std::string ParentName;
  1738. unsigned DeviceID;
  1739. unsigned FileID;
  1740. unsigned Line;
  1741. unsigned Count;
  1742. TargetRegionEntryInfo()
  1743. : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {}
  1744. TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
  1745. unsigned FileID, unsigned Line, unsigned Count = 0)
  1746. : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
  1747. Count(Count) {}
  1748. static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
  1749. StringRef ParentName,
  1750. unsigned DeviceID, unsigned FileID,
  1751. unsigned Line, unsigned Count);
  1752. bool operator<(const TargetRegionEntryInfo RHS) const {
  1753. return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
  1754. std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
  1755. RHS.Count);
  1756. }
  1757. };
  1758. /// Class that manages information about offload code regions and data
  1759. class OffloadEntriesInfoManager {
  1760. /// Number of entries registered so far.
  1761. OpenMPIRBuilderConfig Config;
  1762. unsigned OffloadingEntriesNum = 0;
  1763. public:
  1764. void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
  1765. /// Base class of the entries info.
  1766. class OffloadEntryInfo {
  1767. public:
  1768. /// Kind of a given entry.
  1769. enum OffloadingEntryInfoKinds : unsigned {
  1770. /// Entry is a target region.
  1771. OffloadingEntryInfoTargetRegion = 0,
  1772. /// Entry is a declare target variable.
  1773. OffloadingEntryInfoDeviceGlobalVar = 1,
  1774. /// Invalid entry info.
  1775. OffloadingEntryInfoInvalid = ~0u
  1776. };
  1777. protected:
  1778. OffloadEntryInfo() = delete;
  1779. explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
  1780. explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
  1781. uint32_t Flags)
  1782. : Flags(Flags), Order(Order), Kind(Kind) {}
  1783. ~OffloadEntryInfo() = default;
  1784. public:
  1785. bool isValid() const { return Order != ~0u; }
  1786. unsigned getOrder() const { return Order; }
  1787. OffloadingEntryInfoKinds getKind() const { return Kind; }
  1788. uint32_t getFlags() const { return Flags; }
  1789. void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
  1790. Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
  1791. void setAddress(Constant *V) {
  1792. assert(!Addr.pointsToAliveValue() && "Address has been set before!");
  1793. Addr = V;
  1794. }
  1795. static bool classof(const OffloadEntryInfo *Info) { return true; }
  1796. private:
  1797. /// Address of the entity that has to be mapped for offloading.
  1798. WeakTrackingVH Addr;
  1799. /// Flags associated with the device global.
  1800. uint32_t Flags = 0u;
  1801. /// Order this entry was emitted.
  1802. unsigned Order = ~0u;
  1803. OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
  1804. };
  1805. /// Return true if a there are no entries defined.
  1806. bool empty() const;
  1807. /// Return number of entries defined so far.
  1808. unsigned size() const { return OffloadingEntriesNum; }
  1809. OffloadEntriesInfoManager() : Config() {}
  1810. //
  1811. // Target region entries related.
  1812. //
  1813. /// Kind of the target registry entry.
  1814. enum OMPTargetRegionEntryKind : uint32_t {
  1815. /// Mark the entry as target region.
  1816. OMPTargetRegionEntryTargetRegion = 0x0,
  1817. /// Mark the entry as a global constructor.
  1818. OMPTargetRegionEntryCtor = 0x02,
  1819. /// Mark the entry as a global destructor.
  1820. OMPTargetRegionEntryDtor = 0x04,
  1821. };
  1822. /// Target region entries info.
  1823. class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
  1824. /// Address that can be used as the ID of the entry.
  1825. Constant *ID = nullptr;
  1826. public:
  1827. OffloadEntryInfoTargetRegion()
  1828. : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
  1829. explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
  1830. Constant *ID,
  1831. OMPTargetRegionEntryKind Flags)
  1832. : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
  1833. ID(ID) {
  1834. setAddress(Addr);
  1835. }
  1836. Constant *getID() const { return ID; }
  1837. void setID(Constant *V) {
  1838. assert(!ID && "ID has been set before!");
  1839. ID = V;
  1840. }
  1841. static bool classof(const OffloadEntryInfo *Info) {
  1842. return Info->getKind() == OffloadingEntryInfoTargetRegion;
  1843. }
  1844. };
  1845. /// Initialize target region entry.
  1846. /// This is ONLY needed for DEVICE compilation.
  1847. void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
  1848. unsigned Order);
  1849. /// Register target region entry.
  1850. void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
  1851. Constant *Addr, Constant *ID,
  1852. OMPTargetRegionEntryKind Flags);
  1853. /// Return true if a target region entry with the provided information
  1854. /// exists.
  1855. bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
  1856. bool IgnoreAddressId = false) const;
  1857. // Return the Name based on \a EntryInfo using the next available Count.
  1858. void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
  1859. const TargetRegionEntryInfo &EntryInfo);
  1860. /// brief Applies action \a Action on all registered entries.
  1861. typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
  1862. const OffloadEntryInfoTargetRegion &)>
  1863. OffloadTargetRegionEntryInfoActTy;
  1864. void
  1865. actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
  1866. //
  1867. // Device global variable entries related.
  1868. //
  1869. /// Kind of the global variable entry..
  1870. enum OMPTargetGlobalVarEntryKind : uint32_t {
  1871. /// Mark the entry as a to declare target.
  1872. OMPTargetGlobalVarEntryTo = 0x0,
  1873. /// Mark the entry as a to declare target link.
  1874. OMPTargetGlobalVarEntryLink = 0x1,
  1875. };
  1876. /// Device global variable entries info.
  1877. class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
  1878. /// Type of the global variable.
  1879. int64_t VarSize;
  1880. GlobalValue::LinkageTypes Linkage;
  1881. public:
  1882. OffloadEntryInfoDeviceGlobalVar()
  1883. : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
  1884. explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
  1885. OMPTargetGlobalVarEntryKind Flags)
  1886. : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
  1887. explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
  1888. int64_t VarSize,
  1889. OMPTargetGlobalVarEntryKind Flags,
  1890. GlobalValue::LinkageTypes Linkage)
  1891. : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
  1892. VarSize(VarSize), Linkage(Linkage) {
  1893. setAddress(Addr);
  1894. }
  1895. int64_t getVarSize() const { return VarSize; }
  1896. void setVarSize(int64_t Size) { VarSize = Size; }
  1897. GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
  1898. void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
  1899. static bool classof(const OffloadEntryInfo *Info) {
  1900. return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
  1901. }
  1902. };
  1903. /// Initialize device global variable entry.
  1904. /// This is ONLY used for DEVICE compilation.
  1905. void initializeDeviceGlobalVarEntryInfo(StringRef Name,
  1906. OMPTargetGlobalVarEntryKind Flags,
  1907. unsigned Order);
  1908. /// Register device global variable entry.
  1909. void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
  1910. int64_t VarSize,
  1911. OMPTargetGlobalVarEntryKind Flags,
  1912. GlobalValue::LinkageTypes Linkage);
  1913. /// Checks if the variable with the given name has been registered already.
  1914. bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
  1915. return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
  1916. }
  1917. /// Applies action \a Action on all registered entries.
  1918. typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
  1919. OffloadDeviceGlobalVarEntryInfoActTy;
  1920. void actOnDeviceGlobalVarEntriesInfo(
  1921. const OffloadDeviceGlobalVarEntryInfoActTy &Action);
  1922. private:
  1923. /// Return the count of entries at a particular source location.
  1924. unsigned
  1925. getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
  1926. /// Update the count of entries at a particular source location.
  1927. void
  1928. incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
  1929. static TargetRegionEntryInfo
  1930. getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
  1931. return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
  1932. EntryInfo.FileID, EntryInfo.Line, 0);
  1933. }
  1934. // Count of entries at a location.
  1935. std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
  1936. // Storage for target region entries kind.
  1937. typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
  1938. OffloadEntriesTargetRegionTy;
  1939. OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
  1940. /// Storage for device global variable entries kind. The storage is to be
  1941. /// indexed by mangled name.
  1942. typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
  1943. OffloadEntriesDeviceGlobalVarTy;
  1944. OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
  1945. };
  1946. /// Class to represented the control flow structure of an OpenMP canonical loop.
  1947. ///
  1948. /// The control-flow structure is standardized for easy consumption by
  1949. /// directives associated with loops. For instance, the worksharing-loop
  1950. /// construct may change this control flow such that each loop iteration is
  1951. /// executed on only one thread. The constraints of a canonical loop in brief
  1952. /// are:
  1953. ///
  1954. /// * The number of loop iterations must have been computed before entering the
  1955. /// loop.
  1956. ///
  1957. /// * Has an (unsigned) logical induction variable that starts at zero and
  1958. /// increments by one.
  1959. ///
  1960. /// * The loop's CFG itself has no side-effects. The OpenMP specification
  1961. /// itself allows side-effects, but the order in which they happen, including
  1962. /// how often or whether at all, is unspecified. We expect that the frontend
  1963. /// will emit those side-effect instructions somewhere (e.g. before the loop)
  1964. /// such that the CanonicalLoopInfo itself can be side-effect free.
  1965. ///
  1966. /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
  1967. /// execution of a loop body that satifies these constraints. It does NOT
  1968. /// represent arbitrary SESE regions that happen to contain a loop. Do not use
  1969. /// CanonicalLoopInfo for such purposes.
  1970. ///
  1971. /// The control flow can be described as follows:
  1972. ///
  1973. /// Preheader
  1974. /// |
  1975. /// /-> Header
  1976. /// | |
  1977. /// | Cond---\
  1978. /// | | |
  1979. /// | Body |
  1980. /// | | | |
  1981. /// | <...> |
  1982. /// | | | |
  1983. /// \--Latch |
  1984. /// |
  1985. /// Exit
  1986. /// |
  1987. /// After
  1988. ///
  1989. /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
  1990. /// including) and end at AfterIP (at the After's first instruction, excluding).
  1991. /// That is, instructions in the Preheader and After blocks (except the
  1992. /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
  1993. /// side-effects. Typically, the Preheader is used to compute the loop's trip
  1994. /// count. The instructions from BodyIP (at the Body block's first instruction,
  1995. /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
  1996. /// control and thus can have side-effects. The body block is the single entry
  1997. /// point into the loop body, which may contain arbitrary control flow as long
  1998. /// as all control paths eventually branch to the Latch block.
  1999. ///
  2000. /// TODO: Consider adding another standardized BasicBlock between Body CFG and
  2001. /// Latch to guarantee that there is only a single edge to the latch. It would
  2002. /// make loop transformations easier to not needing to consider multiple
  2003. /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
  2004. /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
  2005. /// executes after each body iteration.
  2006. ///
  2007. /// There must be no loop-carried dependencies through llvm::Values. This is
  2008. /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
  2009. /// for the induction variable.
  2010. ///
  2011. /// All code in Header, Cond, Latch and Exit (plus the terminator of the
  2012. /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
  2013. /// by assertOK(). They are expected to not be modified unless explicitly
  2014. /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
  2015. /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
  2016. /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
  2017. /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
  2018. /// anymore as its underlying control flow may not exist anymore.
  2019. /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
  2020. /// may also return a new CanonicalLoopInfo that can be passed to other
  2021. /// loop-associated construct implementing methods. These loop-transforming
  2022. /// methods may either create a new CanonicalLoopInfo usually using
  2023. /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
  2024. /// modify one of the input CanonicalLoopInfo and return it as representing the
  2025. /// modified loop. What is done is an implementation detail of
  2026. /// transformation-implementing method and callers should always assume that the
  2027. /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
  2028. /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
  2029. /// created by createCanonicalLoop, such that transforming methods do not have
  2030. /// to special case where the CanonicalLoopInfo originated from.
  2031. ///
  2032. /// Generally, methods consuming CanonicalLoopInfo do not need an
  2033. /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
  2034. /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
  2035. /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
  2036. /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
  2037. /// any InsertPoint in the Preheader, After or Block can still be used after
  2038. /// calling such a method.
  2039. ///
  2040. /// TODO: Provide mechanisms for exception handling and cancellation points.
  2041. ///
  2042. /// Defined outside OpenMPIRBuilder because nested classes cannot be
  2043. /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
  2044. class CanonicalLoopInfo {
  2045. friend class OpenMPIRBuilder;
  2046. private:
  2047. BasicBlock *Header = nullptr;
  2048. BasicBlock *Cond = nullptr;
  2049. BasicBlock *Latch = nullptr;
  2050. BasicBlock *Exit = nullptr;
  2051. /// Add the control blocks of this loop to \p BBs.
  2052. ///
  2053. /// This does not include any block from the body, including the one returned
  2054. /// by getBody().
  2055. ///
  2056. /// FIXME: This currently includes the Preheader and After blocks even though
  2057. /// their content is (mostly) not under CanonicalLoopInfo's control.
  2058. /// Re-evaluated whether this makes sense.
  2059. void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
  2060. /// Sets the number of loop iterations to the given value. This value must be
  2061. /// valid in the condition block (i.e., defined in the preheader) and is
  2062. /// interpreted as an unsigned integer.
  2063. void setTripCount(Value *TripCount);
  2064. /// Replace all uses of the canonical induction variable in the loop body with
  2065. /// a new one.
  2066. ///
  2067. /// The intended use case is to update the induction variable for an updated
  2068. /// iteration space such that it can stay normalized in the 0...tripcount-1
  2069. /// range.
  2070. ///
  2071. /// The \p Updater is called with the (presumable updated) current normalized
  2072. /// induction variable and is expected to return the value that uses of the
  2073. /// pre-updated induction values should use instead, typically dependent on
  2074. /// the new induction variable. This is a lambda (instead of e.g. just passing
  2075. /// the new value) to be able to distinguish the uses of the pre-updated
  2076. /// induction variable and uses of the induction varible to compute the
  2077. /// updated induction variable value.
  2078. void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
  2079. public:
  2080. /// Returns whether this object currently represents the IR of a loop. If
  2081. /// returning false, it may have been consumed by a loop transformation or not
  2082. /// been intialized. Do not use in this case;
  2083. bool isValid() const { return Header; }
  2084. /// The preheader ensures that there is only a single edge entering the loop.
  2085. /// Code that must be execute before any loop iteration can be emitted here,
  2086. /// such as computing the loop trip count and begin lifetime markers. Code in
  2087. /// the preheader is not considered part of the canonical loop.
  2088. BasicBlock *getPreheader() const;
  2089. /// The header is the entry for each iteration. In the canonical control flow,
  2090. /// it only contains the PHINode for the induction variable.
  2091. BasicBlock *getHeader() const {
  2092. assert(isValid() && "Requires a valid canonical loop");
  2093. return Header;
  2094. }
  2095. /// The condition block computes whether there is another loop iteration. If
  2096. /// yes, branches to the body; otherwise to the exit block.
  2097. BasicBlock *getCond() const {
  2098. assert(isValid() && "Requires a valid canonical loop");
  2099. return Cond;
  2100. }
  2101. /// The body block is the single entry for a loop iteration and not controlled
  2102. /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
  2103. /// eventually branch to the \p Latch block.
  2104. BasicBlock *getBody() const {
  2105. assert(isValid() && "Requires a valid canonical loop");
  2106. return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
  2107. }
  2108. /// Reaching the latch indicates the end of the loop body code. In the
  2109. /// canonical control flow, it only contains the increment of the induction
  2110. /// variable.
  2111. BasicBlock *getLatch() const {
  2112. assert(isValid() && "Requires a valid canonical loop");
  2113. return Latch;
  2114. }
  2115. /// Reaching the exit indicates no more iterations are being executed.
  2116. BasicBlock *getExit() const {
  2117. assert(isValid() && "Requires a valid canonical loop");
  2118. return Exit;
  2119. }
  2120. /// The after block is intended for clean-up code such as lifetime end
  2121. /// markers. It is separate from the exit block to ensure, analogous to the
  2122. /// preheader, it having just a single entry edge and being free from PHI
  2123. /// nodes should there be multiple loop exits (such as from break
  2124. /// statements/cancellations).
  2125. BasicBlock *getAfter() const {
  2126. assert(isValid() && "Requires a valid canonical loop");
  2127. return Exit->getSingleSuccessor();
  2128. }
  2129. /// Returns the llvm::Value containing the number of loop iterations. It must
  2130. /// be valid in the preheader and always interpreted as an unsigned integer of
  2131. /// any bit-width.
  2132. Value *getTripCount() const {
  2133. assert(isValid() && "Requires a valid canonical loop");
  2134. Instruction *CmpI = &Cond->front();
  2135. assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
  2136. return CmpI->getOperand(1);
  2137. }
  2138. /// Returns the instruction representing the current logical induction
  2139. /// variable. Always unsigned, always starting at 0 with an increment of one.
  2140. Instruction *getIndVar() const {
  2141. assert(isValid() && "Requires a valid canonical loop");
  2142. Instruction *IndVarPHI = &Header->front();
  2143. assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
  2144. return IndVarPHI;
  2145. }
  2146. /// Return the type of the induction variable (and the trip count).
  2147. Type *getIndVarType() const {
  2148. assert(isValid() && "Requires a valid canonical loop");
  2149. return getIndVar()->getType();
  2150. }
  2151. /// Return the insertion point for user code before the loop.
  2152. OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
  2153. assert(isValid() && "Requires a valid canonical loop");
  2154. BasicBlock *Preheader = getPreheader();
  2155. return {Preheader, std::prev(Preheader->end())};
  2156. };
  2157. /// Return the insertion point for user code in the body.
  2158. OpenMPIRBuilder::InsertPointTy getBodyIP() const {
  2159. assert(isValid() && "Requires a valid canonical loop");
  2160. BasicBlock *Body = getBody();
  2161. return {Body, Body->begin()};
  2162. };
  2163. /// Return the insertion point for user code after the loop.
  2164. OpenMPIRBuilder::InsertPointTy getAfterIP() const {
  2165. assert(isValid() && "Requires a valid canonical loop");
  2166. BasicBlock *After = getAfter();
  2167. return {After, After->begin()};
  2168. };
  2169. Function *getFunction() const {
  2170. assert(isValid() && "Requires a valid canonical loop");
  2171. return Header->getParent();
  2172. }
  2173. /// Consistency self-check.
  2174. void assertOK() const;
  2175. /// Invalidate this loop. That is, the underlying IR does not fulfill the
  2176. /// requirements of an OpenMP canonical loop anymore.
  2177. void invalidate();
  2178. };
  2179. } // end namespace llvm
  2180. #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  2181. #ifdef __GNUC__
  2182. #pragma GCC diagnostic pop
  2183. #endif