OMPIRBuilder.h 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the OpenMPIRBuilder class and helpers used as a convenient
  15. // way to create LLVM instructions for OpenMP directives.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  19. #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  20. #include "llvm/Frontend/OpenMP/OMPConstants.h"
  21. #include "llvm/IR/DebugLoc.h"
  22. #include "llvm/IR/IRBuilder.h"
  23. #include "llvm/Support/Allocator.h"
  24. #include <forward_list>
  25. namespace llvm {
  26. class CanonicalLoopInfo;
  27. /// An interface to create LLVM-IR for OpenMP directives.
  28. ///
  29. /// Each OpenMP directive has a corresponding public generator method.
  30. class OpenMPIRBuilder {
  31. public:
  32. /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
  33. /// not have an effect on \p M (see initialize).
  34. OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
  35. ~OpenMPIRBuilder();
  36. /// Initialize the internal state, this will put structures types and
  37. /// potentially other helpers into the underlying module. Must be called
  38. /// before any other method and only once!
  39. void initialize();
  40. /// Finalize the underlying module, e.g., by outlining regions.
  41. /// \param Fn The function to be finalized. If not used,
  42. /// all functions are finalized.
  43. void finalize(Function *Fn = nullptr);
  44. /// Add attributes known for \p FnID to \p Fn.
  45. void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
  46. /// Type used throughout for insertion points.
  47. using InsertPointTy = IRBuilder<>::InsertPoint;
  48. /// Callback type for variable finalization (think destructors).
  49. ///
  50. /// \param CodeGenIP is the insertion point at which the finalization code
  51. /// should be placed.
  52. ///
  53. /// A finalize callback knows about all objects that need finalization, e.g.
  54. /// destruction, when the scope of the currently generated construct is left
  55. /// at the time, and location, the callback is invoked.
  56. using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
  57. struct FinalizationInfo {
  58. /// The finalization callback provided by the last in-flight invocation of
  59. /// createXXXX for the directive of kind DK.
  60. FinalizeCallbackTy FiniCB;
  61. /// The directive kind of the innermost directive that has an associated
  62. /// region which might require finalization when it is left.
  63. omp::Directive DK;
  64. /// Flag to indicate if the directive is cancellable.
  65. bool IsCancellable;
  66. };
  67. /// Push a finalization callback on the finalization stack.
  68. ///
  69. /// NOTE: Temporary solution until Clang CG is gone.
  70. void pushFinalizationCB(const FinalizationInfo &FI) {
  71. FinalizationStack.push_back(FI);
  72. }
  73. /// Pop the last finalization callback from the finalization stack.
  74. ///
  75. /// NOTE: Temporary solution until Clang CG is gone.
  76. void popFinalizationCB() { FinalizationStack.pop_back(); }
  77. /// Callback type for body (=inner region) code generation
  78. ///
  79. /// The callback takes code locations as arguments, each describing a
  80. /// location at which code might need to be generated or a location that is
  81. /// the target of control transfer.
  82. ///
  83. /// \param AllocaIP is the insertion point at which new alloca instructions
  84. /// should be placed.
  85. /// \param CodeGenIP is the insertion point at which the body code should be
  86. /// placed.
  87. /// \param ContinuationBB is the basic block target to leave the body.
  88. ///
  89. /// Note that all blocks pointed to by the arguments have terminators.
  90. using BodyGenCallbackTy =
  91. function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
  92. BasicBlock &ContinuationBB)>;
  93. // This is created primarily for sections construct as llvm::function_ref
  94. // (BodyGenCallbackTy) is not storable (as described in the comments of
  95. // function_ref class - function_ref contains non-ownable reference
  96. // to the callable.
  97. using StorableBodyGenCallbackTy =
  98. std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
  99. BasicBlock &ContinuationBB)>;
  100. /// Callback type for loop body code generation.
  101. ///
  102. /// \param CodeGenIP is the insertion point where the loop's body code must be
  103. /// placed. This will be a dedicated BasicBlock with a
  104. /// conditional branch from the loop condition check and
  105. /// terminated with an unconditional branch to the loop
  106. /// latch.
  107. /// \param IndVar is the induction variable usable at the insertion point.
  108. using LoopBodyGenCallbackTy =
  109. function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
  110. /// Callback type for variable privatization (think copy & default
  111. /// constructor).
  112. ///
  113. /// \param AllocaIP is the insertion point at which new alloca instructions
  114. /// should be placed.
  115. /// \param CodeGenIP is the insertion point at which the privatization code
  116. /// should be placed.
  117. /// \param Original The value being copied/created, should not be used in the
  118. /// generated IR.
  119. /// \param Inner The equivalent of \p Original that should be used in the
  120. /// generated IR; this is equal to \p Original if the value is
  121. /// a pointer and can thus be passed directly, otherwise it is
  122. /// an equivalent but different value.
  123. /// \param ReplVal The replacement value, thus a copy or new created version
  124. /// of \p Inner.
  125. ///
  126. /// \returns The new insertion point where code generation continues and
  127. /// \p ReplVal the replacement value.
  128. using PrivatizeCallbackTy = function_ref<InsertPointTy(
  129. InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
  130. Value &Inner, Value *&ReplVal)>;
  131. /// Description of a LLVM-IR insertion point (IP) and a debug/source location
  132. /// (filename, line, column, ...).
  133. struct LocationDescription {
  134. template <typename T, typename U>
  135. LocationDescription(const IRBuilder<T, U> &IRB)
  136. : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
  137. LocationDescription(const InsertPointTy &IP) : IP(IP) {}
  138. LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
  139. : IP(IP), DL(DL) {}
  140. InsertPointTy IP;
  141. DebugLoc DL;
  142. };
  143. /// Emitter methods for OpenMP directives.
  144. ///
  145. ///{
  146. /// Generator for '#omp barrier'
  147. ///
  148. /// \param Loc The location where the barrier directive was encountered.
  149. /// \param DK The kind of directive that caused the barrier.
  150. /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
  151. /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
  152. /// should be checked and acted upon.
  153. ///
  154. /// \returns The insertion point after the barrier.
  155. InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
  156. bool ForceSimpleCall = false,
  157. bool CheckCancelFlag = true);
  158. /// Generator for '#omp cancel'
  159. ///
  160. /// \param Loc The location where the directive was encountered.
  161. /// \param IfCondition The evaluated 'if' clause expression, if any.
  162. /// \param CanceledDirective The kind of directive that is cancled.
  163. ///
  164. /// \returns The insertion point after the barrier.
  165. InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
  166. omp::Directive CanceledDirective);
  167. /// Generator for '#omp parallel'
  168. ///
  169. /// \param Loc The insert and source location description.
  170. /// \param AllocaIP The insertion points to be used for alloca instructions.
  171. /// \param BodyGenCB Callback that will generate the region code.
  172. /// \param PrivCB Callback to copy a given variable (think copy constructor).
  173. /// \param FiniCB Callback to finalize variable copies.
  174. /// \param IfCondition The evaluated 'if' clause expression, if any.
  175. /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
  176. /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
  177. /// \param IsCancellable Flag to indicate a cancellable parallel region.
  178. ///
  179. /// \returns The insertion position *after* the parallel.
  180. IRBuilder<>::InsertPoint
  181. createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
  182. BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
  183. FinalizeCallbackTy FiniCB, Value *IfCondition,
  184. Value *NumThreads, omp::ProcBindKind ProcBind,
  185. bool IsCancellable);
  186. /// Generator for the control flow structure of an OpenMP canonical loop.
  187. ///
  188. /// This generator operates on the logical iteration space of the loop, i.e.
  189. /// the caller only has to provide a loop trip count of the loop as defined by
  190. /// base language semantics. The trip count is interpreted as an unsigned
  191. /// integer. The induction variable passed to \p BodyGenCB will be of the same
  192. /// type and run from 0 to \p TripCount - 1. It is up to the callback to
  193. /// convert the logical iteration variable to the loop counter variable in the
  194. /// loop body.
  195. ///
  196. /// \param Loc The insert and source location description. The insert
  197. /// location can be between two instructions or the end of a
  198. /// degenerate block (e.g. a BB under construction).
  199. /// \param BodyGenCB Callback that will generate the loop body code.
  200. /// \param TripCount Number of iterations the loop body is executed.
  201. /// \param Name Base name used to derive BB and instruction names.
  202. ///
  203. /// \returns An object representing the created control flow structure which
  204. /// can be used for loop-associated directives.
  205. CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
  206. LoopBodyGenCallbackTy BodyGenCB,
  207. Value *TripCount,
  208. const Twine &Name = "loop");
  209. /// Generator for the control flow structure of an OpenMP canonical loop.
  210. ///
  211. /// Instead of a logical iteration space, this allows specifying user-defined
  212. /// loop counter values using increment, upper- and lower bounds. To
  213. /// disambiguate the terminology when counting downwards, instead of lower
  214. /// bounds we use \p Start for the loop counter value in the first body
  215. /// iteration.
  216. ///
  217. /// Consider the following limitations:
  218. ///
  219. /// * A loop counter space over all integer values of its bit-width cannot be
  220. /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
  221. /// stored into an 8 bit integer):
  222. ///
  223. /// DO I = 0, 255, 1
  224. ///
  225. /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
  226. /// effectively counting downwards:
  227. ///
  228. /// for (uint8_t i = 100u; i > 0; i += 127u)
  229. ///
  230. ///
  231. /// TODO: May need to add additional parameters to represent:
  232. ///
  233. /// * Allow representing downcounting with unsigned integers.
  234. ///
  235. /// * Sign of the step and the comparison operator might disagree:
  236. ///
  237. /// for (int i = 0; i < 42; i -= 1u)
  238. ///
  239. //
  240. /// \param Loc The insert and source location description.
  241. /// \param BodyGenCB Callback that will generate the loop body code.
  242. /// \param Start Value of the loop counter for the first iterations.
  243. /// \param Stop Loop counter values past this will stop the loop.
  244. /// \param Step Loop counter increment after each iteration; negative
  245. /// means counting down.
  246. /// \param IsSigned Whether Start, Stop and Step are signed integers.
  247. /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
  248. /// counter.
  249. /// \param ComputeIP Insertion point for instructions computing the trip
  250. /// count. Can be used to ensure the trip count is available
  251. /// at the outermost loop of a loop nest. If not set,
  252. /// defaults to the preheader of the generated loop.
  253. /// \param Name Base name used to derive BB and instruction names.
  254. ///
  255. /// \returns An object representing the created control flow structure which
  256. /// can be used for loop-associated directives.
  257. CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
  258. LoopBodyGenCallbackTy BodyGenCB,
  259. Value *Start, Value *Stop, Value *Step,
  260. bool IsSigned, bool InclusiveStop,
  261. InsertPointTy ComputeIP = {},
  262. const Twine &Name = "loop");
  263. /// Collapse a loop nest into a single loop.
  264. ///
  265. /// Merges loops of a loop nest into a single CanonicalLoopNest representation
  266. /// that has the same number of innermost loop iterations as the origin loop
  267. /// nest. The induction variables of the input loops are derived from the
  268. /// collapsed loop's induction variable. This is intended to be used to
  269. /// implement OpenMP's collapse clause. Before applying a directive,
  270. /// collapseLoops normalizes a loop nest to contain only a single loop and the
  271. /// directive's implementation does not need to handle multiple loops itself.
  272. /// This does not remove the need to handle all loop nest handling by
  273. /// directives, such as the ordered(<n>) clause or the simd schedule-clause
  274. /// modifier of the worksharing-loop directive.
  275. ///
  276. /// Example:
  277. /// \code
  278. /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
  279. /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
  280. /// body(i, j);
  281. /// \endcode
  282. ///
  283. /// After collapsing with Loops={i,j}, the loop is changed to
  284. /// \code
  285. /// for (int ij = 0; ij < 63; ++ij) {
  286. /// int i = ij / 9;
  287. /// int j = ij % 9;
  288. /// body(i, j);
  289. /// }
  290. /// \endcode
  291. ///
  292. /// In the current implementation, the following limitations apply:
  293. ///
  294. /// * All input loops have an induction variable of the same type.
  295. ///
  296. /// * The collapsed loop will have the same trip count integer type as the
  297. /// input loops. Therefore it is possible that the collapsed loop cannot
  298. /// represent all iterations of the input loops. For instance, assuming a
  299. /// 32 bit integer type, and two input loops both iterating 2^16 times, the
  300. /// theoretical trip count of the collapsed loop would be 2^32 iteration,
  301. /// which cannot be represented in an 32-bit integer. Behavior is undefined
  302. /// in this case.
  303. ///
  304. /// * The trip counts of every input loop must be available at \p ComputeIP.
  305. /// Non-rectangular loops are not yet supported.
  306. ///
  307. /// * At each nest level, code between a surrounding loop and its nested loop
  308. /// is hoisted into the loop body, and such code will be executed more
  309. /// often than before collapsing (or not at all if any inner loop iteration
  310. /// has a trip count of 0). This is permitted by the OpenMP specification.
  311. ///
  312. /// \param DL Debug location for instructions added for collapsing,
  313. /// such as instructions to compute/derive the input loop's
  314. /// induction variables.
  315. /// \param Loops Loops in the loop nest to collapse. Loops are specified
  316. /// from outermost-to-innermost and every control flow of a
  317. /// loop's body must pass through its directly nested loop.
  318. /// \param ComputeIP Where additional instruction that compute the collapsed
  319. /// trip count. If not set, defaults to before the generated
  320. /// loop.
  321. ///
  322. /// \returns The CanonicalLoopInfo object representing the collapsed loop.
  323. CanonicalLoopInfo *collapseLoops(DebugLoc DL,
  324. ArrayRef<CanonicalLoopInfo *> Loops,
  325. InsertPointTy ComputeIP);
  326. /// Modifies the canonical loop to be a statically-scheduled workshare loop.
  327. ///
  328. /// This takes a \p LoopInfo representing a canonical loop, such as the one
  329. /// created by \p createCanonicalLoop and emits additional instructions to
  330. /// turn it into a workshare loop. In particular, it calls to an OpenMP
  331. /// runtime function in the preheader to obtain the loop bounds to be used in
  332. /// the current thread, updates the relevant instructions in the canonical
  333. /// loop and calls to an OpenMP runtime finalization function after the loop.
  334. ///
  335. /// TODO: Workshare loops with static scheduling may contain up to two loops
  336. /// that fulfill the requirements of an OpenMP canonical loop. One for
  337. /// iterating over all iterations of a chunk and another one for iterating
  338. /// over all chunks that are executed on the same thread. Returning
  339. /// CanonicalLoopInfo objects representing them may eventually be useful for
  340. /// the apply clause planned in OpenMP 6.0, but currently whether these are
  341. /// canonical loops is irrelevant.
  342. ///
  343. /// \param DL Debug location for instructions added for the
  344. /// workshare-loop construct itself.
  345. /// \param CLI A descriptor of the canonical loop to workshare.
  346. /// \param AllocaIP An insertion point for Alloca instructions usable in the
  347. /// preheader of the loop.
  348. /// \param NeedsBarrier Indicates whether a barrier must be inserted after
  349. /// the loop.
  350. /// \param Chunk The size of loop chunk considered as a unit when
  351. /// scheduling. If \p nullptr, defaults to 1.
  352. ///
  353. /// \returns Point where to insert code after the workshare construct.
  354. InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  355. InsertPointTy AllocaIP,
  356. bool NeedsBarrier,
  357. Value *Chunk = nullptr);
  358. /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
  359. ///
  360. /// This takes a \p LoopInfo representing a canonical loop, such as the one
  361. /// created by \p createCanonicalLoop and emits additional instructions to
  362. /// turn it into a workshare loop. In particular, it calls to an OpenMP
  363. /// runtime function in the preheader to obtain, and then in each iteration
  364. /// to update the loop counter.
  365. ///
  366. /// \param DL Debug location for instructions added for the
  367. /// workshare-loop construct itself.
  368. /// \param CLI A descriptor of the canonical loop to workshare.
  369. /// \param AllocaIP An insertion point for Alloca instructions usable in the
  370. /// preheader of the loop.
  371. /// \param SchedType Type of scheduling to be passed to the init function.
  372. /// \param NeedsBarrier Indicates whether a barrier must be insterted after
  373. /// the loop.
  374. /// \param Chunk The size of loop chunk considered as a unit when
  375. /// scheduling. If \p nullptr, defaults to 1.
  376. ///
  377. /// \returns Point where to insert code after the workshare construct.
  378. InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  379. InsertPointTy AllocaIP,
  380. omp::OMPScheduleType SchedType,
  381. bool NeedsBarrier,
  382. Value *Chunk = nullptr);
  383. /// Modifies the canonical loop to be a workshare loop.
  384. ///
  385. /// This takes a \p LoopInfo representing a canonical loop, such as the one
  386. /// created by \p createCanonicalLoop and emits additional instructions to
  387. /// turn it into a workshare loop. In particular, it calls to an OpenMP
  388. /// runtime function in the preheader to obtain the loop bounds to be used in
  389. /// the current thread, updates the relevant instructions in the canonical
  390. /// loop and calls to an OpenMP runtime finalization function after the loop.
  391. ///
  392. /// \param DL Debug location for instructions added for the
  393. /// workshare-loop construct itself.
  394. /// \param CLI A descriptor of the canonical loop to workshare.
  395. /// \param AllocaIP An insertion point for Alloca instructions usable in the
  396. /// preheader of the loop.
  397. /// \param NeedsBarrier Indicates whether a barrier must be insterted after
  398. /// the loop.
  399. ///
  400. /// \returns Point where to insert code after the workshare construct.
  401. InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  402. InsertPointTy AllocaIP, bool NeedsBarrier);
  403. /// Tile a loop nest.
  404. ///
  405. /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
  406. /// \p/ Loops must be perfectly nested, from outermost to innermost loop
  407. /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
  408. /// of every loop and every tile sizes must be usable in the outermost
  409. /// loop's preheader. This implies that the loop nest is rectangular.
  410. ///
  411. /// Example:
  412. /// \code
  413. /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
  414. /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
  415. /// body(i, j);
  416. /// \endcode
  417. ///
  418. /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
  419. /// \code
  420. /// for (int i1 = 0; i1 < 3; ++i1)
  421. /// for (int j1 = 0; j1 < 2; ++j1)
  422. /// for (int i2 = 0; i2 < 5; ++i2)
  423. /// for (int j2 = 0; j2 < 7; ++j2)
  424. /// body(i1*3+i2, j1*3+j2);
  425. /// \endcode
  426. ///
  427. /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
  428. /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
  429. /// handles non-constant trip counts, non-constant tile sizes and trip counts
  430. /// that are not multiples of the tile size. In the latter case the tile loop
  431. /// of the last floor-loop iteration will have fewer iterations than specified
  432. /// as its tile size.
  433. ///
  434. ///
  435. /// @param DL Debug location for instructions added by tiling, for
  436. /// instance the floor- and tile trip count computation.
  437. /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
  438. /// invalidated by this method, i.e. should not used after
  439. /// tiling.
  440. /// @param TileSizes For each loop in \p Loops, the tile size for that
  441. /// dimensions.
  442. ///
  443. /// \returns A list of generated loops. Contains twice as many loops as the
  444. /// input loop nest; the first half are the floor loops and the
  445. /// second half are the tile loops.
  446. std::vector<CanonicalLoopInfo *>
  447. tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  448. ArrayRef<Value *> TileSizes);
  449. /// Fully unroll a loop.
  450. ///
  451. /// Instead of unrolling the loop immediately (and duplicating its body
  452. /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
  453. /// metadata.
  454. ///
  455. /// \param DL Debug location for instructions added by unrolling.
  456. /// \param Loop The loop to unroll. The loop will be invalidated.
  457. void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
  458. /// Fully or partially unroll a loop. How the loop is unrolled is determined
  459. /// using LLVM's LoopUnrollPass.
  460. ///
  461. /// \param DL Debug location for instructions added by unrolling.
  462. /// \param Loop The loop to unroll. The loop will be invalidated.
  463. void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
  464. /// Partially unroll a loop.
  465. ///
  466. /// The CanonicalLoopInfo of the unrolled loop for use with chained
  467. /// loop-associated directive can be requested using \p UnrolledCLI. Not
  468. /// needing the CanonicalLoopInfo allows more efficient code generation by
  469. /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
  470. /// A loop-associated directive applied to the unrolled loop needs to know the
  471. /// new trip count which means that if using a heuristically determined unroll
  472. /// factor (\p Factor == 0), that factor must be computed immediately. We are
  473. /// using the same logic as the LoopUnrollPass to derived the unroll factor,
  474. /// but which assumes that some canonicalization has taken place (e.g.
  475. /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
  476. /// better when the unrolled loop's CanonicalLoopInfo is not needed.
  477. ///
  478. /// \param DL Debug location for instructions added by unrolling.
  479. /// \param Loop The loop to unroll. The loop will be invalidated.
  480. /// \param Factor The factor to unroll the loop by. A factor of 0
  481. /// indicates that a heuristic should be used to determine
  482. /// the unroll-factor.
  483. /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
  484. /// partially unrolled loop. Otherwise, uses loop metadata
  485. /// to defer unrolling to the LoopUnrollPass.
  486. void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
  487. CanonicalLoopInfo **UnrolledCLI);
  488. /// Add metadata to simd-ize a loop.
  489. ///
  490. /// \param DL Debug location for instructions added by unrolling.
  491. /// \param Loop The loop to simd-ize.
  492. void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop);
  493. /// Generator for '#omp flush'
  494. ///
  495. /// \param Loc The location where the flush directive was encountered
  496. void createFlush(const LocationDescription &Loc);
  497. /// Generator for '#omp taskwait'
  498. ///
  499. /// \param Loc The location where the taskwait directive was encountered.
  500. void createTaskwait(const LocationDescription &Loc);
  501. /// Generator for '#omp taskyield'
  502. ///
  503. /// \param Loc The location where the taskyield directive was encountered.
  504. void createTaskyield(const LocationDescription &Loc);
  505. /// Functions used to generate reductions. Such functions take two Values
  506. /// representing LHS and RHS of the reduction, respectively, and a reference
  507. /// to the value that is updated to refer to the reduction result.
  508. using ReductionGenTy =
  509. function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
  510. /// Functions used to generate atomic reductions. Such functions take two
  511. /// Values representing pointers to LHS and RHS of the reduction, as well as
  512. /// the element type of these pointers. They are expected to atomically
  513. /// update the LHS to the reduced value.
  514. using AtomicReductionGenTy =
  515. function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
  516. /// Information about an OpenMP reduction.
  517. struct ReductionInfo {
  518. ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
  519. ReductionGenTy ReductionGen,
  520. AtomicReductionGenTy AtomicReductionGen)
  521. : ElementType(ElementType), Variable(Variable),
  522. PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
  523. AtomicReductionGen(AtomicReductionGen) {
  524. assert(cast<PointerType>(Variable->getType())
  525. ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
  526. }
  527. /// Reduction element type, must match pointee type of variable.
  528. Type *ElementType;
  529. /// Reduction variable of pointer type.
  530. Value *Variable;
  531. /// Thread-private partial reduction variable.
  532. Value *PrivateVariable;
  533. /// Callback for generating the reduction body. The IR produced by this will
  534. /// be used to combine two values in a thread-safe context, e.g., under
  535. /// lock or within the same thread, and therefore need not be atomic.
  536. ReductionGenTy ReductionGen;
  537. /// Callback for generating the atomic reduction body, may be null. The IR
  538. /// produced by this will be used to atomically combine two values during
  539. /// reduction. If null, the implementation will use the non-atomic version
  540. /// along with the appropriate synchronization mechanisms.
  541. AtomicReductionGenTy AtomicReductionGen;
  542. };
  543. // TODO: provide atomic and non-atomic reduction generators for reduction
  544. // operators defined by the OpenMP specification.
  545. /// Generator for '#omp reduction'.
  546. ///
  547. /// Emits the IR instructing the runtime to perform the specific kind of
  548. /// reductions. Expects reduction variables to have been privatized and
  549. /// initialized to reduction-neutral values separately. Emits the calls to
  550. /// runtime functions as well as the reduction function and the basic blocks
  551. /// performing the reduction atomically and non-atomically.
  552. ///
  553. /// The code emitted for the following:
  554. ///
  555. /// \code
  556. /// type var_1;
  557. /// type var_2;
  558. /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
  559. /// /* body */;
  560. /// \endcode
  561. ///
  562. /// corresponds to the following sketch.
  563. ///
  564. /// \code
  565. /// void _outlined_par() {
  566. /// // N is the number of different reductions.
  567. /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
  568. /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
  569. /// _omp_reduction_func,
  570. /// _gomp_critical_user.reduction.var)) {
  571. /// case 1: {
  572. /// var_1 = var_1 <reduction-op> privatized_var_1;
  573. /// var_2 = var_2 <reduction-op> privatized_var_2;
  574. /// // ...
  575. /// __kmpc_end_reduce(...);
  576. /// break;
  577. /// }
  578. /// case 2: {
  579. /// _Atomic<ReductionOp>(var_1, privatized_var_1);
  580. /// _Atomic<ReductionOp>(var_2, privatized_var_2);
  581. /// // ...
  582. /// break;
  583. /// }
  584. /// default: break;
  585. /// }
  586. /// }
  587. ///
  588. /// void _omp_reduction_func(void **lhs, void **rhs) {
  589. /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
  590. /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
  591. /// // ...
  592. /// }
  593. /// \endcode
  594. ///
  595. /// \param Loc The location where the reduction was
  596. /// encountered. Must be within the associate
  597. /// directive and after the last local access to the
  598. /// reduction variables.
  599. /// \param AllocaIP An insertion point suitable for allocas usable
  600. /// in reductions.
  601. /// \param ReductionInfos A list of info on each reduction variable.
  602. /// \param IsNoWait A flag set if the reduction is marked as nowait.
  603. InsertPointTy createReductions(const LocationDescription &Loc,
  604. InsertPointTy AllocaIP,
  605. ArrayRef<ReductionInfo> ReductionInfos,
  606. bool IsNoWait = false);
  607. ///}
  608. /// Return the insertion point used by the underlying IRBuilder.
  609. InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
  610. /// Update the internal location to \p Loc.
  611. bool updateToLocation(const LocationDescription &Loc) {
  612. Builder.restoreIP(Loc.IP);
  613. Builder.SetCurrentDebugLocation(Loc.DL);
  614. return Loc.IP.getBlock() != nullptr;
  615. }
  616. /// Return the function declaration for the runtime function with \p FnID.
  617. FunctionCallee getOrCreateRuntimeFunction(Module &M,
  618. omp::RuntimeFunction FnID);
  619. Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
  620. /// Return the (LLVM-IR) string describing the source location \p LocStr.
  621. Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
  622. /// Return the (LLVM-IR) string describing the default source location.
  623. Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
  624. /// Return the (LLVM-IR) string describing the source location identified by
  625. /// the arguments.
  626. Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
  627. unsigned Line, unsigned Column,
  628. uint32_t &SrcLocStrSize);
  629. /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
  630. /// fallback if \p DL does not specify the function name.
  631. Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
  632. Function *F = nullptr);
  633. /// Return the (LLVM-IR) string describing the source location \p Loc.
  634. Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
  635. uint32_t &SrcLocStrSize);
  636. /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
  637. /// TODO: Create a enum class for the Reserve2Flags
  638. Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
  639. omp::IdentFlag Flags = omp::IdentFlag(0),
  640. unsigned Reserve2Flags = 0);
  641. /// Create a hidden global flag \p Name in the module with initial value \p
  642. /// Value.
  643. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
  644. /// Generate control flow and cleanup for cancellation.
  645. ///
  646. /// \param CancelFlag Flag indicating if the cancellation is performed.
  647. /// \param CanceledDirective The kind of directive that is cancled.
  648. /// \param ExitCB Extra code to be generated in the exit block.
  649. void emitCancelationCheckImpl(Value *CancelFlag,
  650. omp::Directive CanceledDirective,
  651. FinalizeCallbackTy ExitCB = {});
  652. /// Generate a barrier runtime call.
  653. ///
  654. /// \param Loc The location at which the request originated and is fulfilled.
  655. /// \param DK The directive which caused the barrier
  656. /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
  657. /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
  658. /// should be checked and acted upon.
  659. ///
  660. /// \returns The insertion point after the barrier.
  661. InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
  662. omp::Directive DK, bool ForceSimpleCall,
  663. bool CheckCancelFlag);
  664. /// Generate a flush runtime call.
  665. ///
  666. /// \param Loc The location at which the request originated and is fulfilled.
  667. void emitFlush(const LocationDescription &Loc);
  668. /// The finalization stack made up of finalize callbacks currently in-flight,
  669. /// wrapped into FinalizationInfo objects that reference also the finalization
  670. /// target block and the kind of cancellable directive.
  671. SmallVector<FinalizationInfo, 8> FinalizationStack;
  672. /// Return true if the last entry in the finalization stack is of kind \p DK
  673. /// and cancellable.
  674. bool isLastFinalizationInfoCancellable(omp::Directive DK) {
  675. return !FinalizationStack.empty() &&
  676. FinalizationStack.back().IsCancellable &&
  677. FinalizationStack.back().DK == DK;
  678. }
  679. /// Generate a taskwait runtime call.
  680. ///
  681. /// \param Loc The location at which the request originated and is fulfilled.
  682. void emitTaskwaitImpl(const LocationDescription &Loc);
  683. /// Generate a taskyield runtime call.
  684. ///
  685. /// \param Loc The location at which the request originated and is fulfilled.
  686. void emitTaskyieldImpl(const LocationDescription &Loc);
  687. /// Return the current thread ID.
  688. ///
  689. /// \param Ident The ident (ident_t*) describing the query origin.
  690. Value *getOrCreateThreadID(Value *Ident);
  691. /// The underlying LLVM-IR module
  692. Module &M;
  693. /// The LLVM-IR Builder used to create IR.
  694. IRBuilder<> Builder;
  695. /// Map to remember source location strings
  696. StringMap<Constant *> SrcLocStrMap;
  697. /// Map to remember existing ident_t*.
  698. DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
  699. /// Helper that contains information about regions we need to outline
  700. /// during finalization.
  701. struct OutlineInfo {
  702. using PostOutlineCBTy = std::function<void(Function &)>;
  703. PostOutlineCBTy PostOutlineCB;
  704. BasicBlock *EntryBB, *ExitBB;
  705. SmallVector<Value *, 2> ExcludeArgsFromAggregate;
  706. /// Collect all blocks in between EntryBB and ExitBB in both the given
  707. /// vector and set.
  708. void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
  709. SmallVectorImpl<BasicBlock *> &BlockVector);
  710. /// Return the function that contains the region to be outlined.
  711. Function *getFunction() const { return EntryBB->getParent(); }
  712. };
  713. /// Collection of regions that need to be outlined during finalization.
  714. SmallVector<OutlineInfo, 16> OutlineInfos;
  715. /// Collection of owned canonical loop objects that eventually need to be
  716. /// free'd.
  717. std::forward_list<CanonicalLoopInfo> LoopInfos;
  718. /// Add a new region that will be outlined later.
  719. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
  720. /// An ordered map of auto-generated variables to their unique names.
  721. /// It stores variables with the following names: 1) ".gomp_critical_user_" +
  722. /// <critical_section_name> + ".var" for "omp critical" directives; 2)
  723. /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
  724. /// variables.
  725. StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
  726. /// Create the global variable holding the offload mappings information.
  727. GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
  728. std::string VarName);
  729. /// Create the global variable holding the offload names information.
  730. GlobalVariable *
  731. createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
  732. std::string VarName);
  733. struct MapperAllocas {
  734. AllocaInst *ArgsBase = nullptr;
  735. AllocaInst *Args = nullptr;
  736. AllocaInst *ArgSizes = nullptr;
  737. };
  738. /// Create the allocas instruction used in call to mapper functions.
  739. void createMapperAllocas(const LocationDescription &Loc,
  740. InsertPointTy AllocaIP, unsigned NumOperands,
  741. struct MapperAllocas &MapperAllocas);
  742. /// Create the call for the target mapper function.
  743. /// \param Loc The source location description.
  744. /// \param MapperFunc Function to be called.
  745. /// \param SrcLocInfo Source location information global.
  746. /// \param MaptypesArg The argument types.
  747. /// \param MapnamesArg The argument names.
  748. /// \param MapperAllocas The AllocaInst used for the call.
  749. /// \param DeviceID Device ID for the call.
  750. /// \param NumOperands Number of operands in the call.
  751. void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
  752. Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
  753. struct MapperAllocas &MapperAllocas, int64_t DeviceID,
  754. unsigned NumOperands);
  755. public:
  756. /// Generator for __kmpc_copyprivate
  757. ///
  758. /// \param Loc The source location description.
  759. /// \param BufSize Number of elements in the buffer.
  760. /// \param CpyBuf List of pointers to data to be copied.
  761. /// \param CpyFn function to call for copying data.
  762. /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
  763. ///
  764. /// \return The insertion position *after* the CopyPrivate call.
  765. InsertPointTy createCopyPrivate(const LocationDescription &Loc,
  766. llvm::Value *BufSize, llvm::Value *CpyBuf,
  767. llvm::Value *CpyFn, llvm::Value *DidIt);
  768. /// Generator for '#omp single'
  769. ///
  770. /// \param Loc The source location description.
  771. /// \param BodyGenCB Callback that will generate the region code.
  772. /// \param FiniCB Callback to finalize variable copies.
  773. /// \param DidIt Local variable used as a flag to indicate 'single' thread
  774. ///
  775. /// \returns The insertion position *after* the single call.
  776. InsertPointTy createSingle(const LocationDescription &Loc,
  777. BodyGenCallbackTy BodyGenCB,
  778. FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
  779. /// Generator for '#omp master'
  780. ///
  781. /// \param Loc The insert and source location description.
  782. /// \param BodyGenCB Callback that will generate the region code.
  783. /// \param FiniCB Callback to finalize variable copies.
  784. ///
  785. /// \returns The insertion position *after* the master.
  786. InsertPointTy createMaster(const LocationDescription &Loc,
  787. BodyGenCallbackTy BodyGenCB,
  788. FinalizeCallbackTy FiniCB);
  789. /// Generator for '#omp masked'
  790. ///
  791. /// \param Loc The insert and source location description.
  792. /// \param BodyGenCB Callback that will generate the region code.
  793. /// \param FiniCB Callback to finialize variable copies.
  794. ///
  795. /// \returns The insertion position *after* the masked.
  796. InsertPointTy createMasked(const LocationDescription &Loc,
  797. BodyGenCallbackTy BodyGenCB,
  798. FinalizeCallbackTy FiniCB, Value *Filter);
  799. /// Generator for '#omp critical'
  800. ///
  801. /// \param Loc The insert and source location description.
  802. /// \param BodyGenCB Callback that will generate the region body code.
  803. /// \param FiniCB Callback to finalize variable copies.
  804. /// \param CriticalName name of the lock used by the critical directive
  805. /// \param HintInst Hint Instruction for hint clause associated with critical
  806. ///
  807. /// \returns The insertion position *after* the critical.
  808. InsertPointTy createCritical(const LocationDescription &Loc,
  809. BodyGenCallbackTy BodyGenCB,
  810. FinalizeCallbackTy FiniCB,
  811. StringRef CriticalName, Value *HintInst);
  812. /// Generator for '#omp ordered depend (source | sink)'
  813. ///
  814. /// \param Loc The insert and source location description.
  815. /// \param AllocaIP The insertion point to be used for alloca instructions.
  816. /// \param NumLoops The number of loops in depend clause.
  817. /// \param StoreValues The value will be stored in vector address.
  818. /// \param Name The name of alloca instruction.
  819. /// \param IsDependSource If true, depend source; otherwise, depend sink.
  820. ///
  821. /// \return The insertion position *after* the ordered.
  822. InsertPointTy createOrderedDepend(const LocationDescription &Loc,
  823. InsertPointTy AllocaIP, unsigned NumLoops,
  824. ArrayRef<llvm::Value *> StoreValues,
  825. const Twine &Name, bool IsDependSource);
  826. /// Generator for '#omp ordered [threads | simd]'
  827. ///
  828. /// \param Loc The insert and source location description.
  829. /// \param BodyGenCB Callback that will generate the region code.
  830. /// \param FiniCB Callback to finalize variable copies.
  831. /// \param IsThreads If true, with threads clause or without clause;
  832. /// otherwise, with simd clause;
  833. ///
  834. /// \returns The insertion position *after* the ordered.
  835. InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
  836. BodyGenCallbackTy BodyGenCB,
  837. FinalizeCallbackTy FiniCB,
  838. bool IsThreads);
  839. /// Generator for '#omp sections'
  840. ///
  841. /// \param Loc The insert and source location description.
  842. /// \param AllocaIP The insertion points to be used for alloca instructions.
  843. /// \param SectionCBs Callbacks that will generate body of each section.
  844. /// \param PrivCB Callback to copy a given variable (think copy constructor).
  845. /// \param FiniCB Callback to finalize variable copies.
  846. /// \param IsCancellable Flag to indicate a cancellable parallel region.
  847. /// \param IsNowait If true, barrier - to ensure all sections are executed
  848. /// before moving forward will not be generated.
  849. /// \returns The insertion position *after* the sections.
  850. InsertPointTy createSections(const LocationDescription &Loc,
  851. InsertPointTy AllocaIP,
  852. ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
  853. PrivatizeCallbackTy PrivCB,
  854. FinalizeCallbackTy FiniCB, bool IsCancellable,
  855. bool IsNowait);
  856. /// Generator for '#omp section'
  857. ///
  858. /// \param Loc The insert and source location description.
  859. /// \param BodyGenCB Callback that will generate the region body code.
  860. /// \param FiniCB Callback to finalize variable copies.
  861. /// \returns The insertion position *after* the section.
  862. InsertPointTy createSection(const LocationDescription &Loc,
  863. BodyGenCallbackTy BodyGenCB,
  864. FinalizeCallbackTy FiniCB);
  865. /// Generate conditional branch and relevant BasicBlocks through which private
  866. /// threads copy the 'copyin' variables from Master copy to threadprivate
  867. /// copies.
  868. ///
  869. /// \param IP insertion block for copyin conditional
  870. /// \param MasterVarPtr a pointer to the master variable
  871. /// \param PrivateVarPtr a pointer to the threadprivate variable
  872. /// \param IntPtrTy Pointer size type
  873. /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
  874. // and copy.in.end block
  875. ///
  876. /// \returns The insertion point where copying operation to be emitted.
  877. InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
  878. Value *PrivateAddr,
  879. llvm::IntegerType *IntPtrTy,
  880. bool BranchtoEnd = true);
  881. /// Create a runtime call for kmpc_Alloc
  882. ///
  883. /// \param Loc The insert and source location description.
  884. /// \param Size Size of allocated memory space
  885. /// \param Allocator Allocator information instruction
  886. /// \param Name Name of call Instruction for OMP_alloc
  887. ///
  888. /// \returns CallInst to the OMP_Alloc call
  889. CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
  890. Value *Allocator, std::string Name = "");
  891. /// Create a runtime call for kmpc_free
  892. ///
  893. /// \param Loc The insert and source location description.
  894. /// \param Addr Address of memory space to be freed
  895. /// \param Allocator Allocator information instruction
  896. /// \param Name Name of call Instruction for OMP_Free
  897. ///
  898. /// \returns CallInst to the OMP_Free call
  899. CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
  900. Value *Allocator, std::string Name = "");
  901. /// Create a runtime call for kmpc_threadprivate_cached
  902. ///
  903. /// \param Loc The insert and source location description.
  904. /// \param Pointer pointer to data to be cached
  905. /// \param Size size of data to be cached
  906. /// \param Name Name of call Instruction for callinst
  907. ///
  908. /// \returns CallInst to the thread private cache call.
  909. CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
  910. llvm::Value *Pointer,
  911. llvm::ConstantInt *Size,
  912. const llvm::Twine &Name = Twine(""));
  913. /// Create a runtime call for __tgt_interop_init
  914. ///
  915. /// \param Loc The insert and source location description.
  916. /// \param InteropVar variable to be allocated
  917. /// \param InteropType type of interop operation
  918. /// \param Device devide to which offloading will occur
  919. /// \param NumDependences number of dependence variables
  920. /// \param DependenceAddress pointer to dependence variables
  921. /// \param HaveNowaitClause does nowait clause exist
  922. ///
  923. /// \returns CallInst to the __tgt_interop_init call
  924. CallInst *createOMPInteropInit(const LocationDescription &Loc,
  925. Value *InteropVar,
  926. omp::OMPInteropType InteropType, Value *Device,
  927. Value *NumDependences,
  928. Value *DependenceAddress,
  929. bool HaveNowaitClause);
  930. /// Create a runtime call for __tgt_interop_destroy
  931. ///
  932. /// \param Loc The insert and source location description.
  933. /// \param InteropVar variable to be allocated
  934. /// \param Device devide to which offloading will occur
  935. /// \param NumDependences number of dependence variables
  936. /// \param DependenceAddress pointer to dependence variables
  937. /// \param HaveNowaitClause does nowait clause exist
  938. ///
  939. /// \returns CallInst to the __tgt_interop_destroy call
  940. CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
  941. Value *InteropVar, Value *Device,
  942. Value *NumDependences,
  943. Value *DependenceAddress,
  944. bool HaveNowaitClause);
  945. /// Create a runtime call for __tgt_interop_use
  946. ///
  947. /// \param Loc The insert and source location description.
  948. /// \param InteropVar variable to be allocated
  949. /// \param Device devide to which offloading will occur
  950. /// \param NumDependences number of dependence variables
  951. /// \param DependenceAddress pointer to dependence variables
  952. /// \param HaveNowaitClause does nowait clause exist
  953. ///
  954. /// \returns CallInst to the __tgt_interop_use call
  955. CallInst *createOMPInteropUse(const LocationDescription &Loc,
  956. Value *InteropVar, Value *Device,
  957. Value *NumDependences, Value *DependenceAddress,
  958. bool HaveNowaitClause);
  959. /// The `omp target` interface
  960. ///
  961. /// For more information about the usage of this interface,
  962. /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
  963. ///
  964. ///{
  965. /// Create a runtime call for kmpc_target_init
  966. ///
  967. /// \param Loc The insert and source location description.
  968. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
  969. /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
  970. InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
  971. bool RequiresFullRuntime);
  972. /// Create a runtime call for kmpc_target_deinit
  973. ///
  974. /// \param Loc The insert and source location description.
  975. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
  976. /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
  977. void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
  978. bool RequiresFullRuntime);
  979. ///}
  980. /// Declarations for LLVM-IR types (simple, array, function and structure) are
  981. /// generated below. Their names are defined and used in OpenMPKinds.def. Here
  982. /// we provide the declarations, the initializeTypes function will provide the
  983. /// values.
  984. ///
  985. ///{
  986. #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
  987. #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
  988. ArrayType *VarName##Ty = nullptr; \
  989. PointerType *VarName##PtrTy = nullptr;
  990. #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
  991. FunctionType *VarName = nullptr; \
  992. PointerType *VarName##Ptr = nullptr;
  993. #define OMP_STRUCT_TYPE(VarName, StrName, ...) \
  994. StructType *VarName = nullptr; \
  995. PointerType *VarName##Ptr = nullptr;
  996. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  997. ///}
  998. private:
  999. /// Create all simple and struct types exposed by the runtime and remember
  1000. /// the llvm::PointerTypes of them for easy access later.
  1001. void initializeTypes(Module &M);
  1002. /// Common interface for generating entry calls for OMP Directives.
  1003. /// if the directive has a region/body, It will set the insertion
  1004. /// point to the body
  1005. ///
  1006. /// \param OMPD Directive to generate entry blocks for
  1007. /// \param EntryCall Call to the entry OMP Runtime Function
  1008. /// \param ExitBB block where the region ends.
  1009. /// \param Conditional indicate if the entry call result will be used
  1010. /// to evaluate a conditional of whether a thread will execute
  1011. /// body code or not.
  1012. ///
  1013. /// \return The insertion position in exit block
  1014. InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
  1015. BasicBlock *ExitBB,
  1016. bool Conditional = false);
  1017. /// Common interface to finalize the region
  1018. ///
  1019. /// \param OMPD Directive to generate exiting code for
  1020. /// \param FinIP Insertion point for emitting Finalization code and exit call
  1021. /// \param ExitCall Call to the ending OMP Runtime Function
  1022. /// \param HasFinalize indicate if the directive will require finalization
  1023. /// and has a finalization callback in the stack that
  1024. /// should be called.
  1025. ///
  1026. /// \return The insertion position in exit block
  1027. InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
  1028. InsertPointTy FinIP,
  1029. Instruction *ExitCall,
  1030. bool HasFinalize = true);
  1031. /// Common Interface to generate OMP inlined regions
  1032. ///
  1033. /// \param OMPD Directive to generate inlined region for
  1034. /// \param EntryCall Call to the entry OMP Runtime Function
  1035. /// \param ExitCall Call to the ending OMP Runtime Function
  1036. /// \param BodyGenCB Body code generation callback.
  1037. /// \param FiniCB Finalization Callback. Will be called when finalizing region
  1038. /// \param Conditional indicate if the entry call result will be used
  1039. /// to evaluate a conditional of whether a thread will execute
  1040. /// body code or not.
  1041. /// \param HasFinalize indicate if the directive will require finalization
  1042. /// and has a finalization callback in the stack that
  1043. /// should be called.
  1044. /// \param IsCancellable if HasFinalize is set to true, indicate if the
  1045. /// the directive should be cancellable.
  1046. /// \return The insertion point after the region
  1047. InsertPointTy
  1048. EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
  1049. Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
  1050. FinalizeCallbackTy FiniCB, bool Conditional = false,
  1051. bool HasFinalize = true, bool IsCancellable = false);
  1052. /// Get the platform-specific name separator.
  1053. /// \param Parts different parts of the final name that needs separation
  1054. /// \param FirstSeparator First separator used between the initial two
  1055. /// parts of the name.
  1056. /// \param Separator separator used between all of the rest consecutive
  1057. /// parts of the name
  1058. static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
  1059. StringRef FirstSeparator,
  1060. StringRef Separator);
  1061. /// Gets (if variable with the given name already exist) or creates
  1062. /// internal global variable with the specified Name. The created variable has
  1063. /// linkage CommonLinkage by default and is initialized by null value.
  1064. /// \param Ty Type of the global variable. If it is exist already the type
  1065. /// must be the same.
  1066. /// \param Name Name of the variable.
  1067. Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
  1068. unsigned AddressSpace = 0);
  1069. /// Returns corresponding lock object for the specified critical region
  1070. /// name. If the lock object does not exist it is created, otherwise the
  1071. /// reference to the existing copy is returned.
  1072. /// \param CriticalName Name of the critical region.
  1073. ///
  1074. Value *getOMPCriticalRegionLock(StringRef CriticalName);
  1075. /// Callback type for Atomic Expression update
  1076. /// ex:
  1077. /// \code{.cpp}
  1078. /// unsigned x = 0;
  1079. /// #pragma omp atomic update
  1080. /// x = Expr(x_old); //Expr() is any legal operation
  1081. /// \endcode
  1082. ///
  1083. /// \param XOld the value of the atomic memory address to use for update
  1084. /// \param IRB reference to the IRBuilder to use
  1085. ///
  1086. /// \returns Value to update X to.
  1087. using AtomicUpdateCallbackTy =
  1088. const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
  1089. private:
  1090. enum AtomicKind { Read, Write, Update, Capture };
  1091. /// Determine whether to emit flush or not
  1092. ///
  1093. /// \param Loc The insert and source location description.
  1094. /// \param AO The required atomic ordering
  1095. /// \param AK The OpenMP atomic operation kind used.
  1096. ///
  1097. /// \returns wether a flush was emitted or not
  1098. bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
  1099. AtomicOrdering AO, AtomicKind AK);
  1100. /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
  1101. /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
  1102. /// Only Scalar data types.
  1103. ///
  1104. /// \param AllocIP Instruction to create AllocaInst before.
  1105. /// \param X The target atomic pointer to be updated
  1106. /// \param XElemTy The element type of the atomic pointer.
  1107. /// \param Expr The value to update X with.
  1108. /// \param AO Atomic ordering of the generated atomic
  1109. /// instructions.
  1110. /// \param RMWOp The binary operation used for update. If
  1111. /// operation is not supported by atomicRMW,
  1112. /// or belong to {FADD, FSUB, BAD_BINOP}.
  1113. /// Then a `cmpExch` based atomic will be generated.
  1114. /// \param UpdateOp Code generator for complex expressions that cannot be
  1115. /// expressed through atomicrmw instruction.
  1116. /// \param VolatileX true if \a X volatile?
  1117. /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
  1118. /// update expression, false otherwise.
  1119. /// (e.g. true for X = X BinOp Expr)
  1120. ///
  1121. /// \returns A pair of the old value of X before the update, and the value
  1122. /// used for the update.
  1123. std::pair<Value *, Value *>
  1124. emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
  1125. AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  1126. AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
  1127. bool IsXBinopExpr);
  1128. /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
  1129. ///
  1130. /// \Return The instruction
  1131. Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
  1132. AtomicRMWInst::BinOp RMWOp);
  1133. public:
  1134. /// a struct to pack relevant information while generating atomic Ops
  1135. struct AtomicOpValue {
  1136. Value *Var = nullptr;
  1137. Type *ElemTy = nullptr;
  1138. bool IsSigned = false;
  1139. bool IsVolatile = false;
  1140. };
  1141. /// Emit atomic Read for : V = X --- Only Scalar data types.
  1142. ///
  1143. /// \param Loc The insert and source location description.
  1144. /// \param X The target pointer to be atomically read
  1145. /// \param V Memory address where to store atomically read
  1146. /// value
  1147. /// \param AO Atomic ordering of the generated atomic
  1148. /// instructions.
  1149. ///
  1150. /// \return Insertion point after generated atomic read IR.
  1151. InsertPointTy createAtomicRead(const LocationDescription &Loc,
  1152. AtomicOpValue &X, AtomicOpValue &V,
  1153. AtomicOrdering AO);
  1154. /// Emit atomic write for : X = Expr --- Only Scalar data types.
  1155. ///
  1156. /// \param Loc The insert and source location description.
  1157. /// \param X The target pointer to be atomically written to
  1158. /// \param Expr The value to store.
  1159. /// \param AO Atomic ordering of the generated atomic
  1160. /// instructions.
  1161. ///
  1162. /// \return Insertion point after generated atomic Write IR.
  1163. InsertPointTy createAtomicWrite(const LocationDescription &Loc,
  1164. AtomicOpValue &X, Value *Expr,
  1165. AtomicOrdering AO);
  1166. /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
  1167. /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
  1168. /// Only Scalar data types.
  1169. ///
  1170. /// \param Loc The insert and source location description.
  1171. /// \param AllocIP Instruction to create AllocaInst before.
  1172. /// \param X The target atomic pointer to be updated
  1173. /// \param Expr The value to update X with.
  1174. /// \param AO Atomic ordering of the generated atomic instructions.
  1175. /// \param RMWOp The binary operation used for update. If operation
  1176. /// is not supported by atomicRMW, or belong to
  1177. /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
  1178. /// atomic will be generated.
  1179. /// \param UpdateOp Code generator for complex expressions that cannot be
  1180. /// expressed through atomicrmw instruction.
  1181. /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
  1182. /// update expression, false otherwise.
  1183. /// (e.g. true for X = X BinOp Expr)
  1184. ///
  1185. /// \return Insertion point after generated atomic update IR.
  1186. InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
  1187. Instruction *AllocIP, AtomicOpValue &X,
  1188. Value *Expr, AtomicOrdering AO,
  1189. AtomicRMWInst::BinOp RMWOp,
  1190. AtomicUpdateCallbackTy &UpdateOp,
  1191. bool IsXBinopExpr);
  1192. /// Emit atomic update for constructs: --- Only Scalar data types
  1193. /// V = X; X = X BinOp Expr ,
  1194. /// X = X BinOp Expr; V = X,
  1195. /// V = X; X = Expr BinOp X,
  1196. /// X = Expr BinOp X; V = X,
  1197. /// V = X; X = UpdateOp(X),
  1198. /// X = UpdateOp(X); V = X,
  1199. ///
  1200. /// \param Loc The insert and source location description.
  1201. /// \param AllocIP Instruction to create AllocaInst before.
  1202. /// \param X The target atomic pointer to be updated
  1203. /// \param V Memory address where to store captured value
  1204. /// \param Expr The value to update X with.
  1205. /// \param AO Atomic ordering of the generated atomic instructions
  1206. /// \param RMWOp The binary operation used for update. If
  1207. /// operation is not supported by atomicRMW, or belong to
  1208. /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
  1209. /// atomic will be generated.
  1210. /// \param UpdateOp Code generator for complex expressions that cannot be
  1211. /// expressed through atomicrmw instruction.
  1212. /// \param UpdateExpr true if X is an in place update of the form
  1213. /// X = X BinOp Expr or X = Expr BinOp X
  1214. /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
  1215. /// update expression, false otherwise.
  1216. /// (e.g. true for X = X BinOp Expr)
  1217. /// \param IsPostfixUpdate true if original value of 'x' must be stored in
  1218. /// 'v', not an updated one.
  1219. ///
  1220. /// \return Insertion point after generated atomic capture IR.
  1221. InsertPointTy
  1222. createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP,
  1223. AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
  1224. AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  1225. AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
  1226. bool IsPostfixUpdate, bool IsXBinopExpr);
  1227. /// Create the control flow structure of a canonical OpenMP loop.
  1228. ///
  1229. /// The emitted loop will be disconnected, i.e. no edge to the loop's
  1230. /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
  1231. /// IRBuilder location is not preserved.
  1232. ///
  1233. /// \param DL DebugLoc used for the instructions in the skeleton.
  1234. /// \param TripCount Value to be used for the trip count.
  1235. /// \param F Function in which to insert the BasicBlocks.
  1236. /// \param PreInsertBefore Where to insert BBs that execute before the body,
  1237. /// typically the body itself.
  1238. /// \param PostInsertBefore Where to insert BBs that execute after the body.
  1239. /// \param Name Base name used to derive BB
  1240. /// and instruction names.
  1241. ///
  1242. /// \returns The CanonicalLoopInfo that represents the emitted loop.
  1243. CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
  1244. Function *F,
  1245. BasicBlock *PreInsertBefore,
  1246. BasicBlock *PostInsertBefore,
  1247. const Twine &Name = {});
  1248. };
  1249. /// Class to represented the control flow structure of an OpenMP canonical loop.
  1250. ///
  1251. /// The control-flow structure is standardized for easy consumption by
  1252. /// directives associated with loops. For instance, the worksharing-loop
  1253. /// construct may change this control flow such that each loop iteration is
  1254. /// executed on only one thread. The constraints of a canonical loop in brief
  1255. /// are:
  1256. ///
  1257. /// * The number of loop iterations must have been computed before entering the
  1258. /// loop.
  1259. ///
  1260. /// * Has an (unsigned) logical induction variable that starts at zero and
  1261. /// increments by one.
  1262. ///
  1263. /// * The loop's CFG itself has no side-effects. The OpenMP specification
  1264. /// itself allows side-effects, but the order in which they happen, including
  1265. /// how often or whether at all, is unspecified. We expect that the frontend
  1266. /// will emit those side-effect instructions somewhere (e.g. before the loop)
  1267. /// such that the CanonicalLoopInfo itself can be side-effect free.
  1268. ///
  1269. /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
  1270. /// execution of a loop body that satifies these constraints. It does NOT
  1271. /// represent arbitrary SESE regions that happen to contain a loop. Do not use
  1272. /// CanonicalLoopInfo for such purposes.
  1273. ///
  1274. /// The control flow can be described as follows:
  1275. ///
  1276. /// Preheader
  1277. /// |
  1278. /// /-> Header
  1279. /// | |
  1280. /// | Cond---\
  1281. /// | | |
  1282. /// | Body |
  1283. /// | | | |
  1284. /// | <...> |
  1285. /// | | | |
  1286. /// \--Latch |
  1287. /// |
  1288. /// Exit
  1289. /// |
  1290. /// After
  1291. ///
  1292. /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
  1293. /// including) and end at AfterIP (at the After's first instruction, excluding).
  1294. /// That is, instructions in the Preheader and After blocks (except the
  1295. /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
  1296. /// side-effects. Typically, the Preheader is used to compute the loop's trip
  1297. /// count. The instructions from BodyIP (at the Body block's first instruction,
  1298. /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
  1299. /// control and thus can have side-effects. The body block is the single entry
  1300. /// point into the loop body, which may contain arbitrary control flow as long
  1301. /// as all control paths eventually branch to the Latch block.
  1302. ///
  1303. /// TODO: Consider adding another standardized BasicBlock between Body CFG and
  1304. /// Latch to guarantee that there is only a single edge to the latch. It would
  1305. /// make loop transformations easier to not needing to consider multiple
  1306. /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
  1307. /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
  1308. /// executes after each body iteration.
  1309. ///
  1310. /// There must be no loop-carried dependencies through llvm::Values. This is
  1311. /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
  1312. /// for the induction variable.
  1313. ///
  1314. /// All code in Header, Cond, Latch and Exit (plus the terminator of the
  1315. /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
  1316. /// by assertOK(). They are expected to not be modified unless explicitly
  1317. /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
  1318. /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
  1319. /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
  1320. /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
  1321. /// anymore as its underlying control flow may not exist anymore.
  1322. /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
  1323. /// may also return a new CanonicalLoopInfo that can be passed to other
  1324. /// loop-associated construct implementing methods. These loop-transforming
  1325. /// methods may either create a new CanonicalLoopInfo usually using
  1326. /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
  1327. /// modify one of the input CanonicalLoopInfo and return it as representing the
  1328. /// modified loop. What is done is an implementation detail of
  1329. /// transformation-implementing method and callers should always assume that the
  1330. /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
  1331. /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
  1332. /// created by createCanonicalLoop, such that transforming methods do not have
  1333. /// to special case where the CanonicalLoopInfo originated from.
  1334. ///
  1335. /// Generally, methods consuming CanonicalLoopInfo do not need an
  1336. /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
  1337. /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
  1338. /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
  1339. /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
  1340. /// any InsertPoint in the Preheader, After or Block can still be used after
  1341. /// calling such a method.
  1342. ///
  1343. /// TODO: Provide mechanisms for exception handling and cancellation points.
  1344. ///
  1345. /// Defined outside OpenMPIRBuilder because nested classes cannot be
  1346. /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
  1347. class CanonicalLoopInfo {
  1348. friend class OpenMPIRBuilder;
  1349. private:
  1350. BasicBlock *Header = nullptr;
  1351. BasicBlock *Cond = nullptr;
  1352. BasicBlock *Latch = nullptr;
  1353. BasicBlock *Exit = nullptr;
  1354. /// Add the control blocks of this loop to \p BBs.
  1355. ///
  1356. /// This does not include any block from the body, including the one returned
  1357. /// by getBody().
  1358. ///
  1359. /// FIXME: This currently includes the Preheader and After blocks even though
  1360. /// their content is (mostly) not under CanonicalLoopInfo's control.
  1361. /// Re-evaluated whether this makes sense.
  1362. void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
  1363. public:
  1364. /// Returns whether this object currently represents the IR of a loop. If
  1365. /// returning false, it may have been consumed by a loop transformation or not
  1366. /// been intialized. Do not use in this case;
  1367. bool isValid() const { return Header; }
  1368. /// The preheader ensures that there is only a single edge entering the loop.
  1369. /// Code that must be execute before any loop iteration can be emitted here,
  1370. /// such as computing the loop trip count and begin lifetime markers. Code in
  1371. /// the preheader is not considered part of the canonical loop.
  1372. BasicBlock *getPreheader() const;
  1373. /// The header is the entry for each iteration. In the canonical control flow,
  1374. /// it only contains the PHINode for the induction variable.
  1375. BasicBlock *getHeader() const {
  1376. assert(isValid() && "Requires a valid canonical loop");
  1377. return Header;
  1378. }
  1379. /// The condition block computes whether there is another loop iteration. If
  1380. /// yes, branches to the body; otherwise to the exit block.
  1381. BasicBlock *getCond() const {
  1382. assert(isValid() && "Requires a valid canonical loop");
  1383. return Cond;
  1384. }
  1385. /// The body block is the single entry for a loop iteration and not controlled
  1386. /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
  1387. /// eventually branch to the \p Latch block.
  1388. BasicBlock *getBody() const {
  1389. assert(isValid() && "Requires a valid canonical loop");
  1390. return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
  1391. }
  1392. /// Reaching the latch indicates the end of the loop body code. In the
  1393. /// canonical control flow, it only contains the increment of the induction
  1394. /// variable.
  1395. BasicBlock *getLatch() const {
  1396. assert(isValid() && "Requires a valid canonical loop");
  1397. return Latch;
  1398. }
  1399. /// Reaching the exit indicates no more iterations are being executed.
  1400. BasicBlock *getExit() const {
  1401. assert(isValid() && "Requires a valid canonical loop");
  1402. return Exit;
  1403. }
  1404. /// The after block is intended for clean-up code such as lifetime end
  1405. /// markers. It is separate from the exit block to ensure, analogous to the
  1406. /// preheader, it having just a single entry edge and being free from PHI
  1407. /// nodes should there be multiple loop exits (such as from break
  1408. /// statements/cancellations).
  1409. BasicBlock *getAfter() const {
  1410. assert(isValid() && "Requires a valid canonical loop");
  1411. return Exit->getSingleSuccessor();
  1412. }
  1413. /// Returns the llvm::Value containing the number of loop iterations. It must
  1414. /// be valid in the preheader and always interpreted as an unsigned integer of
  1415. /// any bit-width.
  1416. Value *getTripCount() const {
  1417. assert(isValid() && "Requires a valid canonical loop");
  1418. Instruction *CmpI = &Cond->front();
  1419. assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
  1420. return CmpI->getOperand(1);
  1421. }
  1422. /// Returns the instruction representing the current logical induction
  1423. /// variable. Always unsigned, always starting at 0 with an increment of one.
  1424. Instruction *getIndVar() const {
  1425. assert(isValid() && "Requires a valid canonical loop");
  1426. Instruction *IndVarPHI = &Header->front();
  1427. assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
  1428. return IndVarPHI;
  1429. }
  1430. /// Return the type of the induction variable (and the trip count).
  1431. Type *getIndVarType() const {
  1432. assert(isValid() && "Requires a valid canonical loop");
  1433. return getIndVar()->getType();
  1434. }
  1435. /// Return the insertion point for user code before the loop.
  1436. OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
  1437. assert(isValid() && "Requires a valid canonical loop");
  1438. BasicBlock *Preheader = getPreheader();
  1439. return {Preheader, std::prev(Preheader->end())};
  1440. };
  1441. /// Return the insertion point for user code in the body.
  1442. OpenMPIRBuilder::InsertPointTy getBodyIP() const {
  1443. assert(isValid() && "Requires a valid canonical loop");
  1444. BasicBlock *Body = getBody();
  1445. return {Body, Body->begin()};
  1446. };
  1447. /// Return the insertion point for user code after the loop.
  1448. OpenMPIRBuilder::InsertPointTy getAfterIP() const {
  1449. assert(isValid() && "Requires a valid canonical loop");
  1450. BasicBlock *After = getAfter();
  1451. return {After, After->begin()};
  1452. };
  1453. Function *getFunction() const {
  1454. assert(isValid() && "Requires a valid canonical loop");
  1455. return Header->getParent();
  1456. }
  1457. /// Consistency self-check.
  1458. void assertOK() const;
  1459. /// Invalidate this loop. That is, the underlying IR does not fulfill the
  1460. /// requirements of an OpenMP canonical loop anymore.
  1461. void invalidate();
  1462. };
  1463. } // end namespace llvm
  1464. #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  1465. #ifdef __GNUC__
  1466. #pragma GCC diagnostic pop
  1467. #endif