OMPIRBuilder.cpp 142 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645
  1. //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// This file implements the OpenMPIRBuilder class, which is used as a
  11. /// convenient way to create LLVM instructions for OpenMP directives.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
  15. #include "llvm/ADT/SmallSet.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/ADT/Triple.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/CodeMetrics.h"
  20. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  21. #include "llvm/Analysis/ScalarEvolution.h"
  22. #include "llvm/Analysis/TargetLibraryInfo.h"
  23. #include "llvm/IR/CFG.h"
  24. #include "llvm/IR/Constants.h"
  25. #include "llvm/IR/DebugInfo.h"
  26. #include "llvm/IR/GlobalVariable.h"
  27. #include "llvm/IR/IRBuilder.h"
  28. #include "llvm/IR/MDBuilder.h"
  29. #include "llvm/IR/PassManager.h"
  30. #include "llvm/IR/Value.h"
  31. #include "llvm/MC/TargetRegistry.h"
  32. #include "llvm/Support/CommandLine.h"
  33. #include "llvm/Support/Error.h"
  34. #include "llvm/Target/TargetMachine.h"
  35. #include "llvm/Target/TargetOptions.h"
  36. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  37. #include "llvm/Transforms/Utils/CodeExtractor.h"
  38. #include "llvm/Transforms/Utils/LoopPeel.h"
  39. #include "llvm/Transforms/Utils/ModuleUtils.h"
  40. #include "llvm/Transforms/Utils/UnrollLoop.h"
  41. #include <cstdint>
  42. #include <sstream>
  43. #define DEBUG_TYPE "openmp-ir-builder"
  44. using namespace llvm;
  45. using namespace omp;
  46. static cl::opt<bool>
  47. OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
  48. cl::desc("Use optimistic attributes describing "
  49. "'as-if' properties of runtime calls."),
  50. cl::init(false));
  51. static cl::opt<double> UnrollThresholdFactor(
  52. "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
  53. cl::desc("Factor for the unroll threshold to account for code "
  54. "simplifications still taking place"),
  55. cl::init(1.5));
  56. #ifndef NDEBUG
  57. /// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions
  58. /// at position IP1 may change the meaning of IP2 or vice-versa. This is because
  59. /// an InsertPoint stores the instruction before something is inserted. For
  60. /// instance, if both point to the same instruction, two IRBuilders alternating
  61. /// creating instruction will cause the instructions to be interleaved.
  62. static bool isConflictIP(IRBuilder<>::InsertPoint IP1,
  63. IRBuilder<>::InsertPoint IP2) {
  64. if (!IP1.isSet() || !IP2.isSet())
  65. return false;
  66. return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
  67. }
  68. #endif
  69. void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
  70. LLVMContext &Ctx = Fn.getContext();
  71. // Get the function's current attributes.
  72. auto Attrs = Fn.getAttributes();
  73. auto FnAttrs = Attrs.getFnAttrs();
  74. auto RetAttrs = Attrs.getRetAttrs();
  75. SmallVector<AttributeSet, 4> ArgAttrs;
  76. for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
  77. ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
  78. #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
  79. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  80. // Add attributes to the function declaration.
  81. switch (FnID) {
  82. #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
  83. case Enum: \
  84. FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
  85. RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
  86. for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
  87. ArgAttrs[ArgNo] = \
  88. ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
  89. Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
  90. break;
  91. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  92. default:
  93. // Attributes are optional.
  94. break;
  95. }
  96. }
  97. FunctionCallee
  98. OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) {
  99. FunctionType *FnTy = nullptr;
  100. Function *Fn = nullptr;
  101. // Try to find the declation in the module first.
  102. switch (FnID) {
  103. #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
  104. case Enum: \
  105. FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
  106. IsVarArg); \
  107. Fn = M.getFunction(Str); \
  108. break;
  109. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  110. }
  111. if (!Fn) {
  112. // Create a new declaration if we need one.
  113. switch (FnID) {
  114. #define OMP_RTL(Enum, Str, ...) \
  115. case Enum: \
  116. Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
  117. break;
  118. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  119. }
  120. // Add information if the runtime function takes a callback function
  121. if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
  122. if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
  123. LLVMContext &Ctx = Fn->getContext();
  124. MDBuilder MDB(Ctx);
  125. // Annotate the callback behavior of the runtime function:
  126. // - The callback callee is argument number 2 (microtask).
  127. // - The first two arguments of the callback callee are unknown (-1).
  128. // - All variadic arguments to the runtime function are passed to the
  129. // callback callee.
  130. Fn->addMetadata(
  131. LLVMContext::MD_callback,
  132. *MDNode::get(Ctx, {MDB.createCallbackEncoding(
  133. 2, {-1, -1}, /* VarArgsArePassed */ true)}));
  134. }
  135. }
  136. LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
  137. << " with type " << *Fn->getFunctionType() << "\n");
  138. addAttributes(FnID, *Fn);
  139. } else {
  140. LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
  141. << " with type " << *Fn->getFunctionType() << "\n");
  142. }
  143. assert(Fn && "Failed to create OpenMP runtime function");
  144. // Cast the function to the expected type if necessary
  145. Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo());
  146. return {FnTy, C};
  147. }
  148. Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
  149. FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
  150. auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
  151. assert(Fn && "Failed to create OpenMP runtime function pointer");
  152. return Fn;
  153. }
  154. void OpenMPIRBuilder::initialize() { initializeTypes(M); }
  155. void OpenMPIRBuilder::finalize(Function *Fn) {
  156. SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
  157. SmallVector<BasicBlock *, 32> Blocks;
  158. SmallVector<OutlineInfo, 16> DeferredOutlines;
  159. for (OutlineInfo &OI : OutlineInfos) {
  160. // Skip functions that have not finalized yet; may happen with nested
  161. // function generation.
  162. if (Fn && OI.getFunction() != Fn) {
  163. DeferredOutlines.push_back(OI);
  164. continue;
  165. }
  166. ParallelRegionBlockSet.clear();
  167. Blocks.clear();
  168. OI.collectBlocks(ParallelRegionBlockSet, Blocks);
  169. Function *OuterFn = OI.getFunction();
  170. CodeExtractorAnalysisCache CEAC(*OuterFn);
  171. CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
  172. /* AggregateArgs */ true,
  173. /* BlockFrequencyInfo */ nullptr,
  174. /* BranchProbabilityInfo */ nullptr,
  175. /* AssumptionCache */ nullptr,
  176. /* AllowVarArgs */ true,
  177. /* AllowAlloca */ true,
  178. /* Suffix */ ".omp_par");
  179. LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
  180. LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
  181. << " Exit: " << OI.ExitBB->getName() << "\n");
  182. assert(Extractor.isEligible() &&
  183. "Expected OpenMP outlining to be possible!");
  184. for (auto *V : OI.ExcludeArgsFromAggregate)
  185. Extractor.excludeArgFromAggregate(V);
  186. Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
  187. LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
  188. LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
  189. assert(OutlinedFn->getReturnType()->isVoidTy() &&
  190. "OpenMP outlined functions should not return a value!");
  191. // For compability with the clang CG we move the outlined function after the
  192. // one with the parallel region.
  193. OutlinedFn->removeFromParent();
  194. M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
  195. // Remove the artificial entry introduced by the extractor right away, we
  196. // made our own entry block after all.
  197. {
  198. BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
  199. assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
  200. assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
  201. // Move instructions from the to-be-deleted ArtificialEntry to the entry
  202. // basic block of the parallel region. CodeExtractor generates
  203. // instructions to unwrap the aggregate argument and may sink
  204. // allocas/bitcasts for values that are solely used in the outlined region
  205. // and do not escape.
  206. assert(!ArtificialEntry.empty() &&
  207. "Expected instructions to add in the outlined region entry");
  208. for (BasicBlock::reverse_iterator It = ArtificialEntry.rbegin(),
  209. End = ArtificialEntry.rend();
  210. It != End;) {
  211. Instruction &I = *It;
  212. It++;
  213. if (I.isTerminator())
  214. continue;
  215. I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
  216. }
  217. OI.EntryBB->moveBefore(&ArtificialEntry);
  218. ArtificialEntry.eraseFromParent();
  219. }
  220. assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
  221. assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
  222. // Run a user callback, e.g. to add attributes.
  223. if (OI.PostOutlineCB)
  224. OI.PostOutlineCB(*OutlinedFn);
  225. }
  226. // Remove work items that have been completed.
  227. OutlineInfos = std::move(DeferredOutlines);
  228. }
  229. OpenMPIRBuilder::~OpenMPIRBuilder() {
  230. assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
  231. }
  232. GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
  233. IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
  234. auto *GV =
  235. new GlobalVariable(M, I32Ty,
  236. /* isConstant = */ true, GlobalValue::WeakODRLinkage,
  237. ConstantInt::get(I32Ty, Value), Name);
  238. GV->setVisibility(GlobalValue::HiddenVisibility);
  239. return GV;
  240. }
  241. Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
  242. uint32_t SrcLocStrSize,
  243. IdentFlag LocFlags,
  244. unsigned Reserve2Flags) {
  245. // Enable "C-mode".
  246. LocFlags |= OMP_IDENT_FLAG_KMPC;
  247. Constant *&Ident =
  248. IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
  249. if (!Ident) {
  250. Constant *I32Null = ConstantInt::getNullValue(Int32);
  251. Constant *IdentData[] = {I32Null,
  252. ConstantInt::get(Int32, uint32_t(LocFlags)),
  253. ConstantInt::get(Int32, Reserve2Flags),
  254. ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
  255. Constant *Initializer =
  256. ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
  257. // Look for existing encoding of the location + flags, not needed but
  258. // minimizes the difference to the existing solution while we transition.
  259. for (GlobalVariable &GV : M.getGlobalList())
  260. if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
  261. if (GV.getInitializer() == Initializer)
  262. Ident = &GV;
  263. if (!Ident) {
  264. auto *GV = new GlobalVariable(
  265. M, OpenMPIRBuilder::Ident,
  266. /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
  267. nullptr, GlobalValue::NotThreadLocal,
  268. M.getDataLayout().getDefaultGlobalsAddressSpace());
  269. GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  270. GV->setAlignment(Align(8));
  271. Ident = GV;
  272. }
  273. }
  274. return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr);
  275. }
  276. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
  277. uint32_t &SrcLocStrSize) {
  278. SrcLocStrSize = LocStr.size();
  279. Constant *&SrcLocStr = SrcLocStrMap[LocStr];
  280. if (!SrcLocStr) {
  281. Constant *Initializer =
  282. ConstantDataArray::getString(M.getContext(), LocStr);
  283. // Look for existing encoding of the location, not needed but minimizes the
  284. // difference to the existing solution while we transition.
  285. for (GlobalVariable &GV : M.getGlobalList())
  286. if (GV.isConstant() && GV.hasInitializer() &&
  287. GV.getInitializer() == Initializer)
  288. return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
  289. SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
  290. /* AddressSpace */ 0, &M);
  291. }
  292. return SrcLocStr;
  293. }
  294. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
  295. StringRef FileName,
  296. unsigned Line, unsigned Column,
  297. uint32_t &SrcLocStrSize) {
  298. SmallString<128> Buffer;
  299. Buffer.push_back(';');
  300. Buffer.append(FileName);
  301. Buffer.push_back(';');
  302. Buffer.append(FunctionName);
  303. Buffer.push_back(';');
  304. Buffer.append(std::to_string(Line));
  305. Buffer.push_back(';');
  306. Buffer.append(std::to_string(Column));
  307. Buffer.push_back(';');
  308. Buffer.push_back(';');
  309. return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
  310. }
  311. Constant *
  312. OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) {
  313. StringRef UnknownLoc = ";unknown;unknown;0;0;;";
  314. return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
  315. }
  316. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL,
  317. uint32_t &SrcLocStrSize,
  318. Function *F) {
  319. DILocation *DIL = DL.get();
  320. if (!DIL)
  321. return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
  322. StringRef FileName = M.getName();
  323. if (DIFile *DIF = DIL->getFile())
  324. if (Optional<StringRef> Source = DIF->getSource())
  325. FileName = *Source;
  326. StringRef Function = DIL->getScope()->getSubprogram()->getName();
  327. if (Function.empty() && F)
  328. Function = F->getName();
  329. return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
  330. DIL->getColumn(), SrcLocStrSize);
  331. }
  332. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc,
  333. uint32_t &SrcLocStrSize) {
  334. return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
  335. Loc.IP.getBlock()->getParent());
  336. }
  337. Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
  338. return Builder.CreateCall(
  339. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
  340. "omp_global_thread_num");
  341. }
  342. OpenMPIRBuilder::InsertPointTy
  343. OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK,
  344. bool ForceSimpleCall, bool CheckCancelFlag) {
  345. if (!updateToLocation(Loc))
  346. return Loc.IP;
  347. return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
  348. }
  349. OpenMPIRBuilder::InsertPointTy
  350. OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
  351. bool ForceSimpleCall, bool CheckCancelFlag) {
  352. // Build call __kmpc_cancel_barrier(loc, thread_id) or
  353. // __kmpc_barrier(loc, thread_id);
  354. IdentFlag BarrierLocFlags;
  355. switch (Kind) {
  356. case OMPD_for:
  357. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
  358. break;
  359. case OMPD_sections:
  360. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
  361. break;
  362. case OMPD_single:
  363. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
  364. break;
  365. case OMPD_barrier:
  366. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
  367. break;
  368. default:
  369. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
  370. break;
  371. }
  372. uint32_t SrcLocStrSize;
  373. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  374. Value *Args[] = {
  375. getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
  376. getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
  377. // If we are in a cancellable parallel region, barriers are cancellation
  378. // points.
  379. // TODO: Check why we would force simple calls or to ignore the cancel flag.
  380. bool UseCancelBarrier =
  381. !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
  382. Value *Result =
  383. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
  384. UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
  385. : OMPRTL___kmpc_barrier),
  386. Args);
  387. if (UseCancelBarrier && CheckCancelFlag)
  388. emitCancelationCheckImpl(Result, OMPD_parallel);
  389. return Builder.saveIP();
  390. }
  391. OpenMPIRBuilder::InsertPointTy
  392. OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
  393. Value *IfCondition,
  394. omp::Directive CanceledDirective) {
  395. if (!updateToLocation(Loc))
  396. return Loc.IP;
  397. // LLVM utilities like blocks with terminators.
  398. auto *UI = Builder.CreateUnreachable();
  399. Instruction *ThenTI = UI, *ElseTI = nullptr;
  400. if (IfCondition)
  401. SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
  402. Builder.SetInsertPoint(ThenTI);
  403. Value *CancelKind = nullptr;
  404. switch (CanceledDirective) {
  405. #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
  406. case DirectiveEnum: \
  407. CancelKind = Builder.getInt32(Value); \
  408. break;
  409. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  410. default:
  411. llvm_unreachable("Unknown cancel kind!");
  412. }
  413. uint32_t SrcLocStrSize;
  414. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  415. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  416. Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
  417. Value *Result = Builder.CreateCall(
  418. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
  419. auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
  420. if (CanceledDirective == OMPD_parallel) {
  421. IRBuilder<>::InsertPointGuard IPG(Builder);
  422. Builder.restoreIP(IP);
  423. createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
  424. omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
  425. /* CheckCancelFlag */ false);
  426. }
  427. };
  428. // The actual cancel logic is shared with others, e.g., cancel_barriers.
  429. emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
  430. // Update the insertion point and remove the terminator we introduced.
  431. Builder.SetInsertPoint(UI->getParent());
  432. UI->eraseFromParent();
  433. return Builder.saveIP();
  434. }
  435. void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
  436. omp::Directive CanceledDirective,
  437. FinalizeCallbackTy ExitCB) {
  438. assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
  439. "Unexpected cancellation!");
  440. // For a cancel barrier we create two new blocks.
  441. BasicBlock *BB = Builder.GetInsertBlock();
  442. BasicBlock *NonCancellationBlock;
  443. if (Builder.GetInsertPoint() == BB->end()) {
  444. // TODO: This branch will not be needed once we moved to the
  445. // OpenMPIRBuilder codegen completely.
  446. NonCancellationBlock = BasicBlock::Create(
  447. BB->getContext(), BB->getName() + ".cont", BB->getParent());
  448. } else {
  449. NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
  450. BB->getTerminator()->eraseFromParent();
  451. Builder.SetInsertPoint(BB);
  452. }
  453. BasicBlock *CancellationBlock = BasicBlock::Create(
  454. BB->getContext(), BB->getName() + ".cncl", BB->getParent());
  455. // Jump to them based on the return value.
  456. Value *Cmp = Builder.CreateIsNull(CancelFlag);
  457. Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
  458. /* TODO weight */ nullptr, nullptr);
  459. // From the cancellation block we finalize all variables and go to the
  460. // post finalization block that is known to the FiniCB callback.
  461. Builder.SetInsertPoint(CancellationBlock);
  462. if (ExitCB)
  463. ExitCB(Builder.saveIP());
  464. auto &FI = FinalizationStack.back();
  465. FI.FiniCB(Builder.saveIP());
  466. // The continuation block is where code generation continues.
  467. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
  468. }
  469. IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
  470. const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
  471. BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
  472. FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
  473. omp::ProcBindKind ProcBind, bool IsCancellable) {
  474. assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
  475. if (!updateToLocation(Loc))
  476. return Loc.IP;
  477. uint32_t SrcLocStrSize;
  478. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  479. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  480. Value *ThreadID = getOrCreateThreadID(Ident);
  481. if (NumThreads) {
  482. // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
  483. Value *Args[] = {
  484. Ident, ThreadID,
  485. Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
  486. Builder.CreateCall(
  487. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
  488. }
  489. if (ProcBind != OMP_PROC_BIND_default) {
  490. // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
  491. Value *Args[] = {
  492. Ident, ThreadID,
  493. ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
  494. Builder.CreateCall(
  495. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
  496. }
  497. BasicBlock *InsertBB = Builder.GetInsertBlock();
  498. Function *OuterFn = InsertBB->getParent();
  499. // Save the outer alloca block because the insertion iterator may get
  500. // invalidated and we still need this later.
  501. BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
  502. // Vector to remember instructions we used only during the modeling but which
  503. // we want to delete at the end.
  504. SmallVector<Instruction *, 4> ToBeDeleted;
  505. // Change the location to the outer alloca insertion point to create and
  506. // initialize the allocas we pass into the parallel region.
  507. Builder.restoreIP(OuterAllocaIP);
  508. AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
  509. AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
  510. // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
  511. // program, otherwise we only need them for modeling purposes to get the
  512. // associated arguments in the outlined function. In the former case,
  513. // initialize the allocas properly, in the latter case, delete them later.
  514. if (IfCondition) {
  515. Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
  516. Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
  517. } else {
  518. ToBeDeleted.push_back(TIDAddr);
  519. ToBeDeleted.push_back(ZeroAddr);
  520. }
  521. // Create an artificial insertion point that will also ensure the blocks we
  522. // are about to split are not degenerated.
  523. auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
  524. Instruction *ThenTI = UI, *ElseTI = nullptr;
  525. if (IfCondition)
  526. SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
  527. BasicBlock *ThenBB = ThenTI->getParent();
  528. BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
  529. BasicBlock *PRegBodyBB =
  530. PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
  531. BasicBlock *PRegPreFiniBB =
  532. PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
  533. BasicBlock *PRegExitBB =
  534. PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
  535. auto FiniCBWrapper = [&](InsertPointTy IP) {
  536. // Hide "open-ended" blocks from the given FiniCB by setting the right jump
  537. // target to the region exit block.
  538. if (IP.getBlock()->end() == IP.getPoint()) {
  539. IRBuilder<>::InsertPointGuard IPG(Builder);
  540. Builder.restoreIP(IP);
  541. Instruction *I = Builder.CreateBr(PRegExitBB);
  542. IP = InsertPointTy(I->getParent(), I->getIterator());
  543. }
  544. assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
  545. IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
  546. "Unexpected insertion point for finalization call!");
  547. return FiniCB(IP);
  548. };
  549. FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
  550. // Generate the privatization allocas in the block that will become the entry
  551. // of the outlined function.
  552. Builder.SetInsertPoint(PRegEntryBB->getTerminator());
  553. InsertPointTy InnerAllocaIP = Builder.saveIP();
  554. AllocaInst *PrivTIDAddr =
  555. Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
  556. Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
  557. // Add some fake uses for OpenMP provided arguments.
  558. ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
  559. Instruction *ZeroAddrUse =
  560. Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
  561. ToBeDeleted.push_back(ZeroAddrUse);
  562. // ThenBB
  563. // |
  564. // V
  565. // PRegionEntryBB <- Privatization allocas are placed here.
  566. // |
  567. // V
  568. // PRegionBodyBB <- BodeGen is invoked here.
  569. // |
  570. // V
  571. // PRegPreFiniBB <- The block we will start finalization from.
  572. // |
  573. // V
  574. // PRegionExitBB <- A common exit to simplify block collection.
  575. //
  576. LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
  577. // Let the caller create the body.
  578. assert(BodyGenCB && "Expected body generation callback!");
  579. InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
  580. BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
  581. LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
  582. FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
  583. if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
  584. if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
  585. llvm::LLVMContext &Ctx = F->getContext();
  586. MDBuilder MDB(Ctx);
  587. // Annotate the callback behavior of the __kmpc_fork_call:
  588. // - The callback callee is argument number 2 (microtask).
  589. // - The first two arguments of the callback callee are unknown (-1).
  590. // - All variadic arguments to the __kmpc_fork_call are passed to the
  591. // callback callee.
  592. F->addMetadata(
  593. llvm::LLVMContext::MD_callback,
  594. *llvm::MDNode::get(
  595. Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
  596. /* VarArgsArePassed */ true)}));
  597. }
  598. }
  599. OutlineInfo OI;
  600. OI.PostOutlineCB = [=](Function &OutlinedFn) {
  601. // Add some known attributes.
  602. OutlinedFn.addParamAttr(0, Attribute::NoAlias);
  603. OutlinedFn.addParamAttr(1, Attribute::NoAlias);
  604. OutlinedFn.addFnAttr(Attribute::NoUnwind);
  605. OutlinedFn.addFnAttr(Attribute::NoRecurse);
  606. assert(OutlinedFn.arg_size() >= 2 &&
  607. "Expected at least tid and bounded tid as arguments");
  608. unsigned NumCapturedVars =
  609. OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
  610. CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
  611. CI->getParent()->setName("omp_parallel");
  612. Builder.SetInsertPoint(CI);
  613. // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
  614. Value *ForkCallArgs[] = {
  615. Ident, Builder.getInt32(NumCapturedVars),
  616. Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
  617. SmallVector<Value *, 16> RealArgs;
  618. RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
  619. RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
  620. Builder.CreateCall(RTLFn, RealArgs);
  621. LLVM_DEBUG(dbgs() << "With fork_call placed: "
  622. << *Builder.GetInsertBlock()->getParent() << "\n");
  623. InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
  624. // Initialize the local TID stack location with the argument value.
  625. Builder.SetInsertPoint(PrivTID);
  626. Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
  627. Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
  628. // If no "if" clause was present we do not need the call created during
  629. // outlining, otherwise we reuse it in the serialized parallel region.
  630. if (!ElseTI) {
  631. CI->eraseFromParent();
  632. } else {
  633. // If an "if" clause was present we are now generating the serialized
  634. // version into the "else" branch.
  635. Builder.SetInsertPoint(ElseTI);
  636. // Build calls __kmpc_serialized_parallel(&Ident, GTid);
  637. Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
  638. Builder.CreateCall(
  639. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
  640. SerializedParallelCallArgs);
  641. // OutlinedFn(&GTid, &zero, CapturedStruct);
  642. CI->removeFromParent();
  643. Builder.Insert(CI);
  644. // __kmpc_end_serialized_parallel(&Ident, GTid);
  645. Value *EndArgs[] = {Ident, ThreadID};
  646. Builder.CreateCall(
  647. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
  648. EndArgs);
  649. LLVM_DEBUG(dbgs() << "With serialized parallel region: "
  650. << *Builder.GetInsertBlock()->getParent() << "\n");
  651. }
  652. for (Instruction *I : ToBeDeleted)
  653. I->eraseFromParent();
  654. };
  655. // Adjust the finalization stack, verify the adjustment, and call the
  656. // finalize function a last time to finalize values between the pre-fini
  657. // block and the exit block if we left the parallel "the normal way".
  658. auto FiniInfo = FinalizationStack.pop_back_val();
  659. (void)FiniInfo;
  660. assert(FiniInfo.DK == OMPD_parallel &&
  661. "Unexpected finalization stack state!");
  662. Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
  663. InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
  664. FiniCB(PreFiniIP);
  665. OI.EntryBB = PRegEntryBB;
  666. OI.ExitBB = PRegExitBB;
  667. SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
  668. SmallVector<BasicBlock *, 32> Blocks;
  669. OI.collectBlocks(ParallelRegionBlockSet, Blocks);
  670. // Ensure a single exit node for the outlined region by creating one.
  671. // We might have multiple incoming edges to the exit now due to finalizations,
  672. // e.g., cancel calls that cause the control flow to leave the region.
  673. BasicBlock *PRegOutlinedExitBB = PRegExitBB;
  674. PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
  675. PRegOutlinedExitBB->setName("omp.par.outlined.exit");
  676. Blocks.push_back(PRegOutlinedExitBB);
  677. CodeExtractorAnalysisCache CEAC(*OuterFn);
  678. CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
  679. /* AggregateArgs */ false,
  680. /* BlockFrequencyInfo */ nullptr,
  681. /* BranchProbabilityInfo */ nullptr,
  682. /* AssumptionCache */ nullptr,
  683. /* AllowVarArgs */ true,
  684. /* AllowAlloca */ true,
  685. /* Suffix */ ".omp_par");
  686. // Find inputs to, outputs from the code region.
  687. BasicBlock *CommonExit = nullptr;
  688. SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
  689. Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
  690. Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
  691. LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
  692. FunctionCallee TIDRTLFn =
  693. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
  694. auto PrivHelper = [&](Value &V) {
  695. if (&V == TIDAddr || &V == ZeroAddr) {
  696. OI.ExcludeArgsFromAggregate.push_back(&V);
  697. return;
  698. }
  699. SetVector<Use *> Uses;
  700. for (Use &U : V.uses())
  701. if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
  702. if (ParallelRegionBlockSet.count(UserI->getParent()))
  703. Uses.insert(&U);
  704. // __kmpc_fork_call expects extra arguments as pointers. If the input
  705. // already has a pointer type, everything is fine. Otherwise, store the
  706. // value onto stack and load it back inside the to-be-outlined region. This
  707. // will ensure only the pointer will be passed to the function.
  708. // FIXME: if there are more than 15 trailing arguments, they must be
  709. // additionally packed in a struct.
  710. Value *Inner = &V;
  711. if (!V.getType()->isPointerTy()) {
  712. IRBuilder<>::InsertPointGuard Guard(Builder);
  713. LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
  714. Builder.restoreIP(OuterAllocaIP);
  715. Value *Ptr =
  716. Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
  717. // Store to stack at end of the block that currently branches to the entry
  718. // block of the to-be-outlined region.
  719. Builder.SetInsertPoint(InsertBB,
  720. InsertBB->getTerminator()->getIterator());
  721. Builder.CreateStore(&V, Ptr);
  722. // Load back next to allocations in the to-be-outlined region.
  723. Builder.restoreIP(InnerAllocaIP);
  724. Inner = Builder.CreateLoad(V.getType(), Ptr);
  725. }
  726. Value *ReplacementValue = nullptr;
  727. CallInst *CI = dyn_cast<CallInst>(&V);
  728. if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
  729. ReplacementValue = PrivTID;
  730. } else {
  731. Builder.restoreIP(
  732. PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
  733. assert(ReplacementValue &&
  734. "Expected copy/create callback to set replacement value!");
  735. if (ReplacementValue == &V)
  736. return;
  737. }
  738. for (Use *UPtr : Uses)
  739. UPtr->set(ReplacementValue);
  740. };
  741. // Reset the inner alloca insertion as it will be used for loading the values
  742. // wrapped into pointers before passing them into the to-be-outlined region.
  743. // Configure it to insert immediately after the fake use of zero address so
  744. // that they are available in the generated body and so that the
  745. // OpenMP-related values (thread ID and zero address pointers) remain leading
  746. // in the argument list.
  747. InnerAllocaIP = IRBuilder<>::InsertPoint(
  748. ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
  749. // Reset the outer alloca insertion point to the entry of the relevant block
  750. // in case it was invalidated.
  751. OuterAllocaIP = IRBuilder<>::InsertPoint(
  752. OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
  753. for (Value *Input : Inputs) {
  754. LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
  755. PrivHelper(*Input);
  756. }
  757. LLVM_DEBUG({
  758. for (Value *Output : Outputs)
  759. LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
  760. });
  761. assert(Outputs.empty() &&
  762. "OpenMP outlining should not produce live-out values!");
  763. LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
  764. LLVM_DEBUG({
  765. for (auto *BB : Blocks)
  766. dbgs() << " PBR: " << BB->getName() << "\n";
  767. });
  768. // Register the outlined info.
  769. addOutlineInfo(std::move(OI));
  770. InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
  771. UI->eraseFromParent();
  772. return AfterIP;
  773. }
  774. void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
  775. // Build call void __kmpc_flush(ident_t *loc)
  776. uint32_t SrcLocStrSize;
  777. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  778. Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
  779. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
  780. }
  781. void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) {
  782. if (!updateToLocation(Loc))
  783. return;
  784. emitFlush(Loc);
  785. }
  786. void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) {
  787. // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
  788. // global_tid);
  789. uint32_t SrcLocStrSize;
  790. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  791. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  792. Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
  793. // Ignore return result until untied tasks are supported.
  794. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
  795. Args);
  796. }
  797. void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) {
  798. if (!updateToLocation(Loc))
  799. return;
  800. emitTaskwaitImpl(Loc);
  801. }
  802. void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) {
  803. // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
  804. uint32_t SrcLocStrSize;
  805. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  806. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  807. Constant *I32Null = ConstantInt::getNullValue(Int32);
  808. Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
  809. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
  810. Args);
  811. }
  812. void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
  813. if (!updateToLocation(Loc))
  814. return;
  815. emitTaskyieldImpl(Loc);
  816. }
  817. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
  818. const LocationDescription &Loc, InsertPointTy AllocaIP,
  819. ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB,
  820. FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
  821. if (!updateToLocation(Loc))
  822. return Loc.IP;
  823. auto FiniCBWrapper = [&](InsertPointTy IP) {
  824. if (IP.getBlock()->end() != IP.getPoint())
  825. return FiniCB(IP);
  826. // This must be done otherwise any nested constructs using FinalizeOMPRegion
  827. // will fail because that function requires the Finalization Basic Block to
  828. // have a terminator, which is already removed by EmitOMPRegionBody.
  829. // IP is currently at cancelation block.
  830. // We need to backtrack to the condition block to fetch
  831. // the exit block and create a branch from cancelation
  832. // to exit block.
  833. IRBuilder<>::InsertPointGuard IPG(Builder);
  834. Builder.restoreIP(IP);
  835. auto *CaseBB = IP.getBlock()->getSinglePredecessor();
  836. auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
  837. auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
  838. Instruction *I = Builder.CreateBr(ExitBB);
  839. IP = InsertPointTy(I->getParent(), I->getIterator());
  840. return FiniCB(IP);
  841. };
  842. FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
  843. // Each section is emitted as a switch case
  844. // Each finalization callback is handled from clang.EmitOMPSectionDirective()
  845. // -> OMP.createSection() which generates the IR for each section
  846. // Iterate through all sections and emit a switch construct:
  847. // switch (IV) {
  848. // case 0:
  849. // <SectionStmt[0]>;
  850. // break;
  851. // ...
  852. // case <NumSection> - 1:
  853. // <SectionStmt[<NumSection> - 1]>;
  854. // break;
  855. // }
  856. // ...
  857. // section_loop.after:
  858. // <FiniCB>;
  859. auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
  860. auto *CurFn = CodeGenIP.getBlock()->getParent();
  861. auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
  862. auto *ForExitBB = CodeGenIP.getBlock()
  863. ->getSinglePredecessor()
  864. ->getTerminator()
  865. ->getSuccessor(1);
  866. SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
  867. Builder.restoreIP(CodeGenIP);
  868. unsigned CaseNumber = 0;
  869. for (auto SectionCB : SectionCBs) {
  870. auto *CaseBB = BasicBlock::Create(M.getContext(),
  871. "omp_section_loop.body.case", CurFn);
  872. SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
  873. Builder.SetInsertPoint(CaseBB);
  874. SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
  875. CaseNumber++;
  876. }
  877. // remove the existing terminator from body BB since there can be no
  878. // terminators after switch/case
  879. CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
  880. };
  881. // Loop body ends here
  882. // LowerBound, UpperBound, and STride for createCanonicalLoop
  883. Type *I32Ty = Type::getInt32Ty(M.getContext());
  884. Value *LB = ConstantInt::get(I32Ty, 0);
  885. Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
  886. Value *ST = ConstantInt::get(I32Ty, 1);
  887. llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
  888. Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
  889. Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
  890. AllocaIP = Builder.saveIP();
  891. InsertPointTy AfterIP =
  892. applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
  893. BasicBlock *LoopAfterBB = AfterIP.getBlock();
  894. Instruction *SplitPos = LoopAfterBB->getTerminator();
  895. if (!isa_and_nonnull<BranchInst>(SplitPos))
  896. SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
  897. // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
  898. // which requires a BB with branch
  899. BasicBlock *ExitBB =
  900. LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
  901. SplitPos->eraseFromParent();
  902. // Apply the finalization callback in LoopAfterBB
  903. auto FiniInfo = FinalizationStack.pop_back_val();
  904. assert(FiniInfo.DK == OMPD_sections &&
  905. "Unexpected finalization stack state!");
  906. Builder.SetInsertPoint(LoopAfterBB->getTerminator());
  907. FiniInfo.FiniCB(Builder.saveIP());
  908. Builder.SetInsertPoint(ExitBB);
  909. return Builder.saveIP();
  910. }
  911. OpenMPIRBuilder::InsertPointTy
  912. OpenMPIRBuilder::createSection(const LocationDescription &Loc,
  913. BodyGenCallbackTy BodyGenCB,
  914. FinalizeCallbackTy FiniCB) {
  915. if (!updateToLocation(Loc))
  916. return Loc.IP;
  917. auto FiniCBWrapper = [&](InsertPointTy IP) {
  918. if (IP.getBlock()->end() != IP.getPoint())
  919. return FiniCB(IP);
  920. // This must be done otherwise any nested constructs using FinalizeOMPRegion
  921. // will fail because that function requires the Finalization Basic Block to
  922. // have a terminator, which is already removed by EmitOMPRegionBody.
  923. // IP is currently at cancelation block.
  924. // We need to backtrack to the condition block to fetch
  925. // the exit block and create a branch from cancelation
  926. // to exit block.
  927. IRBuilder<>::InsertPointGuard IPG(Builder);
  928. Builder.restoreIP(IP);
  929. auto *CaseBB = Loc.IP.getBlock();
  930. auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
  931. auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
  932. Instruction *I = Builder.CreateBr(ExitBB);
  933. IP = InsertPointTy(I->getParent(), I->getIterator());
  934. return FiniCB(IP);
  935. };
  936. Directive OMPD = Directive::OMPD_sections;
  937. // Since we are using Finalization Callback here, HasFinalize
  938. // and IsCancellable have to be true
  939. return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
  940. /*Conditional*/ false, /*hasFinalize*/ true,
  941. /*IsCancellable*/ true);
  942. }
  943. /// Create a function with a unique name and a "void (i8*, i8*)" signature in
  944. /// the given module and return it.
  945. Function *getFreshReductionFunc(Module &M) {
  946. Type *VoidTy = Type::getVoidTy(M.getContext());
  947. Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
  948. auto *FuncTy =
  949. FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
  950. return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
  951. M.getDataLayout().getDefaultGlobalsAddressSpace(),
  952. ".omp.reduction.func", &M);
  953. }
  954. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
  955. const LocationDescription &Loc, InsertPointTy AllocaIP,
  956. ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
  957. for (const ReductionInfo &RI : ReductionInfos) {
  958. (void)RI;
  959. assert(RI.Variable && "expected non-null variable");
  960. assert(RI.PrivateVariable && "expected non-null private variable");
  961. assert(RI.ReductionGen && "expected non-null reduction generator callback");
  962. assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
  963. "expected variables and their private equivalents to have the same "
  964. "type");
  965. assert(RI.Variable->getType()->isPointerTy() &&
  966. "expected variables to be pointers");
  967. }
  968. if (!updateToLocation(Loc))
  969. return InsertPointTy();
  970. BasicBlock *InsertBlock = Loc.IP.getBlock();
  971. BasicBlock *ContinuationBlock =
  972. InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
  973. InsertBlock->getTerminator()->eraseFromParent();
  974. // Create and populate array of type-erased pointers to private reduction
  975. // values.
  976. unsigned NumReductions = ReductionInfos.size();
  977. Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
  978. Builder.restoreIP(AllocaIP);
  979. Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
  980. Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
  981. for (auto En : enumerate(ReductionInfos)) {
  982. unsigned Index = En.index();
  983. const ReductionInfo &RI = En.value();
  984. Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
  985. RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
  986. Value *Casted =
  987. Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
  988. "private.red.var." + Twine(Index) + ".casted");
  989. Builder.CreateStore(Casted, RedArrayElemPtr);
  990. }
  991. // Emit a call to the runtime function that orchestrates the reduction.
  992. // Declare the reduction function in the process.
  993. Function *Func = Builder.GetInsertBlock()->getParent();
  994. Module *Module = Func->getParent();
  995. Value *RedArrayPtr =
  996. Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
  997. uint32_t SrcLocStrSize;
  998. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  999. bool CanGenerateAtomic =
  1000. llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
  1001. return RI.AtomicReductionGen;
  1002. });
  1003. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
  1004. CanGenerateAtomic
  1005. ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
  1006. : IdentFlag(0));
  1007. Value *ThreadId = getOrCreateThreadID(Ident);
  1008. Constant *NumVariables = Builder.getInt32(NumReductions);
  1009. const DataLayout &DL = Module->getDataLayout();
  1010. unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
  1011. Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
  1012. Function *ReductionFunc = getFreshReductionFunc(*Module);
  1013. Value *Lock = getOMPCriticalRegionLock(".reduction");
  1014. Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
  1015. IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
  1016. : RuntimeFunction::OMPRTL___kmpc_reduce);
  1017. CallInst *ReduceCall =
  1018. Builder.CreateCall(ReduceFunc,
  1019. {Ident, ThreadId, NumVariables, RedArraySize,
  1020. RedArrayPtr, ReductionFunc, Lock},
  1021. "reduce");
  1022. // Create final reduction entry blocks for the atomic and non-atomic case.
  1023. // Emit IR that dispatches control flow to one of the blocks based on the
  1024. // reduction supporting the atomic mode.
  1025. BasicBlock *NonAtomicRedBlock =
  1026. BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
  1027. BasicBlock *AtomicRedBlock =
  1028. BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
  1029. SwitchInst *Switch =
  1030. Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
  1031. Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
  1032. Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
  1033. // Populate the non-atomic reduction using the elementwise reduction function.
  1034. // This loads the elements from the global and private variables and reduces
  1035. // them before storing back the result to the global variable.
  1036. Builder.SetInsertPoint(NonAtomicRedBlock);
  1037. for (auto En : enumerate(ReductionInfos)) {
  1038. const ReductionInfo &RI = En.value();
  1039. Type *ValueType = RI.ElementType;
  1040. Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
  1041. "red.value." + Twine(En.index()));
  1042. Value *PrivateRedValue =
  1043. Builder.CreateLoad(ValueType, RI.PrivateVariable,
  1044. "red.private.value." + Twine(En.index()));
  1045. Value *Reduced;
  1046. Builder.restoreIP(
  1047. RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
  1048. if (!Builder.GetInsertBlock())
  1049. return InsertPointTy();
  1050. Builder.CreateStore(Reduced, RI.Variable);
  1051. }
  1052. Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
  1053. IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
  1054. : RuntimeFunction::OMPRTL___kmpc_end_reduce);
  1055. Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
  1056. Builder.CreateBr(ContinuationBlock);
  1057. // Populate the atomic reduction using the atomic elementwise reduction
  1058. // function. There are no loads/stores here because they will be happening
  1059. // inside the atomic elementwise reduction.
  1060. Builder.SetInsertPoint(AtomicRedBlock);
  1061. if (CanGenerateAtomic) {
  1062. for (const ReductionInfo &RI : ReductionInfos) {
  1063. Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
  1064. RI.Variable, RI.PrivateVariable));
  1065. if (!Builder.GetInsertBlock())
  1066. return InsertPointTy();
  1067. }
  1068. Builder.CreateBr(ContinuationBlock);
  1069. } else {
  1070. Builder.CreateUnreachable();
  1071. }
  1072. // Populate the outlined reduction function using the elementwise reduction
  1073. // function. Partial values are extracted from the type-erased array of
  1074. // pointers to private variables.
  1075. BasicBlock *ReductionFuncBlock =
  1076. BasicBlock::Create(Module->getContext(), "", ReductionFunc);
  1077. Builder.SetInsertPoint(ReductionFuncBlock);
  1078. Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
  1079. RedArrayTy->getPointerTo());
  1080. Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
  1081. RedArrayTy->getPointerTo());
  1082. for (auto En : enumerate(ReductionInfos)) {
  1083. const ReductionInfo &RI = En.value();
  1084. Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
  1085. RedArrayTy, LHSArrayPtr, 0, En.index());
  1086. Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
  1087. Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
  1088. Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
  1089. Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
  1090. RedArrayTy, RHSArrayPtr, 0, En.index());
  1091. Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
  1092. Value *RHSPtr =
  1093. Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
  1094. Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
  1095. Value *Reduced;
  1096. Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
  1097. if (!Builder.GetInsertBlock())
  1098. return InsertPointTy();
  1099. Builder.CreateStore(Reduced, LHSPtr);
  1100. }
  1101. Builder.CreateRetVoid();
  1102. Builder.SetInsertPoint(ContinuationBlock);
  1103. return Builder.saveIP();
  1104. }
  1105. OpenMPIRBuilder::InsertPointTy
  1106. OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
  1107. BodyGenCallbackTy BodyGenCB,
  1108. FinalizeCallbackTy FiniCB) {
  1109. if (!updateToLocation(Loc))
  1110. return Loc.IP;
  1111. Directive OMPD = Directive::OMPD_master;
  1112. uint32_t SrcLocStrSize;
  1113. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1114. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1115. Value *ThreadId = getOrCreateThreadID(Ident);
  1116. Value *Args[] = {Ident, ThreadId};
  1117. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
  1118. Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  1119. Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
  1120. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  1121. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  1122. /*Conditional*/ true, /*hasFinalize*/ true);
  1123. }
  1124. OpenMPIRBuilder::InsertPointTy
  1125. OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
  1126. BodyGenCallbackTy BodyGenCB,
  1127. FinalizeCallbackTy FiniCB, Value *Filter) {
  1128. if (!updateToLocation(Loc))
  1129. return Loc.IP;
  1130. Directive OMPD = Directive::OMPD_masked;
  1131. uint32_t SrcLocStrSize;
  1132. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1133. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1134. Value *ThreadId = getOrCreateThreadID(Ident);
  1135. Value *Args[] = {Ident, ThreadId, Filter};
  1136. Value *ArgsEnd[] = {Ident, ThreadId};
  1137. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
  1138. Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  1139. Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
  1140. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
  1141. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  1142. /*Conditional*/ true, /*hasFinalize*/ true);
  1143. }
  1144. CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
  1145. DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
  1146. BasicBlock *PostInsertBefore, const Twine &Name) {
  1147. Module *M = F->getParent();
  1148. LLVMContext &Ctx = M->getContext();
  1149. Type *IndVarTy = TripCount->getType();
  1150. // Create the basic block structure.
  1151. BasicBlock *Preheader =
  1152. BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
  1153. BasicBlock *Header =
  1154. BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
  1155. BasicBlock *Cond =
  1156. BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
  1157. BasicBlock *Body =
  1158. BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
  1159. BasicBlock *Latch =
  1160. BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
  1161. BasicBlock *Exit =
  1162. BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
  1163. BasicBlock *After =
  1164. BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
  1165. // Use specified DebugLoc for new instructions.
  1166. Builder.SetCurrentDebugLocation(DL);
  1167. Builder.SetInsertPoint(Preheader);
  1168. Builder.CreateBr(Header);
  1169. Builder.SetInsertPoint(Header);
  1170. PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
  1171. IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
  1172. Builder.CreateBr(Cond);
  1173. Builder.SetInsertPoint(Cond);
  1174. Value *Cmp =
  1175. Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
  1176. Builder.CreateCondBr(Cmp, Body, Exit);
  1177. Builder.SetInsertPoint(Body);
  1178. Builder.CreateBr(Latch);
  1179. Builder.SetInsertPoint(Latch);
  1180. Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
  1181. "omp_" + Name + ".next", /*HasNUW=*/true);
  1182. Builder.CreateBr(Header);
  1183. IndVarPHI->addIncoming(Next, Latch);
  1184. Builder.SetInsertPoint(Exit);
  1185. Builder.CreateBr(After);
  1186. // Remember and return the canonical control flow.
  1187. LoopInfos.emplace_front();
  1188. CanonicalLoopInfo *CL = &LoopInfos.front();
  1189. CL->Header = Header;
  1190. CL->Cond = Cond;
  1191. CL->Latch = Latch;
  1192. CL->Exit = Exit;
  1193. #ifndef NDEBUG
  1194. CL->assertOK();
  1195. #endif
  1196. return CL;
  1197. }
  1198. CanonicalLoopInfo *
  1199. OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
  1200. LoopBodyGenCallbackTy BodyGenCB,
  1201. Value *TripCount, const Twine &Name) {
  1202. BasicBlock *BB = Loc.IP.getBlock();
  1203. BasicBlock *NextBB = BB->getNextNode();
  1204. CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
  1205. NextBB, NextBB, Name);
  1206. BasicBlock *After = CL->getAfter();
  1207. // If location is not set, don't connect the loop.
  1208. if (updateToLocation(Loc)) {
  1209. // Split the loop at the insertion point: Branch to the preheader and move
  1210. // every following instruction to after the loop (the After BB). Also, the
  1211. // new successor is the loop's after block.
  1212. Builder.CreateBr(CL->getPreheader());
  1213. After->getInstList().splice(After->begin(), BB->getInstList(),
  1214. Builder.GetInsertPoint(), BB->end());
  1215. After->replaceSuccessorsPhiUsesWith(BB, After);
  1216. }
  1217. // Emit the body content. We do it after connecting the loop to the CFG to
  1218. // avoid that the callback encounters degenerate BBs.
  1219. BodyGenCB(CL->getBodyIP(), CL->getIndVar());
  1220. #ifndef NDEBUG
  1221. CL->assertOK();
  1222. #endif
  1223. return CL;
  1224. }
  1225. CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
  1226. const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
  1227. Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
  1228. InsertPointTy ComputeIP, const Twine &Name) {
  1229. // Consider the following difficulties (assuming 8-bit signed integers):
  1230. // * Adding \p Step to the loop counter which passes \p Stop may overflow:
  1231. // DO I = 1, 100, 50
  1232. /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
  1233. // DO I = 100, 0, -128
  1234. // Start, Stop and Step must be of the same integer type.
  1235. auto *IndVarTy = cast<IntegerType>(Start->getType());
  1236. assert(IndVarTy == Stop->getType() && "Stop type mismatch");
  1237. assert(IndVarTy == Step->getType() && "Step type mismatch");
  1238. LocationDescription ComputeLoc =
  1239. ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
  1240. updateToLocation(ComputeLoc);
  1241. ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
  1242. ConstantInt *One = ConstantInt::get(IndVarTy, 1);
  1243. // Like Step, but always positive.
  1244. Value *Incr = Step;
  1245. // Distance between Start and Stop; always positive.
  1246. Value *Span;
  1247. // Condition whether there are no iterations are executed at all, e.g. because
  1248. // UB < LB.
  1249. Value *ZeroCmp;
  1250. if (IsSigned) {
  1251. // Ensure that increment is positive. If not, negate and invert LB and UB.
  1252. Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
  1253. Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
  1254. Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
  1255. Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
  1256. Span = Builder.CreateSub(UB, LB, "", false, true);
  1257. ZeroCmp = Builder.CreateICmp(
  1258. InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
  1259. } else {
  1260. Span = Builder.CreateSub(Stop, Start, "", true);
  1261. ZeroCmp = Builder.CreateICmp(
  1262. InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
  1263. }
  1264. Value *CountIfLooping;
  1265. if (InclusiveStop) {
  1266. CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
  1267. } else {
  1268. // Avoid incrementing past stop since it could overflow.
  1269. Value *CountIfTwo = Builder.CreateAdd(
  1270. Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
  1271. Value *OneCmp = Builder.CreateICmp(
  1272. InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
  1273. CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
  1274. }
  1275. Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
  1276. "omp_" + Name + ".tripcount");
  1277. auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
  1278. Builder.restoreIP(CodeGenIP);
  1279. Value *Span = Builder.CreateMul(IV, Step);
  1280. Value *IndVar = Builder.CreateAdd(Span, Start);
  1281. BodyGenCB(Builder.saveIP(), IndVar);
  1282. };
  1283. LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
  1284. return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
  1285. }
  1286. // Returns an LLVM function to call for initializing loop bounds using OpenMP
  1287. // static scheduling depending on `type`. Only i32 and i64 are supported by the
  1288. // runtime. Always interpret integers as unsigned similarly to
  1289. // CanonicalLoopInfo.
  1290. static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
  1291. OpenMPIRBuilder &OMPBuilder) {
  1292. unsigned Bitwidth = Ty->getIntegerBitWidth();
  1293. if (Bitwidth == 32)
  1294. return OMPBuilder.getOrCreateRuntimeFunction(
  1295. M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
  1296. if (Bitwidth == 64)
  1297. return OMPBuilder.getOrCreateRuntimeFunction(
  1298. M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
  1299. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  1300. }
  1301. // Sets the number of loop iterations to the given value. This value must be
  1302. // valid in the condition block (i.e., defined in the preheader) and is
  1303. // interpreted as an unsigned integer.
  1304. void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
  1305. Instruction *CmpI = &CLI->getCond()->front();
  1306. assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
  1307. CmpI->setOperand(1, TripCount);
  1308. CLI->assertOK();
  1309. }
  1310. OpenMPIRBuilder::InsertPointTy
  1311. OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  1312. InsertPointTy AllocaIP,
  1313. bool NeedsBarrier, Value *Chunk) {
  1314. assert(CLI->isValid() && "Requires a valid canonical loop");
  1315. assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
  1316. "Require dedicated allocate IP");
  1317. // Set up the source location value for OpenMP runtime.
  1318. Builder.restoreIP(CLI->getPreheaderIP());
  1319. Builder.SetCurrentDebugLocation(DL);
  1320. uint32_t SrcLocStrSize;
  1321. Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
  1322. Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1323. // Declare useful OpenMP runtime functions.
  1324. Value *IV = CLI->getIndVar();
  1325. Type *IVTy = IV->getType();
  1326. FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
  1327. FunctionCallee StaticFini =
  1328. getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
  1329. // Allocate space for computed loop bounds as expected by the "init" function.
  1330. Builder.restoreIP(AllocaIP);
  1331. Type *I32Type = Type::getInt32Ty(M.getContext());
  1332. Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
  1333. Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
  1334. Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
  1335. Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
  1336. // At the end of the preheader, prepare for calling the "init" function by
  1337. // storing the current loop bounds into the allocated space. A canonical loop
  1338. // always iterates from 0 to trip-count with step 1. Note that "init" expects
  1339. // and produces an inclusive upper bound.
  1340. Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
  1341. Constant *Zero = ConstantInt::get(IVTy, 0);
  1342. Constant *One = ConstantInt::get(IVTy, 1);
  1343. Builder.CreateStore(Zero, PLowerBound);
  1344. Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
  1345. Builder.CreateStore(UpperBound, PUpperBound);
  1346. Builder.CreateStore(One, PStride);
  1347. // FIXME: schedule(static) is NOT the same as schedule(static,1)
  1348. if (!Chunk)
  1349. Chunk = One;
  1350. Value *ThreadNum = getOrCreateThreadID(SrcLoc);
  1351. Constant *SchedulingType =
  1352. ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
  1353. // Call the "init" function and update the trip count of the loop with the
  1354. // value it produced.
  1355. Builder.CreateCall(StaticInit,
  1356. {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
  1357. PUpperBound, PStride, One, Chunk});
  1358. Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
  1359. Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
  1360. Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
  1361. Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
  1362. setCanonicalLoopTripCount(CLI, TripCount);
  1363. // Update all uses of the induction variable except the one in the condition
  1364. // block that compares it with the actual upper bound, and the increment in
  1365. // the latch block.
  1366. // TODO: this can eventually move to CanonicalLoopInfo or to a new
  1367. // CanonicalLoopInfoUpdater interface.
  1368. Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
  1369. Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
  1370. IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
  1371. auto *Instr = dyn_cast<Instruction>(U.getUser());
  1372. return !Instr ||
  1373. (Instr->getParent() != CLI->getCond() &&
  1374. Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
  1375. });
  1376. // In the "exit" block, call the "fini" function.
  1377. Builder.SetInsertPoint(CLI->getExit(),
  1378. CLI->getExit()->getTerminator()->getIterator());
  1379. Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
  1380. // Add the barrier if requested.
  1381. if (NeedsBarrier)
  1382. createBarrier(LocationDescription(Builder.saveIP(), DL),
  1383. omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
  1384. /* CheckCancelFlag */ false);
  1385. InsertPointTy AfterIP = CLI->getAfterIP();
  1386. CLI->invalidate();
  1387. return AfterIP;
  1388. }
  1389. OpenMPIRBuilder::InsertPointTy
  1390. OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  1391. InsertPointTy AllocaIP, bool NeedsBarrier) {
  1392. // Currently only supports static schedules.
  1393. return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
  1394. }
  1395. /// Returns an LLVM function to call for initializing loop bounds using OpenMP
  1396. /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
  1397. /// the runtime. Always interpret integers as unsigned similarly to
  1398. /// CanonicalLoopInfo.
  1399. static FunctionCallee
  1400. getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
  1401. unsigned Bitwidth = Ty->getIntegerBitWidth();
  1402. if (Bitwidth == 32)
  1403. return OMPBuilder.getOrCreateRuntimeFunction(
  1404. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
  1405. if (Bitwidth == 64)
  1406. return OMPBuilder.getOrCreateRuntimeFunction(
  1407. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
  1408. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  1409. }
  1410. /// Returns an LLVM function to call for updating the next loop using OpenMP
  1411. /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
  1412. /// the runtime. Always interpret integers as unsigned similarly to
  1413. /// CanonicalLoopInfo.
  1414. static FunctionCallee
  1415. getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
  1416. unsigned Bitwidth = Ty->getIntegerBitWidth();
  1417. if (Bitwidth == 32)
  1418. return OMPBuilder.getOrCreateRuntimeFunction(
  1419. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
  1420. if (Bitwidth == 64)
  1421. return OMPBuilder.getOrCreateRuntimeFunction(
  1422. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
  1423. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  1424. }
  1425. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
  1426. DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
  1427. OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
  1428. assert(CLI->isValid() && "Requires a valid canonical loop");
  1429. assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
  1430. "Require dedicated allocate IP");
  1431. // Set up the source location value for OpenMP runtime.
  1432. Builder.SetCurrentDebugLocation(DL);
  1433. uint32_t SrcLocStrSize;
  1434. Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
  1435. Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1436. // Declare useful OpenMP runtime functions.
  1437. Value *IV = CLI->getIndVar();
  1438. Type *IVTy = IV->getType();
  1439. FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
  1440. FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
  1441. // Allocate space for computed loop bounds as expected by the "init" function.
  1442. Builder.restoreIP(AllocaIP);
  1443. Type *I32Type = Type::getInt32Ty(M.getContext());
  1444. Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
  1445. Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
  1446. Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
  1447. Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
  1448. // At the end of the preheader, prepare for calling the "init" function by
  1449. // storing the current loop bounds into the allocated space. A canonical loop
  1450. // always iterates from 0 to trip-count with step 1. Note that "init" expects
  1451. // and produces an inclusive upper bound.
  1452. BasicBlock *PreHeader = CLI->getPreheader();
  1453. Builder.SetInsertPoint(PreHeader->getTerminator());
  1454. Constant *One = ConstantInt::get(IVTy, 1);
  1455. Builder.CreateStore(One, PLowerBound);
  1456. Value *UpperBound = CLI->getTripCount();
  1457. Builder.CreateStore(UpperBound, PUpperBound);
  1458. Builder.CreateStore(One, PStride);
  1459. BasicBlock *Header = CLI->getHeader();
  1460. BasicBlock *Exit = CLI->getExit();
  1461. BasicBlock *Cond = CLI->getCond();
  1462. InsertPointTy AfterIP = CLI->getAfterIP();
  1463. // The CLI will be "broken" in the code below, as the loop is no longer
  1464. // a valid canonical loop.
  1465. if (!Chunk)
  1466. Chunk = One;
  1467. Value *ThreadNum = getOrCreateThreadID(SrcLoc);
  1468. Constant *SchedulingType =
  1469. ConstantInt::get(I32Type, static_cast<int>(SchedType));
  1470. // Call the "init" function.
  1471. Builder.CreateCall(DynamicInit,
  1472. {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
  1473. UpperBound, /* step */ One, Chunk});
  1474. // An outer loop around the existing one.
  1475. BasicBlock *OuterCond = BasicBlock::Create(
  1476. PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
  1477. PreHeader->getParent());
  1478. // This needs to be 32-bit always, so can't use the IVTy Zero above.
  1479. Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
  1480. Value *Res =
  1481. Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
  1482. PLowerBound, PUpperBound, PStride});
  1483. Constant *Zero32 = ConstantInt::get(I32Type, 0);
  1484. Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
  1485. Value *LowerBound =
  1486. Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
  1487. Builder.CreateCondBr(MoreWork, Header, Exit);
  1488. // Change PHI-node in loop header to use outer cond rather than preheader,
  1489. // and set IV to the LowerBound.
  1490. Instruction *Phi = &Header->front();
  1491. auto *PI = cast<PHINode>(Phi);
  1492. PI->setIncomingBlock(0, OuterCond);
  1493. PI->setIncomingValue(0, LowerBound);
  1494. // Then set the pre-header to jump to the OuterCond
  1495. Instruction *Term = PreHeader->getTerminator();
  1496. auto *Br = cast<BranchInst>(Term);
  1497. Br->setSuccessor(0, OuterCond);
  1498. // Modify the inner condition:
  1499. // * Use the UpperBound returned from the DynamicNext call.
  1500. // * jump to the loop outer loop when done with one of the inner loops.
  1501. Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
  1502. UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
  1503. Instruction *Comp = &*Builder.GetInsertPoint();
  1504. auto *CI = cast<CmpInst>(Comp);
  1505. CI->setOperand(1, UpperBound);
  1506. // Redirect the inner exit to branch to outer condition.
  1507. Instruction *Branch = &Cond->back();
  1508. auto *BI = cast<BranchInst>(Branch);
  1509. assert(BI->getSuccessor(1) == Exit);
  1510. BI->setSuccessor(1, OuterCond);
  1511. // Add the barrier if requested.
  1512. if (NeedsBarrier) {
  1513. Builder.SetInsertPoint(&Exit->back());
  1514. createBarrier(LocationDescription(Builder.saveIP(), DL),
  1515. omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
  1516. /* CheckCancelFlag */ false);
  1517. }
  1518. CLI->invalidate();
  1519. return AfterIP;
  1520. }
  1521. /// Make \p Source branch to \p Target.
  1522. ///
  1523. /// Handles two situations:
  1524. /// * \p Source already has an unconditional branch.
  1525. /// * \p Source is a degenerate block (no terminator because the BB is
  1526. /// the current head of the IR construction).
  1527. static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
  1528. if (Instruction *Term = Source->getTerminator()) {
  1529. auto *Br = cast<BranchInst>(Term);
  1530. assert(!Br->isConditional() &&
  1531. "BB's terminator must be an unconditional branch (or degenerate)");
  1532. BasicBlock *Succ = Br->getSuccessor(0);
  1533. Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
  1534. Br->setSuccessor(0, Target);
  1535. return;
  1536. }
  1537. auto *NewBr = BranchInst::Create(Target, Source);
  1538. NewBr->setDebugLoc(DL);
  1539. }
  1540. /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
  1541. /// after this \p OldTarget will be orphaned.
  1542. static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
  1543. BasicBlock *NewTarget, DebugLoc DL) {
  1544. for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
  1545. redirectTo(Pred, NewTarget, DL);
  1546. }
  1547. /// Determine which blocks in \p BBs are reachable from outside and remove the
  1548. /// ones that are not reachable from the function.
  1549. static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
  1550. SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
  1551. auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
  1552. for (Use &U : BB->uses()) {
  1553. auto *UseInst = dyn_cast<Instruction>(U.getUser());
  1554. if (!UseInst)
  1555. continue;
  1556. if (BBsToErase.count(UseInst->getParent()))
  1557. continue;
  1558. return true;
  1559. }
  1560. return false;
  1561. };
  1562. while (true) {
  1563. bool Changed = false;
  1564. for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
  1565. if (HasRemainingUses(BB)) {
  1566. BBsToErase.erase(BB);
  1567. Changed = true;
  1568. }
  1569. }
  1570. if (!Changed)
  1571. break;
  1572. }
  1573. SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
  1574. DeleteDeadBlocks(BBVec);
  1575. }
  1576. CanonicalLoopInfo *
  1577. OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  1578. InsertPointTy ComputeIP) {
  1579. assert(Loops.size() >= 1 && "At least one loop required");
  1580. size_t NumLoops = Loops.size();
  1581. // Nothing to do if there is already just one loop.
  1582. if (NumLoops == 1)
  1583. return Loops.front();
  1584. CanonicalLoopInfo *Outermost = Loops.front();
  1585. CanonicalLoopInfo *Innermost = Loops.back();
  1586. BasicBlock *OrigPreheader = Outermost->getPreheader();
  1587. BasicBlock *OrigAfter = Outermost->getAfter();
  1588. Function *F = OrigPreheader->getParent();
  1589. // Loop control blocks that may become orphaned later.
  1590. SmallVector<BasicBlock *, 12> OldControlBBs;
  1591. OldControlBBs.reserve(6 * Loops.size());
  1592. for (CanonicalLoopInfo *Loop : Loops)
  1593. Loop->collectControlBlocks(OldControlBBs);
  1594. // Setup the IRBuilder for inserting the trip count computation.
  1595. Builder.SetCurrentDebugLocation(DL);
  1596. if (ComputeIP.isSet())
  1597. Builder.restoreIP(ComputeIP);
  1598. else
  1599. Builder.restoreIP(Outermost->getPreheaderIP());
  1600. // Derive the collapsed' loop trip count.
  1601. // TODO: Find common/largest indvar type.
  1602. Value *CollapsedTripCount = nullptr;
  1603. for (CanonicalLoopInfo *L : Loops) {
  1604. assert(L->isValid() &&
  1605. "All loops to collapse must be valid canonical loops");
  1606. Value *OrigTripCount = L->getTripCount();
  1607. if (!CollapsedTripCount) {
  1608. CollapsedTripCount = OrigTripCount;
  1609. continue;
  1610. }
  1611. // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
  1612. CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
  1613. {}, /*HasNUW=*/true);
  1614. }
  1615. // Create the collapsed loop control flow.
  1616. CanonicalLoopInfo *Result =
  1617. createLoopSkeleton(DL, CollapsedTripCount, F,
  1618. OrigPreheader->getNextNode(), OrigAfter, "collapsed");
  1619. // Build the collapsed loop body code.
  1620. // Start with deriving the input loop induction variables from the collapsed
  1621. // one, using a divmod scheme. To preserve the original loops' order, the
  1622. // innermost loop use the least significant bits.
  1623. Builder.restoreIP(Result->getBodyIP());
  1624. Value *Leftover = Result->getIndVar();
  1625. SmallVector<Value *> NewIndVars;
  1626. NewIndVars.resize(NumLoops);
  1627. for (int i = NumLoops - 1; i >= 1; --i) {
  1628. Value *OrigTripCount = Loops[i]->getTripCount();
  1629. Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
  1630. NewIndVars[i] = NewIndVar;
  1631. Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
  1632. }
  1633. // Outermost loop gets all the remaining bits.
  1634. NewIndVars[0] = Leftover;
  1635. // Construct the loop body control flow.
  1636. // We progressively construct the branch structure following in direction of
  1637. // the control flow, from the leading in-between code, the loop nest body, the
  1638. // trailing in-between code, and rejoining the collapsed loop's latch.
  1639. // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
  1640. // the ContinueBlock is set, continue with that block. If ContinuePred, use
  1641. // its predecessors as sources.
  1642. BasicBlock *ContinueBlock = Result->getBody();
  1643. BasicBlock *ContinuePred = nullptr;
  1644. auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
  1645. BasicBlock *NextSrc) {
  1646. if (ContinueBlock)
  1647. redirectTo(ContinueBlock, Dest, DL);
  1648. else
  1649. redirectAllPredecessorsTo(ContinuePred, Dest, DL);
  1650. ContinueBlock = nullptr;
  1651. ContinuePred = NextSrc;
  1652. };
  1653. // The code before the nested loop of each level.
  1654. // Because we are sinking it into the nest, it will be executed more often
  1655. // that the original loop. More sophisticated schemes could keep track of what
  1656. // the in-between code is and instantiate it only once per thread.
  1657. for (size_t i = 0; i < NumLoops - 1; ++i)
  1658. ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
  1659. // Connect the loop nest body.
  1660. ContinueWith(Innermost->getBody(), Innermost->getLatch());
  1661. // The code after the nested loop at each level.
  1662. for (size_t i = NumLoops - 1; i > 0; --i)
  1663. ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
  1664. // Connect the finished loop to the collapsed loop latch.
  1665. ContinueWith(Result->getLatch(), nullptr);
  1666. // Replace the input loops with the new collapsed loop.
  1667. redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
  1668. redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
  1669. // Replace the input loop indvars with the derived ones.
  1670. for (size_t i = 0; i < NumLoops; ++i)
  1671. Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
  1672. // Remove unused parts of the input loops.
  1673. removeUnusedBlocksFromParent(OldControlBBs);
  1674. for (CanonicalLoopInfo *L : Loops)
  1675. L->invalidate();
  1676. #ifndef NDEBUG
  1677. Result->assertOK();
  1678. #endif
  1679. return Result;
  1680. }
  1681. std::vector<CanonicalLoopInfo *>
  1682. OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  1683. ArrayRef<Value *> TileSizes) {
  1684. assert(TileSizes.size() == Loops.size() &&
  1685. "Must pass as many tile sizes as there are loops");
  1686. int NumLoops = Loops.size();
  1687. assert(NumLoops >= 1 && "At least one loop to tile required");
  1688. CanonicalLoopInfo *OutermostLoop = Loops.front();
  1689. CanonicalLoopInfo *InnermostLoop = Loops.back();
  1690. Function *F = OutermostLoop->getBody()->getParent();
  1691. BasicBlock *InnerEnter = InnermostLoop->getBody();
  1692. BasicBlock *InnerLatch = InnermostLoop->getLatch();
  1693. // Loop control blocks that may become orphaned later.
  1694. SmallVector<BasicBlock *, 12> OldControlBBs;
  1695. OldControlBBs.reserve(6 * Loops.size());
  1696. for (CanonicalLoopInfo *Loop : Loops)
  1697. Loop->collectControlBlocks(OldControlBBs);
  1698. // Collect original trip counts and induction variable to be accessible by
  1699. // index. Also, the structure of the original loops is not preserved during
  1700. // the construction of the tiled loops, so do it before we scavenge the BBs of
  1701. // any original CanonicalLoopInfo.
  1702. SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
  1703. for (CanonicalLoopInfo *L : Loops) {
  1704. assert(L->isValid() && "All input loops must be valid canonical loops");
  1705. OrigTripCounts.push_back(L->getTripCount());
  1706. OrigIndVars.push_back(L->getIndVar());
  1707. }
  1708. // Collect the code between loop headers. These may contain SSA definitions
  1709. // that are used in the loop nest body. To be usable with in the innermost
  1710. // body, these BasicBlocks will be sunk into the loop nest body. That is,
  1711. // these instructions may be executed more often than before the tiling.
  1712. // TODO: It would be sufficient to only sink them into body of the
  1713. // corresponding tile loop.
  1714. SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
  1715. for (int i = 0; i < NumLoops - 1; ++i) {
  1716. CanonicalLoopInfo *Surrounding = Loops[i];
  1717. CanonicalLoopInfo *Nested = Loops[i + 1];
  1718. BasicBlock *EnterBB = Surrounding->getBody();
  1719. BasicBlock *ExitBB = Nested->getHeader();
  1720. InbetweenCode.emplace_back(EnterBB, ExitBB);
  1721. }
  1722. // Compute the trip counts of the floor loops.
  1723. Builder.SetCurrentDebugLocation(DL);
  1724. Builder.restoreIP(OutermostLoop->getPreheaderIP());
  1725. SmallVector<Value *, 4> FloorCount, FloorRems;
  1726. for (int i = 0; i < NumLoops; ++i) {
  1727. Value *TileSize = TileSizes[i];
  1728. Value *OrigTripCount = OrigTripCounts[i];
  1729. Type *IVType = OrigTripCount->getType();
  1730. Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
  1731. Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
  1732. // 0 if tripcount divides the tilesize, 1 otherwise.
  1733. // 1 means we need an additional iteration for a partial tile.
  1734. //
  1735. // Unfortunately we cannot just use the roundup-formula
  1736. // (tripcount + tilesize - 1)/tilesize
  1737. // because the summation might overflow. We do not want introduce undefined
  1738. // behavior when the untiled loop nest did not.
  1739. Value *FloorTripOverflow =
  1740. Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
  1741. FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
  1742. FloorTripCount =
  1743. Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
  1744. "omp_floor" + Twine(i) + ".tripcount", true);
  1745. // Remember some values for later use.
  1746. FloorCount.push_back(FloorTripCount);
  1747. FloorRems.push_back(FloorTripRem);
  1748. }
  1749. // Generate the new loop nest, from the outermost to the innermost.
  1750. std::vector<CanonicalLoopInfo *> Result;
  1751. Result.reserve(NumLoops * 2);
  1752. // The basic block of the surrounding loop that enters the nest generated
  1753. // loop.
  1754. BasicBlock *Enter = OutermostLoop->getPreheader();
  1755. // The basic block of the surrounding loop where the inner code should
  1756. // continue.
  1757. BasicBlock *Continue = OutermostLoop->getAfter();
  1758. // Where the next loop basic block should be inserted.
  1759. BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
  1760. auto EmbeddNewLoop =
  1761. [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
  1762. Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
  1763. CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
  1764. DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
  1765. redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
  1766. redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
  1767. // Setup the position where the next embedded loop connects to this loop.
  1768. Enter = EmbeddedLoop->getBody();
  1769. Continue = EmbeddedLoop->getLatch();
  1770. OutroInsertBefore = EmbeddedLoop->getLatch();
  1771. return EmbeddedLoop;
  1772. };
  1773. auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
  1774. const Twine &NameBase) {
  1775. for (auto P : enumerate(TripCounts)) {
  1776. CanonicalLoopInfo *EmbeddedLoop =
  1777. EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
  1778. Result.push_back(EmbeddedLoop);
  1779. }
  1780. };
  1781. EmbeddNewLoops(FloorCount, "floor");
  1782. // Within the innermost floor loop, emit the code that computes the tile
  1783. // sizes.
  1784. Builder.SetInsertPoint(Enter->getTerminator());
  1785. SmallVector<Value *, 4> TileCounts;
  1786. for (int i = 0; i < NumLoops; ++i) {
  1787. CanonicalLoopInfo *FloorLoop = Result[i];
  1788. Value *TileSize = TileSizes[i];
  1789. Value *FloorIsEpilogue =
  1790. Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
  1791. Value *TileTripCount =
  1792. Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
  1793. TileCounts.push_back(TileTripCount);
  1794. }
  1795. // Create the tile loops.
  1796. EmbeddNewLoops(TileCounts, "tile");
  1797. // Insert the inbetween code into the body.
  1798. BasicBlock *BodyEnter = Enter;
  1799. BasicBlock *BodyEntered = nullptr;
  1800. for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
  1801. BasicBlock *EnterBB = P.first;
  1802. BasicBlock *ExitBB = P.second;
  1803. if (BodyEnter)
  1804. redirectTo(BodyEnter, EnterBB, DL);
  1805. else
  1806. redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
  1807. BodyEnter = nullptr;
  1808. BodyEntered = ExitBB;
  1809. }
  1810. // Append the original loop nest body into the generated loop nest body.
  1811. if (BodyEnter)
  1812. redirectTo(BodyEnter, InnerEnter, DL);
  1813. else
  1814. redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
  1815. redirectAllPredecessorsTo(InnerLatch, Continue, DL);
  1816. // Replace the original induction variable with an induction variable computed
  1817. // from the tile and floor induction variables.
  1818. Builder.restoreIP(Result.back()->getBodyIP());
  1819. for (int i = 0; i < NumLoops; ++i) {
  1820. CanonicalLoopInfo *FloorLoop = Result[i];
  1821. CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
  1822. Value *OrigIndVar = OrigIndVars[i];
  1823. Value *Size = TileSizes[i];
  1824. Value *Scale =
  1825. Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
  1826. Value *Shift =
  1827. Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
  1828. OrigIndVar->replaceAllUsesWith(Shift);
  1829. }
  1830. // Remove unused parts of the original loops.
  1831. removeUnusedBlocksFromParent(OldControlBBs);
  1832. for (CanonicalLoopInfo *L : Loops)
  1833. L->invalidate();
  1834. #ifndef NDEBUG
  1835. for (CanonicalLoopInfo *GenL : Result)
  1836. GenL->assertOK();
  1837. #endif
  1838. return Result;
  1839. }
  1840. /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
  1841. /// loop already has metadata, the loop properties are appended.
  1842. static void addLoopMetadata(CanonicalLoopInfo *Loop,
  1843. ArrayRef<Metadata *> Properties) {
  1844. assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
  1845. // Nothing to do if no property to attach.
  1846. if (Properties.empty())
  1847. return;
  1848. LLVMContext &Ctx = Loop->getFunction()->getContext();
  1849. SmallVector<Metadata *> NewLoopProperties;
  1850. NewLoopProperties.push_back(nullptr);
  1851. // If the loop already has metadata, prepend it to the new metadata.
  1852. BasicBlock *Latch = Loop->getLatch();
  1853. assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
  1854. MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
  1855. if (Existing)
  1856. append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
  1857. append_range(NewLoopProperties, Properties);
  1858. MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
  1859. LoopID->replaceOperandWith(0, LoopID);
  1860. Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
  1861. }
  1862. /// Attach llvm.access.group metadata to the memref instructions of \p Block
  1863. static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup,
  1864. LoopInfo &LI) {
  1865. for (Instruction &I : *Block) {
  1866. if (I.mayReadOrWriteMemory()) {
  1867. // TODO: This instruction may already have access group from
  1868. // other pragmas e.g. #pragma clang loop vectorize. Append
  1869. // so that the existing metadata is not overwritten.
  1870. I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
  1871. }
  1872. }
  1873. }
  1874. void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
  1875. LLVMContext &Ctx = Builder.getContext();
  1876. addLoopMetadata(
  1877. Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
  1878. MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
  1879. }
  1880. void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) {
  1881. LLVMContext &Ctx = Builder.getContext();
  1882. addLoopMetadata(
  1883. Loop, {
  1884. MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
  1885. });
  1886. }
  1887. void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *CanonicalLoop) {
  1888. LLVMContext &Ctx = Builder.getContext();
  1889. Function *F = CanonicalLoop->getFunction();
  1890. FunctionAnalysisManager FAM;
  1891. FAM.registerPass([]() { return DominatorTreeAnalysis(); });
  1892. FAM.registerPass([]() { return LoopAnalysis(); });
  1893. FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
  1894. LoopAnalysis LIA;
  1895. LoopInfo &&LI = LIA.run(*F, FAM);
  1896. Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
  1897. SmallSet<BasicBlock *, 8> Reachable;
  1898. // Get the basic blocks from the loop in which memref instructions
  1899. // can be found.
  1900. // TODO: Generalize getting all blocks inside a CanonicalizeLoopInfo,
  1901. // preferably without running any passes.
  1902. for (BasicBlock *Block : L->getBlocks()) {
  1903. if (Block == CanonicalLoop->getCond() ||
  1904. Block == CanonicalLoop->getHeader())
  1905. continue;
  1906. Reachable.insert(Block);
  1907. }
  1908. // Add access group metadata to memory-access instructions.
  1909. MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
  1910. for (BasicBlock *BB : Reachable)
  1911. addSimdMetadata(BB, AccessGroup, LI);
  1912. // Use the above access group metadata to create loop level
  1913. // metadata, which should be distinct for each loop.
  1914. ConstantAsMetadata *BoolConst =
  1915. ConstantAsMetadata::get(ConstantInt::getTrue(Type::getInt1Ty(Ctx)));
  1916. // TODO: If the loop has existing parallel access metadata, have
  1917. // to combine two lists.
  1918. addLoopMetadata(
  1919. CanonicalLoop,
  1920. {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"),
  1921. AccessGroup}),
  1922. MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
  1923. BoolConst})});
  1924. }
  1925. /// Create the TargetMachine object to query the backend for optimization
  1926. /// preferences.
  1927. ///
  1928. /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
  1929. /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
  1930. /// needed for the LLVM pass pipline. We use some default options to avoid
  1931. /// having to pass too many settings from the frontend that probably do not
  1932. /// matter.
  1933. ///
  1934. /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
  1935. /// method. If we are going to use TargetMachine for more purposes, especially
  1936. /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
  1937. /// might become be worth requiring front-ends to pass on their TargetMachine,
  1938. /// or at least cache it between methods. Note that while fontends such as Clang
  1939. /// have just a single main TargetMachine per translation unit, "target-cpu" and
  1940. /// "target-features" that determine the TargetMachine are per-function and can
  1941. /// be overrided using __attribute__((target("OPTIONS"))).
  1942. static std::unique_ptr<TargetMachine>
  1943. createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) {
  1944. Module *M = F->getParent();
  1945. StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
  1946. StringRef Features = F->getFnAttribute("target-features").getValueAsString();
  1947. const std::string &Triple = M->getTargetTriple();
  1948. std::string Error;
  1949. const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
  1950. if (!TheTarget)
  1951. return {};
  1952. llvm::TargetOptions Options;
  1953. return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
  1954. Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
  1955. OptLevel));
  1956. }
  1957. /// Heuristically determine the best-performant unroll factor for \p CLI. This
  1958. /// depends on the target processor. We are re-using the same heuristics as the
  1959. /// LoopUnrollPass.
  1960. static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
  1961. Function *F = CLI->getFunction();
  1962. // Assume the user requests the most aggressive unrolling, even if the rest of
  1963. // the code is optimized using a lower setting.
  1964. CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
  1965. std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
  1966. FunctionAnalysisManager FAM;
  1967. FAM.registerPass([]() { return TargetLibraryAnalysis(); });
  1968. FAM.registerPass([]() { return AssumptionAnalysis(); });
  1969. FAM.registerPass([]() { return DominatorTreeAnalysis(); });
  1970. FAM.registerPass([]() { return LoopAnalysis(); });
  1971. FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
  1972. FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
  1973. TargetIRAnalysis TIRA;
  1974. if (TM)
  1975. TIRA = TargetIRAnalysis(
  1976. [&](const Function &F) { return TM->getTargetTransformInfo(F); });
  1977. FAM.registerPass([&]() { return TIRA; });
  1978. TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
  1979. ScalarEvolutionAnalysis SEA;
  1980. ScalarEvolution &&SE = SEA.run(*F, FAM);
  1981. DominatorTreeAnalysis DTA;
  1982. DominatorTree &&DT = DTA.run(*F, FAM);
  1983. LoopAnalysis LIA;
  1984. LoopInfo &&LI = LIA.run(*F, FAM);
  1985. AssumptionAnalysis ACT;
  1986. AssumptionCache &&AC = ACT.run(*F, FAM);
  1987. OptimizationRemarkEmitter ORE{F};
  1988. Loop *L = LI.getLoopFor(CLI->getHeader());
  1989. assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
  1990. TargetTransformInfo::UnrollingPreferences UP =
  1991. gatherUnrollingPreferences(L, SE, TTI,
  1992. /*BlockFrequencyInfo=*/nullptr,
  1993. /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
  1994. /*UserThreshold=*/None,
  1995. /*UserCount=*/None,
  1996. /*UserAllowPartial=*/true,
  1997. /*UserAllowRuntime=*/true,
  1998. /*UserUpperBound=*/None,
  1999. /*UserFullUnrollMaxCount=*/None);
  2000. UP.Force = true;
  2001. // Account for additional optimizations taking place before the LoopUnrollPass
  2002. // would unroll the loop.
  2003. UP.Threshold *= UnrollThresholdFactor;
  2004. UP.PartialThreshold *= UnrollThresholdFactor;
  2005. // Use normal unroll factors even if the rest of the code is optimized for
  2006. // size.
  2007. UP.OptSizeThreshold = UP.Threshold;
  2008. UP.PartialOptSizeThreshold = UP.PartialThreshold;
  2009. LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
  2010. << " Threshold=" << UP.Threshold << "\n"
  2011. << " PartialThreshold=" << UP.PartialThreshold << "\n"
  2012. << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
  2013. << " PartialOptSizeThreshold="
  2014. << UP.PartialOptSizeThreshold << "\n");
  2015. // Disable peeling.
  2016. TargetTransformInfo::PeelingPreferences PP =
  2017. gatherPeelingPreferences(L, SE, TTI,
  2018. /*UserAllowPeeling=*/false,
  2019. /*UserAllowProfileBasedPeeling=*/false,
  2020. /*UnrollingSpecficValues=*/false);
  2021. SmallPtrSet<const Value *, 32> EphValues;
  2022. CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
  2023. // Assume that reads and writes to stack variables can be eliminated by
  2024. // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
  2025. // size.
  2026. for (BasicBlock *BB : L->blocks()) {
  2027. for (Instruction &I : *BB) {
  2028. Value *Ptr;
  2029. if (auto *Load = dyn_cast<LoadInst>(&I)) {
  2030. Ptr = Load->getPointerOperand();
  2031. } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
  2032. Ptr = Store->getPointerOperand();
  2033. } else
  2034. continue;
  2035. Ptr = Ptr->stripPointerCasts();
  2036. if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
  2037. if (Alloca->getParent() == &F->getEntryBlock())
  2038. EphValues.insert(&I);
  2039. }
  2040. }
  2041. }
  2042. unsigned NumInlineCandidates;
  2043. bool NotDuplicatable;
  2044. bool Convergent;
  2045. unsigned LoopSize =
  2046. ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
  2047. TTI, EphValues, UP.BEInsns);
  2048. LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
  2049. // Loop is not unrollable if the loop contains certain instructions.
  2050. if (NotDuplicatable || Convergent) {
  2051. LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
  2052. return 1;
  2053. }
  2054. // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
  2055. // be able to use it.
  2056. int TripCount = 0;
  2057. int MaxTripCount = 0;
  2058. bool MaxOrZero = false;
  2059. unsigned TripMultiple = 0;
  2060. bool UseUpperBound = false;
  2061. computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
  2062. MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
  2063. UseUpperBound);
  2064. unsigned Factor = UP.Count;
  2065. LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
  2066. // This function returns 1 to signal to not unroll a loop.
  2067. if (Factor == 0)
  2068. return 1;
  2069. return Factor;
  2070. }
  2071. void OpenMPIRBuilder::unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop,
  2072. int32_t Factor,
  2073. CanonicalLoopInfo **UnrolledCLI) {
  2074. assert(Factor >= 0 && "Unroll factor must not be negative");
  2075. Function *F = Loop->getFunction();
  2076. LLVMContext &Ctx = F->getContext();
  2077. // If the unrolled loop is not used for another loop-associated directive, it
  2078. // is sufficient to add metadata for the LoopUnrollPass.
  2079. if (!UnrolledCLI) {
  2080. SmallVector<Metadata *, 2> LoopMetadata;
  2081. LoopMetadata.push_back(
  2082. MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
  2083. if (Factor >= 1) {
  2084. ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
  2085. ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
  2086. LoopMetadata.push_back(MDNode::get(
  2087. Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
  2088. }
  2089. addLoopMetadata(Loop, LoopMetadata);
  2090. return;
  2091. }
  2092. // Heuristically determine the unroll factor.
  2093. if (Factor == 0)
  2094. Factor = computeHeuristicUnrollFactor(Loop);
  2095. // No change required with unroll factor 1.
  2096. if (Factor == 1) {
  2097. *UnrolledCLI = Loop;
  2098. return;
  2099. }
  2100. assert(Factor >= 2 &&
  2101. "unrolling only makes sense with a factor of 2 or larger");
  2102. Type *IndVarTy = Loop->getIndVarType();
  2103. // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
  2104. // unroll the inner loop.
  2105. Value *FactorVal =
  2106. ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
  2107. /*isSigned=*/false));
  2108. std::vector<CanonicalLoopInfo *> LoopNest =
  2109. tileLoops(DL, {Loop}, {FactorVal});
  2110. assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
  2111. *UnrolledCLI = LoopNest[0];
  2112. CanonicalLoopInfo *InnerLoop = LoopNest[1];
  2113. // LoopUnrollPass can only fully unroll loops with constant trip count.
  2114. // Unroll by the unroll factor with a fallback epilog for the remainder
  2115. // iterations if necessary.
  2116. ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
  2117. ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
  2118. addLoopMetadata(
  2119. InnerLoop,
  2120. {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
  2121. MDNode::get(
  2122. Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
  2123. #ifndef NDEBUG
  2124. (*UnrolledCLI)->assertOK();
  2125. #endif
  2126. }
  2127. OpenMPIRBuilder::InsertPointTy
  2128. OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
  2129. llvm::Value *BufSize, llvm::Value *CpyBuf,
  2130. llvm::Value *CpyFn, llvm::Value *DidIt) {
  2131. if (!updateToLocation(Loc))
  2132. return Loc.IP;
  2133. uint32_t SrcLocStrSize;
  2134. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2135. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2136. Value *ThreadId = getOrCreateThreadID(Ident);
  2137. llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
  2138. Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
  2139. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
  2140. Builder.CreateCall(Fn, Args);
  2141. return Builder.saveIP();
  2142. }
  2143. OpenMPIRBuilder::InsertPointTy
  2144. OpenMPIRBuilder::createSingle(const LocationDescription &Loc,
  2145. BodyGenCallbackTy BodyGenCB,
  2146. FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
  2147. if (!updateToLocation(Loc))
  2148. return Loc.IP;
  2149. // If needed (i.e. not null), initialize `DidIt` with 0
  2150. if (DidIt) {
  2151. Builder.CreateStore(Builder.getInt32(0), DidIt);
  2152. }
  2153. Directive OMPD = Directive::OMPD_single;
  2154. uint32_t SrcLocStrSize;
  2155. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2156. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2157. Value *ThreadId = getOrCreateThreadID(Ident);
  2158. Value *Args[] = {Ident, ThreadId};
  2159. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
  2160. Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  2161. Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
  2162. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  2163. // generates the following:
  2164. // if (__kmpc_single()) {
  2165. // .... single region ...
  2166. // __kmpc_end_single
  2167. // }
  2168. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  2169. /*Conditional*/ true, /*hasFinalize*/ true);
  2170. }
  2171. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
  2172. const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
  2173. FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
  2174. if (!updateToLocation(Loc))
  2175. return Loc.IP;
  2176. Directive OMPD = Directive::OMPD_critical;
  2177. uint32_t SrcLocStrSize;
  2178. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2179. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2180. Value *ThreadId = getOrCreateThreadID(Ident);
  2181. Value *LockVar = getOMPCriticalRegionLock(CriticalName);
  2182. Value *Args[] = {Ident, ThreadId, LockVar};
  2183. SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args));
  2184. Function *RTFn = nullptr;
  2185. if (HintInst) {
  2186. // Add Hint to entry Args and create call
  2187. EnterArgs.push_back(HintInst);
  2188. RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
  2189. } else {
  2190. RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
  2191. }
  2192. Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
  2193. Function *ExitRTLFn =
  2194. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
  2195. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  2196. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  2197. /*Conditional*/ false, /*hasFinalize*/ true);
  2198. }
  2199. OpenMPIRBuilder::InsertPointTy
  2200. OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
  2201. InsertPointTy AllocaIP, unsigned NumLoops,
  2202. ArrayRef<llvm::Value *> StoreValues,
  2203. const Twine &Name, bool IsDependSource) {
  2204. for (size_t I = 0; I < StoreValues.size(); I++)
  2205. assert(StoreValues[I]->getType()->isIntegerTy(64) &&
  2206. "OpenMP runtime requires depend vec with i64 type");
  2207. if (!updateToLocation(Loc))
  2208. return Loc.IP;
  2209. // Allocate space for vector and generate alloc instruction.
  2210. auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
  2211. Builder.restoreIP(AllocaIP);
  2212. AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
  2213. ArgsBase->setAlignment(Align(8));
  2214. Builder.restoreIP(Loc.IP);
  2215. // Store the index value with offset in depend vector.
  2216. for (unsigned I = 0; I < NumLoops; ++I) {
  2217. Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
  2218. ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
  2219. StoreInst *STInst = Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
  2220. STInst->setAlignment(Align(8));
  2221. }
  2222. Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
  2223. ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
  2224. uint32_t SrcLocStrSize;
  2225. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2226. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2227. Value *ThreadId = getOrCreateThreadID(Ident);
  2228. Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
  2229. Function *RTLFn = nullptr;
  2230. if (IsDependSource)
  2231. RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
  2232. else
  2233. RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
  2234. Builder.CreateCall(RTLFn, Args);
  2235. return Builder.saveIP();
  2236. }
  2237. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
  2238. const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
  2239. FinalizeCallbackTy FiniCB, bool IsThreads) {
  2240. if (!updateToLocation(Loc))
  2241. return Loc.IP;
  2242. Directive OMPD = Directive::OMPD_ordered;
  2243. Instruction *EntryCall = nullptr;
  2244. Instruction *ExitCall = nullptr;
  2245. if (IsThreads) {
  2246. uint32_t SrcLocStrSize;
  2247. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2248. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2249. Value *ThreadId = getOrCreateThreadID(Ident);
  2250. Value *Args[] = {Ident, ThreadId};
  2251. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
  2252. EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  2253. Function *ExitRTLFn =
  2254. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
  2255. ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  2256. }
  2257. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  2258. /*Conditional*/ false, /*hasFinalize*/ true);
  2259. }
  2260. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
  2261. Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
  2262. BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
  2263. bool HasFinalize, bool IsCancellable) {
  2264. if (HasFinalize)
  2265. FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
  2266. // Create inlined region's entry and body blocks, in preparation
  2267. // for conditional creation
  2268. BasicBlock *EntryBB = Builder.GetInsertBlock();
  2269. Instruction *SplitPos = EntryBB->getTerminator();
  2270. if (!isa_and_nonnull<BranchInst>(SplitPos))
  2271. SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
  2272. BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
  2273. BasicBlock *FiniBB =
  2274. EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
  2275. Builder.SetInsertPoint(EntryBB->getTerminator());
  2276. emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
  2277. // generate body
  2278. BodyGenCB(/* AllocaIP */ InsertPointTy(),
  2279. /* CodeGenIP */ Builder.saveIP(), *FiniBB);
  2280. // If we didn't emit a branch to FiniBB during body generation, it means
  2281. // FiniBB is unreachable (e.g. while(1);). stop generating all the
  2282. // unreachable blocks, and remove anything we are not going to use.
  2283. auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
  2284. if (SkipEmittingRegion) {
  2285. FiniBB->eraseFromParent();
  2286. ExitCall->eraseFromParent();
  2287. // Discard finalization if we have it.
  2288. if (HasFinalize) {
  2289. assert(!FinalizationStack.empty() &&
  2290. "Unexpected finalization stack state!");
  2291. FinalizationStack.pop_back();
  2292. }
  2293. } else {
  2294. // emit exit call and do any needed finalization.
  2295. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
  2296. assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
  2297. FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
  2298. "Unexpected control flow graph state!!");
  2299. emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
  2300. assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
  2301. "Unexpected Control Flow State!");
  2302. MergeBlockIntoPredecessor(FiniBB);
  2303. }
  2304. // If we are skipping the region of a non conditional, remove the exit
  2305. // block, and clear the builder's insertion point.
  2306. assert(SplitPos->getParent() == ExitBB &&
  2307. "Unexpected Insertion point location!");
  2308. if (!Conditional && SkipEmittingRegion) {
  2309. ExitBB->eraseFromParent();
  2310. Builder.ClearInsertionPoint();
  2311. } else {
  2312. auto merged = MergeBlockIntoPredecessor(ExitBB);
  2313. BasicBlock *ExitPredBB = SplitPos->getParent();
  2314. auto InsertBB = merged ? ExitPredBB : ExitBB;
  2315. if (!isa_and_nonnull<BranchInst>(SplitPos))
  2316. SplitPos->eraseFromParent();
  2317. Builder.SetInsertPoint(InsertBB);
  2318. }
  2319. return Builder.saveIP();
  2320. }
  2321. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
  2322. Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
  2323. // if nothing to do, Return current insertion point.
  2324. if (!Conditional || !EntryCall)
  2325. return Builder.saveIP();
  2326. BasicBlock *EntryBB = Builder.GetInsertBlock();
  2327. Value *CallBool = Builder.CreateIsNotNull(EntryCall);
  2328. auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
  2329. auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
  2330. // Emit thenBB and set the Builder's insertion point there for
  2331. // body generation next. Place the block after the current block.
  2332. Function *CurFn = EntryBB->getParent();
  2333. CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
  2334. // Move Entry branch to end of ThenBB, and replace with conditional
  2335. // branch (If-stmt)
  2336. Instruction *EntryBBTI = EntryBB->getTerminator();
  2337. Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
  2338. EntryBBTI->removeFromParent();
  2339. Builder.SetInsertPoint(UI);
  2340. Builder.Insert(EntryBBTI);
  2341. UI->eraseFromParent();
  2342. Builder.SetInsertPoint(ThenBB->getTerminator());
  2343. // return an insertion point to ExitBB.
  2344. return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
  2345. }
  2346. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
  2347. omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
  2348. bool HasFinalize) {
  2349. Builder.restoreIP(FinIP);
  2350. // If there is finalization to do, emit it before the exit call
  2351. if (HasFinalize) {
  2352. assert(!FinalizationStack.empty() &&
  2353. "Unexpected finalization stack state!");
  2354. FinalizationInfo Fi = FinalizationStack.pop_back_val();
  2355. assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
  2356. Fi.FiniCB(FinIP);
  2357. BasicBlock *FiniBB = FinIP.getBlock();
  2358. Instruction *FiniBBTI = FiniBB->getTerminator();
  2359. // set Builder IP for call creation
  2360. Builder.SetInsertPoint(FiniBBTI);
  2361. }
  2362. if (!ExitCall)
  2363. return Builder.saveIP();
  2364. // place the Exitcall as last instruction before Finalization block terminator
  2365. ExitCall->removeFromParent();
  2366. Builder.Insert(ExitCall);
  2367. return IRBuilder<>::InsertPoint(ExitCall->getParent(),
  2368. ExitCall->getIterator());
  2369. }
  2370. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
  2371. InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
  2372. llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
  2373. if (!IP.isSet())
  2374. return IP;
  2375. IRBuilder<>::InsertPointGuard IPG(Builder);
  2376. // creates the following CFG structure
  2377. // OMP_Entry : (MasterAddr != PrivateAddr)?
  2378. // F T
  2379. // | \
  2380. // | copin.not.master
  2381. // | /
  2382. // v /
  2383. // copyin.not.master.end
  2384. // |
  2385. // v
  2386. // OMP.Entry.Next
  2387. BasicBlock *OMP_Entry = IP.getBlock();
  2388. Function *CurFn = OMP_Entry->getParent();
  2389. BasicBlock *CopyBegin =
  2390. BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
  2391. BasicBlock *CopyEnd = nullptr;
  2392. // If entry block is terminated, split to preserve the branch to following
  2393. // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
  2394. if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
  2395. CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
  2396. "copyin.not.master.end");
  2397. OMP_Entry->getTerminator()->eraseFromParent();
  2398. } else {
  2399. CopyEnd =
  2400. BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
  2401. }
  2402. Builder.SetInsertPoint(OMP_Entry);
  2403. Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
  2404. Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
  2405. Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
  2406. Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
  2407. Builder.SetInsertPoint(CopyBegin);
  2408. if (BranchtoEnd)
  2409. Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
  2410. return Builder.saveIP();
  2411. }
  2412. CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
  2413. Value *Size, Value *Allocator,
  2414. std::string Name) {
  2415. IRBuilder<>::InsertPointGuard IPG(Builder);
  2416. Builder.restoreIP(Loc.IP);
  2417. uint32_t SrcLocStrSize;
  2418. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2419. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2420. Value *ThreadId = getOrCreateThreadID(Ident);
  2421. Value *Args[] = {ThreadId, Size, Allocator};
  2422. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
  2423. return Builder.CreateCall(Fn, Args, Name);
  2424. }
  2425. CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
  2426. Value *Addr, Value *Allocator,
  2427. std::string Name) {
  2428. IRBuilder<>::InsertPointGuard IPG(Builder);
  2429. Builder.restoreIP(Loc.IP);
  2430. uint32_t SrcLocStrSize;
  2431. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2432. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2433. Value *ThreadId = getOrCreateThreadID(Ident);
  2434. Value *Args[] = {ThreadId, Addr, Allocator};
  2435. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
  2436. return Builder.CreateCall(Fn, Args, Name);
  2437. }
  2438. CallInst *OpenMPIRBuilder::createOMPInteropInit(
  2439. const LocationDescription &Loc, Value *InteropVar,
  2440. omp::OMPInteropType InteropType, Value *Device, Value *NumDependences,
  2441. Value *DependenceAddress, bool HaveNowaitClause) {
  2442. IRBuilder<>::InsertPointGuard IPG(Builder);
  2443. Builder.restoreIP(Loc.IP);
  2444. uint32_t SrcLocStrSize;
  2445. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2446. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2447. Value *ThreadId = getOrCreateThreadID(Ident);
  2448. if (Device == nullptr)
  2449. Device = ConstantInt::get(Int32, -1);
  2450. Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType);
  2451. if (NumDependences == nullptr) {
  2452. NumDependences = ConstantInt::get(Int32, 0);
  2453. PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
  2454. DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
  2455. }
  2456. Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
  2457. Value *Args[] = {
  2458. Ident, ThreadId, InteropVar, InteropTypeVal,
  2459. Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
  2460. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
  2461. return Builder.CreateCall(Fn, Args);
  2462. }
  2463. CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
  2464. const LocationDescription &Loc, Value *InteropVar, Value *Device,
  2465. Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) {
  2466. IRBuilder<>::InsertPointGuard IPG(Builder);
  2467. Builder.restoreIP(Loc.IP);
  2468. uint32_t SrcLocStrSize;
  2469. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2470. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2471. Value *ThreadId = getOrCreateThreadID(Ident);
  2472. if (Device == nullptr)
  2473. Device = ConstantInt::get(Int32, -1);
  2474. if (NumDependences == nullptr) {
  2475. NumDependences = ConstantInt::get(Int32, 0);
  2476. PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
  2477. DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
  2478. }
  2479. Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
  2480. Value *Args[] = {
  2481. Ident, ThreadId, InteropVar, Device,
  2482. NumDependences, DependenceAddress, HaveNowaitClauseVal};
  2483. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
  2484. return Builder.CreateCall(Fn, Args);
  2485. }
  2486. CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc,
  2487. Value *InteropVar, Value *Device,
  2488. Value *NumDependences,
  2489. Value *DependenceAddress,
  2490. bool HaveNowaitClause) {
  2491. IRBuilder<>::InsertPointGuard IPG(Builder);
  2492. Builder.restoreIP(Loc.IP);
  2493. uint32_t SrcLocStrSize;
  2494. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2495. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2496. Value *ThreadId = getOrCreateThreadID(Ident);
  2497. if (Device == nullptr)
  2498. Device = ConstantInt::get(Int32, -1);
  2499. if (NumDependences == nullptr) {
  2500. NumDependences = ConstantInt::get(Int32, 0);
  2501. PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
  2502. DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
  2503. }
  2504. Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
  2505. Value *Args[] = {
  2506. Ident, ThreadId, InteropVar, Device,
  2507. NumDependences, DependenceAddress, HaveNowaitClauseVal};
  2508. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
  2509. return Builder.CreateCall(Fn, Args);
  2510. }
  2511. CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
  2512. const LocationDescription &Loc, llvm::Value *Pointer,
  2513. llvm::ConstantInt *Size, const llvm::Twine &Name) {
  2514. IRBuilder<>::InsertPointGuard IPG(Builder);
  2515. Builder.restoreIP(Loc.IP);
  2516. uint32_t SrcLocStrSize;
  2517. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2518. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2519. Value *ThreadId = getOrCreateThreadID(Ident);
  2520. Constant *ThreadPrivateCache =
  2521. getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
  2522. llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
  2523. Function *Fn =
  2524. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
  2525. return Builder.CreateCall(Fn, Args);
  2526. }
  2527. OpenMPIRBuilder::InsertPointTy
  2528. OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
  2529. bool RequiresFullRuntime) {
  2530. if (!updateToLocation(Loc))
  2531. return Loc.IP;
  2532. uint32_t SrcLocStrSize;
  2533. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2534. Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2535. ConstantInt *IsSPMDVal = ConstantInt::getSigned(
  2536. IntegerType::getInt8Ty(Int8->getContext()),
  2537. IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
  2538. ConstantInt *UseGenericStateMachine =
  2539. ConstantInt::getBool(Int32->getContext(), !IsSPMD);
  2540. ConstantInt *RequiresFullRuntimeVal =
  2541. ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
  2542. Function *Fn = getOrCreateRuntimeFunctionPtr(
  2543. omp::RuntimeFunction::OMPRTL___kmpc_target_init);
  2544. CallInst *ThreadKind = Builder.CreateCall(
  2545. Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
  2546. Value *ExecUserCode = Builder.CreateICmpEQ(
  2547. ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
  2548. "exec_user_code");
  2549. // ThreadKind = __kmpc_target_init(...)
  2550. // if (ThreadKind == -1)
  2551. // user_code
  2552. // else
  2553. // return;
  2554. auto *UI = Builder.CreateUnreachable();
  2555. BasicBlock *CheckBB = UI->getParent();
  2556. BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
  2557. BasicBlock *WorkerExitBB = BasicBlock::Create(
  2558. CheckBB->getContext(), "worker.exit", CheckBB->getParent());
  2559. Builder.SetInsertPoint(WorkerExitBB);
  2560. Builder.CreateRetVoid();
  2561. auto *CheckBBTI = CheckBB->getTerminator();
  2562. Builder.SetInsertPoint(CheckBBTI);
  2563. Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
  2564. CheckBBTI->eraseFromParent();
  2565. UI->eraseFromParent();
  2566. // Continue in the "user_code" block, see diagram above and in
  2567. // openmp/libomptarget/deviceRTLs/common/include/target.h .
  2568. return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
  2569. }
  2570. void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
  2571. bool IsSPMD,
  2572. bool RequiresFullRuntime) {
  2573. if (!updateToLocation(Loc))
  2574. return;
  2575. uint32_t SrcLocStrSize;
  2576. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2577. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2578. ConstantInt *IsSPMDVal = ConstantInt::getSigned(
  2579. IntegerType::getInt8Ty(Int8->getContext()),
  2580. IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
  2581. ConstantInt *RequiresFullRuntimeVal =
  2582. ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
  2583. Function *Fn = getOrCreateRuntimeFunctionPtr(
  2584. omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
  2585. Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
  2586. }
  2587. std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
  2588. StringRef FirstSeparator,
  2589. StringRef Separator) {
  2590. SmallString<128> Buffer;
  2591. llvm::raw_svector_ostream OS(Buffer);
  2592. StringRef Sep = FirstSeparator;
  2593. for (StringRef Part : Parts) {
  2594. OS << Sep << Part;
  2595. Sep = Separator;
  2596. }
  2597. return OS.str().str();
  2598. }
  2599. Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
  2600. llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
  2601. // TODO: Replace the twine arg with stringref to get rid of the conversion
  2602. // logic. However This is taken from current implementation in clang as is.
  2603. // Since this method is used in many places exclusively for OMP internal use
  2604. // we will keep it as is for temporarily until we move all users to the
  2605. // builder and then, if possible, fix it everywhere in one go.
  2606. SmallString<256> Buffer;
  2607. llvm::raw_svector_ostream Out(Buffer);
  2608. Out << Name;
  2609. StringRef RuntimeName = Out.str();
  2610. auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
  2611. if (Elem.second) {
  2612. assert(cast<PointerType>(Elem.second->getType())
  2613. ->isOpaqueOrPointeeTypeMatches(Ty) &&
  2614. "OMP internal variable has different type than requested");
  2615. } else {
  2616. // TODO: investigate the appropriate linkage type used for the global
  2617. // variable for possibly changing that to internal or private, or maybe
  2618. // create different versions of the function for different OMP internal
  2619. // variables.
  2620. Elem.second = new llvm::GlobalVariable(
  2621. M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
  2622. llvm::Constant::getNullValue(Ty), Elem.first(),
  2623. /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
  2624. AddressSpace);
  2625. }
  2626. return Elem.second;
  2627. }
  2628. Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
  2629. std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
  2630. std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
  2631. return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
  2632. }
  2633. GlobalVariable *
  2634. OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
  2635. std::string VarName) {
  2636. llvm::Constant *MaptypesArrayInit =
  2637. llvm::ConstantDataArray::get(M.getContext(), Mappings);
  2638. auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
  2639. M, MaptypesArrayInit->getType(),
  2640. /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
  2641. VarName);
  2642. MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  2643. return MaptypesArrayGlobal;
  2644. }
  2645. void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc,
  2646. InsertPointTy AllocaIP,
  2647. unsigned NumOperands,
  2648. struct MapperAllocas &MapperAllocas) {
  2649. if (!updateToLocation(Loc))
  2650. return;
  2651. auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
  2652. auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
  2653. Builder.restoreIP(AllocaIP);
  2654. AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
  2655. AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
  2656. AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
  2657. Builder.restoreIP(Loc.IP);
  2658. MapperAllocas.ArgsBase = ArgsBase;
  2659. MapperAllocas.Args = Args;
  2660. MapperAllocas.ArgSizes = ArgSizes;
  2661. }
  2662. void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
  2663. Function *MapperFunc, Value *SrcLocInfo,
  2664. Value *MaptypesArg, Value *MapnamesArg,
  2665. struct MapperAllocas &MapperAllocas,
  2666. int64_t DeviceID, unsigned NumOperands) {
  2667. if (!updateToLocation(Loc))
  2668. return;
  2669. auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
  2670. auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
  2671. Value *ArgsBaseGEP =
  2672. Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
  2673. {Builder.getInt32(0), Builder.getInt32(0)});
  2674. Value *ArgsGEP =
  2675. Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
  2676. {Builder.getInt32(0), Builder.getInt32(0)});
  2677. Value *ArgSizesGEP =
  2678. Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
  2679. {Builder.getInt32(0), Builder.getInt32(0)});
  2680. Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
  2681. Builder.CreateCall(MapperFunc,
  2682. {SrcLocInfo, Builder.getInt64(DeviceID),
  2683. Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
  2684. ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
  2685. }
  2686. bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
  2687. const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
  2688. assert(!(AO == AtomicOrdering::NotAtomic ||
  2689. AO == llvm::AtomicOrdering::Unordered) &&
  2690. "Unexpected Atomic Ordering.");
  2691. bool Flush = false;
  2692. llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic;
  2693. switch (AK) {
  2694. case Read:
  2695. if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease ||
  2696. AO == AtomicOrdering::SequentiallyConsistent) {
  2697. FlushAO = AtomicOrdering::Acquire;
  2698. Flush = true;
  2699. }
  2700. break;
  2701. case Write:
  2702. case Update:
  2703. if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease ||
  2704. AO == AtomicOrdering::SequentiallyConsistent) {
  2705. FlushAO = AtomicOrdering::Release;
  2706. Flush = true;
  2707. }
  2708. break;
  2709. case Capture:
  2710. switch (AO) {
  2711. case AtomicOrdering::Acquire:
  2712. FlushAO = AtomicOrdering::Acquire;
  2713. Flush = true;
  2714. break;
  2715. case AtomicOrdering::Release:
  2716. FlushAO = AtomicOrdering::Release;
  2717. Flush = true;
  2718. break;
  2719. case AtomicOrdering::AcquireRelease:
  2720. case AtomicOrdering::SequentiallyConsistent:
  2721. FlushAO = AtomicOrdering::AcquireRelease;
  2722. Flush = true;
  2723. break;
  2724. default:
  2725. // do nothing - leave silently.
  2726. break;
  2727. }
  2728. }
  2729. if (Flush) {
  2730. // Currently Flush RT call still doesn't take memory_ordering, so for when
  2731. // that happens, this tries to do the resolution of which atomic ordering
  2732. // to use with but issue the flush call
  2733. // TODO: pass `FlushAO` after memory ordering support is added
  2734. (void)FlushAO;
  2735. emitFlush(Loc);
  2736. }
  2737. // for AO == AtomicOrdering::Monotonic and all other case combinations
  2738. // do nothing
  2739. return Flush;
  2740. }
  2741. OpenMPIRBuilder::InsertPointTy
  2742. OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
  2743. AtomicOpValue &X, AtomicOpValue &V,
  2744. AtomicOrdering AO) {
  2745. if (!updateToLocation(Loc))
  2746. return Loc.IP;
  2747. Type *XTy = X.Var->getType();
  2748. assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
  2749. Type *XElemTy = X.ElemTy;
  2750. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  2751. XElemTy->isPointerTy()) &&
  2752. "OMP atomic read expected a scalar type");
  2753. Value *XRead = nullptr;
  2754. if (XElemTy->isIntegerTy()) {
  2755. LoadInst *XLD =
  2756. Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
  2757. XLD->setAtomic(AO);
  2758. XRead = cast<Value>(XLD);
  2759. } else {
  2760. // We need to bitcast and perform atomic op as integer
  2761. unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
  2762. IntegerType *IntCastTy =
  2763. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  2764. Value *XBCast = Builder.CreateBitCast(
  2765. X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
  2766. LoadInst *XLoad =
  2767. Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
  2768. XLoad->setAtomic(AO);
  2769. if (XElemTy->isFloatingPointTy()) {
  2770. XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
  2771. } else {
  2772. XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
  2773. }
  2774. }
  2775. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
  2776. Builder.CreateStore(XRead, V.Var, V.IsVolatile);
  2777. return Builder.saveIP();
  2778. }
  2779. OpenMPIRBuilder::InsertPointTy
  2780. OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
  2781. AtomicOpValue &X, Value *Expr,
  2782. AtomicOrdering AO) {
  2783. if (!updateToLocation(Loc))
  2784. return Loc.IP;
  2785. Type *XTy = X.Var->getType();
  2786. assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
  2787. Type *XElemTy = X.ElemTy;
  2788. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  2789. XElemTy->isPointerTy()) &&
  2790. "OMP atomic write expected a scalar type");
  2791. if (XElemTy->isIntegerTy()) {
  2792. StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
  2793. XSt->setAtomic(AO);
  2794. } else {
  2795. // We need to bitcast and perform atomic op as integers
  2796. unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
  2797. IntegerType *IntCastTy =
  2798. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  2799. Value *XBCast = Builder.CreateBitCast(
  2800. X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
  2801. Value *ExprCast =
  2802. Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
  2803. StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
  2804. XSt->setAtomic(AO);
  2805. }
  2806. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
  2807. return Builder.saveIP();
  2808. }
  2809. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
  2810. const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
  2811. Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  2812. AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
  2813. if (!updateToLocation(Loc))
  2814. return Loc.IP;
  2815. LLVM_DEBUG({
  2816. Type *XTy = X.Var->getType();
  2817. assert(XTy->isPointerTy() &&
  2818. "OMP Atomic expects a pointer to target memory");
  2819. Type *XElemTy = X.ElemTy;
  2820. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  2821. XElemTy->isPointerTy()) &&
  2822. "OMP atomic update expected a scalar type");
  2823. assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
  2824. (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
  2825. "OpenMP atomic does not support LT or GT operations");
  2826. });
  2827. emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
  2828. X.IsVolatile, IsXBinopExpr);
  2829. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
  2830. return Builder.saveIP();
  2831. }
  2832. Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
  2833. AtomicRMWInst::BinOp RMWOp) {
  2834. switch (RMWOp) {
  2835. case AtomicRMWInst::Add:
  2836. return Builder.CreateAdd(Src1, Src2);
  2837. case AtomicRMWInst::Sub:
  2838. return Builder.CreateSub(Src1, Src2);
  2839. case AtomicRMWInst::And:
  2840. return Builder.CreateAnd(Src1, Src2);
  2841. case AtomicRMWInst::Nand:
  2842. return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
  2843. case AtomicRMWInst::Or:
  2844. return Builder.CreateOr(Src1, Src2);
  2845. case AtomicRMWInst::Xor:
  2846. return Builder.CreateXor(Src1, Src2);
  2847. case AtomicRMWInst::Xchg:
  2848. case AtomicRMWInst::FAdd:
  2849. case AtomicRMWInst::FSub:
  2850. case AtomicRMWInst::BAD_BINOP:
  2851. case AtomicRMWInst::Max:
  2852. case AtomicRMWInst::Min:
  2853. case AtomicRMWInst::UMax:
  2854. case AtomicRMWInst::UMin:
  2855. llvm_unreachable("Unsupported atomic update operation");
  2856. }
  2857. llvm_unreachable("Unsupported atomic update operation");
  2858. }
  2859. std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
  2860. Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
  2861. AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  2862. AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
  2863. bool DoCmpExch =
  2864. ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
  2865. (RMWOp == AtomicRMWInst::FSub) ||
  2866. (RMWOp == AtomicRMWInst::Sub && !IsXBinopExpr);
  2867. std::pair<Value *, Value *> Res;
  2868. if (XElemTy->isIntegerTy() && !DoCmpExch) {
  2869. Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
  2870. // not needed except in case of postfix captures. Generate anyway for
  2871. // consistency with the else part. Will be removed with any DCE pass.
  2872. Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
  2873. } else {
  2874. unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
  2875. IntegerType *IntCastTy =
  2876. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  2877. Value *XBCast =
  2878. Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
  2879. LoadInst *OldVal =
  2880. Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
  2881. OldVal->setAtomic(AO);
  2882. // CurBB
  2883. // | /---\
  2884. // ContBB |
  2885. // | \---/
  2886. // ExitBB
  2887. BasicBlock *CurBB = Builder.GetInsertBlock();
  2888. Instruction *CurBBTI = CurBB->getTerminator();
  2889. CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
  2890. BasicBlock *ExitBB =
  2891. CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
  2892. BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
  2893. X->getName() + ".atomic.cont");
  2894. ContBB->getTerminator()->eraseFromParent();
  2895. Builder.SetInsertPoint(ContBB);
  2896. llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
  2897. PHI->addIncoming(OldVal, CurBB);
  2898. AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
  2899. NewAtomicAddr->setName(X->getName() + "x.new.val");
  2900. NewAtomicAddr->moveBefore(AllocIP);
  2901. IntegerType *NewAtomicCastTy =
  2902. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  2903. bool IsIntTy = XElemTy->isIntegerTy();
  2904. Value *NewAtomicIntAddr =
  2905. (IsIntTy)
  2906. ? NewAtomicAddr
  2907. : Builder.CreateBitCast(NewAtomicAddr,
  2908. NewAtomicCastTy->getPointerTo(Addrspace));
  2909. Value *OldExprVal = PHI;
  2910. if (!IsIntTy) {
  2911. if (XElemTy->isFloatingPointTy()) {
  2912. OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
  2913. X->getName() + ".atomic.fltCast");
  2914. } else {
  2915. OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
  2916. X->getName() + ".atomic.ptrCast");
  2917. }
  2918. }
  2919. Value *Upd = UpdateOp(OldExprVal, Builder);
  2920. Builder.CreateStore(Upd, NewAtomicAddr);
  2921. LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
  2922. Value *XAddr =
  2923. (IsIntTy)
  2924. ? X
  2925. : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
  2926. AtomicOrdering Failure =
  2927. llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
  2928. AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
  2929. XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
  2930. Result->setVolatile(VolatileX);
  2931. Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
  2932. Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
  2933. PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
  2934. Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
  2935. Res.first = OldExprVal;
  2936. Res.second = Upd;
  2937. // set Insertion point in exit block
  2938. if (UnreachableInst *ExitTI =
  2939. dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
  2940. CurBBTI->eraseFromParent();
  2941. Builder.SetInsertPoint(ExitBB);
  2942. } else {
  2943. Builder.SetInsertPoint(ExitTI);
  2944. }
  2945. }
  2946. return Res;
  2947. }
  2948. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
  2949. const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
  2950. AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
  2951. AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
  2952. bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
  2953. if (!updateToLocation(Loc))
  2954. return Loc.IP;
  2955. LLVM_DEBUG({
  2956. Type *XTy = X.Var->getType();
  2957. assert(XTy->isPointerTy() &&
  2958. "OMP Atomic expects a pointer to target memory");
  2959. Type *XElemTy = XTy->getPointerElementType();
  2960. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  2961. XElemTy->isPointerTy()) &&
  2962. "OMP atomic capture expected a scalar type");
  2963. assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
  2964. "OpenMP atomic does not support LT or GT operations");
  2965. });
  2966. // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
  2967. // 'x' is simply atomically rewritten with 'expr'.
  2968. AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
  2969. std::pair<Value *, Value *> Result =
  2970. emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
  2971. X.IsVolatile, IsXBinopExpr);
  2972. Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
  2973. Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
  2974. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
  2975. return Builder.saveIP();
  2976. }
  2977. GlobalVariable *
  2978. OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
  2979. std::string VarName) {
  2980. llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
  2981. llvm::ArrayType::get(
  2982. llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
  2983. Names);
  2984. auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
  2985. M, MapNamesArrayInit->getType(),
  2986. /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
  2987. VarName);
  2988. return MapNamesArrayGlobal;
  2989. }
  2990. // Create all simple and struct types exposed by the runtime and remember
  2991. // the llvm::PointerTypes of them for easy access later.
  2992. void OpenMPIRBuilder::initializeTypes(Module &M) {
  2993. LLVMContext &Ctx = M.getContext();
  2994. StructType *T;
  2995. #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
  2996. #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
  2997. VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
  2998. VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
  2999. #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
  3000. VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
  3001. VarName##Ptr = PointerType::getUnqual(VarName);
  3002. #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
  3003. T = StructType::getTypeByName(Ctx, StructName); \
  3004. if (!T) \
  3005. T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
  3006. VarName = T; \
  3007. VarName##Ptr = PointerType::getUnqual(T);
  3008. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  3009. }
  3010. void OpenMPIRBuilder::OutlineInfo::collectBlocks(
  3011. SmallPtrSetImpl<BasicBlock *> &BlockSet,
  3012. SmallVectorImpl<BasicBlock *> &BlockVector) {
  3013. SmallVector<BasicBlock *, 32> Worklist;
  3014. BlockSet.insert(EntryBB);
  3015. BlockSet.insert(ExitBB);
  3016. Worklist.push_back(EntryBB);
  3017. while (!Worklist.empty()) {
  3018. BasicBlock *BB = Worklist.pop_back_val();
  3019. BlockVector.push_back(BB);
  3020. for (BasicBlock *SuccBB : successors(BB))
  3021. if (BlockSet.insert(SuccBB).second)
  3022. Worklist.push_back(SuccBB);
  3023. }
  3024. }
  3025. void CanonicalLoopInfo::collectControlBlocks(
  3026. SmallVectorImpl<BasicBlock *> &BBs) {
  3027. // We only count those BBs as control block for which we do not need to
  3028. // reverse the CFG, i.e. not the loop body which can contain arbitrary control
  3029. // flow. For consistency, this also means we do not add the Body block, which
  3030. // is just the entry to the body code.
  3031. BBs.reserve(BBs.size() + 6);
  3032. BBs.append({getPreheader(), Header, Cond, Latch, Exit, getAfter()});
  3033. }
  3034. BasicBlock *CanonicalLoopInfo::getPreheader() const {
  3035. assert(isValid() && "Requires a valid canonical loop");
  3036. for (BasicBlock *Pred : predecessors(Header)) {
  3037. if (Pred != Latch)
  3038. return Pred;
  3039. }
  3040. llvm_unreachable("Missing preheader");
  3041. }
  3042. void CanonicalLoopInfo::assertOK() const {
  3043. #ifndef NDEBUG
  3044. // No constraints if this object currently does not describe a loop.
  3045. if (!isValid())
  3046. return;
  3047. BasicBlock *Preheader = getPreheader();
  3048. BasicBlock *Body = getBody();
  3049. BasicBlock *After = getAfter();
  3050. // Verify standard control-flow we use for OpenMP loops.
  3051. assert(Preheader);
  3052. assert(isa<BranchInst>(Preheader->getTerminator()) &&
  3053. "Preheader must terminate with unconditional branch");
  3054. assert(Preheader->getSingleSuccessor() == Header &&
  3055. "Preheader must jump to header");
  3056. assert(Header);
  3057. assert(isa<BranchInst>(Header->getTerminator()) &&
  3058. "Header must terminate with unconditional branch");
  3059. assert(Header->getSingleSuccessor() == Cond &&
  3060. "Header must jump to exiting block");
  3061. assert(Cond);
  3062. assert(Cond->getSinglePredecessor() == Header &&
  3063. "Exiting block only reachable from header");
  3064. assert(isa<BranchInst>(Cond->getTerminator()) &&
  3065. "Exiting block must terminate with conditional branch");
  3066. assert(size(successors(Cond)) == 2 &&
  3067. "Exiting block must have two successors");
  3068. assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
  3069. "Exiting block's first successor jump to the body");
  3070. assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
  3071. "Exiting block's second successor must exit the loop");
  3072. assert(Body);
  3073. assert(Body->getSinglePredecessor() == Cond &&
  3074. "Body only reachable from exiting block");
  3075. assert(!isa<PHINode>(Body->front()));
  3076. assert(Latch);
  3077. assert(isa<BranchInst>(Latch->getTerminator()) &&
  3078. "Latch must terminate with unconditional branch");
  3079. assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
  3080. // TODO: To support simple redirecting of the end of the body code that has
  3081. // multiple; introduce another auxiliary basic block like preheader and after.
  3082. assert(Latch->getSinglePredecessor() != nullptr);
  3083. assert(!isa<PHINode>(Latch->front()));
  3084. assert(Exit);
  3085. assert(isa<BranchInst>(Exit->getTerminator()) &&
  3086. "Exit block must terminate with unconditional branch");
  3087. assert(Exit->getSingleSuccessor() == After &&
  3088. "Exit block must jump to after block");
  3089. assert(After);
  3090. assert(After->getSinglePredecessor() == Exit &&
  3091. "After block only reachable from exit block");
  3092. assert(After->empty() || !isa<PHINode>(After->front()));
  3093. Instruction *IndVar = getIndVar();
  3094. assert(IndVar && "Canonical induction variable not found?");
  3095. assert(isa<IntegerType>(IndVar->getType()) &&
  3096. "Induction variable must be an integer");
  3097. assert(cast<PHINode>(IndVar)->getParent() == Header &&
  3098. "Induction variable must be a PHI in the loop header");
  3099. assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
  3100. assert(
  3101. cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
  3102. assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
  3103. auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
  3104. assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
  3105. assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
  3106. assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
  3107. assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
  3108. ->isOne());
  3109. Value *TripCount = getTripCount();
  3110. assert(TripCount && "Loop trip count not found?");
  3111. assert(IndVar->getType() == TripCount->getType() &&
  3112. "Trip count and induction variable must have the same type");
  3113. auto *CmpI = cast<CmpInst>(&Cond->front());
  3114. assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
  3115. "Exit condition must be a signed less-than comparison");
  3116. assert(CmpI->getOperand(0) == IndVar &&
  3117. "Exit condition must compare the induction variable");
  3118. assert(CmpI->getOperand(1) == TripCount &&
  3119. "Exit condition must compare with the trip count");
  3120. #endif
  3121. }
  3122. void CanonicalLoopInfo::invalidate() {
  3123. Header = nullptr;
  3124. Cond = nullptr;
  3125. Latch = nullptr;
  3126. Exit = nullptr;
  3127. }