OMPIRBuilder.cpp 207 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304
  1. //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// This file implements the OpenMPIRBuilder class, which is used as a
  11. /// convenient way to create LLVM instructions for OpenMP directives.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
  15. #include "llvm/ADT/SmallSet.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/Analysis/AssumptionCache.h"
  18. #include "llvm/Analysis/CodeMetrics.h"
  19. #include "llvm/Analysis/LoopInfo.h"
  20. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  21. #include "llvm/Analysis/ScalarEvolution.h"
  22. #include "llvm/Analysis/TargetLibraryInfo.h"
  23. #include "llvm/IR/CFG.h"
  24. #include "llvm/IR/Constants.h"
  25. #include "llvm/IR/DebugInfoMetadata.h"
  26. #include "llvm/IR/DerivedTypes.h"
  27. #include "llvm/IR/GlobalVariable.h"
  28. #include "llvm/IR/IRBuilder.h"
  29. #include "llvm/IR/MDBuilder.h"
  30. #include "llvm/IR/PassManager.h"
  31. #include "llvm/IR/Value.h"
  32. #include "llvm/MC/TargetRegistry.h"
  33. #include "llvm/Support/CommandLine.h"
  34. #include "llvm/Target/TargetMachine.h"
  35. #include "llvm/Target/TargetOptions.h"
  36. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  37. #include "llvm/Transforms/Utils/Cloning.h"
  38. #include "llvm/Transforms/Utils/CodeExtractor.h"
  39. #include "llvm/Transforms/Utils/LoopPeel.h"
  40. #include "llvm/Transforms/Utils/UnrollLoop.h"
  41. #include <cstdint>
  42. #include <optional>
  43. #define DEBUG_TYPE "openmp-ir-builder"
  44. using namespace llvm;
  45. using namespace omp;
  46. static cl::opt<bool>
  47. OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
  48. cl::desc("Use optimistic attributes describing "
  49. "'as-if' properties of runtime calls."),
  50. cl::init(false));
  51. static cl::opt<double> UnrollThresholdFactor(
  52. "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
  53. cl::desc("Factor for the unroll threshold to account for code "
  54. "simplifications still taking place"),
  55. cl::init(1.5));
  56. #ifndef NDEBUG
  57. /// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions
  58. /// at position IP1 may change the meaning of IP2 or vice-versa. This is because
  59. /// an InsertPoint stores the instruction before something is inserted. For
  60. /// instance, if both point to the same instruction, two IRBuilders alternating
  61. /// creating instruction will cause the instructions to be interleaved.
  62. static bool isConflictIP(IRBuilder<>::InsertPoint IP1,
  63. IRBuilder<>::InsertPoint IP2) {
  64. if (!IP1.isSet() || !IP2.isSet())
  65. return false;
  66. return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
  67. }
  68. static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) {
  69. // Valid ordered/unordered and base algorithm combinations.
  70. switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
  71. case OMPScheduleType::UnorderedStaticChunked:
  72. case OMPScheduleType::UnorderedStatic:
  73. case OMPScheduleType::UnorderedDynamicChunked:
  74. case OMPScheduleType::UnorderedGuidedChunked:
  75. case OMPScheduleType::UnorderedRuntime:
  76. case OMPScheduleType::UnorderedAuto:
  77. case OMPScheduleType::UnorderedTrapezoidal:
  78. case OMPScheduleType::UnorderedGreedy:
  79. case OMPScheduleType::UnorderedBalanced:
  80. case OMPScheduleType::UnorderedGuidedIterativeChunked:
  81. case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
  82. case OMPScheduleType::UnorderedSteal:
  83. case OMPScheduleType::UnorderedStaticBalancedChunked:
  84. case OMPScheduleType::UnorderedGuidedSimd:
  85. case OMPScheduleType::UnorderedRuntimeSimd:
  86. case OMPScheduleType::OrderedStaticChunked:
  87. case OMPScheduleType::OrderedStatic:
  88. case OMPScheduleType::OrderedDynamicChunked:
  89. case OMPScheduleType::OrderedGuidedChunked:
  90. case OMPScheduleType::OrderedRuntime:
  91. case OMPScheduleType::OrderedAuto:
  92. case OMPScheduleType::OrderdTrapezoidal:
  93. case OMPScheduleType::NomergeUnorderedStaticChunked:
  94. case OMPScheduleType::NomergeUnorderedStatic:
  95. case OMPScheduleType::NomergeUnorderedDynamicChunked:
  96. case OMPScheduleType::NomergeUnorderedGuidedChunked:
  97. case OMPScheduleType::NomergeUnorderedRuntime:
  98. case OMPScheduleType::NomergeUnorderedAuto:
  99. case OMPScheduleType::NomergeUnorderedTrapezoidal:
  100. case OMPScheduleType::NomergeUnorderedGreedy:
  101. case OMPScheduleType::NomergeUnorderedBalanced:
  102. case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
  103. case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
  104. case OMPScheduleType::NomergeUnorderedSteal:
  105. case OMPScheduleType::NomergeOrderedStaticChunked:
  106. case OMPScheduleType::NomergeOrderedStatic:
  107. case OMPScheduleType::NomergeOrderedDynamicChunked:
  108. case OMPScheduleType::NomergeOrderedGuidedChunked:
  109. case OMPScheduleType::NomergeOrderedRuntime:
  110. case OMPScheduleType::NomergeOrderedAuto:
  111. case OMPScheduleType::NomergeOrderedTrapezoidal:
  112. break;
  113. default:
  114. return false;
  115. }
  116. // Must not set both monotonicity modifiers at the same time.
  117. OMPScheduleType MonotonicityFlags =
  118. SchedType & OMPScheduleType::MonotonicityMask;
  119. if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
  120. return false;
  121. return true;
  122. }
  123. #endif
  124. /// Determine which scheduling algorithm to use, determined from schedule clause
  125. /// arguments.
  126. static OMPScheduleType
  127. getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
  128. bool HasSimdModifier) {
  129. // Currently, the default schedule it static.
  130. switch (ClauseKind) {
  131. case OMP_SCHEDULE_Default:
  132. case OMP_SCHEDULE_Static:
  133. return HasChunks ? OMPScheduleType::BaseStaticChunked
  134. : OMPScheduleType::BaseStatic;
  135. case OMP_SCHEDULE_Dynamic:
  136. return OMPScheduleType::BaseDynamicChunked;
  137. case OMP_SCHEDULE_Guided:
  138. return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
  139. : OMPScheduleType::BaseGuidedChunked;
  140. case OMP_SCHEDULE_Auto:
  141. return llvm::omp::OMPScheduleType::BaseAuto;
  142. case OMP_SCHEDULE_Runtime:
  143. return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
  144. : OMPScheduleType::BaseRuntime;
  145. }
  146. llvm_unreachable("unhandled schedule clause argument");
  147. }
  148. /// Adds ordering modifier flags to schedule type.
  149. static OMPScheduleType
  150. getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType,
  151. bool HasOrderedClause) {
  152. assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
  153. OMPScheduleType::None &&
  154. "Must not have ordering nor monotonicity flags already set");
  155. OMPScheduleType OrderingModifier = HasOrderedClause
  156. ? OMPScheduleType::ModifierOrdered
  157. : OMPScheduleType::ModifierUnordered;
  158. OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
  159. // Unsupported combinations
  160. if (OrderingScheduleType ==
  161. (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
  162. return OMPScheduleType::OrderedGuidedChunked;
  163. else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
  164. OMPScheduleType::ModifierOrdered))
  165. return OMPScheduleType::OrderedRuntime;
  166. return OrderingScheduleType;
  167. }
  168. /// Adds monotonicity modifier flags to schedule type.
  169. static OMPScheduleType
  170. getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType,
  171. bool HasSimdModifier, bool HasMonotonic,
  172. bool HasNonmonotonic, bool HasOrderedClause) {
  173. assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
  174. OMPScheduleType::None &&
  175. "Must not have monotonicity flags already set");
  176. assert((!HasMonotonic || !HasNonmonotonic) &&
  177. "Monotonic and Nonmonotonic are contradicting each other");
  178. if (HasMonotonic) {
  179. return ScheduleType | OMPScheduleType::ModifierMonotonic;
  180. } else if (HasNonmonotonic) {
  181. return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
  182. } else {
  183. // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
  184. // If the static schedule kind is specified or if the ordered clause is
  185. // specified, and if the nonmonotonic modifier is not specified, the
  186. // effect is as if the monotonic modifier is specified. Otherwise, unless
  187. // the monotonic modifier is specified, the effect is as if the
  188. // nonmonotonic modifier is specified.
  189. OMPScheduleType BaseScheduleType =
  190. ScheduleType & ~OMPScheduleType::ModifierMask;
  191. if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
  192. (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
  193. HasOrderedClause) {
  194. // The monotonic is used by default in openmp runtime library, so no need
  195. // to set it.
  196. return ScheduleType;
  197. } else {
  198. return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
  199. }
  200. }
  201. }
  202. /// Determine the schedule type using schedule and ordering clause arguments.
  203. static OMPScheduleType
  204. computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
  205. bool HasSimdModifier, bool HasMonotonicModifier,
  206. bool HasNonmonotonicModifier, bool HasOrderedClause) {
  207. OMPScheduleType BaseSchedule =
  208. getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier);
  209. OMPScheduleType OrderedSchedule =
  210. getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause);
  211. OMPScheduleType Result = getOpenMPMonotonicityScheduleType(
  212. OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
  213. HasNonmonotonicModifier, HasOrderedClause);
  214. assert(isValidWorkshareLoopScheduleType(Result));
  215. return Result;
  216. }
  217. /// Make \p Source branch to \p Target.
  218. ///
  219. /// Handles two situations:
  220. /// * \p Source already has an unconditional branch.
  221. /// * \p Source is a degenerate block (no terminator because the BB is
  222. /// the current head of the IR construction).
  223. static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
  224. if (Instruction *Term = Source->getTerminator()) {
  225. auto *Br = cast<BranchInst>(Term);
  226. assert(!Br->isConditional() &&
  227. "BB's terminator must be an unconditional branch (or degenerate)");
  228. BasicBlock *Succ = Br->getSuccessor(0);
  229. Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
  230. Br->setSuccessor(0, Target);
  231. return;
  232. }
  233. auto *NewBr = BranchInst::Create(Target, Source);
  234. NewBr->setDebugLoc(DL);
  235. }
  236. void llvm::spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
  237. bool CreateBranch) {
  238. assert(New->getFirstInsertionPt() == New->begin() &&
  239. "Target BB must not have PHI nodes");
  240. // Move instructions to new block.
  241. BasicBlock *Old = IP.getBlock();
  242. New->splice(New->begin(), Old, IP.getPoint(), Old->end());
  243. if (CreateBranch)
  244. BranchInst::Create(New, Old);
  245. }
  246. void llvm::spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) {
  247. DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
  248. BasicBlock *Old = Builder.GetInsertBlock();
  249. spliceBB(Builder.saveIP(), New, CreateBranch);
  250. if (CreateBranch)
  251. Builder.SetInsertPoint(Old->getTerminator());
  252. else
  253. Builder.SetInsertPoint(Old);
  254. // SetInsertPoint also updates the Builder's debug location, but we want to
  255. // keep the one the Builder was configured to use.
  256. Builder.SetCurrentDebugLocation(DebugLoc);
  257. }
  258. BasicBlock *llvm::splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
  259. llvm::Twine Name) {
  260. BasicBlock *Old = IP.getBlock();
  261. BasicBlock *New = BasicBlock::Create(
  262. Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name,
  263. Old->getParent(), Old->getNextNode());
  264. spliceBB(IP, New, CreateBranch);
  265. New->replaceSuccessorsPhiUsesWith(Old, New);
  266. return New;
  267. }
  268. BasicBlock *llvm::splitBB(IRBuilderBase &Builder, bool CreateBranch,
  269. llvm::Twine Name) {
  270. DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
  271. BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
  272. if (CreateBranch)
  273. Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
  274. else
  275. Builder.SetInsertPoint(Builder.GetInsertBlock());
  276. // SetInsertPoint also updates the Builder's debug location, but we want to
  277. // keep the one the Builder was configured to use.
  278. Builder.SetCurrentDebugLocation(DebugLoc);
  279. return New;
  280. }
  281. BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch,
  282. llvm::Twine Name) {
  283. DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
  284. BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
  285. if (CreateBranch)
  286. Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
  287. else
  288. Builder.SetInsertPoint(Builder.GetInsertBlock());
  289. // SetInsertPoint also updates the Builder's debug location, but we want to
  290. // keep the one the Builder was configured to use.
  291. Builder.SetCurrentDebugLocation(DebugLoc);
  292. return New;
  293. }
  294. BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
  295. llvm::Twine Suffix) {
  296. BasicBlock *Old = Builder.GetInsertBlock();
  297. return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
  298. }
  299. void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
  300. LLVMContext &Ctx = Fn.getContext();
  301. Triple T(M.getTargetTriple());
  302. // Get the function's current attributes.
  303. auto Attrs = Fn.getAttributes();
  304. auto FnAttrs = Attrs.getFnAttrs();
  305. auto RetAttrs = Attrs.getRetAttrs();
  306. SmallVector<AttributeSet, 4> ArgAttrs;
  307. for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
  308. ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
  309. // Add AS to FnAS while taking special care with integer extensions.
  310. auto addAttrSet = [&](AttributeSet &FnAS, const AttributeSet &AS,
  311. bool Param = true) -> void {
  312. bool HasSignExt = AS.hasAttribute(Attribute::SExt);
  313. bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
  314. if (HasSignExt || HasZeroExt) {
  315. assert(AS.getNumAttributes() == 1 &&
  316. "Currently not handling extension attr combined with others.");
  317. if (Param) {
  318. if (auto AK = TargetLibraryInfo::getExtAttrForI32Param(T, HasSignExt))
  319. FnAS = FnAS.addAttribute(Ctx, AK);
  320. } else
  321. if (auto AK = TargetLibraryInfo::getExtAttrForI32Return(T, HasSignExt))
  322. FnAS = FnAS.addAttribute(Ctx, AK);
  323. } else {
  324. FnAS = FnAS.addAttributes(Ctx, AS);
  325. }
  326. };
  327. #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
  328. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  329. // Add attributes to the function declaration.
  330. switch (FnID) {
  331. #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
  332. case Enum: \
  333. FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
  334. addAttrSet(RetAttrs, RetAttrSet, /*Param*/false); \
  335. for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
  336. addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
  337. Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
  338. break;
  339. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  340. default:
  341. // Attributes are optional.
  342. break;
  343. }
  344. }
  345. FunctionCallee
  346. OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) {
  347. FunctionType *FnTy = nullptr;
  348. Function *Fn = nullptr;
  349. // Try to find the declation in the module first.
  350. switch (FnID) {
  351. #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
  352. case Enum: \
  353. FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
  354. IsVarArg); \
  355. Fn = M.getFunction(Str); \
  356. break;
  357. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  358. }
  359. if (!Fn) {
  360. // Create a new declaration if we need one.
  361. switch (FnID) {
  362. #define OMP_RTL(Enum, Str, ...) \
  363. case Enum: \
  364. Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
  365. break;
  366. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  367. }
  368. // Add information if the runtime function takes a callback function
  369. if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
  370. if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
  371. LLVMContext &Ctx = Fn->getContext();
  372. MDBuilder MDB(Ctx);
  373. // Annotate the callback behavior of the runtime function:
  374. // - The callback callee is argument number 2 (microtask).
  375. // - The first two arguments of the callback callee are unknown (-1).
  376. // - All variadic arguments to the runtime function are passed to the
  377. // callback callee.
  378. Fn->addMetadata(
  379. LLVMContext::MD_callback,
  380. *MDNode::get(Ctx, {MDB.createCallbackEncoding(
  381. 2, {-1, -1}, /* VarArgsArePassed */ true)}));
  382. }
  383. }
  384. LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
  385. << " with type " << *Fn->getFunctionType() << "\n");
  386. addAttributes(FnID, *Fn);
  387. } else {
  388. LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
  389. << " with type " << *Fn->getFunctionType() << "\n");
  390. }
  391. assert(Fn && "Failed to create OpenMP runtime function");
  392. // Cast the function to the expected type if necessary
  393. Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo());
  394. return {FnTy, C};
  395. }
  396. Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
  397. FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
  398. auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
  399. assert(Fn && "Failed to create OpenMP runtime function pointer");
  400. return Fn;
  401. }
  402. void OpenMPIRBuilder::initialize() { initializeTypes(M); }
  403. void OpenMPIRBuilder::finalize(Function *Fn) {
  404. SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
  405. SmallVector<BasicBlock *, 32> Blocks;
  406. SmallVector<OutlineInfo, 16> DeferredOutlines;
  407. for (OutlineInfo &OI : OutlineInfos) {
  408. // Skip functions that have not finalized yet; may happen with nested
  409. // function generation.
  410. if (Fn && OI.getFunction() != Fn) {
  411. DeferredOutlines.push_back(OI);
  412. continue;
  413. }
  414. ParallelRegionBlockSet.clear();
  415. Blocks.clear();
  416. OI.collectBlocks(ParallelRegionBlockSet, Blocks);
  417. Function *OuterFn = OI.getFunction();
  418. CodeExtractorAnalysisCache CEAC(*OuterFn);
  419. CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
  420. /* AggregateArgs */ true,
  421. /* BlockFrequencyInfo */ nullptr,
  422. /* BranchProbabilityInfo */ nullptr,
  423. /* AssumptionCache */ nullptr,
  424. /* AllowVarArgs */ true,
  425. /* AllowAlloca */ true,
  426. /* AllocaBlock*/ OI.OuterAllocaBB,
  427. /* Suffix */ ".omp_par");
  428. LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
  429. LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
  430. << " Exit: " << OI.ExitBB->getName() << "\n");
  431. assert(Extractor.isEligible() &&
  432. "Expected OpenMP outlining to be possible!");
  433. for (auto *V : OI.ExcludeArgsFromAggregate)
  434. Extractor.excludeArgFromAggregate(V);
  435. Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
  436. LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
  437. LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
  438. assert(OutlinedFn->getReturnType()->isVoidTy() &&
  439. "OpenMP outlined functions should not return a value!");
  440. // For compability with the clang CG we move the outlined function after the
  441. // one with the parallel region.
  442. OutlinedFn->removeFromParent();
  443. M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
  444. // Remove the artificial entry introduced by the extractor right away, we
  445. // made our own entry block after all.
  446. {
  447. BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
  448. assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
  449. assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
  450. // Move instructions from the to-be-deleted ArtificialEntry to the entry
  451. // basic block of the parallel region. CodeExtractor generates
  452. // instructions to unwrap the aggregate argument and may sink
  453. // allocas/bitcasts for values that are solely used in the outlined region
  454. // and do not escape.
  455. assert(!ArtificialEntry.empty() &&
  456. "Expected instructions to add in the outlined region entry");
  457. for (BasicBlock::reverse_iterator It = ArtificialEntry.rbegin(),
  458. End = ArtificialEntry.rend();
  459. It != End;) {
  460. Instruction &I = *It;
  461. It++;
  462. if (I.isTerminator())
  463. continue;
  464. I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
  465. }
  466. OI.EntryBB->moveBefore(&ArtificialEntry);
  467. ArtificialEntry.eraseFromParent();
  468. }
  469. assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
  470. assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
  471. // Run a user callback, e.g. to add attributes.
  472. if (OI.PostOutlineCB)
  473. OI.PostOutlineCB(*OutlinedFn);
  474. }
  475. // Remove work items that have been completed.
  476. OutlineInfos = std::move(DeferredOutlines);
  477. }
  478. OpenMPIRBuilder::~OpenMPIRBuilder() {
  479. assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
  480. }
  481. GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
  482. IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
  483. auto *GV =
  484. new GlobalVariable(M, I32Ty,
  485. /* isConstant = */ true, GlobalValue::WeakODRLinkage,
  486. ConstantInt::get(I32Ty, Value), Name);
  487. GV->setVisibility(GlobalValue::HiddenVisibility);
  488. return GV;
  489. }
  490. Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
  491. uint32_t SrcLocStrSize,
  492. IdentFlag LocFlags,
  493. unsigned Reserve2Flags) {
  494. // Enable "C-mode".
  495. LocFlags |= OMP_IDENT_FLAG_KMPC;
  496. Constant *&Ident =
  497. IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
  498. if (!Ident) {
  499. Constant *I32Null = ConstantInt::getNullValue(Int32);
  500. Constant *IdentData[] = {I32Null,
  501. ConstantInt::get(Int32, uint32_t(LocFlags)),
  502. ConstantInt::get(Int32, Reserve2Flags),
  503. ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
  504. Constant *Initializer =
  505. ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
  506. // Look for existing encoding of the location + flags, not needed but
  507. // minimizes the difference to the existing solution while we transition.
  508. for (GlobalVariable &GV : M.getGlobalList())
  509. if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
  510. if (GV.getInitializer() == Initializer)
  511. Ident = &GV;
  512. if (!Ident) {
  513. auto *GV = new GlobalVariable(
  514. M, OpenMPIRBuilder::Ident,
  515. /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
  516. nullptr, GlobalValue::NotThreadLocal,
  517. M.getDataLayout().getDefaultGlobalsAddressSpace());
  518. GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  519. GV->setAlignment(Align(8));
  520. Ident = GV;
  521. }
  522. }
  523. return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr);
  524. }
  525. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
  526. uint32_t &SrcLocStrSize) {
  527. SrcLocStrSize = LocStr.size();
  528. Constant *&SrcLocStr = SrcLocStrMap[LocStr];
  529. if (!SrcLocStr) {
  530. Constant *Initializer =
  531. ConstantDataArray::getString(M.getContext(), LocStr);
  532. // Look for existing encoding of the location, not needed but minimizes the
  533. // difference to the existing solution while we transition.
  534. for (GlobalVariable &GV : M.getGlobalList())
  535. if (GV.isConstant() && GV.hasInitializer() &&
  536. GV.getInitializer() == Initializer)
  537. return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
  538. SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
  539. /* AddressSpace */ 0, &M);
  540. }
  541. return SrcLocStr;
  542. }
  543. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
  544. StringRef FileName,
  545. unsigned Line, unsigned Column,
  546. uint32_t &SrcLocStrSize) {
  547. SmallString<128> Buffer;
  548. Buffer.push_back(';');
  549. Buffer.append(FileName);
  550. Buffer.push_back(';');
  551. Buffer.append(FunctionName);
  552. Buffer.push_back(';');
  553. Buffer.append(std::to_string(Line));
  554. Buffer.push_back(';');
  555. Buffer.append(std::to_string(Column));
  556. Buffer.push_back(';');
  557. Buffer.push_back(';');
  558. return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
  559. }
  560. Constant *
  561. OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) {
  562. StringRef UnknownLoc = ";unknown;unknown;0;0;;";
  563. return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
  564. }
  565. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL,
  566. uint32_t &SrcLocStrSize,
  567. Function *F) {
  568. DILocation *DIL = DL.get();
  569. if (!DIL)
  570. return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
  571. StringRef FileName = M.getName();
  572. if (DIFile *DIF = DIL->getFile())
  573. if (std::optional<StringRef> Source = DIF->getSource())
  574. FileName = *Source;
  575. StringRef Function = DIL->getScope()->getSubprogram()->getName();
  576. if (Function.empty() && F)
  577. Function = F->getName();
  578. return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
  579. DIL->getColumn(), SrcLocStrSize);
  580. }
  581. Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc,
  582. uint32_t &SrcLocStrSize) {
  583. return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
  584. Loc.IP.getBlock()->getParent());
  585. }
  586. Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
  587. return Builder.CreateCall(
  588. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
  589. "omp_global_thread_num");
  590. }
  591. OpenMPIRBuilder::InsertPointTy
  592. OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK,
  593. bool ForceSimpleCall, bool CheckCancelFlag) {
  594. if (!updateToLocation(Loc))
  595. return Loc.IP;
  596. return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
  597. }
  598. OpenMPIRBuilder::InsertPointTy
  599. OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
  600. bool ForceSimpleCall, bool CheckCancelFlag) {
  601. // Build call __kmpc_cancel_barrier(loc, thread_id) or
  602. // __kmpc_barrier(loc, thread_id);
  603. IdentFlag BarrierLocFlags;
  604. switch (Kind) {
  605. case OMPD_for:
  606. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
  607. break;
  608. case OMPD_sections:
  609. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
  610. break;
  611. case OMPD_single:
  612. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
  613. break;
  614. case OMPD_barrier:
  615. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
  616. break;
  617. default:
  618. BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
  619. break;
  620. }
  621. uint32_t SrcLocStrSize;
  622. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  623. Value *Args[] = {
  624. getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
  625. getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
  626. // If we are in a cancellable parallel region, barriers are cancellation
  627. // points.
  628. // TODO: Check why we would force simple calls or to ignore the cancel flag.
  629. bool UseCancelBarrier =
  630. !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
  631. Value *Result =
  632. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
  633. UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
  634. : OMPRTL___kmpc_barrier),
  635. Args);
  636. if (UseCancelBarrier && CheckCancelFlag)
  637. emitCancelationCheckImpl(Result, OMPD_parallel);
  638. return Builder.saveIP();
  639. }
  640. OpenMPIRBuilder::InsertPointTy
  641. OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
  642. Value *IfCondition,
  643. omp::Directive CanceledDirective) {
  644. if (!updateToLocation(Loc))
  645. return Loc.IP;
  646. // LLVM utilities like blocks with terminators.
  647. auto *UI = Builder.CreateUnreachable();
  648. Instruction *ThenTI = UI, *ElseTI = nullptr;
  649. if (IfCondition)
  650. SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
  651. Builder.SetInsertPoint(ThenTI);
  652. Value *CancelKind = nullptr;
  653. switch (CanceledDirective) {
  654. #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
  655. case DirectiveEnum: \
  656. CancelKind = Builder.getInt32(Value); \
  657. break;
  658. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  659. default:
  660. llvm_unreachable("Unknown cancel kind!");
  661. }
  662. uint32_t SrcLocStrSize;
  663. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  664. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  665. Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
  666. Value *Result = Builder.CreateCall(
  667. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
  668. auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
  669. if (CanceledDirective == OMPD_parallel) {
  670. IRBuilder<>::InsertPointGuard IPG(Builder);
  671. Builder.restoreIP(IP);
  672. createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
  673. omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
  674. /* CheckCancelFlag */ false);
  675. }
  676. };
  677. // The actual cancel logic is shared with others, e.g., cancel_barriers.
  678. emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
  679. // Update the insertion point and remove the terminator we introduced.
  680. Builder.SetInsertPoint(UI->getParent());
  681. UI->eraseFromParent();
  682. return Builder.saveIP();
  683. }
  684. void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name,
  685. uint64_t Size, int32_t Flags,
  686. StringRef SectionName) {
  687. Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
  688. Type *Int32Ty = Type::getInt32Ty(M.getContext());
  689. Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext());
  690. Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
  691. // Create the constant string used to look up the symbol in the device.
  692. auto *Str =
  693. new llvm::GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
  694. llvm::GlobalValue::InternalLinkage, AddrName,
  695. ".omp_offloading.entry_name");
  696. Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  697. // Construct the offloading entry.
  698. Constant *EntryData[] = {
  699. ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, Int8PtrTy),
  700. ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, Int8PtrTy),
  701. ConstantInt::get(SizeTy, Size),
  702. ConstantInt::get(Int32Ty, Flags),
  703. ConstantInt::get(Int32Ty, 0),
  704. };
  705. Constant *EntryInitializer =
  706. ConstantStruct::get(OpenMPIRBuilder::OffloadEntry, EntryData);
  707. auto *Entry = new GlobalVariable(
  708. M, OpenMPIRBuilder::OffloadEntry,
  709. /* isConstant = */ true, GlobalValue::WeakAnyLinkage, EntryInitializer,
  710. ".omp_offloading.entry." + Name, nullptr, GlobalValue::NotThreadLocal,
  711. M.getDataLayout().getDefaultGlobalsAddressSpace());
  712. // The entry has to be created in the section the linker expects it to be.
  713. Entry->setSection(SectionName);
  714. Entry->setAlignment(Align(1));
  715. }
  716. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
  717. const LocationDescription &Loc, Value *&Return, Value *Ident,
  718. Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr,
  719. ArrayRef<Value *> KernelArgs) {
  720. if (!updateToLocation(Loc))
  721. return Loc.IP;
  722. auto *KernelArgsPtr =
  723. Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args");
  724. for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) {
  725. llvm::Value *Arg =
  726. Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I);
  727. Builder.CreateAlignedStore(
  728. KernelArgs[I], Arg,
  729. M.getDataLayout().getPrefTypeAlign(KernelArgs[I]->getType()));
  730. }
  731. SmallVector<Value *> OffloadingArgs{Ident, DeviceID, NumTeams,
  732. NumThreads, HostPtr, KernelArgsPtr};
  733. Return = Builder.CreateCall(
  734. getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
  735. OffloadingArgs);
  736. return Builder.saveIP();
  737. }
  738. void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
  739. omp::Directive CanceledDirective,
  740. FinalizeCallbackTy ExitCB) {
  741. assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
  742. "Unexpected cancellation!");
  743. // For a cancel barrier we create two new blocks.
  744. BasicBlock *BB = Builder.GetInsertBlock();
  745. BasicBlock *NonCancellationBlock;
  746. if (Builder.GetInsertPoint() == BB->end()) {
  747. // TODO: This branch will not be needed once we moved to the
  748. // OpenMPIRBuilder codegen completely.
  749. NonCancellationBlock = BasicBlock::Create(
  750. BB->getContext(), BB->getName() + ".cont", BB->getParent());
  751. } else {
  752. NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
  753. BB->getTerminator()->eraseFromParent();
  754. Builder.SetInsertPoint(BB);
  755. }
  756. BasicBlock *CancellationBlock = BasicBlock::Create(
  757. BB->getContext(), BB->getName() + ".cncl", BB->getParent());
  758. // Jump to them based on the return value.
  759. Value *Cmp = Builder.CreateIsNull(CancelFlag);
  760. Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
  761. /* TODO weight */ nullptr, nullptr);
  762. // From the cancellation block we finalize all variables and go to the
  763. // post finalization block that is known to the FiniCB callback.
  764. Builder.SetInsertPoint(CancellationBlock);
  765. if (ExitCB)
  766. ExitCB(Builder.saveIP());
  767. auto &FI = FinalizationStack.back();
  768. FI.FiniCB(Builder.saveIP());
  769. // The continuation block is where code generation continues.
  770. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
  771. }
  772. IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
  773. const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
  774. BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
  775. FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
  776. omp::ProcBindKind ProcBind, bool IsCancellable) {
  777. assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
  778. if (!updateToLocation(Loc))
  779. return Loc.IP;
  780. uint32_t SrcLocStrSize;
  781. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  782. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  783. Value *ThreadID = getOrCreateThreadID(Ident);
  784. if (NumThreads) {
  785. // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
  786. Value *Args[] = {
  787. Ident, ThreadID,
  788. Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
  789. Builder.CreateCall(
  790. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
  791. }
  792. if (ProcBind != OMP_PROC_BIND_default) {
  793. // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
  794. Value *Args[] = {
  795. Ident, ThreadID,
  796. ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
  797. Builder.CreateCall(
  798. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
  799. }
  800. BasicBlock *InsertBB = Builder.GetInsertBlock();
  801. Function *OuterFn = InsertBB->getParent();
  802. // Save the outer alloca block because the insertion iterator may get
  803. // invalidated and we still need this later.
  804. BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
  805. // Vector to remember instructions we used only during the modeling but which
  806. // we want to delete at the end.
  807. SmallVector<Instruction *, 4> ToBeDeleted;
  808. // Change the location to the outer alloca insertion point to create and
  809. // initialize the allocas we pass into the parallel region.
  810. Builder.restoreIP(OuterAllocaIP);
  811. AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
  812. AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
  813. // We only need TIDAddr and ZeroAddr for modeling purposes to get the
  814. // associated arguments in the outlined function, so we delete them later.
  815. ToBeDeleted.push_back(TIDAddr);
  816. ToBeDeleted.push_back(ZeroAddr);
  817. // Create an artificial insertion point that will also ensure the blocks we
  818. // are about to split are not degenerated.
  819. auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
  820. BasicBlock *EntryBB = UI->getParent();
  821. BasicBlock *PRegEntryBB = EntryBB->splitBasicBlock(UI, "omp.par.entry");
  822. BasicBlock *PRegBodyBB = PRegEntryBB->splitBasicBlock(UI, "omp.par.region");
  823. BasicBlock *PRegPreFiniBB =
  824. PRegBodyBB->splitBasicBlock(UI, "omp.par.pre_finalize");
  825. BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(UI, "omp.par.exit");
  826. auto FiniCBWrapper = [&](InsertPointTy IP) {
  827. // Hide "open-ended" blocks from the given FiniCB by setting the right jump
  828. // target to the region exit block.
  829. if (IP.getBlock()->end() == IP.getPoint()) {
  830. IRBuilder<>::InsertPointGuard IPG(Builder);
  831. Builder.restoreIP(IP);
  832. Instruction *I = Builder.CreateBr(PRegExitBB);
  833. IP = InsertPointTy(I->getParent(), I->getIterator());
  834. }
  835. assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
  836. IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
  837. "Unexpected insertion point for finalization call!");
  838. return FiniCB(IP);
  839. };
  840. FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
  841. // Generate the privatization allocas in the block that will become the entry
  842. // of the outlined function.
  843. Builder.SetInsertPoint(PRegEntryBB->getTerminator());
  844. InsertPointTy InnerAllocaIP = Builder.saveIP();
  845. AllocaInst *PrivTIDAddr =
  846. Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
  847. Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
  848. // Add some fake uses for OpenMP provided arguments.
  849. ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
  850. Instruction *ZeroAddrUse =
  851. Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
  852. ToBeDeleted.push_back(ZeroAddrUse);
  853. // EntryBB
  854. // |
  855. // V
  856. // PRegionEntryBB <- Privatization allocas are placed here.
  857. // |
  858. // V
  859. // PRegionBodyBB <- BodeGen is invoked here.
  860. // |
  861. // V
  862. // PRegPreFiniBB <- The block we will start finalization from.
  863. // |
  864. // V
  865. // PRegionExitBB <- A common exit to simplify block collection.
  866. //
  867. LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
  868. // Let the caller create the body.
  869. assert(BodyGenCB && "Expected body generation callback!");
  870. InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
  871. BodyGenCB(InnerAllocaIP, CodeGenIP);
  872. LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
  873. FunctionCallee RTLFn;
  874. if (IfCondition)
  875. RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
  876. else
  877. RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
  878. if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
  879. if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
  880. llvm::LLVMContext &Ctx = F->getContext();
  881. MDBuilder MDB(Ctx);
  882. // Annotate the callback behavior of the __kmpc_fork_call:
  883. // - The callback callee is argument number 2 (microtask).
  884. // - The first two arguments of the callback callee are unknown (-1).
  885. // - All variadic arguments to the __kmpc_fork_call are passed to the
  886. // callback callee.
  887. F->addMetadata(
  888. llvm::LLVMContext::MD_callback,
  889. *llvm::MDNode::get(
  890. Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
  891. /* VarArgsArePassed */ true)}));
  892. }
  893. }
  894. OutlineInfo OI;
  895. OI.PostOutlineCB = [=](Function &OutlinedFn) {
  896. // Add some known attributes.
  897. OutlinedFn.addParamAttr(0, Attribute::NoAlias);
  898. OutlinedFn.addParamAttr(1, Attribute::NoAlias);
  899. OutlinedFn.addFnAttr(Attribute::NoUnwind);
  900. OutlinedFn.addFnAttr(Attribute::NoRecurse);
  901. assert(OutlinedFn.arg_size() >= 2 &&
  902. "Expected at least tid and bounded tid as arguments");
  903. unsigned NumCapturedVars =
  904. OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
  905. CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
  906. CI->getParent()->setName("omp_parallel");
  907. Builder.SetInsertPoint(CI);
  908. // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn);
  909. Value *ForkCallArgs[] = {
  910. Ident, Builder.getInt32(NumCapturedVars),
  911. Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
  912. SmallVector<Value *, 16> RealArgs;
  913. RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
  914. if (IfCondition) {
  915. Value *Cond = Builder.CreateSExtOrTrunc(IfCondition,
  916. Type::getInt32Ty(M.getContext()));
  917. RealArgs.push_back(Cond);
  918. }
  919. RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
  920. // __kmpc_fork_call_if always expects a void ptr as the last argument
  921. // If there are no arguments, pass a null pointer.
  922. auto PtrTy = Type::getInt8PtrTy(M.getContext());
  923. if (IfCondition && NumCapturedVars == 0) {
  924. llvm::Value *Void = ConstantPointerNull::get(PtrTy);
  925. RealArgs.push_back(Void);
  926. }
  927. if (IfCondition && RealArgs.back()->getType() != PtrTy)
  928. RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy);
  929. Builder.CreateCall(RTLFn, RealArgs);
  930. LLVM_DEBUG(dbgs() << "With fork_call placed: "
  931. << *Builder.GetInsertBlock()->getParent() << "\n");
  932. InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
  933. // Initialize the local TID stack location with the argument value.
  934. Builder.SetInsertPoint(PrivTID);
  935. Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
  936. Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
  937. CI->eraseFromParent();
  938. for (Instruction *I : ToBeDeleted)
  939. I->eraseFromParent();
  940. };
  941. // Adjust the finalization stack, verify the adjustment, and call the
  942. // finalize function a last time to finalize values between the pre-fini
  943. // block and the exit block if we left the parallel "the normal way".
  944. auto FiniInfo = FinalizationStack.pop_back_val();
  945. (void)FiniInfo;
  946. assert(FiniInfo.DK == OMPD_parallel &&
  947. "Unexpected finalization stack state!");
  948. Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
  949. InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
  950. FiniCB(PreFiniIP);
  951. OI.OuterAllocaBB = OuterAllocaBlock;
  952. OI.EntryBB = PRegEntryBB;
  953. OI.ExitBB = PRegExitBB;
  954. SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
  955. SmallVector<BasicBlock *, 32> Blocks;
  956. OI.collectBlocks(ParallelRegionBlockSet, Blocks);
  957. // Ensure a single exit node for the outlined region by creating one.
  958. // We might have multiple incoming edges to the exit now due to finalizations,
  959. // e.g., cancel calls that cause the control flow to leave the region.
  960. BasicBlock *PRegOutlinedExitBB = PRegExitBB;
  961. PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
  962. PRegOutlinedExitBB->setName("omp.par.outlined.exit");
  963. Blocks.push_back(PRegOutlinedExitBB);
  964. CodeExtractorAnalysisCache CEAC(*OuterFn);
  965. CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
  966. /* AggregateArgs */ false,
  967. /* BlockFrequencyInfo */ nullptr,
  968. /* BranchProbabilityInfo */ nullptr,
  969. /* AssumptionCache */ nullptr,
  970. /* AllowVarArgs */ true,
  971. /* AllowAlloca */ true,
  972. /* AllocationBlock */ OuterAllocaBlock,
  973. /* Suffix */ ".omp_par");
  974. // Find inputs to, outputs from the code region.
  975. BasicBlock *CommonExit = nullptr;
  976. SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
  977. Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
  978. Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
  979. LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
  980. FunctionCallee TIDRTLFn =
  981. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
  982. auto PrivHelper = [&](Value &V) {
  983. if (&V == TIDAddr || &V == ZeroAddr) {
  984. OI.ExcludeArgsFromAggregate.push_back(&V);
  985. return;
  986. }
  987. SetVector<Use *> Uses;
  988. for (Use &U : V.uses())
  989. if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
  990. if (ParallelRegionBlockSet.count(UserI->getParent()))
  991. Uses.insert(&U);
  992. // __kmpc_fork_call expects extra arguments as pointers. If the input
  993. // already has a pointer type, everything is fine. Otherwise, store the
  994. // value onto stack and load it back inside the to-be-outlined region. This
  995. // will ensure only the pointer will be passed to the function.
  996. // FIXME: if there are more than 15 trailing arguments, they must be
  997. // additionally packed in a struct.
  998. Value *Inner = &V;
  999. if (!V.getType()->isPointerTy()) {
  1000. IRBuilder<>::InsertPointGuard Guard(Builder);
  1001. LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
  1002. Builder.restoreIP(OuterAllocaIP);
  1003. Value *Ptr =
  1004. Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
  1005. // Store to stack at end of the block that currently branches to the entry
  1006. // block of the to-be-outlined region.
  1007. Builder.SetInsertPoint(InsertBB,
  1008. InsertBB->getTerminator()->getIterator());
  1009. Builder.CreateStore(&V, Ptr);
  1010. // Load back next to allocations in the to-be-outlined region.
  1011. Builder.restoreIP(InnerAllocaIP);
  1012. Inner = Builder.CreateLoad(V.getType(), Ptr);
  1013. }
  1014. Value *ReplacementValue = nullptr;
  1015. CallInst *CI = dyn_cast<CallInst>(&V);
  1016. if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
  1017. ReplacementValue = PrivTID;
  1018. } else {
  1019. Builder.restoreIP(
  1020. PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
  1021. assert(ReplacementValue &&
  1022. "Expected copy/create callback to set replacement value!");
  1023. if (ReplacementValue == &V)
  1024. return;
  1025. }
  1026. for (Use *UPtr : Uses)
  1027. UPtr->set(ReplacementValue);
  1028. };
  1029. // Reset the inner alloca insertion as it will be used for loading the values
  1030. // wrapped into pointers before passing them into the to-be-outlined region.
  1031. // Configure it to insert immediately after the fake use of zero address so
  1032. // that they are available in the generated body and so that the
  1033. // OpenMP-related values (thread ID and zero address pointers) remain leading
  1034. // in the argument list.
  1035. InnerAllocaIP = IRBuilder<>::InsertPoint(
  1036. ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
  1037. // Reset the outer alloca insertion point to the entry of the relevant block
  1038. // in case it was invalidated.
  1039. OuterAllocaIP = IRBuilder<>::InsertPoint(
  1040. OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
  1041. for (Value *Input : Inputs) {
  1042. LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
  1043. PrivHelper(*Input);
  1044. }
  1045. LLVM_DEBUG({
  1046. for (Value *Output : Outputs)
  1047. LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
  1048. });
  1049. assert(Outputs.empty() &&
  1050. "OpenMP outlining should not produce live-out values!");
  1051. LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
  1052. LLVM_DEBUG({
  1053. for (auto *BB : Blocks)
  1054. dbgs() << " PBR: " << BB->getName() << "\n";
  1055. });
  1056. // Register the outlined info.
  1057. addOutlineInfo(std::move(OI));
  1058. InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
  1059. UI->eraseFromParent();
  1060. return AfterIP;
  1061. }
  1062. void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
  1063. // Build call void __kmpc_flush(ident_t *loc)
  1064. uint32_t SrcLocStrSize;
  1065. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1066. Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
  1067. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
  1068. }
  1069. void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) {
  1070. if (!updateToLocation(Loc))
  1071. return;
  1072. emitFlush(Loc);
  1073. }
  1074. void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) {
  1075. // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
  1076. // global_tid);
  1077. uint32_t SrcLocStrSize;
  1078. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1079. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1080. Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
  1081. // Ignore return result until untied tasks are supported.
  1082. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
  1083. Args);
  1084. }
  1085. void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) {
  1086. if (!updateToLocation(Loc))
  1087. return;
  1088. emitTaskwaitImpl(Loc);
  1089. }
  1090. void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) {
  1091. // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
  1092. uint32_t SrcLocStrSize;
  1093. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1094. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1095. Constant *I32Null = ConstantInt::getNullValue(Int32);
  1096. Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
  1097. Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
  1098. Args);
  1099. }
  1100. void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
  1101. if (!updateToLocation(Loc))
  1102. return;
  1103. emitTaskyieldImpl(Loc);
  1104. }
  1105. OpenMPIRBuilder::InsertPointTy
  1106. OpenMPIRBuilder::createTask(const LocationDescription &Loc,
  1107. InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
  1108. bool Tied, Value *Final, Value *IfCondition,
  1109. SmallVector<DependData> Dependencies) {
  1110. if (!updateToLocation(Loc))
  1111. return InsertPointTy();
  1112. uint32_t SrcLocStrSize;
  1113. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1114. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1115. // The current basic block is split into four basic blocks. After outlining,
  1116. // they will be mapped as follows:
  1117. // ```
  1118. // def current_fn() {
  1119. // current_basic_block:
  1120. // br label %task.exit
  1121. // task.exit:
  1122. // ; instructions after task
  1123. // }
  1124. // def outlined_fn() {
  1125. // task.alloca:
  1126. // br label %task.body
  1127. // task.body:
  1128. // ret void
  1129. // }
  1130. // ```
  1131. BasicBlock *TaskExitBB = splitBB(Builder, /*CreateBranch=*/true, "task.exit");
  1132. BasicBlock *TaskBodyBB = splitBB(Builder, /*CreateBranch=*/true, "task.body");
  1133. BasicBlock *TaskAllocaBB =
  1134. splitBB(Builder, /*CreateBranch=*/true, "task.alloca");
  1135. OutlineInfo OI;
  1136. OI.EntryBB = TaskAllocaBB;
  1137. OI.OuterAllocaBB = AllocaIP.getBlock();
  1138. OI.ExitBB = TaskExitBB;
  1139. OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition,
  1140. Dependencies](Function &OutlinedFn) {
  1141. // The input IR here looks like the following-
  1142. // ```
  1143. // func @current_fn() {
  1144. // outlined_fn(%args)
  1145. // }
  1146. // func @outlined_fn(%args) { ... }
  1147. // ```
  1148. //
  1149. // This is changed to the following-
  1150. //
  1151. // ```
  1152. // func @current_fn() {
  1153. // runtime_call(..., wrapper_fn, ...)
  1154. // }
  1155. // func @wrapper_fn(..., %args) {
  1156. // outlined_fn(%args)
  1157. // }
  1158. // func @outlined_fn(%args) { ... }
  1159. // ```
  1160. // The stale call instruction will be replaced with a new call instruction
  1161. // for runtime call with a wrapper function.
  1162. assert(OutlinedFn.getNumUses() == 1 &&
  1163. "there must be a single user for the outlined function");
  1164. CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
  1165. // HasTaskData is true if any variables are captured in the outlined region,
  1166. // false otherwise.
  1167. bool HasTaskData = StaleCI->arg_size() > 0;
  1168. Builder.SetInsertPoint(StaleCI);
  1169. // Gather the arguments for emitting the runtime call for
  1170. // @__kmpc_omp_task_alloc
  1171. Function *TaskAllocFn =
  1172. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
  1173. // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
  1174. // call.
  1175. Value *ThreadID = getOrCreateThreadID(Ident);
  1176. // Argument - `flags`
  1177. // Task is tied iff (Flags & 1) == 1.
  1178. // Task is untied iff (Flags & 1) == 0.
  1179. // Task is final iff (Flags & 2) == 2.
  1180. // Task is not final iff (Flags & 2) == 0.
  1181. // TODO: Handle the other flags.
  1182. Value *Flags = Builder.getInt32(Tied);
  1183. if (Final) {
  1184. Value *FinalFlag =
  1185. Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
  1186. Flags = Builder.CreateOr(FinalFlag, Flags);
  1187. }
  1188. // Argument - `sizeof_kmp_task_t` (TaskSize)
  1189. // Tasksize refers to the size in bytes of kmp_task_t data structure
  1190. // including private vars accessed in task.
  1191. Value *TaskSize = Builder.getInt64(0);
  1192. if (HasTaskData) {
  1193. AllocaInst *ArgStructAlloca =
  1194. dyn_cast<AllocaInst>(StaleCI->getArgOperand(0));
  1195. assert(ArgStructAlloca &&
  1196. "Unable to find the alloca instruction corresponding to arguments "
  1197. "for extracted function");
  1198. StructType *ArgStructType =
  1199. dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
  1200. assert(ArgStructType && "Unable to find struct type corresponding to "
  1201. "arguments for extracted function");
  1202. TaskSize =
  1203. Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
  1204. }
  1205. // TODO: Argument - sizeof_shareds
  1206. // Argument - task_entry (the wrapper function)
  1207. // If the outlined function has some captured variables (i.e. HasTaskData is
  1208. // true), then the wrapper function will have an additional argument (the
  1209. // struct containing captured variables). Otherwise, no such argument will
  1210. // be present.
  1211. SmallVector<Type *> WrapperArgTys{Builder.getInt32Ty()};
  1212. if (HasTaskData)
  1213. WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType());
  1214. FunctionCallee WrapperFuncVal = M.getOrInsertFunction(
  1215. (Twine(OutlinedFn.getName()) + ".wrapper").str(),
  1216. FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false));
  1217. Function *WrapperFunc = dyn_cast<Function>(WrapperFuncVal.getCallee());
  1218. PointerType *WrapperFuncBitcastType =
  1219. FunctionType::get(Builder.getInt32Ty(),
  1220. {Builder.getInt32Ty(), Builder.getInt8PtrTy()}, false)
  1221. ->getPointerTo();
  1222. Value *WrapperFuncBitcast =
  1223. ConstantExpr::getBitCast(WrapperFunc, WrapperFuncBitcastType);
  1224. // Emit the @__kmpc_omp_task_alloc runtime call
  1225. // The runtime call returns a pointer to an area where the task captured
  1226. // variables must be copied before the task is run (NewTaskData)
  1227. CallInst *NewTaskData = Builder.CreateCall(
  1228. TaskAllocFn,
  1229. {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
  1230. /*sizeof_task=*/TaskSize, /*sizeof_shared=*/Builder.getInt64(0),
  1231. /*task_func=*/WrapperFuncBitcast});
  1232. // Copy the arguments for outlined function
  1233. if (HasTaskData) {
  1234. Value *TaskData = StaleCI->getArgOperand(0);
  1235. Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
  1236. Builder.CreateMemCpy(NewTaskData, Alignment, TaskData, Alignment,
  1237. TaskSize);
  1238. }
  1239. Value *DepArrayPtr = nullptr;
  1240. if (Dependencies.size()) {
  1241. InsertPointTy OldIP = Builder.saveIP();
  1242. Builder.SetInsertPoint(
  1243. &OldIP.getBlock()->getParent()->getEntryBlock().back());
  1244. Type *DepArrayTy = ArrayType::get(DependInfo, Dependencies.size());
  1245. Value *DepArray =
  1246. Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr");
  1247. unsigned P = 0;
  1248. for (const DependData &Dep : Dependencies) {
  1249. Value *Base =
  1250. Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, P);
  1251. // Store the pointer to the variable
  1252. Value *Addr = Builder.CreateStructGEP(
  1253. DependInfo, Base,
  1254. static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
  1255. Value *DepValPtr =
  1256. Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
  1257. Builder.CreateStore(DepValPtr, Addr);
  1258. // Store the size of the variable
  1259. Value *Size = Builder.CreateStructGEP(
  1260. DependInfo, Base,
  1261. static_cast<unsigned int>(RTLDependInfoFields::Len));
  1262. Builder.CreateStore(Builder.getInt64(M.getDataLayout().getTypeStoreSize(
  1263. Dep.DepValueType)),
  1264. Size);
  1265. // Store the dependency kind
  1266. Value *Flags = Builder.CreateStructGEP(
  1267. DependInfo, Base,
  1268. static_cast<unsigned int>(RTLDependInfoFields::Flags));
  1269. Builder.CreateStore(
  1270. ConstantInt::get(Builder.getInt8Ty(),
  1271. static_cast<unsigned int>(Dep.DepKind)),
  1272. Flags);
  1273. ++P;
  1274. }
  1275. DepArrayPtr = Builder.CreateBitCast(DepArray, Builder.getInt8PtrTy());
  1276. Builder.restoreIP(OldIP);
  1277. }
  1278. // In the presence of the `if` clause, the following IR is generated:
  1279. // ...
  1280. // %data = call @__kmpc_omp_task_alloc(...)
  1281. // br i1 %if_condition, label %then, label %else
  1282. // then:
  1283. // call @__kmpc_omp_task(...)
  1284. // br label %exit
  1285. // else:
  1286. // call @__kmpc_omp_task_begin_if0(...)
  1287. // call @wrapper_fn(...)
  1288. // call @__kmpc_omp_task_complete_if0(...)
  1289. // br label %exit
  1290. // exit:
  1291. // ...
  1292. if (IfCondition) {
  1293. // `SplitBlockAndInsertIfThenElse` requires the block to have a
  1294. // terminator.
  1295. BasicBlock *NewBasicBlock =
  1296. splitBB(Builder, /*CreateBranch=*/true, "if.end");
  1297. Instruction *IfTerminator =
  1298. NewBasicBlock->getSinglePredecessor()->getTerminator();
  1299. Instruction *ThenTI = IfTerminator, *ElseTI = nullptr;
  1300. Builder.SetInsertPoint(IfTerminator);
  1301. SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI,
  1302. &ElseTI);
  1303. Builder.SetInsertPoint(ElseTI);
  1304. Function *TaskBeginFn =
  1305. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
  1306. Function *TaskCompleteFn =
  1307. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
  1308. Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, NewTaskData});
  1309. if (HasTaskData)
  1310. Builder.CreateCall(WrapperFunc, {ThreadID, NewTaskData});
  1311. else
  1312. Builder.CreateCall(WrapperFunc, {ThreadID});
  1313. Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, NewTaskData});
  1314. Builder.SetInsertPoint(ThenTI);
  1315. }
  1316. if (Dependencies.size()) {
  1317. Function *TaskFn =
  1318. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
  1319. Builder.CreateCall(
  1320. TaskFn,
  1321. {Ident, ThreadID, NewTaskData, Builder.getInt32(Dependencies.size()),
  1322. DepArrayPtr, ConstantInt::get(Builder.getInt32Ty(), 0),
  1323. ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))});
  1324. } else {
  1325. // Emit the @__kmpc_omp_task runtime call to spawn the task
  1326. Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
  1327. Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData});
  1328. }
  1329. StaleCI->eraseFromParent();
  1330. // Emit the body for wrapper function
  1331. BasicBlock *WrapperEntryBB =
  1332. BasicBlock::Create(M.getContext(), "", WrapperFunc);
  1333. Builder.SetInsertPoint(WrapperEntryBB);
  1334. if (HasTaskData)
  1335. Builder.CreateCall(&OutlinedFn, {WrapperFunc->getArg(1)});
  1336. else
  1337. Builder.CreateCall(&OutlinedFn);
  1338. Builder.CreateRet(Builder.getInt32(0));
  1339. };
  1340. addOutlineInfo(std::move(OI));
  1341. InsertPointTy TaskAllocaIP =
  1342. InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
  1343. InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
  1344. BodyGenCB(TaskAllocaIP, TaskBodyIP);
  1345. Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin());
  1346. return Builder.saveIP();
  1347. }
  1348. OpenMPIRBuilder::InsertPointTy
  1349. OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
  1350. InsertPointTy AllocaIP,
  1351. BodyGenCallbackTy BodyGenCB) {
  1352. if (!updateToLocation(Loc))
  1353. return InsertPointTy();
  1354. uint32_t SrcLocStrSize;
  1355. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1356. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1357. Value *ThreadID = getOrCreateThreadID(Ident);
  1358. // Emit the @__kmpc_taskgroup runtime call to start the taskgroup
  1359. Function *TaskgroupFn =
  1360. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
  1361. Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
  1362. BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit");
  1363. BodyGenCB(AllocaIP, Builder.saveIP());
  1364. Builder.SetInsertPoint(TaskgroupExitBB);
  1365. // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
  1366. Function *EndTaskgroupFn =
  1367. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
  1368. Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
  1369. return Builder.saveIP();
  1370. }
  1371. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
  1372. const LocationDescription &Loc, InsertPointTy AllocaIP,
  1373. ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB,
  1374. FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
  1375. assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required");
  1376. if (!updateToLocation(Loc))
  1377. return Loc.IP;
  1378. auto FiniCBWrapper = [&](InsertPointTy IP) {
  1379. if (IP.getBlock()->end() != IP.getPoint())
  1380. return FiniCB(IP);
  1381. // This must be done otherwise any nested constructs using FinalizeOMPRegion
  1382. // will fail because that function requires the Finalization Basic Block to
  1383. // have a terminator, which is already removed by EmitOMPRegionBody.
  1384. // IP is currently at cancelation block.
  1385. // We need to backtrack to the condition block to fetch
  1386. // the exit block and create a branch from cancelation
  1387. // to exit block.
  1388. IRBuilder<>::InsertPointGuard IPG(Builder);
  1389. Builder.restoreIP(IP);
  1390. auto *CaseBB = IP.getBlock()->getSinglePredecessor();
  1391. auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
  1392. auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
  1393. Instruction *I = Builder.CreateBr(ExitBB);
  1394. IP = InsertPointTy(I->getParent(), I->getIterator());
  1395. return FiniCB(IP);
  1396. };
  1397. FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
  1398. // Each section is emitted as a switch case
  1399. // Each finalization callback is handled from clang.EmitOMPSectionDirective()
  1400. // -> OMP.createSection() which generates the IR for each section
  1401. // Iterate through all sections and emit a switch construct:
  1402. // switch (IV) {
  1403. // case 0:
  1404. // <SectionStmt[0]>;
  1405. // break;
  1406. // ...
  1407. // case <NumSection> - 1:
  1408. // <SectionStmt[<NumSection> - 1]>;
  1409. // break;
  1410. // }
  1411. // ...
  1412. // section_loop.after:
  1413. // <FiniCB>;
  1414. auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
  1415. Builder.restoreIP(CodeGenIP);
  1416. BasicBlock *Continue =
  1417. splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after");
  1418. Function *CurFn = Continue->getParent();
  1419. SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, Continue);
  1420. unsigned CaseNumber = 0;
  1421. for (auto SectionCB : SectionCBs) {
  1422. BasicBlock *CaseBB = BasicBlock::Create(
  1423. M.getContext(), "omp_section_loop.body.case", CurFn, Continue);
  1424. SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
  1425. Builder.SetInsertPoint(CaseBB);
  1426. BranchInst *CaseEndBr = Builder.CreateBr(Continue);
  1427. SectionCB(InsertPointTy(),
  1428. {CaseEndBr->getParent(), CaseEndBr->getIterator()});
  1429. CaseNumber++;
  1430. }
  1431. // remove the existing terminator from body BB since there can be no
  1432. // terminators after switch/case
  1433. };
  1434. // Loop body ends here
  1435. // LowerBound, UpperBound, and STride for createCanonicalLoop
  1436. Type *I32Ty = Type::getInt32Ty(M.getContext());
  1437. Value *LB = ConstantInt::get(I32Ty, 0);
  1438. Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
  1439. Value *ST = ConstantInt::get(I32Ty, 1);
  1440. llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
  1441. Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
  1442. InsertPointTy AfterIP =
  1443. applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
  1444. // Apply the finalization callback in LoopAfterBB
  1445. auto FiniInfo = FinalizationStack.pop_back_val();
  1446. assert(FiniInfo.DK == OMPD_sections &&
  1447. "Unexpected finalization stack state!");
  1448. if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
  1449. Builder.restoreIP(AfterIP);
  1450. BasicBlock *FiniBB =
  1451. splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini");
  1452. CB(Builder.saveIP());
  1453. AfterIP = {FiniBB, FiniBB->begin()};
  1454. }
  1455. return AfterIP;
  1456. }
  1457. OpenMPIRBuilder::InsertPointTy
  1458. OpenMPIRBuilder::createSection(const LocationDescription &Loc,
  1459. BodyGenCallbackTy BodyGenCB,
  1460. FinalizeCallbackTy FiniCB) {
  1461. if (!updateToLocation(Loc))
  1462. return Loc.IP;
  1463. auto FiniCBWrapper = [&](InsertPointTy IP) {
  1464. if (IP.getBlock()->end() != IP.getPoint())
  1465. return FiniCB(IP);
  1466. // This must be done otherwise any nested constructs using FinalizeOMPRegion
  1467. // will fail because that function requires the Finalization Basic Block to
  1468. // have a terminator, which is already removed by EmitOMPRegionBody.
  1469. // IP is currently at cancelation block.
  1470. // We need to backtrack to the condition block to fetch
  1471. // the exit block and create a branch from cancelation
  1472. // to exit block.
  1473. IRBuilder<>::InsertPointGuard IPG(Builder);
  1474. Builder.restoreIP(IP);
  1475. auto *CaseBB = Loc.IP.getBlock();
  1476. auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
  1477. auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
  1478. Instruction *I = Builder.CreateBr(ExitBB);
  1479. IP = InsertPointTy(I->getParent(), I->getIterator());
  1480. return FiniCB(IP);
  1481. };
  1482. Directive OMPD = Directive::OMPD_sections;
  1483. // Since we are using Finalization Callback here, HasFinalize
  1484. // and IsCancellable have to be true
  1485. return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
  1486. /*Conditional*/ false, /*hasFinalize*/ true,
  1487. /*IsCancellable*/ true);
  1488. }
  1489. /// Create a function with a unique name and a "void (i8*, i8*)" signature in
  1490. /// the given module and return it.
  1491. Function *getFreshReductionFunc(Module &M) {
  1492. Type *VoidTy = Type::getVoidTy(M.getContext());
  1493. Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
  1494. auto *FuncTy =
  1495. FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
  1496. return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
  1497. M.getDataLayout().getDefaultGlobalsAddressSpace(),
  1498. ".omp.reduction.func", &M);
  1499. }
  1500. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
  1501. const LocationDescription &Loc, InsertPointTy AllocaIP,
  1502. ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
  1503. for (const ReductionInfo &RI : ReductionInfos) {
  1504. (void)RI;
  1505. assert(RI.Variable && "expected non-null variable");
  1506. assert(RI.PrivateVariable && "expected non-null private variable");
  1507. assert(RI.ReductionGen && "expected non-null reduction generator callback");
  1508. assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
  1509. "expected variables and their private equivalents to have the same "
  1510. "type");
  1511. assert(RI.Variable->getType()->isPointerTy() &&
  1512. "expected variables to be pointers");
  1513. }
  1514. if (!updateToLocation(Loc))
  1515. return InsertPointTy();
  1516. BasicBlock *InsertBlock = Loc.IP.getBlock();
  1517. BasicBlock *ContinuationBlock =
  1518. InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
  1519. InsertBlock->getTerminator()->eraseFromParent();
  1520. // Create and populate array of type-erased pointers to private reduction
  1521. // values.
  1522. unsigned NumReductions = ReductionInfos.size();
  1523. Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
  1524. Builder.restoreIP(AllocaIP);
  1525. Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
  1526. Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
  1527. for (auto En : enumerate(ReductionInfos)) {
  1528. unsigned Index = En.index();
  1529. const ReductionInfo &RI = En.value();
  1530. Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
  1531. RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
  1532. Value *Casted =
  1533. Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
  1534. "private.red.var." + Twine(Index) + ".casted");
  1535. Builder.CreateStore(Casted, RedArrayElemPtr);
  1536. }
  1537. // Emit a call to the runtime function that orchestrates the reduction.
  1538. // Declare the reduction function in the process.
  1539. Function *Func = Builder.GetInsertBlock()->getParent();
  1540. Module *Module = Func->getParent();
  1541. Value *RedArrayPtr =
  1542. Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
  1543. uint32_t SrcLocStrSize;
  1544. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1545. bool CanGenerateAtomic =
  1546. llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
  1547. return RI.AtomicReductionGen;
  1548. });
  1549. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
  1550. CanGenerateAtomic
  1551. ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
  1552. : IdentFlag(0));
  1553. Value *ThreadId = getOrCreateThreadID(Ident);
  1554. Constant *NumVariables = Builder.getInt32(NumReductions);
  1555. const DataLayout &DL = Module->getDataLayout();
  1556. unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
  1557. Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
  1558. Function *ReductionFunc = getFreshReductionFunc(*Module);
  1559. Value *Lock = getOMPCriticalRegionLock(".reduction");
  1560. Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
  1561. IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
  1562. : RuntimeFunction::OMPRTL___kmpc_reduce);
  1563. CallInst *ReduceCall =
  1564. Builder.CreateCall(ReduceFunc,
  1565. {Ident, ThreadId, NumVariables, RedArraySize,
  1566. RedArrayPtr, ReductionFunc, Lock},
  1567. "reduce");
  1568. // Create final reduction entry blocks for the atomic and non-atomic case.
  1569. // Emit IR that dispatches control flow to one of the blocks based on the
  1570. // reduction supporting the atomic mode.
  1571. BasicBlock *NonAtomicRedBlock =
  1572. BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
  1573. BasicBlock *AtomicRedBlock =
  1574. BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
  1575. SwitchInst *Switch =
  1576. Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
  1577. Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
  1578. Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
  1579. // Populate the non-atomic reduction using the elementwise reduction function.
  1580. // This loads the elements from the global and private variables and reduces
  1581. // them before storing back the result to the global variable.
  1582. Builder.SetInsertPoint(NonAtomicRedBlock);
  1583. for (auto En : enumerate(ReductionInfos)) {
  1584. const ReductionInfo &RI = En.value();
  1585. Type *ValueType = RI.ElementType;
  1586. Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
  1587. "red.value." + Twine(En.index()));
  1588. Value *PrivateRedValue =
  1589. Builder.CreateLoad(ValueType, RI.PrivateVariable,
  1590. "red.private.value." + Twine(En.index()));
  1591. Value *Reduced;
  1592. Builder.restoreIP(
  1593. RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
  1594. if (!Builder.GetInsertBlock())
  1595. return InsertPointTy();
  1596. Builder.CreateStore(Reduced, RI.Variable);
  1597. }
  1598. Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
  1599. IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
  1600. : RuntimeFunction::OMPRTL___kmpc_end_reduce);
  1601. Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
  1602. Builder.CreateBr(ContinuationBlock);
  1603. // Populate the atomic reduction using the atomic elementwise reduction
  1604. // function. There are no loads/stores here because they will be happening
  1605. // inside the atomic elementwise reduction.
  1606. Builder.SetInsertPoint(AtomicRedBlock);
  1607. if (CanGenerateAtomic) {
  1608. for (const ReductionInfo &RI : ReductionInfos) {
  1609. Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
  1610. RI.Variable, RI.PrivateVariable));
  1611. if (!Builder.GetInsertBlock())
  1612. return InsertPointTy();
  1613. }
  1614. Builder.CreateBr(ContinuationBlock);
  1615. } else {
  1616. Builder.CreateUnreachable();
  1617. }
  1618. // Populate the outlined reduction function using the elementwise reduction
  1619. // function. Partial values are extracted from the type-erased array of
  1620. // pointers to private variables.
  1621. BasicBlock *ReductionFuncBlock =
  1622. BasicBlock::Create(Module->getContext(), "", ReductionFunc);
  1623. Builder.SetInsertPoint(ReductionFuncBlock);
  1624. Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
  1625. RedArrayTy->getPointerTo());
  1626. Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
  1627. RedArrayTy->getPointerTo());
  1628. for (auto En : enumerate(ReductionInfos)) {
  1629. const ReductionInfo &RI = En.value();
  1630. Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
  1631. RedArrayTy, LHSArrayPtr, 0, En.index());
  1632. Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
  1633. Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
  1634. Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
  1635. Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
  1636. RedArrayTy, RHSArrayPtr, 0, En.index());
  1637. Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
  1638. Value *RHSPtr =
  1639. Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
  1640. Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
  1641. Value *Reduced;
  1642. Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
  1643. if (!Builder.GetInsertBlock())
  1644. return InsertPointTy();
  1645. Builder.CreateStore(Reduced, LHSPtr);
  1646. }
  1647. Builder.CreateRetVoid();
  1648. Builder.SetInsertPoint(ContinuationBlock);
  1649. return Builder.saveIP();
  1650. }
  1651. OpenMPIRBuilder::InsertPointTy
  1652. OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
  1653. BodyGenCallbackTy BodyGenCB,
  1654. FinalizeCallbackTy FiniCB) {
  1655. if (!updateToLocation(Loc))
  1656. return Loc.IP;
  1657. Directive OMPD = Directive::OMPD_master;
  1658. uint32_t SrcLocStrSize;
  1659. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1660. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1661. Value *ThreadId = getOrCreateThreadID(Ident);
  1662. Value *Args[] = {Ident, ThreadId};
  1663. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
  1664. Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  1665. Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
  1666. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  1667. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  1668. /*Conditional*/ true, /*hasFinalize*/ true);
  1669. }
  1670. OpenMPIRBuilder::InsertPointTy
  1671. OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
  1672. BodyGenCallbackTy BodyGenCB,
  1673. FinalizeCallbackTy FiniCB, Value *Filter) {
  1674. if (!updateToLocation(Loc))
  1675. return Loc.IP;
  1676. Directive OMPD = Directive::OMPD_masked;
  1677. uint32_t SrcLocStrSize;
  1678. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  1679. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1680. Value *ThreadId = getOrCreateThreadID(Ident);
  1681. Value *Args[] = {Ident, ThreadId, Filter};
  1682. Value *ArgsEnd[] = {Ident, ThreadId};
  1683. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
  1684. Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  1685. Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
  1686. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
  1687. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  1688. /*Conditional*/ true, /*hasFinalize*/ true);
  1689. }
  1690. CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
  1691. DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
  1692. BasicBlock *PostInsertBefore, const Twine &Name) {
  1693. Module *M = F->getParent();
  1694. LLVMContext &Ctx = M->getContext();
  1695. Type *IndVarTy = TripCount->getType();
  1696. // Create the basic block structure.
  1697. BasicBlock *Preheader =
  1698. BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
  1699. BasicBlock *Header =
  1700. BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
  1701. BasicBlock *Cond =
  1702. BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
  1703. BasicBlock *Body =
  1704. BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
  1705. BasicBlock *Latch =
  1706. BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
  1707. BasicBlock *Exit =
  1708. BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
  1709. BasicBlock *After =
  1710. BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
  1711. // Use specified DebugLoc for new instructions.
  1712. Builder.SetCurrentDebugLocation(DL);
  1713. Builder.SetInsertPoint(Preheader);
  1714. Builder.CreateBr(Header);
  1715. Builder.SetInsertPoint(Header);
  1716. PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
  1717. IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
  1718. Builder.CreateBr(Cond);
  1719. Builder.SetInsertPoint(Cond);
  1720. Value *Cmp =
  1721. Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
  1722. Builder.CreateCondBr(Cmp, Body, Exit);
  1723. Builder.SetInsertPoint(Body);
  1724. Builder.CreateBr(Latch);
  1725. Builder.SetInsertPoint(Latch);
  1726. Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
  1727. "omp_" + Name + ".next", /*HasNUW=*/true);
  1728. Builder.CreateBr(Header);
  1729. IndVarPHI->addIncoming(Next, Latch);
  1730. Builder.SetInsertPoint(Exit);
  1731. Builder.CreateBr(After);
  1732. // Remember and return the canonical control flow.
  1733. LoopInfos.emplace_front();
  1734. CanonicalLoopInfo *CL = &LoopInfos.front();
  1735. CL->Header = Header;
  1736. CL->Cond = Cond;
  1737. CL->Latch = Latch;
  1738. CL->Exit = Exit;
  1739. #ifndef NDEBUG
  1740. CL->assertOK();
  1741. #endif
  1742. return CL;
  1743. }
  1744. CanonicalLoopInfo *
  1745. OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
  1746. LoopBodyGenCallbackTy BodyGenCB,
  1747. Value *TripCount, const Twine &Name) {
  1748. BasicBlock *BB = Loc.IP.getBlock();
  1749. BasicBlock *NextBB = BB->getNextNode();
  1750. CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
  1751. NextBB, NextBB, Name);
  1752. BasicBlock *After = CL->getAfter();
  1753. // If location is not set, don't connect the loop.
  1754. if (updateToLocation(Loc)) {
  1755. // Split the loop at the insertion point: Branch to the preheader and move
  1756. // every following instruction to after the loop (the After BB). Also, the
  1757. // new successor is the loop's after block.
  1758. spliceBB(Builder, After, /*CreateBranch=*/false);
  1759. Builder.CreateBr(CL->getPreheader());
  1760. }
  1761. // Emit the body content. We do it after connecting the loop to the CFG to
  1762. // avoid that the callback encounters degenerate BBs.
  1763. BodyGenCB(CL->getBodyIP(), CL->getIndVar());
  1764. #ifndef NDEBUG
  1765. CL->assertOK();
  1766. #endif
  1767. return CL;
  1768. }
  1769. CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
  1770. const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
  1771. Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
  1772. InsertPointTy ComputeIP, const Twine &Name) {
  1773. // Consider the following difficulties (assuming 8-bit signed integers):
  1774. // * Adding \p Step to the loop counter which passes \p Stop may overflow:
  1775. // DO I = 1, 100, 50
  1776. /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
  1777. // DO I = 100, 0, -128
  1778. // Start, Stop and Step must be of the same integer type.
  1779. auto *IndVarTy = cast<IntegerType>(Start->getType());
  1780. assert(IndVarTy == Stop->getType() && "Stop type mismatch");
  1781. assert(IndVarTy == Step->getType() && "Step type mismatch");
  1782. LocationDescription ComputeLoc =
  1783. ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
  1784. updateToLocation(ComputeLoc);
  1785. ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
  1786. ConstantInt *One = ConstantInt::get(IndVarTy, 1);
  1787. // Like Step, but always positive.
  1788. Value *Incr = Step;
  1789. // Distance between Start and Stop; always positive.
  1790. Value *Span;
  1791. // Condition whether there are no iterations are executed at all, e.g. because
  1792. // UB < LB.
  1793. Value *ZeroCmp;
  1794. if (IsSigned) {
  1795. // Ensure that increment is positive. If not, negate and invert LB and UB.
  1796. Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
  1797. Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
  1798. Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
  1799. Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
  1800. Span = Builder.CreateSub(UB, LB, "", false, true);
  1801. ZeroCmp = Builder.CreateICmp(
  1802. InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
  1803. } else {
  1804. Span = Builder.CreateSub(Stop, Start, "", true);
  1805. ZeroCmp = Builder.CreateICmp(
  1806. InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
  1807. }
  1808. Value *CountIfLooping;
  1809. if (InclusiveStop) {
  1810. CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
  1811. } else {
  1812. // Avoid incrementing past stop since it could overflow.
  1813. Value *CountIfTwo = Builder.CreateAdd(
  1814. Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
  1815. Value *OneCmp = Builder.CreateICmp(
  1816. InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
  1817. CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
  1818. }
  1819. Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
  1820. "omp_" + Name + ".tripcount");
  1821. auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
  1822. Builder.restoreIP(CodeGenIP);
  1823. Value *Span = Builder.CreateMul(IV, Step);
  1824. Value *IndVar = Builder.CreateAdd(Span, Start);
  1825. BodyGenCB(Builder.saveIP(), IndVar);
  1826. };
  1827. LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
  1828. return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
  1829. }
  1830. // Returns an LLVM function to call for initializing loop bounds using OpenMP
  1831. // static scheduling depending on `type`. Only i32 and i64 are supported by the
  1832. // runtime. Always interpret integers as unsigned similarly to
  1833. // CanonicalLoopInfo.
  1834. static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
  1835. OpenMPIRBuilder &OMPBuilder) {
  1836. unsigned Bitwidth = Ty->getIntegerBitWidth();
  1837. if (Bitwidth == 32)
  1838. return OMPBuilder.getOrCreateRuntimeFunction(
  1839. M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
  1840. if (Bitwidth == 64)
  1841. return OMPBuilder.getOrCreateRuntimeFunction(
  1842. M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
  1843. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  1844. }
  1845. OpenMPIRBuilder::InsertPointTy
  1846. OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  1847. InsertPointTy AllocaIP,
  1848. bool NeedsBarrier) {
  1849. assert(CLI->isValid() && "Requires a valid canonical loop");
  1850. assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
  1851. "Require dedicated allocate IP");
  1852. // Set up the source location value for OpenMP runtime.
  1853. Builder.restoreIP(CLI->getPreheaderIP());
  1854. Builder.SetCurrentDebugLocation(DL);
  1855. uint32_t SrcLocStrSize;
  1856. Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
  1857. Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1858. // Declare useful OpenMP runtime functions.
  1859. Value *IV = CLI->getIndVar();
  1860. Type *IVTy = IV->getType();
  1861. FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
  1862. FunctionCallee StaticFini =
  1863. getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
  1864. // Allocate space for computed loop bounds as expected by the "init" function.
  1865. Builder.restoreIP(AllocaIP);
  1866. Type *I32Type = Type::getInt32Ty(M.getContext());
  1867. Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
  1868. Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
  1869. Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
  1870. Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
  1871. // At the end of the preheader, prepare for calling the "init" function by
  1872. // storing the current loop bounds into the allocated space. A canonical loop
  1873. // always iterates from 0 to trip-count with step 1. Note that "init" expects
  1874. // and produces an inclusive upper bound.
  1875. Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
  1876. Constant *Zero = ConstantInt::get(IVTy, 0);
  1877. Constant *One = ConstantInt::get(IVTy, 1);
  1878. Builder.CreateStore(Zero, PLowerBound);
  1879. Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
  1880. Builder.CreateStore(UpperBound, PUpperBound);
  1881. Builder.CreateStore(One, PStride);
  1882. Value *ThreadNum = getOrCreateThreadID(SrcLoc);
  1883. Constant *SchedulingType = ConstantInt::get(
  1884. I32Type, static_cast<int>(OMPScheduleType::UnorderedStatic));
  1885. // Call the "init" function and update the trip count of the loop with the
  1886. // value it produced.
  1887. Builder.CreateCall(StaticInit,
  1888. {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
  1889. PUpperBound, PStride, One, Zero});
  1890. Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
  1891. Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
  1892. Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
  1893. Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
  1894. CLI->setTripCount(TripCount);
  1895. // Update all uses of the induction variable except the one in the condition
  1896. // block that compares it with the actual upper bound, and the increment in
  1897. // the latch block.
  1898. CLI->mapIndVar([&](Instruction *OldIV) -> Value * {
  1899. Builder.SetInsertPoint(CLI->getBody(),
  1900. CLI->getBody()->getFirstInsertionPt());
  1901. Builder.SetCurrentDebugLocation(DL);
  1902. return Builder.CreateAdd(OldIV, LowerBound);
  1903. });
  1904. // In the "exit" block, call the "fini" function.
  1905. Builder.SetInsertPoint(CLI->getExit(),
  1906. CLI->getExit()->getTerminator()->getIterator());
  1907. Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
  1908. // Add the barrier if requested.
  1909. if (NeedsBarrier)
  1910. createBarrier(LocationDescription(Builder.saveIP(), DL),
  1911. omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
  1912. /* CheckCancelFlag */ false);
  1913. InsertPointTy AfterIP = CLI->getAfterIP();
  1914. CLI->invalidate();
  1915. return AfterIP;
  1916. }
  1917. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
  1918. DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
  1919. bool NeedsBarrier, Value *ChunkSize) {
  1920. assert(CLI->isValid() && "Requires a valid canonical loop");
  1921. assert(ChunkSize && "Chunk size is required");
  1922. LLVMContext &Ctx = CLI->getFunction()->getContext();
  1923. Value *IV = CLI->getIndVar();
  1924. Value *OrigTripCount = CLI->getTripCount();
  1925. Type *IVTy = IV->getType();
  1926. assert(IVTy->getIntegerBitWidth() <= 64 &&
  1927. "Max supported tripcount bitwidth is 64 bits");
  1928. Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32 ? Type::getInt32Ty(Ctx)
  1929. : Type::getInt64Ty(Ctx);
  1930. Type *I32Type = Type::getInt32Ty(M.getContext());
  1931. Constant *Zero = ConstantInt::get(InternalIVTy, 0);
  1932. Constant *One = ConstantInt::get(InternalIVTy, 1);
  1933. // Declare useful OpenMP runtime functions.
  1934. FunctionCallee StaticInit =
  1935. getKmpcForStaticInitForType(InternalIVTy, M, *this);
  1936. FunctionCallee StaticFini =
  1937. getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
  1938. // Allocate space for computed loop bounds as expected by the "init" function.
  1939. Builder.restoreIP(AllocaIP);
  1940. Builder.SetCurrentDebugLocation(DL);
  1941. Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
  1942. Value *PLowerBound =
  1943. Builder.CreateAlloca(InternalIVTy, nullptr, "p.lowerbound");
  1944. Value *PUpperBound =
  1945. Builder.CreateAlloca(InternalIVTy, nullptr, "p.upperbound");
  1946. Value *PStride = Builder.CreateAlloca(InternalIVTy, nullptr, "p.stride");
  1947. // Set up the source location value for the OpenMP runtime.
  1948. Builder.restoreIP(CLI->getPreheaderIP());
  1949. Builder.SetCurrentDebugLocation(DL);
  1950. // TODO: Detect overflow in ubsan or max-out with current tripcount.
  1951. Value *CastedChunkSize =
  1952. Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy, "chunksize");
  1953. Value *CastedTripCount =
  1954. Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount");
  1955. Constant *SchedulingType = ConstantInt::get(
  1956. I32Type, static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
  1957. Builder.CreateStore(Zero, PLowerBound);
  1958. Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
  1959. Builder.CreateStore(OrigUpperBound, PUpperBound);
  1960. Builder.CreateStore(One, PStride);
  1961. // Call the "init" function and update the trip count of the loop with the
  1962. // value it produced.
  1963. uint32_t SrcLocStrSize;
  1964. Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
  1965. Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  1966. Value *ThreadNum = getOrCreateThreadID(SrcLoc);
  1967. Builder.CreateCall(StaticInit,
  1968. {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum,
  1969. /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter,
  1970. /*plower=*/PLowerBound, /*pupper=*/PUpperBound,
  1971. /*pstride=*/PStride, /*incr=*/One,
  1972. /*chunk=*/CastedChunkSize});
  1973. // Load values written by the "init" function.
  1974. Value *FirstChunkStart =
  1975. Builder.CreateLoad(InternalIVTy, PLowerBound, "omp_firstchunk.lb");
  1976. Value *FirstChunkStop =
  1977. Builder.CreateLoad(InternalIVTy, PUpperBound, "omp_firstchunk.ub");
  1978. Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
  1979. Value *ChunkRange =
  1980. Builder.CreateSub(FirstChunkEnd, FirstChunkStart, "omp_chunk.range");
  1981. Value *NextChunkStride =
  1982. Builder.CreateLoad(InternalIVTy, PStride, "omp_dispatch.stride");
  1983. // Create outer "dispatch" loop for enumerating the chunks.
  1984. BasicBlock *DispatchEnter = splitBB(Builder, true);
  1985. Value *DispatchCounter;
  1986. CanonicalLoopInfo *DispatchCLI = createCanonicalLoop(
  1987. {Builder.saveIP(), DL},
  1988. [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; },
  1989. FirstChunkStart, CastedTripCount, NextChunkStride,
  1990. /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{},
  1991. "dispatch");
  1992. // Remember the BasicBlocks of the dispatch loop we need, then invalidate to
  1993. // not have to preserve the canonical invariant.
  1994. BasicBlock *DispatchBody = DispatchCLI->getBody();
  1995. BasicBlock *DispatchLatch = DispatchCLI->getLatch();
  1996. BasicBlock *DispatchExit = DispatchCLI->getExit();
  1997. BasicBlock *DispatchAfter = DispatchCLI->getAfter();
  1998. DispatchCLI->invalidate();
  1999. // Rewire the original loop to become the chunk loop inside the dispatch loop.
  2000. redirectTo(DispatchAfter, CLI->getAfter(), DL);
  2001. redirectTo(CLI->getExit(), DispatchLatch, DL);
  2002. redirectTo(DispatchBody, DispatchEnter, DL);
  2003. // Prepare the prolog of the chunk loop.
  2004. Builder.restoreIP(CLI->getPreheaderIP());
  2005. Builder.SetCurrentDebugLocation(DL);
  2006. // Compute the number of iterations of the chunk loop.
  2007. Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
  2008. Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
  2009. Value *IsLastChunk =
  2010. Builder.CreateICmpUGE(ChunkEnd, CastedTripCount, "omp_chunk.is_last");
  2011. Value *CountUntilOrigTripCount =
  2012. Builder.CreateSub(CastedTripCount, DispatchCounter);
  2013. Value *ChunkTripCount = Builder.CreateSelect(
  2014. IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount");
  2015. Value *BackcastedChunkTC =
  2016. Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc");
  2017. CLI->setTripCount(BackcastedChunkTC);
  2018. // Update all uses of the induction variable except the one in the condition
  2019. // block that compares it with the actual upper bound, and the increment in
  2020. // the latch block.
  2021. Value *BackcastedDispatchCounter =
  2022. Builder.CreateTrunc(DispatchCounter, IVTy, "omp_dispatch.iv.trunc");
  2023. CLI->mapIndVar([&](Instruction *) -> Value * {
  2024. Builder.restoreIP(CLI->getBodyIP());
  2025. return Builder.CreateAdd(IV, BackcastedDispatchCounter);
  2026. });
  2027. // In the "exit" block, call the "fini" function.
  2028. Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt());
  2029. Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
  2030. // Add the barrier if requested.
  2031. if (NeedsBarrier)
  2032. createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for,
  2033. /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false);
  2034. #ifndef NDEBUG
  2035. // Even though we currently do not support applying additional methods to it,
  2036. // the chunk loop should remain a canonical loop.
  2037. CLI->assertOK();
  2038. #endif
  2039. return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
  2040. }
  2041. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
  2042. DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
  2043. bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
  2044. llvm::Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier,
  2045. bool HasNonmonotonicModifier, bool HasOrderedClause) {
  2046. OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
  2047. SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
  2048. HasNonmonotonicModifier, HasOrderedClause);
  2049. bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
  2050. OMPScheduleType::ModifierOrdered;
  2051. switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
  2052. case OMPScheduleType::BaseStatic:
  2053. assert(!ChunkSize && "No chunk size with static-chunked schedule");
  2054. if (IsOrdered)
  2055. return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
  2056. NeedsBarrier, ChunkSize);
  2057. // FIXME: Monotonicity ignored?
  2058. return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
  2059. case OMPScheduleType::BaseStaticChunked:
  2060. if (IsOrdered)
  2061. return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
  2062. NeedsBarrier, ChunkSize);
  2063. // FIXME: Monotonicity ignored?
  2064. return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier,
  2065. ChunkSize);
  2066. case OMPScheduleType::BaseRuntime:
  2067. case OMPScheduleType::BaseAuto:
  2068. case OMPScheduleType::BaseGreedy:
  2069. case OMPScheduleType::BaseBalanced:
  2070. case OMPScheduleType::BaseSteal:
  2071. case OMPScheduleType::BaseGuidedSimd:
  2072. case OMPScheduleType::BaseRuntimeSimd:
  2073. assert(!ChunkSize &&
  2074. "schedule type does not support user-defined chunk sizes");
  2075. LLVM_FALLTHROUGH;
  2076. case OMPScheduleType::BaseDynamicChunked:
  2077. case OMPScheduleType::BaseGuidedChunked:
  2078. case OMPScheduleType::BaseGuidedIterativeChunked:
  2079. case OMPScheduleType::BaseGuidedAnalyticalChunked:
  2080. case OMPScheduleType::BaseStaticBalancedChunked:
  2081. return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
  2082. NeedsBarrier, ChunkSize);
  2083. default:
  2084. llvm_unreachable("Unknown/unimplemented schedule kind");
  2085. }
  2086. }
  2087. /// Returns an LLVM function to call for initializing loop bounds using OpenMP
  2088. /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
  2089. /// the runtime. Always interpret integers as unsigned similarly to
  2090. /// CanonicalLoopInfo.
  2091. static FunctionCallee
  2092. getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
  2093. unsigned Bitwidth = Ty->getIntegerBitWidth();
  2094. if (Bitwidth == 32)
  2095. return OMPBuilder.getOrCreateRuntimeFunction(
  2096. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
  2097. if (Bitwidth == 64)
  2098. return OMPBuilder.getOrCreateRuntimeFunction(
  2099. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
  2100. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  2101. }
  2102. /// Returns an LLVM function to call for updating the next loop using OpenMP
  2103. /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
  2104. /// the runtime. Always interpret integers as unsigned similarly to
  2105. /// CanonicalLoopInfo.
  2106. static FunctionCallee
  2107. getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
  2108. unsigned Bitwidth = Ty->getIntegerBitWidth();
  2109. if (Bitwidth == 32)
  2110. return OMPBuilder.getOrCreateRuntimeFunction(
  2111. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
  2112. if (Bitwidth == 64)
  2113. return OMPBuilder.getOrCreateRuntimeFunction(
  2114. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
  2115. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  2116. }
  2117. /// Returns an LLVM function to call for finalizing the dynamic loop using
  2118. /// depending on `type`. Only i32 and i64 are supported by the runtime. Always
  2119. /// interpret integers as unsigned similarly to CanonicalLoopInfo.
  2120. static FunctionCallee
  2121. getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
  2122. unsigned Bitwidth = Ty->getIntegerBitWidth();
  2123. if (Bitwidth == 32)
  2124. return OMPBuilder.getOrCreateRuntimeFunction(
  2125. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
  2126. if (Bitwidth == 64)
  2127. return OMPBuilder.getOrCreateRuntimeFunction(
  2128. M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
  2129. llvm_unreachable("unknown OpenMP loop iterator bitwidth");
  2130. }
  2131. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
  2132. DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
  2133. OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
  2134. assert(CLI->isValid() && "Requires a valid canonical loop");
  2135. assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
  2136. "Require dedicated allocate IP");
  2137. assert(isValidWorkshareLoopScheduleType(SchedType) &&
  2138. "Require valid schedule type");
  2139. bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
  2140. OMPScheduleType::ModifierOrdered;
  2141. // Set up the source location value for OpenMP runtime.
  2142. Builder.SetCurrentDebugLocation(DL);
  2143. uint32_t SrcLocStrSize;
  2144. Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
  2145. Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2146. // Declare useful OpenMP runtime functions.
  2147. Value *IV = CLI->getIndVar();
  2148. Type *IVTy = IV->getType();
  2149. FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
  2150. FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
  2151. // Allocate space for computed loop bounds as expected by the "init" function.
  2152. Builder.restoreIP(AllocaIP);
  2153. Type *I32Type = Type::getInt32Ty(M.getContext());
  2154. Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
  2155. Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
  2156. Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
  2157. Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
  2158. // At the end of the preheader, prepare for calling the "init" function by
  2159. // storing the current loop bounds into the allocated space. A canonical loop
  2160. // always iterates from 0 to trip-count with step 1. Note that "init" expects
  2161. // and produces an inclusive upper bound.
  2162. BasicBlock *PreHeader = CLI->getPreheader();
  2163. Builder.SetInsertPoint(PreHeader->getTerminator());
  2164. Constant *One = ConstantInt::get(IVTy, 1);
  2165. Builder.CreateStore(One, PLowerBound);
  2166. Value *UpperBound = CLI->getTripCount();
  2167. Builder.CreateStore(UpperBound, PUpperBound);
  2168. Builder.CreateStore(One, PStride);
  2169. BasicBlock *Header = CLI->getHeader();
  2170. BasicBlock *Exit = CLI->getExit();
  2171. BasicBlock *Cond = CLI->getCond();
  2172. BasicBlock *Latch = CLI->getLatch();
  2173. InsertPointTy AfterIP = CLI->getAfterIP();
  2174. // The CLI will be "broken" in the code below, as the loop is no longer
  2175. // a valid canonical loop.
  2176. if (!Chunk)
  2177. Chunk = One;
  2178. Value *ThreadNum = getOrCreateThreadID(SrcLoc);
  2179. Constant *SchedulingType =
  2180. ConstantInt::get(I32Type, static_cast<int>(SchedType));
  2181. // Call the "init" function.
  2182. Builder.CreateCall(DynamicInit,
  2183. {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
  2184. UpperBound, /* step */ One, Chunk});
  2185. // An outer loop around the existing one.
  2186. BasicBlock *OuterCond = BasicBlock::Create(
  2187. PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
  2188. PreHeader->getParent());
  2189. // This needs to be 32-bit always, so can't use the IVTy Zero above.
  2190. Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
  2191. Value *Res =
  2192. Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
  2193. PLowerBound, PUpperBound, PStride});
  2194. Constant *Zero32 = ConstantInt::get(I32Type, 0);
  2195. Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
  2196. Value *LowerBound =
  2197. Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
  2198. Builder.CreateCondBr(MoreWork, Header, Exit);
  2199. // Change PHI-node in loop header to use outer cond rather than preheader,
  2200. // and set IV to the LowerBound.
  2201. Instruction *Phi = &Header->front();
  2202. auto *PI = cast<PHINode>(Phi);
  2203. PI->setIncomingBlock(0, OuterCond);
  2204. PI->setIncomingValue(0, LowerBound);
  2205. // Then set the pre-header to jump to the OuterCond
  2206. Instruction *Term = PreHeader->getTerminator();
  2207. auto *Br = cast<BranchInst>(Term);
  2208. Br->setSuccessor(0, OuterCond);
  2209. // Modify the inner condition:
  2210. // * Use the UpperBound returned from the DynamicNext call.
  2211. // * jump to the loop outer loop when done with one of the inner loops.
  2212. Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
  2213. UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
  2214. Instruction *Comp = &*Builder.GetInsertPoint();
  2215. auto *CI = cast<CmpInst>(Comp);
  2216. CI->setOperand(1, UpperBound);
  2217. // Redirect the inner exit to branch to outer condition.
  2218. Instruction *Branch = &Cond->back();
  2219. auto *BI = cast<BranchInst>(Branch);
  2220. assert(BI->getSuccessor(1) == Exit);
  2221. BI->setSuccessor(1, OuterCond);
  2222. // Call the "fini" function if "ordered" is present in wsloop directive.
  2223. if (Ordered) {
  2224. Builder.SetInsertPoint(&Latch->back());
  2225. FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this);
  2226. Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
  2227. }
  2228. // Add the barrier if requested.
  2229. if (NeedsBarrier) {
  2230. Builder.SetInsertPoint(&Exit->back());
  2231. createBarrier(LocationDescription(Builder.saveIP(), DL),
  2232. omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
  2233. /* CheckCancelFlag */ false);
  2234. }
  2235. CLI->invalidate();
  2236. return AfterIP;
  2237. }
  2238. /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
  2239. /// after this \p OldTarget will be orphaned.
  2240. static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
  2241. BasicBlock *NewTarget, DebugLoc DL) {
  2242. for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
  2243. redirectTo(Pred, NewTarget, DL);
  2244. }
  2245. /// Determine which blocks in \p BBs are reachable from outside and remove the
  2246. /// ones that are not reachable from the function.
  2247. static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
  2248. SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
  2249. auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
  2250. for (Use &U : BB->uses()) {
  2251. auto *UseInst = dyn_cast<Instruction>(U.getUser());
  2252. if (!UseInst)
  2253. continue;
  2254. if (BBsToErase.count(UseInst->getParent()))
  2255. continue;
  2256. return true;
  2257. }
  2258. return false;
  2259. };
  2260. while (true) {
  2261. bool Changed = false;
  2262. for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
  2263. if (HasRemainingUses(BB)) {
  2264. BBsToErase.erase(BB);
  2265. Changed = true;
  2266. }
  2267. }
  2268. if (!Changed)
  2269. break;
  2270. }
  2271. SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
  2272. DeleteDeadBlocks(BBVec);
  2273. }
  2274. CanonicalLoopInfo *
  2275. OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  2276. InsertPointTy ComputeIP) {
  2277. assert(Loops.size() >= 1 && "At least one loop required");
  2278. size_t NumLoops = Loops.size();
  2279. // Nothing to do if there is already just one loop.
  2280. if (NumLoops == 1)
  2281. return Loops.front();
  2282. CanonicalLoopInfo *Outermost = Loops.front();
  2283. CanonicalLoopInfo *Innermost = Loops.back();
  2284. BasicBlock *OrigPreheader = Outermost->getPreheader();
  2285. BasicBlock *OrigAfter = Outermost->getAfter();
  2286. Function *F = OrigPreheader->getParent();
  2287. // Loop control blocks that may become orphaned later.
  2288. SmallVector<BasicBlock *, 12> OldControlBBs;
  2289. OldControlBBs.reserve(6 * Loops.size());
  2290. for (CanonicalLoopInfo *Loop : Loops)
  2291. Loop->collectControlBlocks(OldControlBBs);
  2292. // Setup the IRBuilder for inserting the trip count computation.
  2293. Builder.SetCurrentDebugLocation(DL);
  2294. if (ComputeIP.isSet())
  2295. Builder.restoreIP(ComputeIP);
  2296. else
  2297. Builder.restoreIP(Outermost->getPreheaderIP());
  2298. // Derive the collapsed' loop trip count.
  2299. // TODO: Find common/largest indvar type.
  2300. Value *CollapsedTripCount = nullptr;
  2301. for (CanonicalLoopInfo *L : Loops) {
  2302. assert(L->isValid() &&
  2303. "All loops to collapse must be valid canonical loops");
  2304. Value *OrigTripCount = L->getTripCount();
  2305. if (!CollapsedTripCount) {
  2306. CollapsedTripCount = OrigTripCount;
  2307. continue;
  2308. }
  2309. // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
  2310. CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
  2311. {}, /*HasNUW=*/true);
  2312. }
  2313. // Create the collapsed loop control flow.
  2314. CanonicalLoopInfo *Result =
  2315. createLoopSkeleton(DL, CollapsedTripCount, F,
  2316. OrigPreheader->getNextNode(), OrigAfter, "collapsed");
  2317. // Build the collapsed loop body code.
  2318. // Start with deriving the input loop induction variables from the collapsed
  2319. // one, using a divmod scheme. To preserve the original loops' order, the
  2320. // innermost loop use the least significant bits.
  2321. Builder.restoreIP(Result->getBodyIP());
  2322. Value *Leftover = Result->getIndVar();
  2323. SmallVector<Value *> NewIndVars;
  2324. NewIndVars.resize(NumLoops);
  2325. for (int i = NumLoops - 1; i >= 1; --i) {
  2326. Value *OrigTripCount = Loops[i]->getTripCount();
  2327. Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
  2328. NewIndVars[i] = NewIndVar;
  2329. Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
  2330. }
  2331. // Outermost loop gets all the remaining bits.
  2332. NewIndVars[0] = Leftover;
  2333. // Construct the loop body control flow.
  2334. // We progressively construct the branch structure following in direction of
  2335. // the control flow, from the leading in-between code, the loop nest body, the
  2336. // trailing in-between code, and rejoining the collapsed loop's latch.
  2337. // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
  2338. // the ContinueBlock is set, continue with that block. If ContinuePred, use
  2339. // its predecessors as sources.
  2340. BasicBlock *ContinueBlock = Result->getBody();
  2341. BasicBlock *ContinuePred = nullptr;
  2342. auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
  2343. BasicBlock *NextSrc) {
  2344. if (ContinueBlock)
  2345. redirectTo(ContinueBlock, Dest, DL);
  2346. else
  2347. redirectAllPredecessorsTo(ContinuePred, Dest, DL);
  2348. ContinueBlock = nullptr;
  2349. ContinuePred = NextSrc;
  2350. };
  2351. // The code before the nested loop of each level.
  2352. // Because we are sinking it into the nest, it will be executed more often
  2353. // that the original loop. More sophisticated schemes could keep track of what
  2354. // the in-between code is and instantiate it only once per thread.
  2355. for (size_t i = 0; i < NumLoops - 1; ++i)
  2356. ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
  2357. // Connect the loop nest body.
  2358. ContinueWith(Innermost->getBody(), Innermost->getLatch());
  2359. // The code after the nested loop at each level.
  2360. for (size_t i = NumLoops - 1; i > 0; --i)
  2361. ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
  2362. // Connect the finished loop to the collapsed loop latch.
  2363. ContinueWith(Result->getLatch(), nullptr);
  2364. // Replace the input loops with the new collapsed loop.
  2365. redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
  2366. redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
  2367. // Replace the input loop indvars with the derived ones.
  2368. for (size_t i = 0; i < NumLoops; ++i)
  2369. Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
  2370. // Remove unused parts of the input loops.
  2371. removeUnusedBlocksFromParent(OldControlBBs);
  2372. for (CanonicalLoopInfo *L : Loops)
  2373. L->invalidate();
  2374. #ifndef NDEBUG
  2375. Result->assertOK();
  2376. #endif
  2377. return Result;
  2378. }
  2379. std::vector<CanonicalLoopInfo *>
  2380. OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  2381. ArrayRef<Value *> TileSizes) {
  2382. assert(TileSizes.size() == Loops.size() &&
  2383. "Must pass as many tile sizes as there are loops");
  2384. int NumLoops = Loops.size();
  2385. assert(NumLoops >= 1 && "At least one loop to tile required");
  2386. CanonicalLoopInfo *OutermostLoop = Loops.front();
  2387. CanonicalLoopInfo *InnermostLoop = Loops.back();
  2388. Function *F = OutermostLoop->getBody()->getParent();
  2389. BasicBlock *InnerEnter = InnermostLoop->getBody();
  2390. BasicBlock *InnerLatch = InnermostLoop->getLatch();
  2391. // Loop control blocks that may become orphaned later.
  2392. SmallVector<BasicBlock *, 12> OldControlBBs;
  2393. OldControlBBs.reserve(6 * Loops.size());
  2394. for (CanonicalLoopInfo *Loop : Loops)
  2395. Loop->collectControlBlocks(OldControlBBs);
  2396. // Collect original trip counts and induction variable to be accessible by
  2397. // index. Also, the structure of the original loops is not preserved during
  2398. // the construction of the tiled loops, so do it before we scavenge the BBs of
  2399. // any original CanonicalLoopInfo.
  2400. SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
  2401. for (CanonicalLoopInfo *L : Loops) {
  2402. assert(L->isValid() && "All input loops must be valid canonical loops");
  2403. OrigTripCounts.push_back(L->getTripCount());
  2404. OrigIndVars.push_back(L->getIndVar());
  2405. }
  2406. // Collect the code between loop headers. These may contain SSA definitions
  2407. // that are used in the loop nest body. To be usable with in the innermost
  2408. // body, these BasicBlocks will be sunk into the loop nest body. That is,
  2409. // these instructions may be executed more often than before the tiling.
  2410. // TODO: It would be sufficient to only sink them into body of the
  2411. // corresponding tile loop.
  2412. SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
  2413. for (int i = 0; i < NumLoops - 1; ++i) {
  2414. CanonicalLoopInfo *Surrounding = Loops[i];
  2415. CanonicalLoopInfo *Nested = Loops[i + 1];
  2416. BasicBlock *EnterBB = Surrounding->getBody();
  2417. BasicBlock *ExitBB = Nested->getHeader();
  2418. InbetweenCode.emplace_back(EnterBB, ExitBB);
  2419. }
  2420. // Compute the trip counts of the floor loops.
  2421. Builder.SetCurrentDebugLocation(DL);
  2422. Builder.restoreIP(OutermostLoop->getPreheaderIP());
  2423. SmallVector<Value *, 4> FloorCount, FloorRems;
  2424. for (int i = 0; i < NumLoops; ++i) {
  2425. Value *TileSize = TileSizes[i];
  2426. Value *OrigTripCount = OrigTripCounts[i];
  2427. Type *IVType = OrigTripCount->getType();
  2428. Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
  2429. Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
  2430. // 0 if tripcount divides the tilesize, 1 otherwise.
  2431. // 1 means we need an additional iteration for a partial tile.
  2432. //
  2433. // Unfortunately we cannot just use the roundup-formula
  2434. // (tripcount + tilesize - 1)/tilesize
  2435. // because the summation might overflow. We do not want introduce undefined
  2436. // behavior when the untiled loop nest did not.
  2437. Value *FloorTripOverflow =
  2438. Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
  2439. FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
  2440. FloorTripCount =
  2441. Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
  2442. "omp_floor" + Twine(i) + ".tripcount", true);
  2443. // Remember some values for later use.
  2444. FloorCount.push_back(FloorTripCount);
  2445. FloorRems.push_back(FloorTripRem);
  2446. }
  2447. // Generate the new loop nest, from the outermost to the innermost.
  2448. std::vector<CanonicalLoopInfo *> Result;
  2449. Result.reserve(NumLoops * 2);
  2450. // The basic block of the surrounding loop that enters the nest generated
  2451. // loop.
  2452. BasicBlock *Enter = OutermostLoop->getPreheader();
  2453. // The basic block of the surrounding loop where the inner code should
  2454. // continue.
  2455. BasicBlock *Continue = OutermostLoop->getAfter();
  2456. // Where the next loop basic block should be inserted.
  2457. BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
  2458. auto EmbeddNewLoop =
  2459. [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
  2460. Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
  2461. CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
  2462. DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
  2463. redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
  2464. redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
  2465. // Setup the position where the next embedded loop connects to this loop.
  2466. Enter = EmbeddedLoop->getBody();
  2467. Continue = EmbeddedLoop->getLatch();
  2468. OutroInsertBefore = EmbeddedLoop->getLatch();
  2469. return EmbeddedLoop;
  2470. };
  2471. auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
  2472. const Twine &NameBase) {
  2473. for (auto P : enumerate(TripCounts)) {
  2474. CanonicalLoopInfo *EmbeddedLoop =
  2475. EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
  2476. Result.push_back(EmbeddedLoop);
  2477. }
  2478. };
  2479. EmbeddNewLoops(FloorCount, "floor");
  2480. // Within the innermost floor loop, emit the code that computes the tile
  2481. // sizes.
  2482. Builder.SetInsertPoint(Enter->getTerminator());
  2483. SmallVector<Value *, 4> TileCounts;
  2484. for (int i = 0; i < NumLoops; ++i) {
  2485. CanonicalLoopInfo *FloorLoop = Result[i];
  2486. Value *TileSize = TileSizes[i];
  2487. Value *FloorIsEpilogue =
  2488. Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
  2489. Value *TileTripCount =
  2490. Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
  2491. TileCounts.push_back(TileTripCount);
  2492. }
  2493. // Create the tile loops.
  2494. EmbeddNewLoops(TileCounts, "tile");
  2495. // Insert the inbetween code into the body.
  2496. BasicBlock *BodyEnter = Enter;
  2497. BasicBlock *BodyEntered = nullptr;
  2498. for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
  2499. BasicBlock *EnterBB = P.first;
  2500. BasicBlock *ExitBB = P.second;
  2501. if (BodyEnter)
  2502. redirectTo(BodyEnter, EnterBB, DL);
  2503. else
  2504. redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
  2505. BodyEnter = nullptr;
  2506. BodyEntered = ExitBB;
  2507. }
  2508. // Append the original loop nest body into the generated loop nest body.
  2509. if (BodyEnter)
  2510. redirectTo(BodyEnter, InnerEnter, DL);
  2511. else
  2512. redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
  2513. redirectAllPredecessorsTo(InnerLatch, Continue, DL);
  2514. // Replace the original induction variable with an induction variable computed
  2515. // from the tile and floor induction variables.
  2516. Builder.restoreIP(Result.back()->getBodyIP());
  2517. for (int i = 0; i < NumLoops; ++i) {
  2518. CanonicalLoopInfo *FloorLoop = Result[i];
  2519. CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
  2520. Value *OrigIndVar = OrigIndVars[i];
  2521. Value *Size = TileSizes[i];
  2522. Value *Scale =
  2523. Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
  2524. Value *Shift =
  2525. Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
  2526. OrigIndVar->replaceAllUsesWith(Shift);
  2527. }
  2528. // Remove unused parts of the original loops.
  2529. removeUnusedBlocksFromParent(OldControlBBs);
  2530. for (CanonicalLoopInfo *L : Loops)
  2531. L->invalidate();
  2532. #ifndef NDEBUG
  2533. for (CanonicalLoopInfo *GenL : Result)
  2534. GenL->assertOK();
  2535. #endif
  2536. return Result;
  2537. }
  2538. /// Attach metadata \p Properties to the basic block described by \p BB. If the
  2539. /// basic block already has metadata, the basic block properties are appended.
  2540. static void addBasicBlockMetadata(BasicBlock *BB,
  2541. ArrayRef<Metadata *> Properties) {
  2542. // Nothing to do if no property to attach.
  2543. if (Properties.empty())
  2544. return;
  2545. LLVMContext &Ctx = BB->getContext();
  2546. SmallVector<Metadata *> NewProperties;
  2547. NewProperties.push_back(nullptr);
  2548. // If the basic block already has metadata, prepend it to the new metadata.
  2549. MDNode *Existing = BB->getTerminator()->getMetadata(LLVMContext::MD_loop);
  2550. if (Existing)
  2551. append_range(NewProperties, drop_begin(Existing->operands(), 1));
  2552. append_range(NewProperties, Properties);
  2553. MDNode *BasicBlockID = MDNode::getDistinct(Ctx, NewProperties);
  2554. BasicBlockID->replaceOperandWith(0, BasicBlockID);
  2555. BB->getTerminator()->setMetadata(LLVMContext::MD_loop, BasicBlockID);
  2556. }
  2557. /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
  2558. /// loop already has metadata, the loop properties are appended.
  2559. static void addLoopMetadata(CanonicalLoopInfo *Loop,
  2560. ArrayRef<Metadata *> Properties) {
  2561. assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
  2562. // Attach metadata to the loop's latch
  2563. BasicBlock *Latch = Loop->getLatch();
  2564. assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
  2565. addBasicBlockMetadata(Latch, Properties);
  2566. }
  2567. /// Attach llvm.access.group metadata to the memref instructions of \p Block
  2568. static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup,
  2569. LoopInfo &LI) {
  2570. for (Instruction &I : *Block) {
  2571. if (I.mayReadOrWriteMemory()) {
  2572. // TODO: This instruction may already have access group from
  2573. // other pragmas e.g. #pragma clang loop vectorize. Append
  2574. // so that the existing metadata is not overwritten.
  2575. I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
  2576. }
  2577. }
  2578. }
  2579. void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
  2580. LLVMContext &Ctx = Builder.getContext();
  2581. addLoopMetadata(
  2582. Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
  2583. MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
  2584. }
  2585. void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) {
  2586. LLVMContext &Ctx = Builder.getContext();
  2587. addLoopMetadata(
  2588. Loop, {
  2589. MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
  2590. });
  2591. }
  2592. void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
  2593. Value *IfCond, ValueToValueMapTy &VMap,
  2594. const Twine &NamePrefix) {
  2595. Function *F = CanonicalLoop->getFunction();
  2596. // Define where if branch should be inserted
  2597. Instruction *SplitBefore;
  2598. if (Instruction::classof(IfCond)) {
  2599. SplitBefore = dyn_cast<Instruction>(IfCond);
  2600. } else {
  2601. SplitBefore = CanonicalLoop->getPreheader()->getTerminator();
  2602. }
  2603. // TODO: We should not rely on pass manager. Currently we use pass manager
  2604. // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo
  2605. // object. We should have a method which returns all blocks between
  2606. // CanonicalLoopInfo::getHeader() and CanonicalLoopInfo::getAfter()
  2607. FunctionAnalysisManager FAM;
  2608. FAM.registerPass([]() { return DominatorTreeAnalysis(); });
  2609. FAM.registerPass([]() { return LoopAnalysis(); });
  2610. FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
  2611. // Get the loop which needs to be cloned
  2612. LoopAnalysis LIA;
  2613. LoopInfo &&LI = LIA.run(*F, FAM);
  2614. Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
  2615. // Create additional blocks for the if statement
  2616. BasicBlock *Head = SplitBefore->getParent();
  2617. Instruction *HeadOldTerm = Head->getTerminator();
  2618. llvm::LLVMContext &C = Head->getContext();
  2619. llvm::BasicBlock *ThenBlock = llvm::BasicBlock::Create(
  2620. C, NamePrefix + ".if.then", Head->getParent(), Head->getNextNode());
  2621. llvm::BasicBlock *ElseBlock = llvm::BasicBlock::Create(
  2622. C, NamePrefix + ".if.else", Head->getParent(), CanonicalLoop->getExit());
  2623. // Create if condition branch.
  2624. Builder.SetInsertPoint(HeadOldTerm);
  2625. Instruction *BrInstr =
  2626. Builder.CreateCondBr(IfCond, ThenBlock, /*ifFalse*/ ElseBlock);
  2627. InsertPointTy IP{BrInstr->getParent(), ++BrInstr->getIterator()};
  2628. // Then block contains branch to omp loop which needs to be vectorized
  2629. spliceBB(IP, ThenBlock, false);
  2630. ThenBlock->replaceSuccessorsPhiUsesWith(Head, ThenBlock);
  2631. Builder.SetInsertPoint(ElseBlock);
  2632. // Clone loop for the else branch
  2633. SmallVector<BasicBlock *, 8> NewBlocks;
  2634. VMap[CanonicalLoop->getPreheader()] = ElseBlock;
  2635. for (BasicBlock *Block : L->getBlocks()) {
  2636. BasicBlock *NewBB = CloneBasicBlock(Block, VMap, "", F);
  2637. NewBB->moveBefore(CanonicalLoop->getExit());
  2638. VMap[Block] = NewBB;
  2639. NewBlocks.push_back(NewBB);
  2640. }
  2641. remapInstructionsInBlocks(NewBlocks, VMap);
  2642. Builder.CreateBr(NewBlocks.front());
  2643. }
  2644. void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
  2645. MapVector<Value *, Value *> AlignedVars,
  2646. Value *IfCond, OrderKind Order,
  2647. ConstantInt *Simdlen, ConstantInt *Safelen) {
  2648. LLVMContext &Ctx = Builder.getContext();
  2649. Function *F = CanonicalLoop->getFunction();
  2650. // TODO: We should not rely on pass manager. Currently we use pass manager
  2651. // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo
  2652. // object. We should have a method which returns all blocks between
  2653. // CanonicalLoopInfo::getHeader() and CanonicalLoopInfo::getAfter()
  2654. FunctionAnalysisManager FAM;
  2655. FAM.registerPass([]() { return DominatorTreeAnalysis(); });
  2656. FAM.registerPass([]() { return LoopAnalysis(); });
  2657. FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
  2658. LoopAnalysis LIA;
  2659. LoopInfo &&LI = LIA.run(*F, FAM);
  2660. Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
  2661. if (AlignedVars.size()) {
  2662. InsertPointTy IP = Builder.saveIP();
  2663. Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator());
  2664. for (auto &AlignedItem : AlignedVars) {
  2665. Value *AlignedPtr = AlignedItem.first;
  2666. Value *Alignment = AlignedItem.second;
  2667. Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(),
  2668. AlignedPtr, Alignment);
  2669. }
  2670. Builder.restoreIP(IP);
  2671. }
  2672. if (IfCond) {
  2673. ValueToValueMapTy VMap;
  2674. createIfVersion(CanonicalLoop, IfCond, VMap, "simd");
  2675. // Add metadata to the cloned loop which disables vectorization
  2676. Value *MappedLatch = VMap.lookup(CanonicalLoop->getLatch());
  2677. assert(MappedLatch &&
  2678. "Cannot find value which corresponds to original loop latch");
  2679. assert(isa<BasicBlock>(MappedLatch) &&
  2680. "Cannot cast mapped latch block value to BasicBlock");
  2681. BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
  2682. ConstantAsMetadata *BoolConst =
  2683. ConstantAsMetadata::get(ConstantInt::getFalse(Type::getInt1Ty(Ctx)));
  2684. addBasicBlockMetadata(
  2685. NewLatchBlock,
  2686. {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
  2687. BoolConst})});
  2688. }
  2689. SmallSet<BasicBlock *, 8> Reachable;
  2690. // Get the basic blocks from the loop in which memref instructions
  2691. // can be found.
  2692. // TODO: Generalize getting all blocks inside a CanonicalizeLoopInfo,
  2693. // preferably without running any passes.
  2694. for (BasicBlock *Block : L->getBlocks()) {
  2695. if (Block == CanonicalLoop->getCond() ||
  2696. Block == CanonicalLoop->getHeader())
  2697. continue;
  2698. Reachable.insert(Block);
  2699. }
  2700. SmallVector<Metadata *> LoopMDList;
  2701. // In presence of finite 'safelen', it may be unsafe to mark all
  2702. // the memory instructions parallel, because loop-carried
  2703. // dependences of 'safelen' iterations are possible.
  2704. // If clause order(concurrent) is specified then the memory instructions
  2705. // are marked parallel even if 'safelen' is finite.
  2706. if ((Safelen == nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
  2707. // Add access group metadata to memory-access instructions.
  2708. MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
  2709. for (BasicBlock *BB : Reachable)
  2710. addSimdMetadata(BB, AccessGroup, LI);
  2711. // TODO: If the loop has existing parallel access metadata, have
  2712. // to combine two lists.
  2713. LoopMDList.push_back(MDNode::get(
  2714. Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccessGroup}));
  2715. }
  2716. // Use the above access group metadata to create loop level
  2717. // metadata, which should be distinct for each loop.
  2718. ConstantAsMetadata *BoolConst =
  2719. ConstantAsMetadata::get(ConstantInt::getTrue(Type::getInt1Ty(Ctx)));
  2720. LoopMDList.push_back(MDNode::get(
  2721. Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), BoolConst}));
  2722. if (Simdlen || Safelen) {
  2723. // If both simdlen and safelen clauses are specified, the value of the
  2724. // simdlen parameter must be less than or equal to the value of the safelen
  2725. // parameter. Therefore, use safelen only in the absence of simdlen.
  2726. ConstantInt *VectorizeWidth = Simdlen == nullptr ? Safelen : Simdlen;
  2727. LoopMDList.push_back(
  2728. MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.width"),
  2729. ConstantAsMetadata::get(VectorizeWidth)}));
  2730. }
  2731. addLoopMetadata(CanonicalLoop, LoopMDList);
  2732. }
  2733. /// Create the TargetMachine object to query the backend for optimization
  2734. /// preferences.
  2735. ///
  2736. /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
  2737. /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
  2738. /// needed for the LLVM pass pipline. We use some default options to avoid
  2739. /// having to pass too many settings from the frontend that probably do not
  2740. /// matter.
  2741. ///
  2742. /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
  2743. /// method. If we are going to use TargetMachine for more purposes, especially
  2744. /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
  2745. /// might become be worth requiring front-ends to pass on their TargetMachine,
  2746. /// or at least cache it between methods. Note that while fontends such as Clang
  2747. /// have just a single main TargetMachine per translation unit, "target-cpu" and
  2748. /// "target-features" that determine the TargetMachine are per-function and can
  2749. /// be overrided using __attribute__((target("OPTIONS"))).
  2750. static std::unique_ptr<TargetMachine>
  2751. createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) {
  2752. Module *M = F->getParent();
  2753. StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
  2754. StringRef Features = F->getFnAttribute("target-features").getValueAsString();
  2755. const std::string &Triple = M->getTargetTriple();
  2756. std::string Error;
  2757. const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
  2758. if (!TheTarget)
  2759. return {};
  2760. llvm::TargetOptions Options;
  2761. return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
  2762. Triple, CPU, Features, Options, /*RelocModel=*/std::nullopt,
  2763. /*CodeModel=*/std::nullopt, OptLevel));
  2764. }
  2765. /// Heuristically determine the best-performant unroll factor for \p CLI. This
  2766. /// depends on the target processor. We are re-using the same heuristics as the
  2767. /// LoopUnrollPass.
  2768. static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
  2769. Function *F = CLI->getFunction();
  2770. // Assume the user requests the most aggressive unrolling, even if the rest of
  2771. // the code is optimized using a lower setting.
  2772. CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
  2773. std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
  2774. FunctionAnalysisManager FAM;
  2775. FAM.registerPass([]() { return TargetLibraryAnalysis(); });
  2776. FAM.registerPass([]() { return AssumptionAnalysis(); });
  2777. FAM.registerPass([]() { return DominatorTreeAnalysis(); });
  2778. FAM.registerPass([]() { return LoopAnalysis(); });
  2779. FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
  2780. FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
  2781. TargetIRAnalysis TIRA;
  2782. if (TM)
  2783. TIRA = TargetIRAnalysis(
  2784. [&](const Function &F) { return TM->getTargetTransformInfo(F); });
  2785. FAM.registerPass([&]() { return TIRA; });
  2786. TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
  2787. ScalarEvolutionAnalysis SEA;
  2788. ScalarEvolution &&SE = SEA.run(*F, FAM);
  2789. DominatorTreeAnalysis DTA;
  2790. DominatorTree &&DT = DTA.run(*F, FAM);
  2791. LoopAnalysis LIA;
  2792. LoopInfo &&LI = LIA.run(*F, FAM);
  2793. AssumptionAnalysis ACT;
  2794. AssumptionCache &&AC = ACT.run(*F, FAM);
  2795. OptimizationRemarkEmitter ORE{F};
  2796. Loop *L = LI.getLoopFor(CLI->getHeader());
  2797. assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
  2798. TargetTransformInfo::UnrollingPreferences UP =
  2799. gatherUnrollingPreferences(L, SE, TTI,
  2800. /*BlockFrequencyInfo=*/nullptr,
  2801. /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
  2802. /*UserThreshold=*/std::nullopt,
  2803. /*UserCount=*/std::nullopt,
  2804. /*UserAllowPartial=*/true,
  2805. /*UserAllowRuntime=*/true,
  2806. /*UserUpperBound=*/std::nullopt,
  2807. /*UserFullUnrollMaxCount=*/std::nullopt);
  2808. UP.Force = true;
  2809. // Account for additional optimizations taking place before the LoopUnrollPass
  2810. // would unroll the loop.
  2811. UP.Threshold *= UnrollThresholdFactor;
  2812. UP.PartialThreshold *= UnrollThresholdFactor;
  2813. // Use normal unroll factors even if the rest of the code is optimized for
  2814. // size.
  2815. UP.OptSizeThreshold = UP.Threshold;
  2816. UP.PartialOptSizeThreshold = UP.PartialThreshold;
  2817. LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
  2818. << " Threshold=" << UP.Threshold << "\n"
  2819. << " PartialThreshold=" << UP.PartialThreshold << "\n"
  2820. << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
  2821. << " PartialOptSizeThreshold="
  2822. << UP.PartialOptSizeThreshold << "\n");
  2823. // Disable peeling.
  2824. TargetTransformInfo::PeelingPreferences PP =
  2825. gatherPeelingPreferences(L, SE, TTI,
  2826. /*UserAllowPeeling=*/false,
  2827. /*UserAllowProfileBasedPeeling=*/false,
  2828. /*UnrollingSpecficValues=*/false);
  2829. SmallPtrSet<const Value *, 32> EphValues;
  2830. CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
  2831. // Assume that reads and writes to stack variables can be eliminated by
  2832. // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
  2833. // size.
  2834. for (BasicBlock *BB : L->blocks()) {
  2835. for (Instruction &I : *BB) {
  2836. Value *Ptr;
  2837. if (auto *Load = dyn_cast<LoadInst>(&I)) {
  2838. Ptr = Load->getPointerOperand();
  2839. } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
  2840. Ptr = Store->getPointerOperand();
  2841. } else
  2842. continue;
  2843. Ptr = Ptr->stripPointerCasts();
  2844. if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
  2845. if (Alloca->getParent() == &F->getEntryBlock())
  2846. EphValues.insert(&I);
  2847. }
  2848. }
  2849. }
  2850. unsigned NumInlineCandidates;
  2851. bool NotDuplicatable;
  2852. bool Convergent;
  2853. InstructionCost LoopSizeIC =
  2854. ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
  2855. TTI, EphValues, UP.BEInsns);
  2856. LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n");
  2857. // Loop is not unrollable if the loop contains certain instructions.
  2858. if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) {
  2859. LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
  2860. return 1;
  2861. }
  2862. unsigned LoopSize = *LoopSizeIC.getValue();
  2863. // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
  2864. // be able to use it.
  2865. int TripCount = 0;
  2866. int MaxTripCount = 0;
  2867. bool MaxOrZero = false;
  2868. unsigned TripMultiple = 0;
  2869. bool UseUpperBound = false;
  2870. computeUnrollCount(L, TTI, DT, &LI, &AC, SE, EphValues, &ORE, TripCount,
  2871. MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
  2872. UseUpperBound);
  2873. unsigned Factor = UP.Count;
  2874. LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
  2875. // This function returns 1 to signal to not unroll a loop.
  2876. if (Factor == 0)
  2877. return 1;
  2878. return Factor;
  2879. }
  2880. void OpenMPIRBuilder::unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop,
  2881. int32_t Factor,
  2882. CanonicalLoopInfo **UnrolledCLI) {
  2883. assert(Factor >= 0 && "Unroll factor must not be negative");
  2884. Function *F = Loop->getFunction();
  2885. LLVMContext &Ctx = F->getContext();
  2886. // If the unrolled loop is not used for another loop-associated directive, it
  2887. // is sufficient to add metadata for the LoopUnrollPass.
  2888. if (!UnrolledCLI) {
  2889. SmallVector<Metadata *, 2> LoopMetadata;
  2890. LoopMetadata.push_back(
  2891. MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
  2892. if (Factor >= 1) {
  2893. ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
  2894. ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
  2895. LoopMetadata.push_back(MDNode::get(
  2896. Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
  2897. }
  2898. addLoopMetadata(Loop, LoopMetadata);
  2899. return;
  2900. }
  2901. // Heuristically determine the unroll factor.
  2902. if (Factor == 0)
  2903. Factor = computeHeuristicUnrollFactor(Loop);
  2904. // No change required with unroll factor 1.
  2905. if (Factor == 1) {
  2906. *UnrolledCLI = Loop;
  2907. return;
  2908. }
  2909. assert(Factor >= 2 &&
  2910. "unrolling only makes sense with a factor of 2 or larger");
  2911. Type *IndVarTy = Loop->getIndVarType();
  2912. // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
  2913. // unroll the inner loop.
  2914. Value *FactorVal =
  2915. ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
  2916. /*isSigned=*/false));
  2917. std::vector<CanonicalLoopInfo *> LoopNest =
  2918. tileLoops(DL, {Loop}, {FactorVal});
  2919. assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
  2920. *UnrolledCLI = LoopNest[0];
  2921. CanonicalLoopInfo *InnerLoop = LoopNest[1];
  2922. // LoopUnrollPass can only fully unroll loops with constant trip count.
  2923. // Unroll by the unroll factor with a fallback epilog for the remainder
  2924. // iterations if necessary.
  2925. ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
  2926. ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
  2927. addLoopMetadata(
  2928. InnerLoop,
  2929. {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
  2930. MDNode::get(
  2931. Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
  2932. #ifndef NDEBUG
  2933. (*UnrolledCLI)->assertOK();
  2934. #endif
  2935. }
  2936. OpenMPIRBuilder::InsertPointTy
  2937. OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
  2938. llvm::Value *BufSize, llvm::Value *CpyBuf,
  2939. llvm::Value *CpyFn, llvm::Value *DidIt) {
  2940. if (!updateToLocation(Loc))
  2941. return Loc.IP;
  2942. uint32_t SrcLocStrSize;
  2943. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2944. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2945. Value *ThreadId = getOrCreateThreadID(Ident);
  2946. llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
  2947. Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
  2948. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
  2949. Builder.CreateCall(Fn, Args);
  2950. return Builder.saveIP();
  2951. }
  2952. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
  2953. const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
  2954. FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) {
  2955. if (!updateToLocation(Loc))
  2956. return Loc.IP;
  2957. // If needed (i.e. not null), initialize `DidIt` with 0
  2958. if (DidIt) {
  2959. Builder.CreateStore(Builder.getInt32(0), DidIt);
  2960. }
  2961. Directive OMPD = Directive::OMPD_single;
  2962. uint32_t SrcLocStrSize;
  2963. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2964. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2965. Value *ThreadId = getOrCreateThreadID(Ident);
  2966. Value *Args[] = {Ident, ThreadId};
  2967. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
  2968. Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  2969. Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
  2970. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  2971. // generates the following:
  2972. // if (__kmpc_single()) {
  2973. // .... single region ...
  2974. // __kmpc_end_single
  2975. // }
  2976. // __kmpc_barrier
  2977. EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  2978. /*Conditional*/ true,
  2979. /*hasFinalize*/ true);
  2980. if (!IsNowait)
  2981. createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
  2982. omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
  2983. /* CheckCancelFlag */ false);
  2984. return Builder.saveIP();
  2985. }
  2986. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
  2987. const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
  2988. FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
  2989. if (!updateToLocation(Loc))
  2990. return Loc.IP;
  2991. Directive OMPD = Directive::OMPD_critical;
  2992. uint32_t SrcLocStrSize;
  2993. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  2994. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  2995. Value *ThreadId = getOrCreateThreadID(Ident);
  2996. Value *LockVar = getOMPCriticalRegionLock(CriticalName);
  2997. Value *Args[] = {Ident, ThreadId, LockVar};
  2998. SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args));
  2999. Function *RTFn = nullptr;
  3000. if (HintInst) {
  3001. // Add Hint to entry Args and create call
  3002. EnterArgs.push_back(HintInst);
  3003. RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
  3004. } else {
  3005. RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
  3006. }
  3007. Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
  3008. Function *ExitRTLFn =
  3009. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
  3010. Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  3011. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  3012. /*Conditional*/ false, /*hasFinalize*/ true);
  3013. }
  3014. OpenMPIRBuilder::InsertPointTy
  3015. OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
  3016. InsertPointTy AllocaIP, unsigned NumLoops,
  3017. ArrayRef<llvm::Value *> StoreValues,
  3018. const Twine &Name, bool IsDependSource) {
  3019. assert(
  3020. llvm::all_of(StoreValues,
  3021. [](Value *SV) { return SV->getType()->isIntegerTy(64); }) &&
  3022. "OpenMP runtime requires depend vec with i64 type");
  3023. if (!updateToLocation(Loc))
  3024. return Loc.IP;
  3025. // Allocate space for vector and generate alloc instruction.
  3026. auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
  3027. Builder.restoreIP(AllocaIP);
  3028. AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
  3029. ArgsBase->setAlignment(Align(8));
  3030. Builder.restoreIP(Loc.IP);
  3031. // Store the index value with offset in depend vector.
  3032. for (unsigned I = 0; I < NumLoops; ++I) {
  3033. Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
  3034. ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
  3035. StoreInst *STInst = Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
  3036. STInst->setAlignment(Align(8));
  3037. }
  3038. Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
  3039. ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
  3040. uint32_t SrcLocStrSize;
  3041. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3042. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3043. Value *ThreadId = getOrCreateThreadID(Ident);
  3044. Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
  3045. Function *RTLFn = nullptr;
  3046. if (IsDependSource)
  3047. RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
  3048. else
  3049. RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
  3050. Builder.CreateCall(RTLFn, Args);
  3051. return Builder.saveIP();
  3052. }
  3053. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
  3054. const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
  3055. FinalizeCallbackTy FiniCB, bool IsThreads) {
  3056. if (!updateToLocation(Loc))
  3057. return Loc.IP;
  3058. Directive OMPD = Directive::OMPD_ordered;
  3059. Instruction *EntryCall = nullptr;
  3060. Instruction *ExitCall = nullptr;
  3061. if (IsThreads) {
  3062. uint32_t SrcLocStrSize;
  3063. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3064. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3065. Value *ThreadId = getOrCreateThreadID(Ident);
  3066. Value *Args[] = {Ident, ThreadId};
  3067. Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
  3068. EntryCall = Builder.CreateCall(EntryRTLFn, Args);
  3069. Function *ExitRTLFn =
  3070. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
  3071. ExitCall = Builder.CreateCall(ExitRTLFn, Args);
  3072. }
  3073. return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
  3074. /*Conditional*/ false, /*hasFinalize*/ true);
  3075. }
  3076. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
  3077. Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
  3078. BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
  3079. bool HasFinalize, bool IsCancellable) {
  3080. if (HasFinalize)
  3081. FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
  3082. // Create inlined region's entry and body blocks, in preparation
  3083. // for conditional creation
  3084. BasicBlock *EntryBB = Builder.GetInsertBlock();
  3085. Instruction *SplitPos = EntryBB->getTerminator();
  3086. if (!isa_and_nonnull<BranchInst>(SplitPos))
  3087. SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
  3088. BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
  3089. BasicBlock *FiniBB =
  3090. EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
  3091. Builder.SetInsertPoint(EntryBB->getTerminator());
  3092. emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
  3093. // generate body
  3094. BodyGenCB(/* AllocaIP */ InsertPointTy(),
  3095. /* CodeGenIP */ Builder.saveIP());
  3096. // emit exit call and do any needed finalization.
  3097. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
  3098. assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
  3099. FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
  3100. "Unexpected control flow graph state!!");
  3101. emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
  3102. assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
  3103. "Unexpected Control Flow State!");
  3104. MergeBlockIntoPredecessor(FiniBB);
  3105. // If we are skipping the region of a non conditional, remove the exit
  3106. // block, and clear the builder's insertion point.
  3107. assert(SplitPos->getParent() == ExitBB &&
  3108. "Unexpected Insertion point location!");
  3109. auto merged = MergeBlockIntoPredecessor(ExitBB);
  3110. BasicBlock *ExitPredBB = SplitPos->getParent();
  3111. auto InsertBB = merged ? ExitPredBB : ExitBB;
  3112. if (!isa_and_nonnull<BranchInst>(SplitPos))
  3113. SplitPos->eraseFromParent();
  3114. Builder.SetInsertPoint(InsertBB);
  3115. return Builder.saveIP();
  3116. }
  3117. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
  3118. Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
  3119. // if nothing to do, Return current insertion point.
  3120. if (!Conditional || !EntryCall)
  3121. return Builder.saveIP();
  3122. BasicBlock *EntryBB = Builder.GetInsertBlock();
  3123. Value *CallBool = Builder.CreateIsNotNull(EntryCall);
  3124. auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
  3125. auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
  3126. // Emit thenBB and set the Builder's insertion point there for
  3127. // body generation next. Place the block after the current block.
  3128. Function *CurFn = EntryBB->getParent();
  3129. CurFn->insert(std::next(EntryBB->getIterator()), ThenBB);
  3130. // Move Entry branch to end of ThenBB, and replace with conditional
  3131. // branch (If-stmt)
  3132. Instruction *EntryBBTI = EntryBB->getTerminator();
  3133. Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
  3134. EntryBBTI->removeFromParent();
  3135. Builder.SetInsertPoint(UI);
  3136. Builder.Insert(EntryBBTI);
  3137. UI->eraseFromParent();
  3138. Builder.SetInsertPoint(ThenBB->getTerminator());
  3139. // return an insertion point to ExitBB.
  3140. return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
  3141. }
  3142. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
  3143. omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
  3144. bool HasFinalize) {
  3145. Builder.restoreIP(FinIP);
  3146. // If there is finalization to do, emit it before the exit call
  3147. if (HasFinalize) {
  3148. assert(!FinalizationStack.empty() &&
  3149. "Unexpected finalization stack state!");
  3150. FinalizationInfo Fi = FinalizationStack.pop_back_val();
  3151. assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
  3152. Fi.FiniCB(FinIP);
  3153. BasicBlock *FiniBB = FinIP.getBlock();
  3154. Instruction *FiniBBTI = FiniBB->getTerminator();
  3155. // set Builder IP for call creation
  3156. Builder.SetInsertPoint(FiniBBTI);
  3157. }
  3158. if (!ExitCall)
  3159. return Builder.saveIP();
  3160. // place the Exitcall as last instruction before Finalization block terminator
  3161. ExitCall->removeFromParent();
  3162. Builder.Insert(ExitCall);
  3163. return IRBuilder<>::InsertPoint(ExitCall->getParent(),
  3164. ExitCall->getIterator());
  3165. }
  3166. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
  3167. InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
  3168. llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
  3169. if (!IP.isSet())
  3170. return IP;
  3171. IRBuilder<>::InsertPointGuard IPG(Builder);
  3172. // creates the following CFG structure
  3173. // OMP_Entry : (MasterAddr != PrivateAddr)?
  3174. // F T
  3175. // | \
  3176. // | copin.not.master
  3177. // | /
  3178. // v /
  3179. // copyin.not.master.end
  3180. // |
  3181. // v
  3182. // OMP.Entry.Next
  3183. BasicBlock *OMP_Entry = IP.getBlock();
  3184. Function *CurFn = OMP_Entry->getParent();
  3185. BasicBlock *CopyBegin =
  3186. BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
  3187. BasicBlock *CopyEnd = nullptr;
  3188. // If entry block is terminated, split to preserve the branch to following
  3189. // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
  3190. if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
  3191. CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
  3192. "copyin.not.master.end");
  3193. OMP_Entry->getTerminator()->eraseFromParent();
  3194. } else {
  3195. CopyEnd =
  3196. BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
  3197. }
  3198. Builder.SetInsertPoint(OMP_Entry);
  3199. Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
  3200. Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
  3201. Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
  3202. Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
  3203. Builder.SetInsertPoint(CopyBegin);
  3204. if (BranchtoEnd)
  3205. Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
  3206. return Builder.saveIP();
  3207. }
  3208. CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
  3209. Value *Size, Value *Allocator,
  3210. std::string Name) {
  3211. IRBuilder<>::InsertPointGuard IPG(Builder);
  3212. Builder.restoreIP(Loc.IP);
  3213. uint32_t SrcLocStrSize;
  3214. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3215. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3216. Value *ThreadId = getOrCreateThreadID(Ident);
  3217. Value *Args[] = {ThreadId, Size, Allocator};
  3218. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
  3219. return Builder.CreateCall(Fn, Args, Name);
  3220. }
  3221. CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
  3222. Value *Addr, Value *Allocator,
  3223. std::string Name) {
  3224. IRBuilder<>::InsertPointGuard IPG(Builder);
  3225. Builder.restoreIP(Loc.IP);
  3226. uint32_t SrcLocStrSize;
  3227. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3228. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3229. Value *ThreadId = getOrCreateThreadID(Ident);
  3230. Value *Args[] = {ThreadId, Addr, Allocator};
  3231. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
  3232. return Builder.CreateCall(Fn, Args, Name);
  3233. }
  3234. CallInst *OpenMPIRBuilder::createOMPInteropInit(
  3235. const LocationDescription &Loc, Value *InteropVar,
  3236. omp::OMPInteropType InteropType, Value *Device, Value *NumDependences,
  3237. Value *DependenceAddress, bool HaveNowaitClause) {
  3238. IRBuilder<>::InsertPointGuard IPG(Builder);
  3239. Builder.restoreIP(Loc.IP);
  3240. uint32_t SrcLocStrSize;
  3241. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3242. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3243. Value *ThreadId = getOrCreateThreadID(Ident);
  3244. if (Device == nullptr)
  3245. Device = ConstantInt::get(Int32, -1);
  3246. Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
  3247. if (NumDependences == nullptr) {
  3248. NumDependences = ConstantInt::get(Int64, 0);
  3249. PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
  3250. DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
  3251. }
  3252. Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
  3253. Value *Args[] = {
  3254. Ident, ThreadId, InteropVar, InteropTypeVal,
  3255. Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
  3256. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
  3257. return Builder.CreateCall(Fn, Args);
  3258. }
  3259. CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
  3260. const LocationDescription &Loc, Value *InteropVar, Value *Device,
  3261. Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) {
  3262. IRBuilder<>::InsertPointGuard IPG(Builder);
  3263. Builder.restoreIP(Loc.IP);
  3264. uint32_t SrcLocStrSize;
  3265. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3266. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3267. Value *ThreadId = getOrCreateThreadID(Ident);
  3268. if (Device == nullptr)
  3269. Device = ConstantInt::get(Int32, -1);
  3270. if (NumDependences == nullptr) {
  3271. NumDependences = ConstantInt::get(Int32, 0);
  3272. PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
  3273. DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
  3274. }
  3275. Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
  3276. Value *Args[] = {
  3277. Ident, ThreadId, InteropVar, Device,
  3278. NumDependences, DependenceAddress, HaveNowaitClauseVal};
  3279. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
  3280. return Builder.CreateCall(Fn, Args);
  3281. }
  3282. CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc,
  3283. Value *InteropVar, Value *Device,
  3284. Value *NumDependences,
  3285. Value *DependenceAddress,
  3286. bool HaveNowaitClause) {
  3287. IRBuilder<>::InsertPointGuard IPG(Builder);
  3288. Builder.restoreIP(Loc.IP);
  3289. uint32_t SrcLocStrSize;
  3290. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3291. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3292. Value *ThreadId = getOrCreateThreadID(Ident);
  3293. if (Device == nullptr)
  3294. Device = ConstantInt::get(Int32, -1);
  3295. if (NumDependences == nullptr) {
  3296. NumDependences = ConstantInt::get(Int32, 0);
  3297. PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
  3298. DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
  3299. }
  3300. Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
  3301. Value *Args[] = {
  3302. Ident, ThreadId, InteropVar, Device,
  3303. NumDependences, DependenceAddress, HaveNowaitClauseVal};
  3304. Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
  3305. return Builder.CreateCall(Fn, Args);
  3306. }
  3307. CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
  3308. const LocationDescription &Loc, llvm::Value *Pointer,
  3309. llvm::ConstantInt *Size, const llvm::Twine &Name) {
  3310. IRBuilder<>::InsertPointGuard IPG(Builder);
  3311. Builder.restoreIP(Loc.IP);
  3312. uint32_t SrcLocStrSize;
  3313. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3314. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3315. Value *ThreadId = getOrCreateThreadID(Ident);
  3316. Constant *ThreadPrivateCache =
  3317. getOrCreateInternalVariable(Int8PtrPtr, Name.str());
  3318. llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
  3319. Function *Fn =
  3320. getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
  3321. return Builder.CreateCall(Fn, Args);
  3322. }
  3323. OpenMPIRBuilder::InsertPointTy
  3324. OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
  3325. if (!updateToLocation(Loc))
  3326. return Loc.IP;
  3327. uint32_t SrcLocStrSize;
  3328. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3329. Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3330. ConstantInt *IsSPMDVal = ConstantInt::getSigned(
  3331. IntegerType::getInt8Ty(Int8->getContext()),
  3332. IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
  3333. ConstantInt *UseGenericStateMachine =
  3334. ConstantInt::getBool(Int32->getContext(), !IsSPMD);
  3335. Function *Fn = getOrCreateRuntimeFunctionPtr(
  3336. omp::RuntimeFunction::OMPRTL___kmpc_target_init);
  3337. CallInst *ThreadKind = Builder.CreateCall(
  3338. Fn, {Ident, IsSPMDVal, UseGenericStateMachine});
  3339. Value *ExecUserCode = Builder.CreateICmpEQ(
  3340. ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
  3341. "exec_user_code");
  3342. // ThreadKind = __kmpc_target_init(...)
  3343. // if (ThreadKind == -1)
  3344. // user_code
  3345. // else
  3346. // return;
  3347. auto *UI = Builder.CreateUnreachable();
  3348. BasicBlock *CheckBB = UI->getParent();
  3349. BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
  3350. BasicBlock *WorkerExitBB = BasicBlock::Create(
  3351. CheckBB->getContext(), "worker.exit", CheckBB->getParent());
  3352. Builder.SetInsertPoint(WorkerExitBB);
  3353. Builder.CreateRetVoid();
  3354. auto *CheckBBTI = CheckBB->getTerminator();
  3355. Builder.SetInsertPoint(CheckBBTI);
  3356. Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
  3357. CheckBBTI->eraseFromParent();
  3358. UI->eraseFromParent();
  3359. // Continue in the "user_code" block, see diagram above and in
  3360. // openmp/libomptarget/deviceRTLs/common/include/target.h .
  3361. return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
  3362. }
  3363. void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
  3364. bool IsSPMD) {
  3365. if (!updateToLocation(Loc))
  3366. return;
  3367. uint32_t SrcLocStrSize;
  3368. Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
  3369. Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
  3370. ConstantInt *IsSPMDVal = ConstantInt::getSigned(
  3371. IntegerType::getInt8Ty(Int8->getContext()),
  3372. IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
  3373. Function *Fn = getOrCreateRuntimeFunctionPtr(
  3374. omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
  3375. Builder.CreateCall(Fn, {Ident, IsSPMDVal});
  3376. }
  3377. void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
  3378. Function *OutlinedFn, int32_t NumTeams, int32_t NumThreads) {
  3379. if (Config.isEmbedded()) {
  3380. OutlinedFn->setLinkage(GlobalValue::WeakODRLinkage);
  3381. // TODO: Determine if DSO local can be set to true.
  3382. OutlinedFn->setDSOLocal(false);
  3383. OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility);
  3384. if (Triple(M.getTargetTriple()).isAMDGCN())
  3385. OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL);
  3386. }
  3387. if (NumTeams > 0)
  3388. OutlinedFn->addFnAttr("omp_target_num_teams", std::to_string(NumTeams));
  3389. if (NumThreads > 0)
  3390. OutlinedFn->addFnAttr("omp_target_thread_limit",
  3391. std::to_string(NumThreads));
  3392. }
  3393. Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
  3394. StringRef EntryFnIDName) {
  3395. if (Config.isEmbedded()) {
  3396. assert(OutlinedFn && "The outlined function must exist if embedded");
  3397. return ConstantExpr::getBitCast(OutlinedFn, Builder.getInt8PtrTy());
  3398. }
  3399. return new GlobalVariable(
  3400. M, Builder.getInt8Ty(), /*isConstant=*/true, GlobalValue::WeakAnyLinkage,
  3401. Constant::getNullValue(Builder.getInt8Ty()), EntryFnIDName);
  3402. }
  3403. Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
  3404. StringRef EntryFnName) {
  3405. if (OutlinedFn)
  3406. return OutlinedFn;
  3407. assert(!M.getGlobalVariable(EntryFnName, true) &&
  3408. "Named kernel already exists?");
  3409. return new GlobalVariable(
  3410. M, Builder.getInt8Ty(), /*isConstant=*/true, GlobalValue::InternalLinkage,
  3411. Constant::getNullValue(Builder.getInt8Ty()), EntryFnName);
  3412. }
  3413. void OpenMPIRBuilder::emitTargetRegionFunction(
  3414. OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo,
  3415. FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams,
  3416. int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn,
  3417. Constant *&OutlinedFnID) {
  3418. SmallString<64> EntryFnName;
  3419. InfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
  3420. OutlinedFn = Config.isEmbedded() || !Config.openMPOffloadMandatory()
  3421. ? GenerateFunctionCallback(EntryFnName)
  3422. : nullptr;
  3423. // If this target outline function is not an offload entry, we don't need to
  3424. // register it. This may be in the case of a false if clause, or if there are
  3425. // no OpenMP targets.
  3426. if (!IsOffloadEntry)
  3427. return;
  3428. std::string EntryFnIDName =
  3429. Config.isEmbedded()
  3430. ? std::string(EntryFnName)
  3431. : createPlatformSpecificName({EntryFnName, "region_id"});
  3432. OutlinedFnID = registerTargetRegionFunction(
  3433. InfoManager, EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams,
  3434. NumThreads);
  3435. }
  3436. Constant *OpenMPIRBuilder::registerTargetRegionFunction(
  3437. OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo,
  3438. Function *OutlinedFn, StringRef EntryFnName, StringRef EntryFnIDName,
  3439. int32_t NumTeams, int32_t NumThreads) {
  3440. if (OutlinedFn)
  3441. setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads);
  3442. auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
  3443. auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
  3444. InfoManager.registerTargetRegionEntryInfo(
  3445. EntryInfo, EntryAddr, OutlinedFnID,
  3446. OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
  3447. return OutlinedFnID;
  3448. }
  3449. std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
  3450. StringRef FirstSeparator,
  3451. StringRef Separator) {
  3452. SmallString<128> Buffer;
  3453. llvm::raw_svector_ostream OS(Buffer);
  3454. StringRef Sep = FirstSeparator;
  3455. for (StringRef Part : Parts) {
  3456. OS << Sep << Part;
  3457. Sep = Separator;
  3458. }
  3459. return OS.str().str();
  3460. }
  3461. std::string
  3462. OpenMPIRBuilder::createPlatformSpecificName(ArrayRef<StringRef> Parts) const {
  3463. return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
  3464. Config.separator());
  3465. }
  3466. GlobalVariable *
  3467. OpenMPIRBuilder::getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
  3468. unsigned AddressSpace) {
  3469. auto &Elem = *InternalVars.try_emplace(Name, nullptr).first;
  3470. if (Elem.second) {
  3471. assert(cast<PointerType>(Elem.second->getType())
  3472. ->isOpaqueOrPointeeTypeMatches(Ty) &&
  3473. "OMP internal variable has different type than requested");
  3474. } else {
  3475. // TODO: investigate the appropriate linkage type used for the global
  3476. // variable for possibly changing that to internal or private, or maybe
  3477. // create different versions of the function for different OMP internal
  3478. // variables.
  3479. Elem.second = new GlobalVariable(
  3480. M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage,
  3481. Constant::getNullValue(Ty), Elem.first(),
  3482. /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace);
  3483. }
  3484. return cast<GlobalVariable>(&*Elem.second);
  3485. }
  3486. Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
  3487. std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
  3488. std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
  3489. return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
  3490. }
  3491. GlobalVariable *
  3492. OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
  3493. std::string VarName) {
  3494. llvm::Constant *MaptypesArrayInit =
  3495. llvm::ConstantDataArray::get(M.getContext(), Mappings);
  3496. auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
  3497. M, MaptypesArrayInit->getType(),
  3498. /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
  3499. VarName);
  3500. MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  3501. return MaptypesArrayGlobal;
  3502. }
  3503. void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc,
  3504. InsertPointTy AllocaIP,
  3505. unsigned NumOperands,
  3506. struct MapperAllocas &MapperAllocas) {
  3507. if (!updateToLocation(Loc))
  3508. return;
  3509. auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
  3510. auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
  3511. Builder.restoreIP(AllocaIP);
  3512. AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
  3513. AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
  3514. AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
  3515. Builder.restoreIP(Loc.IP);
  3516. MapperAllocas.ArgsBase = ArgsBase;
  3517. MapperAllocas.Args = Args;
  3518. MapperAllocas.ArgSizes = ArgSizes;
  3519. }
  3520. void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
  3521. Function *MapperFunc, Value *SrcLocInfo,
  3522. Value *MaptypesArg, Value *MapnamesArg,
  3523. struct MapperAllocas &MapperAllocas,
  3524. int64_t DeviceID, unsigned NumOperands) {
  3525. if (!updateToLocation(Loc))
  3526. return;
  3527. auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
  3528. auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
  3529. Value *ArgsBaseGEP =
  3530. Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
  3531. {Builder.getInt32(0), Builder.getInt32(0)});
  3532. Value *ArgsGEP =
  3533. Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
  3534. {Builder.getInt32(0), Builder.getInt32(0)});
  3535. Value *ArgSizesGEP =
  3536. Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
  3537. {Builder.getInt32(0), Builder.getInt32(0)});
  3538. Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
  3539. Builder.CreateCall(MapperFunc,
  3540. {SrcLocInfo, Builder.getInt64(DeviceID),
  3541. Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
  3542. ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
  3543. }
  3544. void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
  3545. TargetDataRTArgs &RTArgs,
  3546. TargetDataInfo &Info,
  3547. bool EmitDebug,
  3548. bool ForEndCall) {
  3549. assert((!ForEndCall || Info.separateBeginEndCalls()) &&
  3550. "expected region end call to runtime only when end call is separate");
  3551. auto VoidPtrTy = Type::getInt8PtrTy(M.getContext());
  3552. auto VoidPtrPtrTy = VoidPtrTy->getPointerTo(0);
  3553. auto Int64Ty = Type::getInt64Ty(M.getContext());
  3554. auto Int64PtrTy = Type::getInt64PtrTy(M.getContext());
  3555. if (!Info.NumberOfPtrs) {
  3556. RTArgs.BasePointersArray = ConstantPointerNull::get(VoidPtrPtrTy);
  3557. RTArgs.PointersArray = ConstantPointerNull::get(VoidPtrPtrTy);
  3558. RTArgs.SizesArray = ConstantPointerNull::get(Int64PtrTy);
  3559. RTArgs.MapTypesArray = ConstantPointerNull::get(Int64PtrTy);
  3560. RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
  3561. RTArgs.MappersArray = ConstantPointerNull::get(VoidPtrPtrTy);
  3562. return;
  3563. }
  3564. RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
  3565. ArrayType::get(VoidPtrTy, Info.NumberOfPtrs),
  3566. Info.RTArgs.BasePointersArray,
  3567. /*Idx0=*/0, /*Idx1=*/0);
  3568. RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
  3569. ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
  3570. /*Idx0=*/0,
  3571. /*Idx1=*/0);
  3572. RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
  3573. ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
  3574. /*Idx0=*/0, /*Idx1=*/0);
  3575. RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
  3576. ArrayType::get(Int64Ty, Info.NumberOfPtrs),
  3577. ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
  3578. : Info.RTArgs.MapTypesArray,
  3579. /*Idx0=*/0,
  3580. /*Idx1=*/0);
  3581. // Only emit the mapper information arrays if debug information is
  3582. // requested.
  3583. if (!EmitDebug)
  3584. RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
  3585. else
  3586. RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
  3587. ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
  3588. /*Idx0=*/0,
  3589. /*Idx1=*/0);
  3590. // If there is no user-defined mapper, set the mapper array to nullptr to
  3591. // avoid an unnecessary data privatization
  3592. if (!Info.HasMapper)
  3593. RTArgs.MappersArray = ConstantPointerNull::get(VoidPtrPtrTy);
  3594. else
  3595. RTArgs.MappersArray =
  3596. Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
  3597. }
  3598. bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
  3599. const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
  3600. assert(!(AO == AtomicOrdering::NotAtomic ||
  3601. AO == llvm::AtomicOrdering::Unordered) &&
  3602. "Unexpected Atomic Ordering.");
  3603. bool Flush = false;
  3604. llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic;
  3605. switch (AK) {
  3606. case Read:
  3607. if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease ||
  3608. AO == AtomicOrdering::SequentiallyConsistent) {
  3609. FlushAO = AtomicOrdering::Acquire;
  3610. Flush = true;
  3611. }
  3612. break;
  3613. case Write:
  3614. case Compare:
  3615. case Update:
  3616. if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease ||
  3617. AO == AtomicOrdering::SequentiallyConsistent) {
  3618. FlushAO = AtomicOrdering::Release;
  3619. Flush = true;
  3620. }
  3621. break;
  3622. case Capture:
  3623. switch (AO) {
  3624. case AtomicOrdering::Acquire:
  3625. FlushAO = AtomicOrdering::Acquire;
  3626. Flush = true;
  3627. break;
  3628. case AtomicOrdering::Release:
  3629. FlushAO = AtomicOrdering::Release;
  3630. Flush = true;
  3631. break;
  3632. case AtomicOrdering::AcquireRelease:
  3633. case AtomicOrdering::SequentiallyConsistent:
  3634. FlushAO = AtomicOrdering::AcquireRelease;
  3635. Flush = true;
  3636. break;
  3637. default:
  3638. // do nothing - leave silently.
  3639. break;
  3640. }
  3641. }
  3642. if (Flush) {
  3643. // Currently Flush RT call still doesn't take memory_ordering, so for when
  3644. // that happens, this tries to do the resolution of which atomic ordering
  3645. // to use with but issue the flush call
  3646. // TODO: pass `FlushAO` after memory ordering support is added
  3647. (void)FlushAO;
  3648. emitFlush(Loc);
  3649. }
  3650. // for AO == AtomicOrdering::Monotonic and all other case combinations
  3651. // do nothing
  3652. return Flush;
  3653. }
  3654. OpenMPIRBuilder::InsertPointTy
  3655. OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
  3656. AtomicOpValue &X, AtomicOpValue &V,
  3657. AtomicOrdering AO) {
  3658. if (!updateToLocation(Loc))
  3659. return Loc.IP;
  3660. Type *XTy = X.Var->getType();
  3661. assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
  3662. Type *XElemTy = X.ElemTy;
  3663. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  3664. XElemTy->isPointerTy()) &&
  3665. "OMP atomic read expected a scalar type");
  3666. Value *XRead = nullptr;
  3667. if (XElemTy->isIntegerTy()) {
  3668. LoadInst *XLD =
  3669. Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
  3670. XLD->setAtomic(AO);
  3671. XRead = cast<Value>(XLD);
  3672. } else {
  3673. // We need to bitcast and perform atomic op as integer
  3674. unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
  3675. IntegerType *IntCastTy =
  3676. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  3677. Value *XBCast = Builder.CreateBitCast(
  3678. X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
  3679. LoadInst *XLoad =
  3680. Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
  3681. XLoad->setAtomic(AO);
  3682. if (XElemTy->isFloatingPointTy()) {
  3683. XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
  3684. } else {
  3685. XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
  3686. }
  3687. }
  3688. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
  3689. Builder.CreateStore(XRead, V.Var, V.IsVolatile);
  3690. return Builder.saveIP();
  3691. }
  3692. OpenMPIRBuilder::InsertPointTy
  3693. OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
  3694. AtomicOpValue &X, Value *Expr,
  3695. AtomicOrdering AO) {
  3696. if (!updateToLocation(Loc))
  3697. return Loc.IP;
  3698. Type *XTy = X.Var->getType();
  3699. assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
  3700. Type *XElemTy = X.ElemTy;
  3701. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  3702. XElemTy->isPointerTy()) &&
  3703. "OMP atomic write expected a scalar type");
  3704. if (XElemTy->isIntegerTy()) {
  3705. StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
  3706. XSt->setAtomic(AO);
  3707. } else {
  3708. // We need to bitcast and perform atomic op as integers
  3709. unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
  3710. IntegerType *IntCastTy =
  3711. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  3712. Value *XBCast = Builder.CreateBitCast(
  3713. X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
  3714. Value *ExprCast =
  3715. Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
  3716. StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
  3717. XSt->setAtomic(AO);
  3718. }
  3719. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
  3720. return Builder.saveIP();
  3721. }
  3722. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
  3723. const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
  3724. Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  3725. AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
  3726. assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
  3727. if (!updateToLocation(Loc))
  3728. return Loc.IP;
  3729. LLVM_DEBUG({
  3730. Type *XTy = X.Var->getType();
  3731. assert(XTy->isPointerTy() &&
  3732. "OMP Atomic expects a pointer to target memory");
  3733. Type *XElemTy = X.ElemTy;
  3734. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  3735. XElemTy->isPointerTy()) &&
  3736. "OMP atomic update expected a scalar type");
  3737. assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
  3738. (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
  3739. "OpenMP atomic does not support LT or GT operations");
  3740. });
  3741. emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
  3742. X.IsVolatile, IsXBinopExpr);
  3743. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
  3744. return Builder.saveIP();
  3745. }
  3746. // FIXME: Duplicating AtomicExpand
  3747. Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
  3748. AtomicRMWInst::BinOp RMWOp) {
  3749. switch (RMWOp) {
  3750. case AtomicRMWInst::Add:
  3751. return Builder.CreateAdd(Src1, Src2);
  3752. case AtomicRMWInst::Sub:
  3753. return Builder.CreateSub(Src1, Src2);
  3754. case AtomicRMWInst::And:
  3755. return Builder.CreateAnd(Src1, Src2);
  3756. case AtomicRMWInst::Nand:
  3757. return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
  3758. case AtomicRMWInst::Or:
  3759. return Builder.CreateOr(Src1, Src2);
  3760. case AtomicRMWInst::Xor:
  3761. return Builder.CreateXor(Src1, Src2);
  3762. case AtomicRMWInst::Xchg:
  3763. case AtomicRMWInst::FAdd:
  3764. case AtomicRMWInst::FSub:
  3765. case AtomicRMWInst::BAD_BINOP:
  3766. case AtomicRMWInst::Max:
  3767. case AtomicRMWInst::Min:
  3768. case AtomicRMWInst::UMax:
  3769. case AtomicRMWInst::UMin:
  3770. case AtomicRMWInst::FMax:
  3771. case AtomicRMWInst::FMin:
  3772. case AtomicRMWInst::UIncWrap:
  3773. case AtomicRMWInst::UDecWrap:
  3774. llvm_unreachable("Unsupported atomic update operation");
  3775. }
  3776. llvm_unreachable("Unsupported atomic update operation");
  3777. }
  3778. std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
  3779. InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
  3780. AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  3781. AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
  3782. // TODO: handle the case where XElemTy is not byte-sized or not a power of 2
  3783. // or a complex datatype.
  3784. bool emitRMWOp = false;
  3785. switch (RMWOp) {
  3786. case AtomicRMWInst::Add:
  3787. case AtomicRMWInst::And:
  3788. case AtomicRMWInst::Nand:
  3789. case AtomicRMWInst::Or:
  3790. case AtomicRMWInst::Xor:
  3791. case AtomicRMWInst::Xchg:
  3792. emitRMWOp = XElemTy;
  3793. break;
  3794. case AtomicRMWInst::Sub:
  3795. emitRMWOp = (IsXBinopExpr && XElemTy);
  3796. break;
  3797. default:
  3798. emitRMWOp = false;
  3799. }
  3800. emitRMWOp &= XElemTy->isIntegerTy();
  3801. std::pair<Value *, Value *> Res;
  3802. if (emitRMWOp) {
  3803. Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
  3804. // not needed except in case of postfix captures. Generate anyway for
  3805. // consistency with the else part. Will be removed with any DCE pass.
  3806. // AtomicRMWInst::Xchg does not have a coressponding instruction.
  3807. if (RMWOp == AtomicRMWInst::Xchg)
  3808. Res.second = Res.first;
  3809. else
  3810. Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
  3811. } else {
  3812. unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
  3813. IntegerType *IntCastTy =
  3814. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  3815. Value *XBCast =
  3816. Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
  3817. LoadInst *OldVal =
  3818. Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
  3819. OldVal->setAtomic(AO);
  3820. // CurBB
  3821. // | /---\
  3822. // ContBB |
  3823. // | \---/
  3824. // ExitBB
  3825. BasicBlock *CurBB = Builder.GetInsertBlock();
  3826. Instruction *CurBBTI = CurBB->getTerminator();
  3827. CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
  3828. BasicBlock *ExitBB =
  3829. CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
  3830. BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
  3831. X->getName() + ".atomic.cont");
  3832. ContBB->getTerminator()->eraseFromParent();
  3833. Builder.restoreIP(AllocaIP);
  3834. AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
  3835. NewAtomicAddr->setName(X->getName() + "x.new.val");
  3836. Builder.SetInsertPoint(ContBB);
  3837. llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
  3838. PHI->addIncoming(OldVal, CurBB);
  3839. IntegerType *NewAtomicCastTy =
  3840. IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
  3841. bool IsIntTy = XElemTy->isIntegerTy();
  3842. Value *NewAtomicIntAddr =
  3843. (IsIntTy)
  3844. ? NewAtomicAddr
  3845. : Builder.CreateBitCast(NewAtomicAddr,
  3846. NewAtomicCastTy->getPointerTo(Addrspace));
  3847. Value *OldExprVal = PHI;
  3848. if (!IsIntTy) {
  3849. if (XElemTy->isFloatingPointTy()) {
  3850. OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
  3851. X->getName() + ".atomic.fltCast");
  3852. } else {
  3853. OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
  3854. X->getName() + ".atomic.ptrCast");
  3855. }
  3856. }
  3857. Value *Upd = UpdateOp(OldExprVal, Builder);
  3858. Builder.CreateStore(Upd, NewAtomicAddr);
  3859. LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicIntAddr);
  3860. Value *XAddr =
  3861. (IsIntTy)
  3862. ? X
  3863. : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
  3864. AtomicOrdering Failure =
  3865. llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
  3866. AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
  3867. XAddr, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
  3868. Result->setVolatile(VolatileX);
  3869. Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
  3870. Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
  3871. PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
  3872. Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
  3873. Res.first = OldExprVal;
  3874. Res.second = Upd;
  3875. // set Insertion point in exit block
  3876. if (UnreachableInst *ExitTI =
  3877. dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
  3878. CurBBTI->eraseFromParent();
  3879. Builder.SetInsertPoint(ExitBB);
  3880. } else {
  3881. Builder.SetInsertPoint(ExitTI);
  3882. }
  3883. }
  3884. return Res;
  3885. }
  3886. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
  3887. const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
  3888. AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
  3889. AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
  3890. bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
  3891. if (!updateToLocation(Loc))
  3892. return Loc.IP;
  3893. LLVM_DEBUG({
  3894. Type *XTy = X.Var->getType();
  3895. assert(XTy->isPointerTy() &&
  3896. "OMP Atomic expects a pointer to target memory");
  3897. Type *XElemTy = X.ElemTy;
  3898. assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
  3899. XElemTy->isPointerTy()) &&
  3900. "OMP atomic capture expected a scalar type");
  3901. assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
  3902. "OpenMP atomic does not support LT or GT operations");
  3903. });
  3904. // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
  3905. // 'x' is simply atomically rewritten with 'expr'.
  3906. AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
  3907. std::pair<Value *, Value *> Result =
  3908. emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
  3909. X.IsVolatile, IsXBinopExpr);
  3910. Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
  3911. Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
  3912. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
  3913. return Builder.saveIP();
  3914. }
  3915. OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
  3916. const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
  3917. AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
  3918. omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
  3919. bool IsFailOnly) {
  3920. if (!updateToLocation(Loc))
  3921. return Loc.IP;
  3922. assert(X.Var->getType()->isPointerTy() &&
  3923. "OMP atomic expects a pointer to target memory");
  3924. // compare capture
  3925. if (V.Var) {
  3926. assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type");
  3927. assert(V.ElemTy == X.ElemTy && "x and v must be of same type");
  3928. }
  3929. bool IsInteger = E->getType()->isIntegerTy();
  3930. if (Op == OMPAtomicCompareOp::EQ) {
  3931. AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
  3932. AtomicCmpXchgInst *Result = nullptr;
  3933. if (!IsInteger) {
  3934. unsigned Addrspace =
  3935. cast<PointerType>(X.Var->getType())->getAddressSpace();
  3936. IntegerType *IntCastTy =
  3937. IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
  3938. Value *XBCast =
  3939. Builder.CreateBitCast(X.Var, IntCastTy->getPointerTo(Addrspace));
  3940. Value *EBCast = Builder.CreateBitCast(E, IntCastTy);
  3941. Value *DBCast = Builder.CreateBitCast(D, IntCastTy);
  3942. Result = Builder.CreateAtomicCmpXchg(XBCast, EBCast, DBCast, MaybeAlign(),
  3943. AO, Failure);
  3944. } else {
  3945. Result =
  3946. Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
  3947. }
  3948. if (V.Var) {
  3949. Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0);
  3950. if (!IsInteger)
  3951. OldValue = Builder.CreateBitCast(OldValue, X.ElemTy);
  3952. assert(OldValue->getType() == V.ElemTy &&
  3953. "OldValue and V must be of same type");
  3954. if (IsPostfixUpdate) {
  3955. Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
  3956. } else {
  3957. Value *SuccessOrFail = Builder.CreateExtractValue(Result, /*Idxs=*/1);
  3958. if (IsFailOnly) {
  3959. // CurBB----
  3960. // | |
  3961. // v |
  3962. // ContBB |
  3963. // | |
  3964. // v |
  3965. // ExitBB <-
  3966. //
  3967. // where ContBB only contains the store of old value to 'v'.
  3968. BasicBlock *CurBB = Builder.GetInsertBlock();
  3969. Instruction *CurBBTI = CurBB->getTerminator();
  3970. CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
  3971. BasicBlock *ExitBB = CurBB->splitBasicBlock(
  3972. CurBBTI, X.Var->getName() + ".atomic.exit");
  3973. BasicBlock *ContBB = CurBB->splitBasicBlock(
  3974. CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
  3975. ContBB->getTerminator()->eraseFromParent();
  3976. CurBB->getTerminator()->eraseFromParent();
  3977. Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
  3978. Builder.SetInsertPoint(ContBB);
  3979. Builder.CreateStore(OldValue, V.Var);
  3980. Builder.CreateBr(ExitBB);
  3981. if (UnreachableInst *ExitTI =
  3982. dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
  3983. CurBBTI->eraseFromParent();
  3984. Builder.SetInsertPoint(ExitBB);
  3985. } else {
  3986. Builder.SetInsertPoint(ExitTI);
  3987. }
  3988. } else {
  3989. Value *CapturedValue =
  3990. Builder.CreateSelect(SuccessOrFail, E, OldValue);
  3991. Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
  3992. }
  3993. }
  3994. }
  3995. // The comparison result has to be stored.
  3996. if (R.Var) {
  3997. assert(R.Var->getType()->isPointerTy() &&
  3998. "r.var must be of pointer type");
  3999. assert(R.ElemTy->isIntegerTy() && "r must be of integral type");
  4000. Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
  4001. Value *ResultCast = R.IsSigned
  4002. ? Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
  4003. : Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
  4004. Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
  4005. }
  4006. } else {
  4007. assert((Op == OMPAtomicCompareOp::MAX || Op == OMPAtomicCompareOp::MIN) &&
  4008. "Op should be either max or min at this point");
  4009. assert(!IsFailOnly && "IsFailOnly is only valid when the comparison is ==");
  4010. // Reverse the ordop as the OpenMP forms are different from LLVM forms.
  4011. // Let's take max as example.
  4012. // OpenMP form:
  4013. // x = x > expr ? expr : x;
  4014. // LLVM form:
  4015. // *ptr = *ptr > val ? *ptr : val;
  4016. // We need to transform to LLVM form.
  4017. // x = x <= expr ? x : expr;
  4018. AtomicRMWInst::BinOp NewOp;
  4019. if (IsXBinopExpr) {
  4020. if (IsInteger) {
  4021. if (X.IsSigned)
  4022. NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min
  4023. : AtomicRMWInst::Max;
  4024. else
  4025. NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin
  4026. : AtomicRMWInst::UMax;
  4027. } else {
  4028. NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMin
  4029. : AtomicRMWInst::FMax;
  4030. }
  4031. } else {
  4032. if (IsInteger) {
  4033. if (X.IsSigned)
  4034. NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max
  4035. : AtomicRMWInst::Min;
  4036. else
  4037. NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax
  4038. : AtomicRMWInst::UMin;
  4039. } else {
  4040. NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMax
  4041. : AtomicRMWInst::FMin;
  4042. }
  4043. }
  4044. AtomicRMWInst *OldValue =
  4045. Builder.CreateAtomicRMW(NewOp, X.Var, E, MaybeAlign(), AO);
  4046. if (V.Var) {
  4047. Value *CapturedValue = nullptr;
  4048. if (IsPostfixUpdate) {
  4049. CapturedValue = OldValue;
  4050. } else {
  4051. CmpInst::Predicate Pred;
  4052. switch (NewOp) {
  4053. case AtomicRMWInst::Max:
  4054. Pred = CmpInst::ICMP_SGT;
  4055. break;
  4056. case AtomicRMWInst::UMax:
  4057. Pred = CmpInst::ICMP_UGT;
  4058. break;
  4059. case AtomicRMWInst::FMax:
  4060. Pred = CmpInst::FCMP_OGT;
  4061. break;
  4062. case AtomicRMWInst::Min:
  4063. Pred = CmpInst::ICMP_SLT;
  4064. break;
  4065. case AtomicRMWInst::UMin:
  4066. Pred = CmpInst::ICMP_ULT;
  4067. break;
  4068. case AtomicRMWInst::FMin:
  4069. Pred = CmpInst::FCMP_OLT;
  4070. break;
  4071. default:
  4072. llvm_unreachable("unexpected comparison op");
  4073. }
  4074. Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue, E);
  4075. CapturedValue = Builder.CreateSelect(NonAtomicCmp, E, OldValue);
  4076. }
  4077. Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
  4078. }
  4079. }
  4080. checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
  4081. return Builder.saveIP();
  4082. }
  4083. GlobalVariable *
  4084. OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
  4085. std::string VarName) {
  4086. llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
  4087. llvm::ArrayType::get(
  4088. llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
  4089. Names);
  4090. auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
  4091. M, MapNamesArrayInit->getType(),
  4092. /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
  4093. VarName);
  4094. return MapNamesArrayGlobal;
  4095. }
  4096. // Create all simple and struct types exposed by the runtime and remember
  4097. // the llvm::PointerTypes of them for easy access later.
  4098. void OpenMPIRBuilder::initializeTypes(Module &M) {
  4099. LLVMContext &Ctx = M.getContext();
  4100. StructType *T;
  4101. #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
  4102. #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
  4103. VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
  4104. VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
  4105. #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
  4106. VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
  4107. VarName##Ptr = PointerType::getUnqual(VarName);
  4108. #define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
  4109. T = StructType::getTypeByName(Ctx, StructName); \
  4110. if (!T) \
  4111. T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
  4112. VarName = T; \
  4113. VarName##Ptr = PointerType::getUnqual(T);
  4114. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  4115. }
  4116. void OpenMPIRBuilder::OutlineInfo::collectBlocks(
  4117. SmallPtrSetImpl<BasicBlock *> &BlockSet,
  4118. SmallVectorImpl<BasicBlock *> &BlockVector) {
  4119. SmallVector<BasicBlock *, 32> Worklist;
  4120. BlockSet.insert(EntryBB);
  4121. BlockSet.insert(ExitBB);
  4122. Worklist.push_back(EntryBB);
  4123. while (!Worklist.empty()) {
  4124. BasicBlock *BB = Worklist.pop_back_val();
  4125. BlockVector.push_back(BB);
  4126. for (BasicBlock *SuccBB : successors(BB))
  4127. if (BlockSet.insert(SuccBB).second)
  4128. Worklist.push_back(SuccBB);
  4129. }
  4130. }
  4131. void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
  4132. uint64_t Size, int32_t Flags,
  4133. GlobalValue::LinkageTypes) {
  4134. if (!Config.isTargetCodegen()) {
  4135. emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
  4136. return;
  4137. }
  4138. // TODO: Add support for global variables on the device after declare target
  4139. // support.
  4140. Function *Fn = dyn_cast<Function>(Addr);
  4141. if (!Fn)
  4142. return;
  4143. Module &M = *(Fn->getParent());
  4144. LLVMContext &Ctx = M.getContext();
  4145. // Get "nvvm.annotations" metadata node.
  4146. NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
  4147. Metadata *MDVals[] = {
  4148. ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"),
  4149. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))};
  4150. // Append metadata to nvvm.annotations.
  4151. MD->addOperand(MDNode::get(Ctx, MDVals));
  4152. // Add a function attribute for the kernel.
  4153. Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
  4154. }
  4155. // We only generate metadata for function that contain target regions.
  4156. void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
  4157. OffloadEntriesInfoManager &OffloadEntriesInfoManager,
  4158. EmitMetadataErrorReportFunctionTy &ErrorFn) {
  4159. // If there are no entries, we don't need to do anything.
  4160. if (OffloadEntriesInfoManager.empty())
  4161. return;
  4162. LLVMContext &C = M.getContext();
  4163. SmallVector<std::pair<const OffloadEntriesInfoManager::OffloadEntryInfo *,
  4164. TargetRegionEntryInfo>,
  4165. 16>
  4166. OrderedEntries(OffloadEntriesInfoManager.size());
  4167. // Auxiliary methods to create metadata values and strings.
  4168. auto &&GetMDInt = [this](unsigned V) {
  4169. return ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), V));
  4170. };
  4171. auto &&GetMDString = [&C](StringRef V) { return MDString::get(C, V); };
  4172. // Create the offloading info metadata node.
  4173. NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
  4174. auto &&TargetRegionMetadataEmitter =
  4175. [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
  4176. const TargetRegionEntryInfo &EntryInfo,
  4177. const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &E) {
  4178. // Generate metadata for target regions. Each entry of this metadata
  4179. // contains:
  4180. // - Entry 0 -> Kind of this type of metadata (0).
  4181. // - Entry 1 -> Device ID of the file where the entry was identified.
  4182. // - Entry 2 -> File ID of the file where the entry was identified.
  4183. // - Entry 3 -> Mangled name of the function where the entry was
  4184. // identified.
  4185. // - Entry 4 -> Line in the file where the entry was identified.
  4186. // - Entry 5 -> Count of regions at this DeviceID/FilesID/Line.
  4187. // - Entry 6 -> Order the entry was created.
  4188. // The first element of the metadata node is the kind.
  4189. Metadata *Ops[] = {
  4190. GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
  4191. GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
  4192. GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
  4193. GetMDInt(E.getOrder())};
  4194. // Save this entry in the right position of the ordered entries array.
  4195. OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
  4196. // Add metadata to the named metadata node.
  4197. MD->addOperand(MDNode::get(C, Ops));
  4198. };
  4199. OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
  4200. TargetRegionMetadataEmitter);
  4201. // Create function that emits metadata for each device global variable entry;
  4202. auto &&DeviceGlobalVarMetadataEmitter =
  4203. [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
  4204. StringRef MangledName,
  4205. const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &E) {
  4206. // Generate metadata for global variables. Each entry of this metadata
  4207. // contains:
  4208. // - Entry 0 -> Kind of this type of metadata (1).
  4209. // - Entry 1 -> Mangled name of the variable.
  4210. // - Entry 2 -> Declare target kind.
  4211. // - Entry 3 -> Order the entry was created.
  4212. // The first element of the metadata node is the kind.
  4213. Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
  4214. GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
  4215. // Save this entry in the right position of the ordered entries array.
  4216. TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
  4217. OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
  4218. // Add metadata to the named metadata node.
  4219. MD->addOperand(MDNode::get(C, Ops));
  4220. };
  4221. OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
  4222. DeviceGlobalVarMetadataEmitter);
  4223. for (const auto &E : OrderedEntries) {
  4224. assert(E.first && "All ordered entries must exist!");
  4225. if (const auto *CE =
  4226. dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
  4227. E.first)) {
  4228. if (!CE->getID() || !CE->getAddress()) {
  4229. // Do not blame the entry if the parent funtion is not emitted.
  4230. TargetRegionEntryInfo EntryInfo = E.second;
  4231. StringRef FnName = EntryInfo.ParentName;
  4232. if (!M.getNamedValue(FnName))
  4233. continue;
  4234. ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
  4235. continue;
  4236. }
  4237. createOffloadEntry(CE->getID(), CE->getAddress(),
  4238. /*Size=*/0, CE->getFlags(),
  4239. GlobalValue::WeakAnyLinkage);
  4240. } else if (const auto *CE = dyn_cast<
  4241. OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
  4242. E.first)) {
  4243. OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
  4244. static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>(
  4245. CE->getFlags());
  4246. switch (Flags) {
  4247. case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: {
  4248. if (Config.isEmbedded() && Config.hasRequiresUnifiedSharedMemory())
  4249. continue;
  4250. if (!CE->getAddress()) {
  4251. ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR, E.second);
  4252. continue;
  4253. }
  4254. // The vaiable has no definition - no need to add the entry.
  4255. if (CE->getVarSize() == 0)
  4256. continue;
  4257. break;
  4258. }
  4259. case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
  4260. assert(((Config.isEmbedded() && !CE->getAddress()) ||
  4261. (!Config.isEmbedded() && CE->getAddress())) &&
  4262. "Declaret target link address is set.");
  4263. if (Config.isEmbedded())
  4264. continue;
  4265. if (!CE->getAddress()) {
  4266. ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
  4267. continue;
  4268. }
  4269. break;
  4270. }
  4271. // Hidden or internal symbols on the device are not externally visible.
  4272. // We should not attempt to register them by creating an offloading
  4273. // entry.
  4274. if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress()))
  4275. if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
  4276. continue;
  4277. createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
  4278. Flags, CE->getLinkage());
  4279. } else {
  4280. llvm_unreachable("Unsupported entry kind.");
  4281. }
  4282. }
  4283. }
  4284. void TargetRegionEntryInfo::getTargetRegionEntryFnName(
  4285. SmallVectorImpl<char> &Name, StringRef ParentName, unsigned DeviceID,
  4286. unsigned FileID, unsigned Line, unsigned Count) {
  4287. raw_svector_ostream OS(Name);
  4288. OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
  4289. << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
  4290. if (Count)
  4291. OS << "_" << Count;
  4292. }
  4293. void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
  4294. SmallVectorImpl<char> &Name, const TargetRegionEntryInfo &EntryInfo) {
  4295. unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
  4296. TargetRegionEntryInfo::getTargetRegionEntryFnName(
  4297. Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
  4298. EntryInfo.Line, NewCount);
  4299. }
  4300. /// Loads all the offload entries information from the host IR
  4301. /// metadata.
  4302. void OpenMPIRBuilder::loadOffloadInfoMetadata(
  4303. Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) {
  4304. // If we are in target mode, load the metadata from the host IR. This code has
  4305. // to match the metadata creation in createOffloadEntriesAndInfoMetadata().
  4306. NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
  4307. if (!MD)
  4308. return;
  4309. for (MDNode *MN : MD->operands()) {
  4310. auto &&GetMDInt = [MN](unsigned Idx) {
  4311. auto *V = cast<ConstantAsMetadata>(MN->getOperand(Idx));
  4312. return cast<ConstantInt>(V->getValue())->getZExtValue();
  4313. };
  4314. auto &&GetMDString = [MN](unsigned Idx) {
  4315. auto *V = cast<MDString>(MN->getOperand(Idx));
  4316. return V->getString();
  4317. };
  4318. switch (GetMDInt(0)) {
  4319. default:
  4320. llvm_unreachable("Unexpected metadata!");
  4321. break;
  4322. case OffloadEntriesInfoManager::OffloadEntryInfo::
  4323. OffloadingEntryInfoTargetRegion: {
  4324. TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3),
  4325. /*DeviceID=*/GetMDInt(1),
  4326. /*FileID=*/GetMDInt(2),
  4327. /*Line=*/GetMDInt(4),
  4328. /*Count=*/GetMDInt(5));
  4329. OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
  4330. EntryInfo, /*Order=*/GetMDInt(6));
  4331. break;
  4332. }
  4333. case OffloadEntriesInfoManager::OffloadEntryInfo::
  4334. OffloadingEntryInfoDeviceGlobalVar:
  4335. OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
  4336. /*MangledName=*/GetMDString(1),
  4337. static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>(
  4338. /*Flags=*/GetMDInt(2)),
  4339. /*Order=*/GetMDInt(3));
  4340. break;
  4341. }
  4342. }
  4343. }
  4344. bool OffloadEntriesInfoManager::empty() const {
  4345. return OffloadEntriesTargetRegion.empty() &&
  4346. OffloadEntriesDeviceGlobalVar.empty();
  4347. }
  4348. unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
  4349. const TargetRegionEntryInfo &EntryInfo) const {
  4350. auto It = OffloadEntriesTargetRegionCount.find(
  4351. getTargetRegionEntryCountKey(EntryInfo));
  4352. if (It == OffloadEntriesTargetRegionCount.end())
  4353. return 0;
  4354. return It->second;
  4355. }
  4356. void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
  4357. const TargetRegionEntryInfo &EntryInfo) {
  4358. OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
  4359. EntryInfo.Count + 1;
  4360. }
  4361. /// Initialize target region entry.
  4362. void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
  4363. const TargetRegionEntryInfo &EntryInfo, unsigned Order) {
  4364. OffloadEntriesTargetRegion[EntryInfo] =
  4365. OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
  4366. OMPTargetRegionEntryTargetRegion);
  4367. ++OffloadingEntriesNum;
  4368. }
  4369. void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
  4370. TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID,
  4371. OMPTargetRegionEntryKind Flags) {
  4372. assert(EntryInfo.Count == 0 && "expected default EntryInfo");
  4373. // Update the EntryInfo with the next available count for this location.
  4374. EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
  4375. // If we are emitting code for a target, the entry is already initialized,
  4376. // only has to be registered.
  4377. if (Config.isEmbedded()) {
  4378. // This could happen if the device compilation is invoked standalone.
  4379. if (!hasTargetRegionEntryInfo(EntryInfo)) {
  4380. return;
  4381. }
  4382. auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
  4383. Entry.setAddress(Addr);
  4384. Entry.setID(ID);
  4385. Entry.setFlags(Flags);
  4386. } else {
  4387. if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
  4388. hasTargetRegionEntryInfo(EntryInfo, /*IgnoreAddressId*/ true))
  4389. return;
  4390. assert(!hasTargetRegionEntryInfo(EntryInfo) &&
  4391. "Target region entry already registered!");
  4392. OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
  4393. OffloadEntriesTargetRegion[EntryInfo] = Entry;
  4394. ++OffloadingEntriesNum;
  4395. }
  4396. incrementTargetRegionEntryInfoCount(EntryInfo);
  4397. }
  4398. bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
  4399. TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId) const {
  4400. // Update the EntryInfo with the next available count for this location.
  4401. EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
  4402. auto It = OffloadEntriesTargetRegion.find(EntryInfo);
  4403. if (It == OffloadEntriesTargetRegion.end()) {
  4404. return false;
  4405. }
  4406. // Fail if this entry is already registered.
  4407. if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
  4408. return false;
  4409. return true;
  4410. }
  4411. void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
  4412. const OffloadTargetRegionEntryInfoActTy &Action) {
  4413. // Scan all target region entries and perform the provided action.
  4414. for (const auto &It : OffloadEntriesTargetRegion) {
  4415. Action(It.first, It.second);
  4416. }
  4417. }
  4418. void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
  4419. StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order) {
  4420. OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
  4421. ++OffloadingEntriesNum;
  4422. }
  4423. void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
  4424. StringRef VarName, Constant *Addr, int64_t VarSize,
  4425. OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage) {
  4426. if (Config.isEmbedded()) {
  4427. // This could happen if the device compilation is invoked standalone.
  4428. if (!hasDeviceGlobalVarEntryInfo(VarName))
  4429. return;
  4430. auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
  4431. if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
  4432. if (Entry.getVarSize() == 0) {
  4433. Entry.setVarSize(VarSize);
  4434. Entry.setLinkage(Linkage);
  4435. }
  4436. return;
  4437. }
  4438. Entry.setVarSize(VarSize);
  4439. Entry.setLinkage(Linkage);
  4440. Entry.setAddress(Addr);
  4441. } else {
  4442. if (hasDeviceGlobalVarEntryInfo(VarName)) {
  4443. auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
  4444. assert(Entry.isValid() && Entry.getFlags() == Flags &&
  4445. "Entry not initialized!");
  4446. if (Entry.getVarSize() == 0) {
  4447. Entry.setVarSize(VarSize);
  4448. Entry.setLinkage(Linkage);
  4449. }
  4450. return;
  4451. }
  4452. OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
  4453. Addr, VarSize, Flags, Linkage);
  4454. ++OffloadingEntriesNum;
  4455. }
  4456. }
  4457. void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
  4458. const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
  4459. // Scan all target region entries and perform the provided action.
  4460. for (const auto &E : OffloadEntriesDeviceGlobalVar)
  4461. Action(E.getKey(), E.getValue());
  4462. }
  4463. void CanonicalLoopInfo::collectControlBlocks(
  4464. SmallVectorImpl<BasicBlock *> &BBs) {
  4465. // We only count those BBs as control block for which we do not need to
  4466. // reverse the CFG, i.e. not the loop body which can contain arbitrary control
  4467. // flow. For consistency, this also means we do not add the Body block, which
  4468. // is just the entry to the body code.
  4469. BBs.reserve(BBs.size() + 6);
  4470. BBs.append({getPreheader(), Header, Cond, Latch, Exit, getAfter()});
  4471. }
  4472. BasicBlock *CanonicalLoopInfo::getPreheader() const {
  4473. assert(isValid() && "Requires a valid canonical loop");
  4474. for (BasicBlock *Pred : predecessors(Header)) {
  4475. if (Pred != Latch)
  4476. return Pred;
  4477. }
  4478. llvm_unreachable("Missing preheader");
  4479. }
  4480. void CanonicalLoopInfo::setTripCount(Value *TripCount) {
  4481. assert(isValid() && "Requires a valid canonical loop");
  4482. Instruction *CmpI = &getCond()->front();
  4483. assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
  4484. CmpI->setOperand(1, TripCount);
  4485. #ifndef NDEBUG
  4486. assertOK();
  4487. #endif
  4488. }
  4489. void CanonicalLoopInfo::mapIndVar(
  4490. llvm::function_ref<Value *(Instruction *)> Updater) {
  4491. assert(isValid() && "Requires a valid canonical loop");
  4492. Instruction *OldIV = getIndVar();
  4493. // Record all uses excluding those introduced by the updater. Uses by the
  4494. // CanonicalLoopInfo itself to keep track of the number of iterations are
  4495. // excluded.
  4496. SmallVector<Use *> ReplacableUses;
  4497. for (Use &U : OldIV->uses()) {
  4498. auto *User = dyn_cast<Instruction>(U.getUser());
  4499. if (!User)
  4500. continue;
  4501. if (User->getParent() == getCond())
  4502. continue;
  4503. if (User->getParent() == getLatch())
  4504. continue;
  4505. ReplacableUses.push_back(&U);
  4506. }
  4507. // Run the updater that may introduce new uses
  4508. Value *NewIV = Updater(OldIV);
  4509. // Replace the old uses with the value returned by the updater.
  4510. for (Use *U : ReplacableUses)
  4511. U->set(NewIV);
  4512. #ifndef NDEBUG
  4513. assertOK();
  4514. #endif
  4515. }
  4516. void CanonicalLoopInfo::assertOK() const {
  4517. #ifndef NDEBUG
  4518. // No constraints if this object currently does not describe a loop.
  4519. if (!isValid())
  4520. return;
  4521. BasicBlock *Preheader = getPreheader();
  4522. BasicBlock *Body = getBody();
  4523. BasicBlock *After = getAfter();
  4524. // Verify standard control-flow we use for OpenMP loops.
  4525. assert(Preheader);
  4526. assert(isa<BranchInst>(Preheader->getTerminator()) &&
  4527. "Preheader must terminate with unconditional branch");
  4528. assert(Preheader->getSingleSuccessor() == Header &&
  4529. "Preheader must jump to header");
  4530. assert(Header);
  4531. assert(isa<BranchInst>(Header->getTerminator()) &&
  4532. "Header must terminate with unconditional branch");
  4533. assert(Header->getSingleSuccessor() == Cond &&
  4534. "Header must jump to exiting block");
  4535. assert(Cond);
  4536. assert(Cond->getSinglePredecessor() == Header &&
  4537. "Exiting block only reachable from header");
  4538. assert(isa<BranchInst>(Cond->getTerminator()) &&
  4539. "Exiting block must terminate with conditional branch");
  4540. assert(size(successors(Cond)) == 2 &&
  4541. "Exiting block must have two successors");
  4542. assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
  4543. "Exiting block's first successor jump to the body");
  4544. assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
  4545. "Exiting block's second successor must exit the loop");
  4546. assert(Body);
  4547. assert(Body->getSinglePredecessor() == Cond &&
  4548. "Body only reachable from exiting block");
  4549. assert(!isa<PHINode>(Body->front()));
  4550. assert(Latch);
  4551. assert(isa<BranchInst>(Latch->getTerminator()) &&
  4552. "Latch must terminate with unconditional branch");
  4553. assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
  4554. // TODO: To support simple redirecting of the end of the body code that has
  4555. // multiple; introduce another auxiliary basic block like preheader and after.
  4556. assert(Latch->getSinglePredecessor() != nullptr);
  4557. assert(!isa<PHINode>(Latch->front()));
  4558. assert(Exit);
  4559. assert(isa<BranchInst>(Exit->getTerminator()) &&
  4560. "Exit block must terminate with unconditional branch");
  4561. assert(Exit->getSingleSuccessor() == After &&
  4562. "Exit block must jump to after block");
  4563. assert(After);
  4564. assert(After->getSinglePredecessor() == Exit &&
  4565. "After block only reachable from exit block");
  4566. assert(After->empty() || !isa<PHINode>(After->front()));
  4567. Instruction *IndVar = getIndVar();
  4568. assert(IndVar && "Canonical induction variable not found?");
  4569. assert(isa<IntegerType>(IndVar->getType()) &&
  4570. "Induction variable must be an integer");
  4571. assert(cast<PHINode>(IndVar)->getParent() == Header &&
  4572. "Induction variable must be a PHI in the loop header");
  4573. assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
  4574. assert(
  4575. cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
  4576. assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
  4577. auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
  4578. assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
  4579. assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
  4580. assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
  4581. assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
  4582. ->isOne());
  4583. Value *TripCount = getTripCount();
  4584. assert(TripCount && "Loop trip count not found?");
  4585. assert(IndVar->getType() == TripCount->getType() &&
  4586. "Trip count and induction variable must have the same type");
  4587. auto *CmpI = cast<CmpInst>(&Cond->front());
  4588. assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
  4589. "Exit condition must be a signed less-than comparison");
  4590. assert(CmpI->getOperand(0) == IndVar &&
  4591. "Exit condition must compare the induction variable");
  4592. assert(CmpI->getOperand(1) == TripCount &&
  4593. "Exit condition must compare with the trip count");
  4594. #endif
  4595. }
  4596. void CanonicalLoopInfo::invalidate() {
  4597. Header = nullptr;
  4598. Cond = nullptr;
  4599. Latch = nullptr;
  4600. Exit = nullptr;
  4601. }