CGOpenMPRuntime.cpp 531 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685
  1. //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This provides a class for OpenMP runtime code generation.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "CGOpenMPRuntime.h"
  13. #include "CGCXXABI.h"
  14. #include "CGCleanup.h"
  15. #include "CGRecordLayout.h"
  16. #include "CodeGenFunction.h"
  17. #include "TargetInfo.h"
  18. #include "clang/AST/APValue.h"
  19. #include "clang/AST/Attr.h"
  20. #include "clang/AST/Decl.h"
  21. #include "clang/AST/OpenMPClause.h"
  22. #include "clang/AST/StmtOpenMP.h"
  23. #include "clang/AST/StmtVisitor.h"
  24. #include "clang/Basic/BitmaskEnum.h"
  25. #include "clang/Basic/FileManager.h"
  26. #include "clang/Basic/OpenMPKinds.h"
  27. #include "clang/Basic/SourceManager.h"
  28. #include "clang/CodeGen/ConstantInitBuilder.h"
  29. #include "llvm/ADT/ArrayRef.h"
  30. #include "llvm/ADT/SetOperations.h"
  31. #include "llvm/ADT/SmallBitVector.h"
  32. #include "llvm/ADT/StringExtras.h"
  33. #include "llvm/Bitcode/BitcodeReader.h"
  34. #include "llvm/IR/Constants.h"
  35. #include "llvm/IR/DerivedTypes.h"
  36. #include "llvm/IR/GlobalValue.h"
  37. #include "llvm/IR/InstrTypes.h"
  38. #include "llvm/IR/Value.h"
  39. #include "llvm/Support/AtomicOrdering.h"
  40. #include "llvm/Support/Format.h"
  41. #include "llvm/Support/raw_ostream.h"
  42. #include <cassert>
  43. #include <numeric>
  44. #include <optional>
  45. using namespace clang;
  46. using namespace CodeGen;
  47. using namespace llvm::omp;
  48. namespace {
  49. /// Base class for handling code generation inside OpenMP regions.
  50. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
  51. public:
  52. /// Kinds of OpenMP regions used in codegen.
  53. enum CGOpenMPRegionKind {
  54. /// Region with outlined function for standalone 'parallel'
  55. /// directive.
  56. ParallelOutlinedRegion,
  57. /// Region with outlined function for standalone 'task' directive.
  58. TaskOutlinedRegion,
  59. /// Region for constructs that do not require function outlining,
  60. /// like 'for', 'sections', 'atomic' etc. directives.
  61. InlinedRegion,
  62. /// Region with outlined function for standalone 'target' directive.
  63. TargetRegion,
  64. };
  65. CGOpenMPRegionInfo(const CapturedStmt &CS,
  66. const CGOpenMPRegionKind RegionKind,
  67. const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
  68. bool HasCancel)
  69. : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
  70. CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
  71. CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
  72. const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
  73. bool HasCancel)
  74. : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
  75. Kind(Kind), HasCancel(HasCancel) {}
  76. /// Get a variable or parameter for storing global thread id
  77. /// inside OpenMP construct.
  78. virtual const VarDecl *getThreadIDVariable() const = 0;
  79. /// Emit the captured statement body.
  80. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
  81. /// Get an LValue for the current ThreadID variable.
  82. /// \return LValue for thread id variable. This LValue always has type int32*.
  83. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
  84. virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
  85. CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
  86. OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
  87. bool hasCancel() const { return HasCancel; }
  88. static bool classof(const CGCapturedStmtInfo *Info) {
  89. return Info->getKind() == CR_OpenMP;
  90. }
  91. ~CGOpenMPRegionInfo() override = default;
  92. protected:
  93. CGOpenMPRegionKind RegionKind;
  94. RegionCodeGenTy CodeGen;
  95. OpenMPDirectiveKind Kind;
  96. bool HasCancel;
  97. };
  98. /// API for captured statement code generation in OpenMP constructs.
  99. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
  100. public:
  101. CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
  102. const RegionCodeGenTy &CodeGen,
  103. OpenMPDirectiveKind Kind, bool HasCancel,
  104. StringRef HelperName)
  105. : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
  106. HasCancel),
  107. ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
  108. assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
  109. }
  110. /// Get a variable or parameter for storing global thread id
  111. /// inside OpenMP construct.
  112. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
  113. /// Get the name of the capture helper.
  114. StringRef getHelperName() const override { return HelperName; }
  115. static bool classof(const CGCapturedStmtInfo *Info) {
  116. return CGOpenMPRegionInfo::classof(Info) &&
  117. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
  118. ParallelOutlinedRegion;
  119. }
  120. private:
  121. /// A variable or parameter storing global thread id for OpenMP
  122. /// constructs.
  123. const VarDecl *ThreadIDVar;
  124. StringRef HelperName;
  125. };
  126. /// API for captured statement code generation in OpenMP constructs.
  127. class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
  128. public:
  129. class UntiedTaskActionTy final : public PrePostActionTy {
  130. bool Untied;
  131. const VarDecl *PartIDVar;
  132. const RegionCodeGenTy UntiedCodeGen;
  133. llvm::SwitchInst *UntiedSwitch = nullptr;
  134. public:
  135. UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
  136. const RegionCodeGenTy &UntiedCodeGen)
  137. : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
  138. void Enter(CodeGenFunction &CGF) override {
  139. if (Untied) {
  140. // Emit task switching point.
  141. LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
  142. CGF.GetAddrOfLocalVar(PartIDVar),
  143. PartIDVar->getType()->castAs<PointerType>());
  144. llvm::Value *Res =
  145. CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
  146. llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
  147. UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
  148. CGF.EmitBlock(DoneBB);
  149. CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
  150. CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
  151. UntiedSwitch->addCase(CGF.Builder.getInt32(0),
  152. CGF.Builder.GetInsertBlock());
  153. emitUntiedSwitch(CGF);
  154. }
  155. }
  156. void emitUntiedSwitch(CodeGenFunction &CGF) const {
  157. if (Untied) {
  158. LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
  159. CGF.GetAddrOfLocalVar(PartIDVar),
  160. PartIDVar->getType()->castAs<PointerType>());
  161. CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
  162. PartIdLVal);
  163. UntiedCodeGen(CGF);
  164. CodeGenFunction::JumpDest CurPoint =
  165. CGF.getJumpDestInCurrentScope(".untied.next.");
  166. CGF.EmitBranch(CGF.ReturnBlock.getBlock());
  167. CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
  168. UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
  169. CGF.Builder.GetInsertBlock());
  170. CGF.EmitBranchThroughCleanup(CurPoint);
  171. CGF.EmitBlock(CurPoint.getBlock());
  172. }
  173. }
  174. unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
  175. };
  176. CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
  177. const VarDecl *ThreadIDVar,
  178. const RegionCodeGenTy &CodeGen,
  179. OpenMPDirectiveKind Kind, bool HasCancel,
  180. const UntiedTaskActionTy &Action)
  181. : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
  182. ThreadIDVar(ThreadIDVar), Action(Action) {
  183. assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
  184. }
  185. /// Get a variable or parameter for storing global thread id
  186. /// inside OpenMP construct.
  187. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
  188. /// Get an LValue for the current ThreadID variable.
  189. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
  190. /// Get the name of the capture helper.
  191. StringRef getHelperName() const override { return ".omp_outlined."; }
  192. void emitUntiedSwitch(CodeGenFunction &CGF) override {
  193. Action.emitUntiedSwitch(CGF);
  194. }
  195. static bool classof(const CGCapturedStmtInfo *Info) {
  196. return CGOpenMPRegionInfo::classof(Info) &&
  197. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
  198. TaskOutlinedRegion;
  199. }
  200. private:
  201. /// A variable or parameter storing global thread id for OpenMP
  202. /// constructs.
  203. const VarDecl *ThreadIDVar;
  204. /// Action for emitting code for untied tasks.
  205. const UntiedTaskActionTy &Action;
  206. };
  207. /// API for inlined captured statement code generation in OpenMP
  208. /// constructs.
  209. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
  210. public:
  211. CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
  212. const RegionCodeGenTy &CodeGen,
  213. OpenMPDirectiveKind Kind, bool HasCancel)
  214. : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
  215. OldCSI(OldCSI),
  216. OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
  217. // Retrieve the value of the context parameter.
  218. llvm::Value *getContextValue() const override {
  219. if (OuterRegionInfo)
  220. return OuterRegionInfo->getContextValue();
  221. llvm_unreachable("No context value for inlined OpenMP region");
  222. }
  223. void setContextValue(llvm::Value *V) override {
  224. if (OuterRegionInfo) {
  225. OuterRegionInfo->setContextValue(V);
  226. return;
  227. }
  228. llvm_unreachable("No context value for inlined OpenMP region");
  229. }
  230. /// Lookup the captured field decl for a variable.
  231. const FieldDecl *lookup(const VarDecl *VD) const override {
  232. if (OuterRegionInfo)
  233. return OuterRegionInfo->lookup(VD);
  234. // If there is no outer outlined region,no need to lookup in a list of
  235. // captured variables, we can use the original one.
  236. return nullptr;
  237. }
  238. FieldDecl *getThisFieldDecl() const override {
  239. if (OuterRegionInfo)
  240. return OuterRegionInfo->getThisFieldDecl();
  241. return nullptr;
  242. }
  243. /// Get a variable or parameter for storing global thread id
  244. /// inside OpenMP construct.
  245. const VarDecl *getThreadIDVariable() const override {
  246. if (OuterRegionInfo)
  247. return OuterRegionInfo->getThreadIDVariable();
  248. return nullptr;
  249. }
  250. /// Get an LValue for the current ThreadID variable.
  251. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
  252. if (OuterRegionInfo)
  253. return OuterRegionInfo->getThreadIDVariableLValue(CGF);
  254. llvm_unreachable("No LValue for inlined OpenMP construct");
  255. }
  256. /// Get the name of the capture helper.
  257. StringRef getHelperName() const override {
  258. if (auto *OuterRegionInfo = getOldCSI())
  259. return OuterRegionInfo->getHelperName();
  260. llvm_unreachable("No helper name for inlined OpenMP construct");
  261. }
  262. void emitUntiedSwitch(CodeGenFunction &CGF) override {
  263. if (OuterRegionInfo)
  264. OuterRegionInfo->emitUntiedSwitch(CGF);
  265. }
  266. CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
  267. static bool classof(const CGCapturedStmtInfo *Info) {
  268. return CGOpenMPRegionInfo::classof(Info) &&
  269. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
  270. }
  271. ~CGOpenMPInlinedRegionInfo() override = default;
  272. private:
  273. /// CodeGen info about outer OpenMP region.
  274. CodeGenFunction::CGCapturedStmtInfo *OldCSI;
  275. CGOpenMPRegionInfo *OuterRegionInfo;
  276. };
  277. /// API for captured statement code generation in OpenMP target
  278. /// constructs. For this captures, implicit parameters are used instead of the
  279. /// captured fields. The name of the target region has to be unique in a given
  280. /// application so it is provided by the client, because only the client has
  281. /// the information to generate that.
  282. class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
  283. public:
  284. CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
  285. const RegionCodeGenTy &CodeGen, StringRef HelperName)
  286. : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
  287. /*HasCancel=*/false),
  288. HelperName(HelperName) {}
  289. /// This is unused for target regions because each starts executing
  290. /// with a single thread.
  291. const VarDecl *getThreadIDVariable() const override { return nullptr; }
  292. /// Get the name of the capture helper.
  293. StringRef getHelperName() const override { return HelperName; }
  294. static bool classof(const CGCapturedStmtInfo *Info) {
  295. return CGOpenMPRegionInfo::classof(Info) &&
  296. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
  297. }
  298. private:
  299. StringRef HelperName;
  300. };
  301. static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
  302. llvm_unreachable("No codegen for expressions");
  303. }
  304. /// API for generation of expressions captured in a innermost OpenMP
  305. /// region.
  306. class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
  307. public:
  308. CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
  309. : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
  310. OMPD_unknown,
  311. /*HasCancel=*/false),
  312. PrivScope(CGF) {
  313. // Make sure the globals captured in the provided statement are local by
  314. // using the privatization logic. We assume the same variable is not
  315. // captured more than once.
  316. for (const auto &C : CS.captures()) {
  317. if (!C.capturesVariable() && !C.capturesVariableByCopy())
  318. continue;
  319. const VarDecl *VD = C.getCapturedVar();
  320. if (VD->isLocalVarDeclOrParm())
  321. continue;
  322. DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
  323. /*RefersToEnclosingVariableOrCapture=*/false,
  324. VD->getType().getNonReferenceType(), VK_LValue,
  325. C.getLocation());
  326. PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
  327. }
  328. (void)PrivScope.Privatize();
  329. }
  330. /// Lookup the captured field decl for a variable.
  331. const FieldDecl *lookup(const VarDecl *VD) const override {
  332. if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
  333. return FD;
  334. return nullptr;
  335. }
  336. /// Emit the captured statement body.
  337. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
  338. llvm_unreachable("No body for expressions");
  339. }
  340. /// Get a variable or parameter for storing global thread id
  341. /// inside OpenMP construct.
  342. const VarDecl *getThreadIDVariable() const override {
  343. llvm_unreachable("No thread id for expressions");
  344. }
  345. /// Get the name of the capture helper.
  346. StringRef getHelperName() const override {
  347. llvm_unreachable("No helper name for expressions");
  348. }
  349. static bool classof(const CGCapturedStmtInfo *Info) { return false; }
  350. private:
  351. /// Private scope to capture global variables.
  352. CodeGenFunction::OMPPrivateScope PrivScope;
  353. };
  354. /// RAII for emitting code of OpenMP constructs.
  355. class InlinedOpenMPRegionRAII {
  356. CodeGenFunction &CGF;
  357. llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
  358. FieldDecl *LambdaThisCaptureField = nullptr;
  359. const CodeGen::CGBlockInfo *BlockInfo = nullptr;
  360. bool NoInheritance = false;
  361. public:
  362. /// Constructs region for combined constructs.
  363. /// \param CodeGen Code generation sequence for combined directives. Includes
  364. /// a list of functions used for code generation of implicitly inlined
  365. /// regions.
  366. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
  367. OpenMPDirectiveKind Kind, bool HasCancel,
  368. bool NoInheritance = true)
  369. : CGF(CGF), NoInheritance(NoInheritance) {
  370. // Start emission for the construct.
  371. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
  372. CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
  373. if (NoInheritance) {
  374. std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
  375. LambdaThisCaptureField = CGF.LambdaThisCaptureField;
  376. CGF.LambdaThisCaptureField = nullptr;
  377. BlockInfo = CGF.BlockInfo;
  378. CGF.BlockInfo = nullptr;
  379. }
  380. }
  381. ~InlinedOpenMPRegionRAII() {
  382. // Restore original CapturedStmtInfo only if we're done with code emission.
  383. auto *OldCSI =
  384. cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
  385. delete CGF.CapturedStmtInfo;
  386. CGF.CapturedStmtInfo = OldCSI;
  387. if (NoInheritance) {
  388. std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
  389. CGF.LambdaThisCaptureField = LambdaThisCaptureField;
  390. CGF.BlockInfo = BlockInfo;
  391. }
  392. }
  393. };
  394. /// Values for bit flags used in the ident_t to describe the fields.
  395. /// All enumeric elements are named and described in accordance with the code
  396. /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
  397. enum OpenMPLocationFlags : unsigned {
  398. /// Use trampoline for internal microtask.
  399. OMP_IDENT_IMD = 0x01,
  400. /// Use c-style ident structure.
  401. OMP_IDENT_KMPC = 0x02,
  402. /// Atomic reduction option for kmpc_reduce.
  403. OMP_ATOMIC_REDUCE = 0x10,
  404. /// Explicit 'barrier' directive.
  405. OMP_IDENT_BARRIER_EXPL = 0x20,
  406. /// Implicit barrier in code.
  407. OMP_IDENT_BARRIER_IMPL = 0x40,
  408. /// Implicit barrier in 'for' directive.
  409. OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
  410. /// Implicit barrier in 'sections' directive.
  411. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
  412. /// Implicit barrier in 'single' directive.
  413. OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
  414. /// Call of __kmp_for_static_init for static loop.
  415. OMP_IDENT_WORK_LOOP = 0x200,
  416. /// Call of __kmp_for_static_init for sections.
  417. OMP_IDENT_WORK_SECTIONS = 0x400,
  418. /// Call of __kmp_for_static_init for distribute.
  419. OMP_IDENT_WORK_DISTRIBUTE = 0x800,
  420. LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
  421. };
  422. namespace {
  423. LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
  424. /// Values for bit flags for marking which requires clauses have been used.
  425. enum OpenMPOffloadingRequiresDirFlags : int64_t {
  426. /// flag undefined.
  427. OMP_REQ_UNDEFINED = 0x000,
  428. /// no requires clause present.
  429. OMP_REQ_NONE = 0x001,
  430. /// reverse_offload clause.
  431. OMP_REQ_REVERSE_OFFLOAD = 0x002,
  432. /// unified_address clause.
  433. OMP_REQ_UNIFIED_ADDRESS = 0x004,
  434. /// unified_shared_memory clause.
  435. OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
  436. /// dynamic_allocators clause.
  437. OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
  438. LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
  439. };
  440. enum OpenMPOffloadingReservedDeviceIDs {
  441. /// Device ID if the device was not defined, runtime should get it
  442. /// from environment variables in the spec.
  443. OMP_DEVICEID_UNDEF = -1,
  444. };
  445. } // anonymous namespace
  446. /// Describes ident structure that describes a source location.
  447. /// All descriptions are taken from
  448. /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
  449. /// Original structure:
  450. /// typedef struct ident {
  451. /// kmp_int32 reserved_1; /**< might be used in Fortran;
  452. /// see above */
  453. /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
  454. /// KMP_IDENT_KMPC identifies this union
  455. /// member */
  456. /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
  457. /// see above */
  458. ///#if USE_ITT_BUILD
  459. /// /* but currently used for storing
  460. /// region-specific ITT */
  461. /// /* contextual information. */
  462. ///#endif /* USE_ITT_BUILD */
  463. /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
  464. /// C++ */
  465. /// char const *psource; /**< String describing the source location.
  466. /// The string is composed of semi-colon separated
  467. // fields which describe the source file,
  468. /// the function and a pair of line numbers that
  469. /// delimit the construct.
  470. /// */
  471. /// } ident_t;
  472. enum IdentFieldIndex {
  473. /// might be used in Fortran
  474. IdentField_Reserved_1,
  475. /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
  476. IdentField_Flags,
  477. /// Not really used in Fortran any more
  478. IdentField_Reserved_2,
  479. /// Source[4] in Fortran, do not use for C++
  480. IdentField_Reserved_3,
  481. /// String describing the source location. The string is composed of
  482. /// semi-colon separated fields which describe the source file, the function
  483. /// and a pair of line numbers that delimit the construct.
  484. IdentField_PSource
  485. };
  486. /// Schedule types for 'omp for' loops (these enumerators are taken from
  487. /// the enum sched_type in kmp.h).
  488. enum OpenMPSchedType {
  489. /// Lower bound for default (unordered) versions.
  490. OMP_sch_lower = 32,
  491. OMP_sch_static_chunked = 33,
  492. OMP_sch_static = 34,
  493. OMP_sch_dynamic_chunked = 35,
  494. OMP_sch_guided_chunked = 36,
  495. OMP_sch_runtime = 37,
  496. OMP_sch_auto = 38,
  497. /// static with chunk adjustment (e.g., simd)
  498. OMP_sch_static_balanced_chunked = 45,
  499. /// Lower bound for 'ordered' versions.
  500. OMP_ord_lower = 64,
  501. OMP_ord_static_chunked = 65,
  502. OMP_ord_static = 66,
  503. OMP_ord_dynamic_chunked = 67,
  504. OMP_ord_guided_chunked = 68,
  505. OMP_ord_runtime = 69,
  506. OMP_ord_auto = 70,
  507. OMP_sch_default = OMP_sch_static,
  508. /// dist_schedule types
  509. OMP_dist_sch_static_chunked = 91,
  510. OMP_dist_sch_static = 92,
  511. /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
  512. /// Set if the monotonic schedule modifier was present.
  513. OMP_sch_modifier_monotonic = (1 << 29),
  514. /// Set if the nonmonotonic schedule modifier was present.
  515. OMP_sch_modifier_nonmonotonic = (1 << 30),
  516. };
  517. /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
  518. /// region.
  519. class CleanupTy final : public EHScopeStack::Cleanup {
  520. PrePostActionTy *Action;
  521. public:
  522. explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
  523. void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
  524. if (!CGF.HaveInsertPoint())
  525. return;
  526. Action->Exit(CGF);
  527. }
  528. };
  529. } // anonymous namespace
  530. void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
  531. CodeGenFunction::RunCleanupsScope Scope(CGF);
  532. if (PrePostAction) {
  533. CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
  534. Callback(CodeGen, CGF, *PrePostAction);
  535. } else {
  536. PrePostActionTy Action;
  537. Callback(CodeGen, CGF, Action);
  538. }
  539. }
  540. /// Check if the combiner is a call to UDR combiner and if it is so return the
  541. /// UDR decl used for reduction.
  542. static const OMPDeclareReductionDecl *
  543. getReductionInit(const Expr *ReductionOp) {
  544. if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
  545. if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
  546. if (const auto *DRE =
  547. dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
  548. if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
  549. return DRD;
  550. return nullptr;
  551. }
  552. static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
  553. const OMPDeclareReductionDecl *DRD,
  554. const Expr *InitOp,
  555. Address Private, Address Original,
  556. QualType Ty) {
  557. if (DRD->getInitializer()) {
  558. std::pair<llvm::Function *, llvm::Function *> Reduction =
  559. CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
  560. const auto *CE = cast<CallExpr>(InitOp);
  561. const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
  562. const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
  563. const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
  564. const auto *LHSDRE =
  565. cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
  566. const auto *RHSDRE =
  567. cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
  568. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  569. PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
  570. PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
  571. (void)PrivateScope.Privatize();
  572. RValue Func = RValue::get(Reduction.second);
  573. CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
  574. CGF.EmitIgnoredExpr(InitOp);
  575. } else {
  576. llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
  577. std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
  578. auto *GV = new llvm::GlobalVariable(
  579. CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
  580. llvm::GlobalValue::PrivateLinkage, Init, Name);
  581. LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
  582. RValue InitRVal;
  583. switch (CGF.getEvaluationKind(Ty)) {
  584. case TEK_Scalar:
  585. InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
  586. break;
  587. case TEK_Complex:
  588. InitRVal =
  589. RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
  590. break;
  591. case TEK_Aggregate: {
  592. OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
  593. CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
  594. CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
  595. /*IsInitializer=*/false);
  596. return;
  597. }
  598. }
  599. OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
  600. CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
  601. CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
  602. /*IsInitializer=*/false);
  603. }
  604. }
  605. /// Emit initialization of arrays of complex types.
  606. /// \param DestAddr Address of the array.
  607. /// \param Type Type of array.
  608. /// \param Init Initial expression of array.
  609. /// \param SrcAddr Address of the original array.
  610. static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
  611. QualType Type, bool EmitDeclareReductionInit,
  612. const Expr *Init,
  613. const OMPDeclareReductionDecl *DRD,
  614. Address SrcAddr = Address::invalid()) {
  615. // Perform element-by-element initialization.
  616. QualType ElementTy;
  617. // Drill down to the base element type on both arrays.
  618. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
  619. llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
  620. if (DRD)
  621. SrcAddr =
  622. CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
  623. llvm::Value *SrcBegin = nullptr;
  624. if (DRD)
  625. SrcBegin = SrcAddr.getPointer();
  626. llvm::Value *DestBegin = DestAddr.getPointer();
  627. // Cast from pointer to array type to pointer to single element.
  628. llvm::Value *DestEnd =
  629. CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
  630. // The basic structure here is a while-do loop.
  631. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
  632. llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
  633. llvm::Value *IsEmpty =
  634. CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
  635. CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  636. // Enter the loop body, making that address the current address.
  637. llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
  638. CGF.EmitBlock(BodyBB);
  639. CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
  640. llvm::PHINode *SrcElementPHI = nullptr;
  641. Address SrcElementCurrent = Address::invalid();
  642. if (DRD) {
  643. SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
  644. "omp.arraycpy.srcElementPast");
  645. SrcElementPHI->addIncoming(SrcBegin, EntryBB);
  646. SrcElementCurrent =
  647. Address(SrcElementPHI, SrcAddr.getElementType(),
  648. SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  649. }
  650. llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
  651. DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
  652. DestElementPHI->addIncoming(DestBegin, EntryBB);
  653. Address DestElementCurrent =
  654. Address(DestElementPHI, DestAddr.getElementType(),
  655. DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  656. // Emit copy.
  657. {
  658. CodeGenFunction::RunCleanupsScope InitScope(CGF);
  659. if (EmitDeclareReductionInit) {
  660. emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
  661. SrcElementCurrent, ElementTy);
  662. } else
  663. CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
  664. /*IsInitializer=*/false);
  665. }
  666. if (DRD) {
  667. // Shift the address forward by one element.
  668. llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
  669. SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
  670. "omp.arraycpy.dest.element");
  671. SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
  672. }
  673. // Shift the address forward by one element.
  674. llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
  675. DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
  676. "omp.arraycpy.dest.element");
  677. // Check whether we've reached the end.
  678. llvm::Value *Done =
  679. CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
  680. CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
  681. DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
  682. // Done.
  683. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  684. }
  685. LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
  686. return CGF.EmitOMPSharedLValue(E);
  687. }
  688. LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
  689. const Expr *E) {
  690. if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
  691. return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
  692. return LValue();
  693. }
  694. void ReductionCodeGen::emitAggregateInitialization(
  695. CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
  696. const OMPDeclareReductionDecl *DRD) {
  697. // Emit VarDecl with copy init for arrays.
  698. // Get the address of the original variable captured in current
  699. // captured region.
  700. const auto *PrivateVD =
  701. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  702. bool EmitDeclareReductionInit =
  703. DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
  704. EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
  705. EmitDeclareReductionInit,
  706. EmitDeclareReductionInit ? ClausesData[N].ReductionOp
  707. : PrivateVD->getInit(),
  708. DRD, SharedAddr);
  709. }
  710. ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
  711. ArrayRef<const Expr *> Origs,
  712. ArrayRef<const Expr *> Privates,
  713. ArrayRef<const Expr *> ReductionOps) {
  714. ClausesData.reserve(Shareds.size());
  715. SharedAddresses.reserve(Shareds.size());
  716. Sizes.reserve(Shareds.size());
  717. BaseDecls.reserve(Shareds.size());
  718. const auto *IOrig = Origs.begin();
  719. const auto *IPriv = Privates.begin();
  720. const auto *IRed = ReductionOps.begin();
  721. for (const Expr *Ref : Shareds) {
  722. ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
  723. std::advance(IOrig, 1);
  724. std::advance(IPriv, 1);
  725. std::advance(IRed, 1);
  726. }
  727. }
  728. void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
  729. assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
  730. "Number of generated lvalues must be exactly N.");
  731. LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
  732. LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
  733. SharedAddresses.emplace_back(First, Second);
  734. if (ClausesData[N].Shared == ClausesData[N].Ref) {
  735. OrigAddresses.emplace_back(First, Second);
  736. } else {
  737. LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
  738. LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
  739. OrigAddresses.emplace_back(First, Second);
  740. }
  741. }
  742. void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
  743. QualType PrivateType = getPrivateType(N);
  744. bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
  745. if (!PrivateType->isVariablyModifiedType()) {
  746. Sizes.emplace_back(
  747. CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
  748. nullptr);
  749. return;
  750. }
  751. llvm::Value *Size;
  752. llvm::Value *SizeInChars;
  753. auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
  754. auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
  755. if (AsArraySection) {
  756. Size = CGF.Builder.CreatePtrDiff(ElemType,
  757. OrigAddresses[N].second.getPointer(CGF),
  758. OrigAddresses[N].first.getPointer(CGF));
  759. Size = CGF.Builder.CreateNUWAdd(
  760. Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
  761. SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
  762. } else {
  763. SizeInChars =
  764. CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
  765. Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
  766. }
  767. Sizes.emplace_back(SizeInChars, Size);
  768. CodeGenFunction::OpaqueValueMapping OpaqueMap(
  769. CGF,
  770. cast<OpaqueValueExpr>(
  771. CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
  772. RValue::get(Size));
  773. CGF.EmitVariablyModifiedType(PrivateType);
  774. }
  775. void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
  776. llvm::Value *Size) {
  777. QualType PrivateType = getPrivateType(N);
  778. if (!PrivateType->isVariablyModifiedType()) {
  779. assert(!Size && !Sizes[N].second &&
  780. "Size should be nullptr for non-variably modified reduction "
  781. "items.");
  782. return;
  783. }
  784. CodeGenFunction::OpaqueValueMapping OpaqueMap(
  785. CGF,
  786. cast<OpaqueValueExpr>(
  787. CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
  788. RValue::get(Size));
  789. CGF.EmitVariablyModifiedType(PrivateType);
  790. }
  791. void ReductionCodeGen::emitInitialization(
  792. CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
  793. llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
  794. assert(SharedAddresses.size() > N && "No variable was generated");
  795. const auto *PrivateVD =
  796. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  797. const OMPDeclareReductionDecl *DRD =
  798. getReductionInit(ClausesData[N].ReductionOp);
  799. if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
  800. if (DRD && DRD->getInitializer())
  801. (void)DefaultInit(CGF);
  802. emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
  803. } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
  804. (void)DefaultInit(CGF);
  805. QualType SharedType = SharedAddresses[N].first.getType();
  806. emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
  807. PrivateAddr, SharedAddr, SharedType);
  808. } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
  809. !CGF.isTrivialInitializer(PrivateVD->getInit())) {
  810. CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
  811. PrivateVD->getType().getQualifiers(),
  812. /*IsInitializer=*/false);
  813. }
  814. }
  815. bool ReductionCodeGen::needCleanups(unsigned N) {
  816. QualType PrivateType = getPrivateType(N);
  817. QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
  818. return DTorKind != QualType::DK_none;
  819. }
  820. void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
  821. Address PrivateAddr) {
  822. QualType PrivateType = getPrivateType(N);
  823. QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
  824. if (needCleanups(N)) {
  825. PrivateAddr = CGF.Builder.CreateElementBitCast(
  826. PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
  827. CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
  828. }
  829. }
  830. static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
  831. LValue BaseLV) {
  832. BaseTy = BaseTy.getNonReferenceType();
  833. while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
  834. !CGF.getContext().hasSameType(BaseTy, ElTy)) {
  835. if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
  836. BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
  837. } else {
  838. LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
  839. BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
  840. }
  841. BaseTy = BaseTy->getPointeeType();
  842. }
  843. return CGF.MakeAddrLValue(
  844. CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
  845. CGF.ConvertTypeForMem(ElTy)),
  846. BaseLV.getType(), BaseLV.getBaseInfo(),
  847. CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
  848. }
  849. static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
  850. Address OriginalBaseAddress, llvm::Value *Addr) {
  851. Address Tmp = Address::invalid();
  852. Address TopTmp = Address::invalid();
  853. Address MostTopTmp = Address::invalid();
  854. BaseTy = BaseTy.getNonReferenceType();
  855. while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
  856. !CGF.getContext().hasSameType(BaseTy, ElTy)) {
  857. Tmp = CGF.CreateMemTemp(BaseTy);
  858. if (TopTmp.isValid())
  859. CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
  860. else
  861. MostTopTmp = Tmp;
  862. TopTmp = Tmp;
  863. BaseTy = BaseTy->getPointeeType();
  864. }
  865. if (Tmp.isValid()) {
  866. Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  867. Addr, Tmp.getElementType());
  868. CGF.Builder.CreateStore(Addr, Tmp);
  869. return MostTopTmp;
  870. }
  871. Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  872. Addr, OriginalBaseAddress.getType());
  873. return OriginalBaseAddress.withPointer(Addr);
  874. }
  875. static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
  876. const VarDecl *OrigVD = nullptr;
  877. if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
  878. const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
  879. while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
  880. Base = TempOASE->getBase()->IgnoreParenImpCasts();
  881. while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
  882. Base = TempASE->getBase()->IgnoreParenImpCasts();
  883. DE = cast<DeclRefExpr>(Base);
  884. OrigVD = cast<VarDecl>(DE->getDecl());
  885. } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
  886. const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
  887. while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
  888. Base = TempASE->getBase()->IgnoreParenImpCasts();
  889. DE = cast<DeclRefExpr>(Base);
  890. OrigVD = cast<VarDecl>(DE->getDecl());
  891. }
  892. return OrigVD;
  893. }
  894. Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
  895. Address PrivateAddr) {
  896. const DeclRefExpr *DE;
  897. if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
  898. BaseDecls.emplace_back(OrigVD);
  899. LValue OriginalBaseLValue = CGF.EmitLValue(DE);
  900. LValue BaseLValue =
  901. loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
  902. OriginalBaseLValue);
  903. Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
  904. llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
  905. SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
  906. SharedAddr.getPointer());
  907. llvm::Value *PrivatePointer =
  908. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  909. PrivateAddr.getPointer(), SharedAddr.getType());
  910. llvm::Value *Ptr = CGF.Builder.CreateGEP(
  911. SharedAddr.getElementType(), PrivatePointer, Adjustment);
  912. return castToBase(CGF, OrigVD->getType(),
  913. SharedAddresses[N].first.getType(),
  914. OriginalBaseLValue.getAddress(CGF), Ptr);
  915. }
  916. BaseDecls.emplace_back(
  917. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
  918. return PrivateAddr;
  919. }
  920. bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
  921. const OMPDeclareReductionDecl *DRD =
  922. getReductionInit(ClausesData[N].ReductionOp);
  923. return DRD && DRD->getInitializer();
  924. }
  925. LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
  926. return CGF.EmitLoadOfPointerLValue(
  927. CGF.GetAddrOfLocalVar(getThreadIDVariable()),
  928. getThreadIDVariable()->getType()->castAs<PointerType>());
  929. }
  930. void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
  931. if (!CGF.HaveInsertPoint())
  932. return;
  933. // 1.2.2 OpenMP Language Terminology
  934. // Structured block - An executable statement with a single entry at the
  935. // top and a single exit at the bottom.
  936. // The point of exit cannot be a branch out of the structured block.
  937. // longjmp() and throw() must not violate the entry/exit criteria.
  938. CGF.EHStack.pushTerminate();
  939. if (S)
  940. CGF.incrementProfileCounter(S);
  941. CodeGen(CGF);
  942. CGF.EHStack.popTerminate();
  943. }
  944. LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
  945. CodeGenFunction &CGF) {
  946. return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
  947. getThreadIDVariable()->getType(),
  948. AlignmentSource::Decl);
  949. }
  950. static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
  951. QualType FieldTy) {
  952. auto *Field = FieldDecl::Create(
  953. C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
  954. C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
  955. /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
  956. Field->setAccess(AS_public);
  957. DC->addDecl(Field);
  958. return Field;
  959. }
  960. CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
  961. : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() {
  962. KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
  963. llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false,
  964. hasRequiresUnifiedSharedMemory(),
  965. CGM.getLangOpts().OpenMPOffloadMandatory);
  966. // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
  967. OMPBuilder.initialize();
  968. OMPBuilder.setConfig(Config);
  969. OffloadEntriesInfoManager.setConfig(Config);
  970. loadOffloadInfoMetadata();
  971. }
  972. void CGOpenMPRuntime::clear() {
  973. InternalVars.clear();
  974. // Clean non-target variable declarations possibly used only in debug info.
  975. for (const auto &Data : EmittedNonTargetVariables) {
  976. if (!Data.getValue().pointsToAliveValue())
  977. continue;
  978. auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
  979. if (!GV)
  980. continue;
  981. if (!GV->isDeclaration() || GV->getNumUses() > 0)
  982. continue;
  983. GV->eraseFromParent();
  984. }
  985. }
  986. std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
  987. return OMPBuilder.createPlatformSpecificName(Parts);
  988. }
  989. static llvm::Function *
  990. emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
  991. const Expr *CombinerInitializer, const VarDecl *In,
  992. const VarDecl *Out, bool IsCombiner) {
  993. // void .omp_combiner.(Ty *in, Ty *out);
  994. ASTContext &C = CGM.getContext();
  995. QualType PtrTy = C.getPointerType(Ty).withRestrict();
  996. FunctionArgList Args;
  997. ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
  998. /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
  999. ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
  1000. /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
  1001. Args.push_back(&OmpOutParm);
  1002. Args.push_back(&OmpInParm);
  1003. const CGFunctionInfo &FnInfo =
  1004. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  1005. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  1006. std::string Name = CGM.getOpenMPRuntime().getName(
  1007. {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
  1008. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  1009. Name, &CGM.getModule());
  1010. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  1011. if (CGM.getLangOpts().Optimize) {
  1012. Fn->removeFnAttr(llvm::Attribute::NoInline);
  1013. Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
  1014. Fn->addFnAttr(llvm::Attribute::AlwaysInline);
  1015. }
  1016. CodeGenFunction CGF(CGM);
  1017. // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
  1018. // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
  1019. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
  1020. Out->getLocation());
  1021. CodeGenFunction::OMPPrivateScope Scope(CGF);
  1022. Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
  1023. Scope.addPrivate(
  1024. In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
  1025. .getAddress(CGF));
  1026. Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
  1027. Scope.addPrivate(
  1028. Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
  1029. .getAddress(CGF));
  1030. (void)Scope.Privatize();
  1031. if (!IsCombiner && Out->hasInit() &&
  1032. !CGF.isTrivialInitializer(Out->getInit())) {
  1033. CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
  1034. Out->getType().getQualifiers(),
  1035. /*IsInitializer=*/true);
  1036. }
  1037. if (CombinerInitializer)
  1038. CGF.EmitIgnoredExpr(CombinerInitializer);
  1039. Scope.ForceCleanup();
  1040. CGF.FinishFunction();
  1041. return Fn;
  1042. }
  1043. void CGOpenMPRuntime::emitUserDefinedReduction(
  1044. CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
  1045. if (UDRMap.count(D) > 0)
  1046. return;
  1047. llvm::Function *Combiner = emitCombinerOrInitializer(
  1048. CGM, D->getType(), D->getCombiner(),
  1049. cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
  1050. cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
  1051. /*IsCombiner=*/true);
  1052. llvm::Function *Initializer = nullptr;
  1053. if (const Expr *Init = D->getInitializer()) {
  1054. Initializer = emitCombinerOrInitializer(
  1055. CGM, D->getType(),
  1056. D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
  1057. : nullptr,
  1058. cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
  1059. cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
  1060. /*IsCombiner=*/false);
  1061. }
  1062. UDRMap.try_emplace(D, Combiner, Initializer);
  1063. if (CGF) {
  1064. auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
  1065. Decls.second.push_back(D);
  1066. }
  1067. }
  1068. std::pair<llvm::Function *, llvm::Function *>
  1069. CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
  1070. auto I = UDRMap.find(D);
  1071. if (I != UDRMap.end())
  1072. return I->second;
  1073. emitUserDefinedReduction(/*CGF=*/nullptr, D);
  1074. return UDRMap.lookup(D);
  1075. }
  1076. namespace {
  1077. // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
  1078. // Builder if one is present.
  1079. struct PushAndPopStackRAII {
  1080. PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
  1081. bool HasCancel, llvm::omp::Directive Kind)
  1082. : OMPBuilder(OMPBuilder) {
  1083. if (!OMPBuilder)
  1084. return;
  1085. // The following callback is the crucial part of clangs cleanup process.
  1086. //
  1087. // NOTE:
  1088. // Once the OpenMPIRBuilder is used to create parallel regions (and
  1089. // similar), the cancellation destination (Dest below) is determined via
  1090. // IP. That means if we have variables to finalize we split the block at IP,
  1091. // use the new block (=BB) as destination to build a JumpDest (via
  1092. // getJumpDestInCurrentScope(BB)) which then is fed to
  1093. // EmitBranchThroughCleanup. Furthermore, there will not be the need
  1094. // to push & pop an FinalizationInfo object.
  1095. // The FiniCB will still be needed but at the point where the
  1096. // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
  1097. auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
  1098. assert(IP.getBlock()->end() == IP.getPoint() &&
  1099. "Clang CG should cause non-terminated block!");
  1100. CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
  1101. CGF.Builder.restoreIP(IP);
  1102. CodeGenFunction::JumpDest Dest =
  1103. CGF.getOMPCancelDestination(OMPD_parallel);
  1104. CGF.EmitBranchThroughCleanup(Dest);
  1105. };
  1106. // TODO: Remove this once we emit parallel regions through the
  1107. // OpenMPIRBuilder as it can do this setup internally.
  1108. llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
  1109. OMPBuilder->pushFinalizationCB(std::move(FI));
  1110. }
  1111. ~PushAndPopStackRAII() {
  1112. if (OMPBuilder)
  1113. OMPBuilder->popFinalizationCB();
  1114. }
  1115. llvm::OpenMPIRBuilder *OMPBuilder;
  1116. };
  1117. } // namespace
  1118. static llvm::Function *emitParallelOrTeamsOutlinedFunction(
  1119. CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
  1120. const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
  1121. const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
  1122. assert(ThreadIDVar->getType()->isPointerType() &&
  1123. "thread id variable must be of type kmp_int32 *");
  1124. CodeGenFunction CGF(CGM, true);
  1125. bool HasCancel = false;
  1126. if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
  1127. HasCancel = OPD->hasCancel();
  1128. else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
  1129. HasCancel = OPD->hasCancel();
  1130. else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
  1131. HasCancel = OPSD->hasCancel();
  1132. else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
  1133. HasCancel = OPFD->hasCancel();
  1134. else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
  1135. HasCancel = OPFD->hasCancel();
  1136. else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
  1137. HasCancel = OPFD->hasCancel();
  1138. else if (const auto *OPFD =
  1139. dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
  1140. HasCancel = OPFD->hasCancel();
  1141. else if (const auto *OPFD =
  1142. dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
  1143. HasCancel = OPFD->hasCancel();
  1144. // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
  1145. // parallel region to make cancellation barriers work properly.
  1146. llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
  1147. PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
  1148. CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
  1149. HasCancel, OutlinedHelperName);
  1150. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  1151. return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
  1152. }
  1153. llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
  1154. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  1155. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  1156. const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
  1157. return emitParallelOrTeamsOutlinedFunction(
  1158. CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
  1159. }
  1160. llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
  1161. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  1162. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  1163. const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
  1164. return emitParallelOrTeamsOutlinedFunction(
  1165. CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
  1166. }
  1167. llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
  1168. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  1169. const VarDecl *PartIDVar, const VarDecl *TaskTVar,
  1170. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  1171. bool Tied, unsigned &NumberOfParts) {
  1172. auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
  1173. PrePostActionTy &) {
  1174. llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
  1175. llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
  1176. llvm::Value *TaskArgs[] = {
  1177. UpLoc, ThreadID,
  1178. CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
  1179. TaskTVar->getType()->castAs<PointerType>())
  1180. .getPointer(CGF)};
  1181. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  1182. CGM.getModule(), OMPRTL___kmpc_omp_task),
  1183. TaskArgs);
  1184. };
  1185. CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
  1186. UntiedCodeGen);
  1187. CodeGen.setAction(Action);
  1188. assert(!ThreadIDVar->getType()->isPointerType() &&
  1189. "thread id variable must be of type kmp_int32 for tasks");
  1190. const OpenMPDirectiveKind Region =
  1191. isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
  1192. : OMPD_task;
  1193. const CapturedStmt *CS = D.getCapturedStmt(Region);
  1194. bool HasCancel = false;
  1195. if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
  1196. HasCancel = TD->hasCancel();
  1197. else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
  1198. HasCancel = TD->hasCancel();
  1199. else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
  1200. HasCancel = TD->hasCancel();
  1201. else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
  1202. HasCancel = TD->hasCancel();
  1203. CodeGenFunction CGF(CGM, true);
  1204. CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
  1205. InnermostKind, HasCancel, Action);
  1206. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  1207. llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
  1208. if (!Tied)
  1209. NumberOfParts = Action.getNumberOfParts();
  1210. return Res;
  1211. }
  1212. void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
  1213. bool AtCurrentPoint) {
  1214. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1215. assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
  1216. llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
  1217. if (AtCurrentPoint) {
  1218. Elem.second.ServiceInsertPt = new llvm::BitCastInst(
  1219. Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
  1220. } else {
  1221. Elem.second.ServiceInsertPt =
  1222. new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
  1223. Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
  1224. }
  1225. }
  1226. void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
  1227. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1228. if (Elem.second.ServiceInsertPt) {
  1229. llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
  1230. Elem.second.ServiceInsertPt = nullptr;
  1231. Ptr->eraseFromParent();
  1232. }
  1233. }
  1234. static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
  1235. SourceLocation Loc,
  1236. SmallString<128> &Buffer) {
  1237. llvm::raw_svector_ostream OS(Buffer);
  1238. // Build debug location
  1239. PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
  1240. OS << ";" << PLoc.getFilename() << ";";
  1241. if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
  1242. OS << FD->getQualifiedNameAsString();
  1243. OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
  1244. return OS.str();
  1245. }
  1246. llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
  1247. SourceLocation Loc,
  1248. unsigned Flags, bool EmitLoc) {
  1249. uint32_t SrcLocStrSize;
  1250. llvm::Constant *SrcLocStr;
  1251. if ((!EmitLoc &&
  1252. CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) ||
  1253. Loc.isInvalid()) {
  1254. SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
  1255. } else {
  1256. std::string FunctionName;
  1257. if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
  1258. FunctionName = FD->getQualifiedNameAsString();
  1259. PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
  1260. const char *FileName = PLoc.getFilename();
  1261. unsigned Line = PLoc.getLine();
  1262. unsigned Column = PLoc.getColumn();
  1263. SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
  1264. Column, SrcLocStrSize);
  1265. }
  1266. unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
  1267. return OMPBuilder.getOrCreateIdent(
  1268. SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
  1269. }
  1270. llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
  1271. SourceLocation Loc) {
  1272. assert(CGF.CurFn && "No function in current CodeGenFunction.");
  1273. // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
  1274. // the clang invariants used below might be broken.
  1275. if (CGM.getLangOpts().OpenMPIRBuilder) {
  1276. SmallString<128> Buffer;
  1277. OMPBuilder.updateToLocation(CGF.Builder.saveIP());
  1278. uint32_t SrcLocStrSize;
  1279. auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
  1280. getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
  1281. return OMPBuilder.getOrCreateThreadID(
  1282. OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
  1283. }
  1284. llvm::Value *ThreadID = nullptr;
  1285. // Check whether we've already cached a load of the thread id in this
  1286. // function.
  1287. auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
  1288. if (I != OpenMPLocThreadIDMap.end()) {
  1289. ThreadID = I->second.ThreadID;
  1290. if (ThreadID != nullptr)
  1291. return ThreadID;
  1292. }
  1293. // If exceptions are enabled, do not use parameter to avoid possible crash.
  1294. if (auto *OMPRegionInfo =
  1295. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  1296. if (OMPRegionInfo->getThreadIDVariable()) {
  1297. // Check if this an outlined function with thread id passed as argument.
  1298. LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
  1299. llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
  1300. if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
  1301. !CGF.getLangOpts().CXXExceptions ||
  1302. CGF.Builder.GetInsertBlock() == TopBlock ||
  1303. !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
  1304. cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
  1305. TopBlock ||
  1306. cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
  1307. CGF.Builder.GetInsertBlock()) {
  1308. ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
  1309. // If value loaded in entry block, cache it and use it everywhere in
  1310. // function.
  1311. if (CGF.Builder.GetInsertBlock() == TopBlock) {
  1312. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1313. Elem.second.ThreadID = ThreadID;
  1314. }
  1315. return ThreadID;
  1316. }
  1317. }
  1318. }
  1319. // This is not an outlined function region - need to call __kmpc_int32
  1320. // kmpc_global_thread_num(ident_t *loc).
  1321. // Generate thread id value and cache this value for use across the
  1322. // function.
  1323. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1324. if (!Elem.second.ServiceInsertPt)
  1325. setLocThreadIdInsertPt(CGF);
  1326. CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
  1327. CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
  1328. llvm::CallInst *Call = CGF.Builder.CreateCall(
  1329. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  1330. OMPRTL___kmpc_global_thread_num),
  1331. emitUpdateLocation(CGF, Loc));
  1332. Call->setCallingConv(CGF.getRuntimeCC());
  1333. Elem.second.ThreadID = Call;
  1334. return Call;
  1335. }
  1336. void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
  1337. assert(CGF.CurFn && "No function in current CodeGenFunction.");
  1338. if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
  1339. clearLocThreadIdInsertPt(CGF);
  1340. OpenMPLocThreadIDMap.erase(CGF.CurFn);
  1341. }
  1342. if (FunctionUDRMap.count(CGF.CurFn) > 0) {
  1343. for(const auto *D : FunctionUDRMap[CGF.CurFn])
  1344. UDRMap.erase(D);
  1345. FunctionUDRMap.erase(CGF.CurFn);
  1346. }
  1347. auto I = FunctionUDMMap.find(CGF.CurFn);
  1348. if (I != FunctionUDMMap.end()) {
  1349. for(const auto *D : I->second)
  1350. UDMMap.erase(D);
  1351. FunctionUDMMap.erase(I);
  1352. }
  1353. LastprivateConditionalToTypes.erase(CGF.CurFn);
  1354. FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
  1355. }
  1356. llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
  1357. return OMPBuilder.IdentPtr;
  1358. }
  1359. llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
  1360. if (!Kmpc_MicroTy) {
  1361. // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
  1362. llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
  1363. llvm::PointerType::getUnqual(CGM.Int32Ty)};
  1364. Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
  1365. }
  1366. return llvm::PointerType::getUnqual(Kmpc_MicroTy);
  1367. }
  1368. llvm::FunctionCallee
  1369. CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
  1370. bool IsGPUDistribute) {
  1371. assert((IVSize == 32 || IVSize == 64) &&
  1372. "IV size is not compatible with the omp runtime");
  1373. StringRef Name;
  1374. if (IsGPUDistribute)
  1375. Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
  1376. : "__kmpc_distribute_static_init_4u")
  1377. : (IVSigned ? "__kmpc_distribute_static_init_8"
  1378. : "__kmpc_distribute_static_init_8u");
  1379. else
  1380. Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
  1381. : "__kmpc_for_static_init_4u")
  1382. : (IVSigned ? "__kmpc_for_static_init_8"
  1383. : "__kmpc_for_static_init_8u");
  1384. llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
  1385. auto *PtrTy = llvm::PointerType::getUnqual(ITy);
  1386. llvm::Type *TypeParams[] = {
  1387. getIdentTyPointerTy(), // loc
  1388. CGM.Int32Ty, // tid
  1389. CGM.Int32Ty, // schedtype
  1390. llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
  1391. PtrTy, // p_lower
  1392. PtrTy, // p_upper
  1393. PtrTy, // p_stride
  1394. ITy, // incr
  1395. ITy // chunk
  1396. };
  1397. auto *FnTy =
  1398. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1399. return CGM.CreateRuntimeFunction(FnTy, Name);
  1400. }
  1401. llvm::FunctionCallee
  1402. CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
  1403. assert((IVSize == 32 || IVSize == 64) &&
  1404. "IV size is not compatible with the omp runtime");
  1405. StringRef Name =
  1406. IVSize == 32
  1407. ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
  1408. : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
  1409. llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
  1410. llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
  1411. CGM.Int32Ty, // tid
  1412. CGM.Int32Ty, // schedtype
  1413. ITy, // lower
  1414. ITy, // upper
  1415. ITy, // stride
  1416. ITy // chunk
  1417. };
  1418. auto *FnTy =
  1419. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1420. return CGM.CreateRuntimeFunction(FnTy, Name);
  1421. }
  1422. llvm::FunctionCallee
  1423. CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
  1424. assert((IVSize == 32 || IVSize == 64) &&
  1425. "IV size is not compatible with the omp runtime");
  1426. StringRef Name =
  1427. IVSize == 32
  1428. ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
  1429. : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
  1430. llvm::Type *TypeParams[] = {
  1431. getIdentTyPointerTy(), // loc
  1432. CGM.Int32Ty, // tid
  1433. };
  1434. auto *FnTy =
  1435. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1436. return CGM.CreateRuntimeFunction(FnTy, Name);
  1437. }
  1438. llvm::FunctionCallee
  1439. CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
  1440. assert((IVSize == 32 || IVSize == 64) &&
  1441. "IV size is not compatible with the omp runtime");
  1442. StringRef Name =
  1443. IVSize == 32
  1444. ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
  1445. : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
  1446. llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
  1447. auto *PtrTy = llvm::PointerType::getUnqual(ITy);
  1448. llvm::Type *TypeParams[] = {
  1449. getIdentTyPointerTy(), // loc
  1450. CGM.Int32Ty, // tid
  1451. llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
  1452. PtrTy, // p_lower
  1453. PtrTy, // p_upper
  1454. PtrTy // p_stride
  1455. };
  1456. auto *FnTy =
  1457. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  1458. return CGM.CreateRuntimeFunction(FnTy, Name);
  1459. }
  1460. /// Obtain information that uniquely identifies a target entry. This
  1461. /// consists of the file and device IDs as well as line number associated with
  1462. /// the relevant entry source location.
  1463. static llvm::TargetRegionEntryInfo
  1464. getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
  1465. StringRef ParentName = "") {
  1466. SourceManager &SM = C.getSourceManager();
  1467. // The loc should be always valid and have a file ID (the user cannot use
  1468. // #pragma directives in macros)
  1469. assert(Loc.isValid() && "Source location is expected to be always valid.");
  1470. PresumedLoc PLoc = SM.getPresumedLoc(Loc);
  1471. assert(PLoc.isValid() && "Source location is expected to be always valid.");
  1472. llvm::sys::fs::UniqueID ID;
  1473. if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
  1474. PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
  1475. assert(PLoc.isValid() && "Source location is expected to be always valid.");
  1476. if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
  1477. SM.getDiagnostics().Report(diag::err_cannot_open_file)
  1478. << PLoc.getFilename() << EC.message();
  1479. }
  1480. return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
  1481. PLoc.getLine());
  1482. }
  1483. Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
  1484. if (CGM.getLangOpts().OpenMPSimd)
  1485. return Address::invalid();
  1486. std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  1487. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  1488. if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
  1489. ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  1490. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  1491. HasRequiresUnifiedSharedMemory))) {
  1492. SmallString<64> PtrName;
  1493. {
  1494. llvm::raw_svector_ostream OS(PtrName);
  1495. OS << CGM.getMangledName(GlobalDecl(VD));
  1496. if (!VD->isExternallyVisible()) {
  1497. auto EntryInfo = getTargetEntryUniqueInfo(
  1498. CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc());
  1499. OS << llvm::format("_%x", EntryInfo.FileID);
  1500. }
  1501. OS << "_decl_tgt_ref_ptr";
  1502. }
  1503. llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
  1504. QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
  1505. llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
  1506. if (!Ptr) {
  1507. Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName);
  1508. auto *GV = cast<llvm::GlobalVariable>(Ptr);
  1509. GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
  1510. if (!CGM.getLangOpts().OpenMPIsDevice)
  1511. GV->setInitializer(CGM.GetAddrOfGlobal(VD));
  1512. registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
  1513. }
  1514. return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
  1515. }
  1516. return Address::invalid();
  1517. }
  1518. llvm::Constant *
  1519. CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
  1520. assert(!CGM.getLangOpts().OpenMPUseTLS ||
  1521. !CGM.getContext().getTargetInfo().isTLSSupported());
  1522. // Lookup the entry, lazily creating it if necessary.
  1523. std::string Suffix = getName({"cache", ""});
  1524. return OMPBuilder.getOrCreateInternalVariable(
  1525. CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
  1526. }
  1527. Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
  1528. const VarDecl *VD,
  1529. Address VDAddr,
  1530. SourceLocation Loc) {
  1531. if (CGM.getLangOpts().OpenMPUseTLS &&
  1532. CGM.getContext().getTargetInfo().isTLSSupported())
  1533. return VDAddr;
  1534. llvm::Type *VarTy = VDAddr.getElementType();
  1535. llvm::Value *Args[] = {
  1536. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  1537. CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
  1538. CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
  1539. getOrCreateThreadPrivateCache(VD)};
  1540. return Address(
  1541. CGF.EmitRuntimeCall(
  1542. OMPBuilder.getOrCreateRuntimeFunction(
  1543. CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
  1544. Args),
  1545. CGF.Int8Ty, VDAddr.getAlignment());
  1546. }
  1547. void CGOpenMPRuntime::emitThreadPrivateVarInit(
  1548. CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
  1549. llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
  1550. // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
  1551. // library.
  1552. llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
  1553. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  1554. CGM.getModule(), OMPRTL___kmpc_global_thread_num),
  1555. OMPLoc);
  1556. // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
  1557. // to register constructor/destructor for variable.
  1558. llvm::Value *Args[] = {
  1559. OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
  1560. Ctor, CopyCtor, Dtor};
  1561. CGF.EmitRuntimeCall(
  1562. OMPBuilder.getOrCreateRuntimeFunction(
  1563. CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
  1564. Args);
  1565. }
  1566. llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
  1567. const VarDecl *VD, Address VDAddr, SourceLocation Loc,
  1568. bool PerformInit, CodeGenFunction *CGF) {
  1569. if (CGM.getLangOpts().OpenMPUseTLS &&
  1570. CGM.getContext().getTargetInfo().isTLSSupported())
  1571. return nullptr;
  1572. VD = VD->getDefinition(CGM.getContext());
  1573. if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
  1574. QualType ASTTy = VD->getType();
  1575. llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
  1576. const Expr *Init = VD->getAnyInitializer();
  1577. if (CGM.getLangOpts().CPlusPlus && PerformInit) {
  1578. // Generate function that re-emits the declaration's initializer into the
  1579. // threadprivate copy of the variable VD
  1580. CodeGenFunction CtorCGF(CGM);
  1581. FunctionArgList Args;
  1582. ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
  1583. /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
  1584. ImplicitParamDecl::Other);
  1585. Args.push_back(&Dst);
  1586. const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
  1587. CGM.getContext().VoidPtrTy, Args);
  1588. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  1589. std::string Name = getName({"__kmpc_global_ctor_", ""});
  1590. llvm::Function *Fn =
  1591. CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
  1592. CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
  1593. Args, Loc, Loc);
  1594. llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
  1595. CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
  1596. CGM.getContext().VoidPtrTy, Dst.getLocation());
  1597. Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
  1598. Arg = CtorCGF.Builder.CreateElementBitCast(
  1599. Arg, CtorCGF.ConvertTypeForMem(ASTTy));
  1600. CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
  1601. /*IsInitializer=*/true);
  1602. ArgVal = CtorCGF.EmitLoadOfScalar(
  1603. CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
  1604. CGM.getContext().VoidPtrTy, Dst.getLocation());
  1605. CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
  1606. CtorCGF.FinishFunction();
  1607. Ctor = Fn;
  1608. }
  1609. if (VD->getType().isDestructedType() != QualType::DK_none) {
  1610. // Generate function that emits destructor call for the threadprivate copy
  1611. // of the variable VD
  1612. CodeGenFunction DtorCGF(CGM);
  1613. FunctionArgList Args;
  1614. ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
  1615. /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
  1616. ImplicitParamDecl::Other);
  1617. Args.push_back(&Dst);
  1618. const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
  1619. CGM.getContext().VoidTy, Args);
  1620. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  1621. std::string Name = getName({"__kmpc_global_dtor_", ""});
  1622. llvm::Function *Fn =
  1623. CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
  1624. auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
  1625. DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
  1626. Loc, Loc);
  1627. // Create a scope with an artificial location for the body of this function.
  1628. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
  1629. llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
  1630. DtorCGF.GetAddrOfLocalVar(&Dst),
  1631. /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
  1632. DtorCGF.emitDestroy(
  1633. Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
  1634. DtorCGF.getDestroyer(ASTTy.isDestructedType()),
  1635. DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
  1636. DtorCGF.FinishFunction();
  1637. Dtor = Fn;
  1638. }
  1639. // Do not emit init function if it is not required.
  1640. if (!Ctor && !Dtor)
  1641. return nullptr;
  1642. llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
  1643. auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
  1644. /*isVarArg=*/false)
  1645. ->getPointerTo();
  1646. // Copying constructor for the threadprivate variable.
  1647. // Must be NULL - reserved by runtime, but currently it requires that this
  1648. // parameter is always NULL. Otherwise it fires assertion.
  1649. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
  1650. if (Ctor == nullptr) {
  1651. auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
  1652. /*isVarArg=*/false)
  1653. ->getPointerTo();
  1654. Ctor = llvm::Constant::getNullValue(CtorTy);
  1655. }
  1656. if (Dtor == nullptr) {
  1657. auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
  1658. /*isVarArg=*/false)
  1659. ->getPointerTo();
  1660. Dtor = llvm::Constant::getNullValue(DtorTy);
  1661. }
  1662. if (!CGF) {
  1663. auto *InitFunctionTy =
  1664. llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
  1665. std::string Name = getName({"__omp_threadprivate_init_", ""});
  1666. llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
  1667. InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
  1668. CodeGenFunction InitCGF(CGM);
  1669. FunctionArgList ArgList;
  1670. InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
  1671. CGM.getTypes().arrangeNullaryFunction(), ArgList,
  1672. Loc, Loc);
  1673. emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
  1674. InitCGF.FinishFunction();
  1675. return InitFunction;
  1676. }
  1677. emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
  1678. }
  1679. return nullptr;
  1680. }
  1681. bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
  1682. llvm::GlobalVariable *Addr,
  1683. bool PerformInit) {
  1684. if (CGM.getLangOpts().OMPTargetTriples.empty() &&
  1685. !CGM.getLangOpts().OpenMPIsDevice)
  1686. return false;
  1687. std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  1688. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  1689. if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
  1690. ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  1691. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  1692. HasRequiresUnifiedSharedMemory))
  1693. return CGM.getLangOpts().OpenMPIsDevice;
  1694. VD = VD->getDefinition(CGM.getContext());
  1695. assert(VD && "Unknown VarDecl");
  1696. if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
  1697. return CGM.getLangOpts().OpenMPIsDevice;
  1698. QualType ASTTy = VD->getType();
  1699. SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
  1700. // Produce the unique prefix to identify the new target regions. We use
  1701. // the source location of the variable declaration which we know to not
  1702. // conflict with any target region.
  1703. auto EntryInfo =
  1704. getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName());
  1705. SmallString<128> Buffer, Out;
  1706. OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
  1707. const Expr *Init = VD->getAnyInitializer();
  1708. if (CGM.getLangOpts().CPlusPlus && PerformInit) {
  1709. llvm::Constant *Ctor;
  1710. llvm::Constant *ID;
  1711. if (CGM.getLangOpts().OpenMPIsDevice) {
  1712. // Generate function that re-emits the declaration's initializer into
  1713. // the threadprivate copy of the variable VD
  1714. CodeGenFunction CtorCGF(CGM);
  1715. const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
  1716. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  1717. llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
  1718. FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
  1719. llvm::GlobalValue::WeakODRLinkage);
  1720. Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
  1721. if (CGM.getTriple().isAMDGCN())
  1722. Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
  1723. auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
  1724. CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
  1725. FunctionArgList(), Loc, Loc);
  1726. auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
  1727. llvm::Constant *AddrInAS0 = Addr;
  1728. if (Addr->getAddressSpace() != 0)
  1729. AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
  1730. Addr, llvm::PointerType::getWithSamePointeeType(
  1731. cast<llvm::PointerType>(Addr->getType()), 0));
  1732. CtorCGF.EmitAnyExprToMem(Init,
  1733. Address(AddrInAS0, Addr->getValueType(),
  1734. CGM.getContext().getDeclAlign(VD)),
  1735. Init->getType().getQualifiers(),
  1736. /*IsInitializer=*/true);
  1737. CtorCGF.FinishFunction();
  1738. Ctor = Fn;
  1739. ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
  1740. } else {
  1741. Ctor = new llvm::GlobalVariable(
  1742. CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
  1743. llvm::GlobalValue::PrivateLinkage,
  1744. llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
  1745. ID = Ctor;
  1746. }
  1747. // Register the information for the entry associated with the constructor.
  1748. Out.clear();
  1749. auto CtorEntryInfo = EntryInfo;
  1750. CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
  1751. OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
  1752. CtorEntryInfo, Ctor, ID,
  1753. llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
  1754. }
  1755. if (VD->getType().isDestructedType() != QualType::DK_none) {
  1756. llvm::Constant *Dtor;
  1757. llvm::Constant *ID;
  1758. if (CGM.getLangOpts().OpenMPIsDevice) {
  1759. // Generate function that emits destructor call for the threadprivate
  1760. // copy of the variable VD
  1761. CodeGenFunction DtorCGF(CGM);
  1762. const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
  1763. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  1764. llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
  1765. FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
  1766. llvm::GlobalValue::WeakODRLinkage);
  1767. Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
  1768. if (CGM.getTriple().isAMDGCN())
  1769. Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
  1770. auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
  1771. DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
  1772. FunctionArgList(), Loc, Loc);
  1773. // Create a scope with an artificial location for the body of this
  1774. // function.
  1775. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
  1776. llvm::Constant *AddrInAS0 = Addr;
  1777. if (Addr->getAddressSpace() != 0)
  1778. AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
  1779. Addr, llvm::PointerType::getWithSamePointeeType(
  1780. cast<llvm::PointerType>(Addr->getType()), 0));
  1781. DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
  1782. CGM.getContext().getDeclAlign(VD)),
  1783. ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
  1784. DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
  1785. DtorCGF.FinishFunction();
  1786. Dtor = Fn;
  1787. ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
  1788. } else {
  1789. Dtor = new llvm::GlobalVariable(
  1790. CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
  1791. llvm::GlobalValue::PrivateLinkage,
  1792. llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
  1793. ID = Dtor;
  1794. }
  1795. // Register the information for the entry associated with the destructor.
  1796. Out.clear();
  1797. auto DtorEntryInfo = EntryInfo;
  1798. DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
  1799. OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
  1800. DtorEntryInfo, Dtor, ID,
  1801. llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
  1802. }
  1803. return CGM.getLangOpts().OpenMPIsDevice;
  1804. }
  1805. Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
  1806. QualType VarType,
  1807. StringRef Name) {
  1808. std::string Suffix = getName({"artificial", ""});
  1809. llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
  1810. llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
  1811. VarLVType, Twine(Name).concat(Suffix).str());
  1812. if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
  1813. CGM.getTarget().isTLSSupported()) {
  1814. GAddr->setThreadLocal(/*Val=*/true);
  1815. return Address(GAddr, GAddr->getValueType(),
  1816. CGM.getContext().getTypeAlignInChars(VarType));
  1817. }
  1818. std::string CacheSuffix = getName({"cache", ""});
  1819. llvm::Value *Args[] = {
  1820. emitUpdateLocation(CGF, SourceLocation()),
  1821. getThreadID(CGF, SourceLocation()),
  1822. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
  1823. CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
  1824. /*isSigned=*/false),
  1825. OMPBuilder.getOrCreateInternalVariable(
  1826. CGM.VoidPtrPtrTy,
  1827. Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
  1828. return Address(
  1829. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  1830. CGF.EmitRuntimeCall(
  1831. OMPBuilder.getOrCreateRuntimeFunction(
  1832. CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
  1833. Args),
  1834. VarLVType->getPointerTo(/*AddrSpace=*/0)),
  1835. VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
  1836. }
  1837. void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
  1838. const RegionCodeGenTy &ThenGen,
  1839. const RegionCodeGenTy &ElseGen) {
  1840. CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
  1841. // If the condition constant folds and can be elided, try to avoid emitting
  1842. // the condition and the dead arm of the if/else.
  1843. bool CondConstant;
  1844. if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
  1845. if (CondConstant)
  1846. ThenGen(CGF);
  1847. else
  1848. ElseGen(CGF);
  1849. return;
  1850. }
  1851. // Otherwise, the condition did not fold, or we couldn't elide it. Just
  1852. // emit the conditional branch.
  1853. llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
  1854. llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
  1855. llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
  1856. CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
  1857. // Emit the 'then' code.
  1858. CGF.EmitBlock(ThenBlock);
  1859. ThenGen(CGF);
  1860. CGF.EmitBranch(ContBlock);
  1861. // Emit the 'else' code if present.
  1862. // There is no need to emit line number for unconditional branch.
  1863. (void)ApplyDebugLocation::CreateEmpty(CGF);
  1864. CGF.EmitBlock(ElseBlock);
  1865. ElseGen(CGF);
  1866. // There is no need to emit line number for unconditional branch.
  1867. (void)ApplyDebugLocation::CreateEmpty(CGF);
  1868. CGF.EmitBranch(ContBlock);
  1869. // Emit the continuation block for code after the if.
  1870. CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
  1871. }
  1872. void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
  1873. llvm::Function *OutlinedFn,
  1874. ArrayRef<llvm::Value *> CapturedVars,
  1875. const Expr *IfCond,
  1876. llvm::Value *NumThreads) {
  1877. if (!CGF.HaveInsertPoint())
  1878. return;
  1879. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
  1880. auto &M = CGM.getModule();
  1881. auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
  1882. this](CodeGenFunction &CGF, PrePostActionTy &) {
  1883. // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
  1884. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  1885. llvm::Value *Args[] = {
  1886. RTLoc,
  1887. CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
  1888. CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
  1889. llvm::SmallVector<llvm::Value *, 16> RealArgs;
  1890. RealArgs.append(std::begin(Args), std::end(Args));
  1891. RealArgs.append(CapturedVars.begin(), CapturedVars.end());
  1892. llvm::FunctionCallee RTLFn =
  1893. OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
  1894. CGF.EmitRuntimeCall(RTLFn, RealArgs);
  1895. };
  1896. auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
  1897. this](CodeGenFunction &CGF, PrePostActionTy &) {
  1898. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  1899. llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
  1900. // Build calls:
  1901. // __kmpc_serialized_parallel(&Loc, GTid);
  1902. llvm::Value *Args[] = {RTLoc, ThreadID};
  1903. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  1904. M, OMPRTL___kmpc_serialized_parallel),
  1905. Args);
  1906. // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
  1907. Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
  1908. Address ZeroAddrBound =
  1909. CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
  1910. /*Name=*/".bound.zero.addr");
  1911. CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
  1912. llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
  1913. // ThreadId for serialized parallels is 0.
  1914. OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
  1915. OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
  1916. OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
  1917. // Ensure we do not inline the function. This is trivially true for the ones
  1918. // passed to __kmpc_fork_call but the ones called in serialized regions
  1919. // could be inlined. This is not a perfect but it is closer to the invariant
  1920. // we want, namely, every data environment starts with a new function.
  1921. // TODO: We should pass the if condition to the runtime function and do the
  1922. // handling there. Much cleaner code.
  1923. OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
  1924. OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
  1925. RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
  1926. // __kmpc_end_serialized_parallel(&Loc, GTid);
  1927. llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
  1928. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  1929. M, OMPRTL___kmpc_end_serialized_parallel),
  1930. EndArgs);
  1931. };
  1932. if (IfCond) {
  1933. emitIfClause(CGF, IfCond, ThenGen, ElseGen);
  1934. } else {
  1935. RegionCodeGenTy ThenRCG(ThenGen);
  1936. ThenRCG(CGF);
  1937. }
  1938. }
  1939. // If we're inside an (outlined) parallel region, use the region info's
  1940. // thread-ID variable (it is passed in a first argument of the outlined function
  1941. // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
  1942. // regular serial code region, get thread ID by calling kmp_int32
  1943. // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
  1944. // return the address of that temp.
  1945. Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
  1946. SourceLocation Loc) {
  1947. if (auto *OMPRegionInfo =
  1948. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  1949. if (OMPRegionInfo->getThreadIDVariable())
  1950. return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
  1951. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  1952. QualType Int32Ty =
  1953. CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
  1954. Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
  1955. CGF.EmitStoreOfScalar(ThreadID,
  1956. CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
  1957. return ThreadIDTemp;
  1958. }
  1959. llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
  1960. std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
  1961. std::string Name = getName({Prefix, "var"});
  1962. return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
  1963. }
  1964. namespace {
  1965. /// Common pre(post)-action for different OpenMP constructs.
  1966. class CommonActionTy final : public PrePostActionTy {
  1967. llvm::FunctionCallee EnterCallee;
  1968. ArrayRef<llvm::Value *> EnterArgs;
  1969. llvm::FunctionCallee ExitCallee;
  1970. ArrayRef<llvm::Value *> ExitArgs;
  1971. bool Conditional;
  1972. llvm::BasicBlock *ContBlock = nullptr;
  1973. public:
  1974. CommonActionTy(llvm::FunctionCallee EnterCallee,
  1975. ArrayRef<llvm::Value *> EnterArgs,
  1976. llvm::FunctionCallee ExitCallee,
  1977. ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
  1978. : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
  1979. ExitArgs(ExitArgs), Conditional(Conditional) {}
  1980. void Enter(CodeGenFunction &CGF) override {
  1981. llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
  1982. if (Conditional) {
  1983. llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
  1984. auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
  1985. ContBlock = CGF.createBasicBlock("omp_if.end");
  1986. // Generate the branch (If-stmt)
  1987. CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
  1988. CGF.EmitBlock(ThenBlock);
  1989. }
  1990. }
  1991. void Done(CodeGenFunction &CGF) {
  1992. // Emit the rest of blocks/branches
  1993. CGF.EmitBranch(ContBlock);
  1994. CGF.EmitBlock(ContBlock, true);
  1995. }
  1996. void Exit(CodeGenFunction &CGF) override {
  1997. CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
  1998. }
  1999. };
  2000. } // anonymous namespace
  2001. void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
  2002. StringRef CriticalName,
  2003. const RegionCodeGenTy &CriticalOpGen,
  2004. SourceLocation Loc, const Expr *Hint) {
  2005. // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
  2006. // CriticalOpGen();
  2007. // __kmpc_end_critical(ident_t *, gtid, Lock);
  2008. // Prepare arguments and build a call to __kmpc_critical
  2009. if (!CGF.HaveInsertPoint())
  2010. return;
  2011. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  2012. getCriticalRegionLock(CriticalName)};
  2013. llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
  2014. std::end(Args));
  2015. if (Hint) {
  2016. EnterArgs.push_back(CGF.Builder.CreateIntCast(
  2017. CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
  2018. }
  2019. CommonActionTy Action(
  2020. OMPBuilder.getOrCreateRuntimeFunction(
  2021. CGM.getModule(),
  2022. Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
  2023. EnterArgs,
  2024. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  2025. OMPRTL___kmpc_end_critical),
  2026. Args);
  2027. CriticalOpGen.setAction(Action);
  2028. emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
  2029. }
  2030. void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
  2031. const RegionCodeGenTy &MasterOpGen,
  2032. SourceLocation Loc) {
  2033. if (!CGF.HaveInsertPoint())
  2034. return;
  2035. // if(__kmpc_master(ident_t *, gtid)) {
  2036. // MasterOpGen();
  2037. // __kmpc_end_master(ident_t *, gtid);
  2038. // }
  2039. // Prepare arguments and build a call to __kmpc_master
  2040. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  2041. CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
  2042. CGM.getModule(), OMPRTL___kmpc_master),
  2043. Args,
  2044. OMPBuilder.getOrCreateRuntimeFunction(
  2045. CGM.getModule(), OMPRTL___kmpc_end_master),
  2046. Args,
  2047. /*Conditional=*/true);
  2048. MasterOpGen.setAction(Action);
  2049. emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
  2050. Action.Done(CGF);
  2051. }
  2052. void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
  2053. const RegionCodeGenTy &MaskedOpGen,
  2054. SourceLocation Loc, const Expr *Filter) {
  2055. if (!CGF.HaveInsertPoint())
  2056. return;
  2057. // if(__kmpc_masked(ident_t *, gtid, filter)) {
  2058. // MaskedOpGen();
  2059. // __kmpc_end_masked(iden_t *, gtid);
  2060. // }
  2061. // Prepare arguments and build a call to __kmpc_masked
  2062. llvm::Value *FilterVal = Filter
  2063. ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
  2064. : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
  2065. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  2066. FilterVal};
  2067. llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
  2068. getThreadID(CGF, Loc)};
  2069. CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
  2070. CGM.getModule(), OMPRTL___kmpc_masked),
  2071. Args,
  2072. OMPBuilder.getOrCreateRuntimeFunction(
  2073. CGM.getModule(), OMPRTL___kmpc_end_masked),
  2074. ArgsEnd,
  2075. /*Conditional=*/true);
  2076. MaskedOpGen.setAction(Action);
  2077. emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
  2078. Action.Done(CGF);
  2079. }
  2080. void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
  2081. SourceLocation Loc) {
  2082. if (!CGF.HaveInsertPoint())
  2083. return;
  2084. if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
  2085. OMPBuilder.createTaskyield(CGF.Builder);
  2086. } else {
  2087. // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
  2088. llvm::Value *Args[] = {
  2089. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  2090. llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
  2091. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2092. CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
  2093. Args);
  2094. }
  2095. if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  2096. Region->emitUntiedSwitch(CGF);
  2097. }
  2098. void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
  2099. const RegionCodeGenTy &TaskgroupOpGen,
  2100. SourceLocation Loc) {
  2101. if (!CGF.HaveInsertPoint())
  2102. return;
  2103. // __kmpc_taskgroup(ident_t *, gtid);
  2104. // TaskgroupOpGen();
  2105. // __kmpc_end_taskgroup(ident_t *, gtid);
  2106. // Prepare arguments and build a call to __kmpc_taskgroup
  2107. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  2108. CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
  2109. CGM.getModule(), OMPRTL___kmpc_taskgroup),
  2110. Args,
  2111. OMPBuilder.getOrCreateRuntimeFunction(
  2112. CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
  2113. Args);
  2114. TaskgroupOpGen.setAction(Action);
  2115. emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
  2116. }
  2117. /// Given an array of pointers to variables, project the address of a
  2118. /// given variable.
  2119. static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
  2120. unsigned Index, const VarDecl *Var) {
  2121. // Pull out the pointer to the variable.
  2122. Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
  2123. llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
  2124. llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
  2125. return Address(
  2126. CGF.Builder.CreateBitCast(
  2127. Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
  2128. ElemTy, CGF.getContext().getDeclAlign(Var));
  2129. }
  2130. static llvm::Value *emitCopyprivateCopyFunction(
  2131. CodeGenModule &CGM, llvm::Type *ArgsElemType,
  2132. ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
  2133. ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
  2134. SourceLocation Loc) {
  2135. ASTContext &C = CGM.getContext();
  2136. // void copy_func(void *LHSArg, void *RHSArg);
  2137. FunctionArgList Args;
  2138. ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  2139. ImplicitParamDecl::Other);
  2140. ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  2141. ImplicitParamDecl::Other);
  2142. Args.push_back(&LHSArg);
  2143. Args.push_back(&RHSArg);
  2144. const auto &CGFI =
  2145. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  2146. std::string Name =
  2147. CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
  2148. auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
  2149. llvm::GlobalValue::InternalLinkage, Name,
  2150. &CGM.getModule());
  2151. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
  2152. Fn->setDoesNotRecurse();
  2153. CodeGenFunction CGF(CGM);
  2154. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
  2155. // Dest = (void*[n])(LHSArg);
  2156. // Src = (void*[n])(RHSArg);
  2157. Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  2158. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
  2159. ArgsElemType->getPointerTo()),
  2160. ArgsElemType, CGF.getPointerAlign());
  2161. Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  2162. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
  2163. ArgsElemType->getPointerTo()),
  2164. ArgsElemType, CGF.getPointerAlign());
  2165. // *(Type0*)Dst[0] = *(Type0*)Src[0];
  2166. // *(Type1*)Dst[1] = *(Type1*)Src[1];
  2167. // ...
  2168. // *(Typen*)Dst[n] = *(Typen*)Src[n];
  2169. for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
  2170. const auto *DestVar =
  2171. cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
  2172. Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
  2173. const auto *SrcVar =
  2174. cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
  2175. Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
  2176. const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
  2177. QualType Type = VD->getType();
  2178. CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
  2179. }
  2180. CGF.FinishFunction();
  2181. return Fn;
  2182. }
  2183. void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
  2184. const RegionCodeGenTy &SingleOpGen,
  2185. SourceLocation Loc,
  2186. ArrayRef<const Expr *> CopyprivateVars,
  2187. ArrayRef<const Expr *> SrcExprs,
  2188. ArrayRef<const Expr *> DstExprs,
  2189. ArrayRef<const Expr *> AssignmentOps) {
  2190. if (!CGF.HaveInsertPoint())
  2191. return;
  2192. assert(CopyprivateVars.size() == SrcExprs.size() &&
  2193. CopyprivateVars.size() == DstExprs.size() &&
  2194. CopyprivateVars.size() == AssignmentOps.size());
  2195. ASTContext &C = CGM.getContext();
  2196. // int32 did_it = 0;
  2197. // if(__kmpc_single(ident_t *, gtid)) {
  2198. // SingleOpGen();
  2199. // __kmpc_end_single(ident_t *, gtid);
  2200. // did_it = 1;
  2201. // }
  2202. // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
  2203. // <copy_func>, did_it);
  2204. Address DidIt = Address::invalid();
  2205. if (!CopyprivateVars.empty()) {
  2206. // int32 did_it = 0;
  2207. QualType KmpInt32Ty =
  2208. C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  2209. DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
  2210. CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
  2211. }
  2212. // Prepare arguments and build a call to __kmpc_single
  2213. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  2214. CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
  2215. CGM.getModule(), OMPRTL___kmpc_single),
  2216. Args,
  2217. OMPBuilder.getOrCreateRuntimeFunction(
  2218. CGM.getModule(), OMPRTL___kmpc_end_single),
  2219. Args,
  2220. /*Conditional=*/true);
  2221. SingleOpGen.setAction(Action);
  2222. emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
  2223. if (DidIt.isValid()) {
  2224. // did_it = 1;
  2225. CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
  2226. }
  2227. Action.Done(CGF);
  2228. // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
  2229. // <copy_func>, did_it);
  2230. if (DidIt.isValid()) {
  2231. llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
  2232. QualType CopyprivateArrayTy = C.getConstantArrayType(
  2233. C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
  2234. /*IndexTypeQuals=*/0);
  2235. // Create a list of all private variables for copyprivate.
  2236. Address CopyprivateList =
  2237. CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
  2238. for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
  2239. Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
  2240. CGF.Builder.CreateStore(
  2241. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  2242. CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
  2243. CGF.VoidPtrTy),
  2244. Elem);
  2245. }
  2246. // Build function that copies private values from single region to all other
  2247. // threads in the corresponding parallel region.
  2248. llvm::Value *CpyFn = emitCopyprivateCopyFunction(
  2249. CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
  2250. SrcExprs, DstExprs, AssignmentOps, Loc);
  2251. llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
  2252. Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  2253. CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
  2254. llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
  2255. llvm::Value *Args[] = {
  2256. emitUpdateLocation(CGF, Loc), // ident_t *<loc>
  2257. getThreadID(CGF, Loc), // i32 <gtid>
  2258. BufSize, // size_t <buf_size>
  2259. CL.getPointer(), // void *<copyprivate list>
  2260. CpyFn, // void (*) (void *, void *) <copy_func>
  2261. DidItVal // i32 did_it
  2262. };
  2263. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2264. CGM.getModule(), OMPRTL___kmpc_copyprivate),
  2265. Args);
  2266. }
  2267. }
  2268. void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
  2269. const RegionCodeGenTy &OrderedOpGen,
  2270. SourceLocation Loc, bool IsThreads) {
  2271. if (!CGF.HaveInsertPoint())
  2272. return;
  2273. // __kmpc_ordered(ident_t *, gtid);
  2274. // OrderedOpGen();
  2275. // __kmpc_end_ordered(ident_t *, gtid);
  2276. // Prepare arguments and build a call to __kmpc_ordered
  2277. if (IsThreads) {
  2278. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  2279. CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
  2280. CGM.getModule(), OMPRTL___kmpc_ordered),
  2281. Args,
  2282. OMPBuilder.getOrCreateRuntimeFunction(
  2283. CGM.getModule(), OMPRTL___kmpc_end_ordered),
  2284. Args);
  2285. OrderedOpGen.setAction(Action);
  2286. emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
  2287. return;
  2288. }
  2289. emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
  2290. }
  2291. unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
  2292. unsigned Flags;
  2293. if (Kind == OMPD_for)
  2294. Flags = OMP_IDENT_BARRIER_IMPL_FOR;
  2295. else if (Kind == OMPD_sections)
  2296. Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
  2297. else if (Kind == OMPD_single)
  2298. Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
  2299. else if (Kind == OMPD_barrier)
  2300. Flags = OMP_IDENT_BARRIER_EXPL;
  2301. else
  2302. Flags = OMP_IDENT_BARRIER_IMPL;
  2303. return Flags;
  2304. }
  2305. void CGOpenMPRuntime::getDefaultScheduleAndChunk(
  2306. CodeGenFunction &CGF, const OMPLoopDirective &S,
  2307. OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
  2308. // Check if the loop directive is actually a doacross loop directive. In this
  2309. // case choose static, 1 schedule.
  2310. if (llvm::any_of(
  2311. S.getClausesOfKind<OMPOrderedClause>(),
  2312. [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
  2313. ScheduleKind = OMPC_SCHEDULE_static;
  2314. // Chunk size is 1 in this case.
  2315. llvm::APInt ChunkSize(32, 1);
  2316. ChunkExpr = IntegerLiteral::Create(
  2317. CGF.getContext(), ChunkSize,
  2318. CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
  2319. SourceLocation());
  2320. }
  2321. }
  2322. void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
  2323. OpenMPDirectiveKind Kind, bool EmitChecks,
  2324. bool ForceSimpleCall) {
  2325. // Check if we should use the OMPBuilder
  2326. auto *OMPRegionInfo =
  2327. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
  2328. if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
  2329. CGF.Builder.restoreIP(OMPBuilder.createBarrier(
  2330. CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
  2331. return;
  2332. }
  2333. if (!CGF.HaveInsertPoint())
  2334. return;
  2335. // Build call __kmpc_cancel_barrier(loc, thread_id);
  2336. // Build call __kmpc_barrier(loc, thread_id);
  2337. unsigned Flags = getDefaultFlagsForBarriers(Kind);
  2338. // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
  2339. // thread_id);
  2340. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
  2341. getThreadID(CGF, Loc)};
  2342. if (OMPRegionInfo) {
  2343. if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
  2344. llvm::Value *Result = CGF.EmitRuntimeCall(
  2345. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  2346. OMPRTL___kmpc_cancel_barrier),
  2347. Args);
  2348. if (EmitChecks) {
  2349. // if (__kmpc_cancel_barrier()) {
  2350. // exit from construct;
  2351. // }
  2352. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
  2353. llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
  2354. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
  2355. CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
  2356. CGF.EmitBlock(ExitBB);
  2357. // exit from construct;
  2358. CodeGenFunction::JumpDest CancelDestination =
  2359. CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
  2360. CGF.EmitBranchThroughCleanup(CancelDestination);
  2361. CGF.EmitBlock(ContBB, /*IsFinished=*/true);
  2362. }
  2363. return;
  2364. }
  2365. }
  2366. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2367. CGM.getModule(), OMPRTL___kmpc_barrier),
  2368. Args);
  2369. }
  2370. void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
  2371. Expr *ME, bool IsFatal) {
  2372. llvm::Value *MVL =
  2373. ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
  2374. : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  2375. // Build call void __kmpc_error(ident_t *loc, int severity, const char
  2376. // *message)
  2377. llvm::Value *Args[] = {
  2378. emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
  2379. llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
  2380. CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
  2381. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2382. CGM.getModule(), OMPRTL___kmpc_error),
  2383. Args);
  2384. }
  2385. /// Map the OpenMP loop schedule to the runtime enumeration.
  2386. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
  2387. bool Chunked, bool Ordered) {
  2388. switch (ScheduleKind) {
  2389. case OMPC_SCHEDULE_static:
  2390. return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
  2391. : (Ordered ? OMP_ord_static : OMP_sch_static);
  2392. case OMPC_SCHEDULE_dynamic:
  2393. return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
  2394. case OMPC_SCHEDULE_guided:
  2395. return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
  2396. case OMPC_SCHEDULE_runtime:
  2397. return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
  2398. case OMPC_SCHEDULE_auto:
  2399. return Ordered ? OMP_ord_auto : OMP_sch_auto;
  2400. case OMPC_SCHEDULE_unknown:
  2401. assert(!Chunked && "chunk was specified but schedule kind not known");
  2402. return Ordered ? OMP_ord_static : OMP_sch_static;
  2403. }
  2404. llvm_unreachable("Unexpected runtime schedule");
  2405. }
  2406. /// Map the OpenMP distribute schedule to the runtime enumeration.
  2407. static OpenMPSchedType
  2408. getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
  2409. // only static is allowed for dist_schedule
  2410. return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
  2411. }
  2412. bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
  2413. bool Chunked) const {
  2414. OpenMPSchedType Schedule =
  2415. getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
  2416. return Schedule == OMP_sch_static;
  2417. }
  2418. bool CGOpenMPRuntime::isStaticNonchunked(
  2419. OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
  2420. OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
  2421. return Schedule == OMP_dist_sch_static;
  2422. }
  2423. bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
  2424. bool Chunked) const {
  2425. OpenMPSchedType Schedule =
  2426. getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
  2427. return Schedule == OMP_sch_static_chunked;
  2428. }
  2429. bool CGOpenMPRuntime::isStaticChunked(
  2430. OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
  2431. OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
  2432. return Schedule == OMP_dist_sch_static_chunked;
  2433. }
  2434. bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
  2435. OpenMPSchedType Schedule =
  2436. getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
  2437. assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
  2438. return Schedule != OMP_sch_static;
  2439. }
  2440. static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
  2441. OpenMPScheduleClauseModifier M1,
  2442. OpenMPScheduleClauseModifier M2) {
  2443. int Modifier = 0;
  2444. switch (M1) {
  2445. case OMPC_SCHEDULE_MODIFIER_monotonic:
  2446. Modifier = OMP_sch_modifier_monotonic;
  2447. break;
  2448. case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
  2449. Modifier = OMP_sch_modifier_nonmonotonic;
  2450. break;
  2451. case OMPC_SCHEDULE_MODIFIER_simd:
  2452. if (Schedule == OMP_sch_static_chunked)
  2453. Schedule = OMP_sch_static_balanced_chunked;
  2454. break;
  2455. case OMPC_SCHEDULE_MODIFIER_last:
  2456. case OMPC_SCHEDULE_MODIFIER_unknown:
  2457. break;
  2458. }
  2459. switch (M2) {
  2460. case OMPC_SCHEDULE_MODIFIER_monotonic:
  2461. Modifier = OMP_sch_modifier_monotonic;
  2462. break;
  2463. case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
  2464. Modifier = OMP_sch_modifier_nonmonotonic;
  2465. break;
  2466. case OMPC_SCHEDULE_MODIFIER_simd:
  2467. if (Schedule == OMP_sch_static_chunked)
  2468. Schedule = OMP_sch_static_balanced_chunked;
  2469. break;
  2470. case OMPC_SCHEDULE_MODIFIER_last:
  2471. case OMPC_SCHEDULE_MODIFIER_unknown:
  2472. break;
  2473. }
  2474. // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
  2475. // If the static schedule kind is specified or if the ordered clause is
  2476. // specified, and if the nonmonotonic modifier is not specified, the effect is
  2477. // as if the monotonic modifier is specified. Otherwise, unless the monotonic
  2478. // modifier is specified, the effect is as if the nonmonotonic modifier is
  2479. // specified.
  2480. if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
  2481. if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
  2482. Schedule == OMP_sch_static_balanced_chunked ||
  2483. Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
  2484. Schedule == OMP_dist_sch_static_chunked ||
  2485. Schedule == OMP_dist_sch_static))
  2486. Modifier = OMP_sch_modifier_nonmonotonic;
  2487. }
  2488. return Schedule | Modifier;
  2489. }
  2490. void CGOpenMPRuntime::emitForDispatchInit(
  2491. CodeGenFunction &CGF, SourceLocation Loc,
  2492. const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
  2493. bool Ordered, const DispatchRTInput &DispatchValues) {
  2494. if (!CGF.HaveInsertPoint())
  2495. return;
  2496. OpenMPSchedType Schedule = getRuntimeSchedule(
  2497. ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
  2498. assert(Ordered ||
  2499. (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
  2500. Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
  2501. Schedule != OMP_sch_static_balanced_chunked));
  2502. // Call __kmpc_dispatch_init(
  2503. // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
  2504. // kmp_int[32|64] lower, kmp_int[32|64] upper,
  2505. // kmp_int[32|64] stride, kmp_int[32|64] chunk);
  2506. // If the Chunk was not specified in the clause - use default value 1.
  2507. llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
  2508. : CGF.Builder.getIntN(IVSize, 1);
  2509. llvm::Value *Args[] = {
  2510. emitUpdateLocation(CGF, Loc),
  2511. getThreadID(CGF, Loc),
  2512. CGF.Builder.getInt32(addMonoNonMonoModifier(
  2513. CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
  2514. DispatchValues.LB, // Lower
  2515. DispatchValues.UB, // Upper
  2516. CGF.Builder.getIntN(IVSize, 1), // Stride
  2517. Chunk // Chunk
  2518. };
  2519. CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
  2520. }
  2521. static void emitForStaticInitCall(
  2522. CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
  2523. llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
  2524. OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
  2525. const CGOpenMPRuntime::StaticRTInput &Values) {
  2526. if (!CGF.HaveInsertPoint())
  2527. return;
  2528. assert(!Values.Ordered);
  2529. assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
  2530. Schedule == OMP_sch_static_balanced_chunked ||
  2531. Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
  2532. Schedule == OMP_dist_sch_static ||
  2533. Schedule == OMP_dist_sch_static_chunked);
  2534. // Call __kmpc_for_static_init(
  2535. // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
  2536. // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
  2537. // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
  2538. // kmp_int[32|64] incr, kmp_int[32|64] chunk);
  2539. llvm::Value *Chunk = Values.Chunk;
  2540. if (Chunk == nullptr) {
  2541. assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
  2542. Schedule == OMP_dist_sch_static) &&
  2543. "expected static non-chunked schedule");
  2544. // If the Chunk was not specified in the clause - use default value 1.
  2545. Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
  2546. } else {
  2547. assert((Schedule == OMP_sch_static_chunked ||
  2548. Schedule == OMP_sch_static_balanced_chunked ||
  2549. Schedule == OMP_ord_static_chunked ||
  2550. Schedule == OMP_dist_sch_static_chunked) &&
  2551. "expected static chunked schedule");
  2552. }
  2553. llvm::Value *Args[] = {
  2554. UpdateLocation,
  2555. ThreadId,
  2556. CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
  2557. M2)), // Schedule type
  2558. Values.IL.getPointer(), // &isLastIter
  2559. Values.LB.getPointer(), // &LB
  2560. Values.UB.getPointer(), // &UB
  2561. Values.ST.getPointer(), // &Stride
  2562. CGF.Builder.getIntN(Values.IVSize, 1), // Incr
  2563. Chunk // Chunk
  2564. };
  2565. CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
  2566. }
  2567. void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
  2568. SourceLocation Loc,
  2569. OpenMPDirectiveKind DKind,
  2570. const OpenMPScheduleTy &ScheduleKind,
  2571. const StaticRTInput &Values) {
  2572. OpenMPSchedType ScheduleNum = getRuntimeSchedule(
  2573. ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
  2574. assert(isOpenMPWorksharingDirective(DKind) &&
  2575. "Expected loop-based or sections-based directive.");
  2576. llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
  2577. isOpenMPLoopDirective(DKind)
  2578. ? OMP_IDENT_WORK_LOOP
  2579. : OMP_IDENT_WORK_SECTIONS);
  2580. llvm::Value *ThreadId = getThreadID(CGF, Loc);
  2581. llvm::FunctionCallee StaticInitFunction =
  2582. createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
  2583. auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
  2584. emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
  2585. ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
  2586. }
  2587. void CGOpenMPRuntime::emitDistributeStaticInit(
  2588. CodeGenFunction &CGF, SourceLocation Loc,
  2589. OpenMPDistScheduleClauseKind SchedKind,
  2590. const CGOpenMPRuntime::StaticRTInput &Values) {
  2591. OpenMPSchedType ScheduleNum =
  2592. getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
  2593. llvm::Value *UpdatedLocation =
  2594. emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
  2595. llvm::Value *ThreadId = getThreadID(CGF, Loc);
  2596. llvm::FunctionCallee StaticInitFunction;
  2597. bool isGPUDistribute =
  2598. CGM.getLangOpts().OpenMPIsDevice &&
  2599. (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
  2600. StaticInitFunction = createForStaticInitFunction(
  2601. Values.IVSize, Values.IVSigned, isGPUDistribute);
  2602. emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
  2603. ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
  2604. OMPC_SCHEDULE_MODIFIER_unknown, Values);
  2605. }
  2606. void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
  2607. SourceLocation Loc,
  2608. OpenMPDirectiveKind DKind) {
  2609. if (!CGF.HaveInsertPoint())
  2610. return;
  2611. // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
  2612. llvm::Value *Args[] = {
  2613. emitUpdateLocation(CGF, Loc,
  2614. isOpenMPDistributeDirective(DKind)
  2615. ? OMP_IDENT_WORK_DISTRIBUTE
  2616. : isOpenMPLoopDirective(DKind)
  2617. ? OMP_IDENT_WORK_LOOP
  2618. : OMP_IDENT_WORK_SECTIONS),
  2619. getThreadID(CGF, Loc)};
  2620. auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
  2621. if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
  2622. (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
  2623. CGF.EmitRuntimeCall(
  2624. OMPBuilder.getOrCreateRuntimeFunction(
  2625. CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
  2626. Args);
  2627. else
  2628. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2629. CGM.getModule(), OMPRTL___kmpc_for_static_fini),
  2630. Args);
  2631. }
  2632. void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
  2633. SourceLocation Loc,
  2634. unsigned IVSize,
  2635. bool IVSigned) {
  2636. if (!CGF.HaveInsertPoint())
  2637. return;
  2638. // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
  2639. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  2640. CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
  2641. }
  2642. llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
  2643. SourceLocation Loc, unsigned IVSize,
  2644. bool IVSigned, Address IL,
  2645. Address LB, Address UB,
  2646. Address ST) {
  2647. // Call __kmpc_dispatch_next(
  2648. // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
  2649. // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
  2650. // kmp_int[32|64] *p_stride);
  2651. llvm::Value *Args[] = {
  2652. emitUpdateLocation(CGF, Loc),
  2653. getThreadID(CGF, Loc),
  2654. IL.getPointer(), // &isLastIter
  2655. LB.getPointer(), // &Lower
  2656. UB.getPointer(), // &Upper
  2657. ST.getPointer() // &Stride
  2658. };
  2659. llvm::Value *Call =
  2660. CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
  2661. return CGF.EmitScalarConversion(
  2662. Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
  2663. CGF.getContext().BoolTy, Loc);
  2664. }
  2665. void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
  2666. llvm::Value *NumThreads,
  2667. SourceLocation Loc) {
  2668. if (!CGF.HaveInsertPoint())
  2669. return;
  2670. // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
  2671. llvm::Value *Args[] = {
  2672. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  2673. CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
  2674. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2675. CGM.getModule(), OMPRTL___kmpc_push_num_threads),
  2676. Args);
  2677. }
  2678. void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
  2679. ProcBindKind ProcBind,
  2680. SourceLocation Loc) {
  2681. if (!CGF.HaveInsertPoint())
  2682. return;
  2683. assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
  2684. // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
  2685. llvm::Value *Args[] = {
  2686. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  2687. llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
  2688. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2689. CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
  2690. Args);
  2691. }
  2692. void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
  2693. SourceLocation Loc, llvm::AtomicOrdering AO) {
  2694. if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
  2695. OMPBuilder.createFlush(CGF.Builder);
  2696. } else {
  2697. if (!CGF.HaveInsertPoint())
  2698. return;
  2699. // Build call void __kmpc_flush(ident_t *loc)
  2700. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  2701. CGM.getModule(), OMPRTL___kmpc_flush),
  2702. emitUpdateLocation(CGF, Loc));
  2703. }
  2704. }
  2705. namespace {
  2706. /// Indexes of fields for type kmp_task_t.
  2707. enum KmpTaskTFields {
  2708. /// List of shared variables.
  2709. KmpTaskTShareds,
  2710. /// Task routine.
  2711. KmpTaskTRoutine,
  2712. /// Partition id for the untied tasks.
  2713. KmpTaskTPartId,
  2714. /// Function with call of destructors for private variables.
  2715. Data1,
  2716. /// Task priority.
  2717. Data2,
  2718. /// (Taskloops only) Lower bound.
  2719. KmpTaskTLowerBound,
  2720. /// (Taskloops only) Upper bound.
  2721. KmpTaskTUpperBound,
  2722. /// (Taskloops only) Stride.
  2723. KmpTaskTStride,
  2724. /// (Taskloops only) Is last iteration flag.
  2725. KmpTaskTLastIter,
  2726. /// (Taskloops only) Reduction data.
  2727. KmpTaskTReductions,
  2728. };
  2729. } // anonymous namespace
  2730. void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
  2731. // If we are in simd mode or there are no entries, we don't need to do
  2732. // anything.
  2733. if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
  2734. return;
  2735. llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
  2736. [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
  2737. const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
  2738. SourceLocation Loc;
  2739. if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
  2740. for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
  2741. E = CGM.getContext().getSourceManager().fileinfo_end();
  2742. I != E; ++I) {
  2743. if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
  2744. I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
  2745. Loc = CGM.getContext().getSourceManager().translateFileLineCol(
  2746. I->getFirst(), EntryInfo.Line, 1);
  2747. break;
  2748. }
  2749. }
  2750. }
  2751. switch (Kind) {
  2752. case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
  2753. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  2754. DiagnosticsEngine::Error, "Offloading entry for target region in "
  2755. "%0 is incorrect: either the "
  2756. "address or the ID is invalid.");
  2757. CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
  2758. } break;
  2759. case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
  2760. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  2761. DiagnosticsEngine::Error, "Offloading entry for declare target "
  2762. "variable %0 is incorrect: the "
  2763. "address is invalid.");
  2764. CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
  2765. } break;
  2766. case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
  2767. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  2768. DiagnosticsEngine::Error,
  2769. "Offloading entry for declare target variable is incorrect: the "
  2770. "address is invalid.");
  2771. CGM.getDiags().Report(DiagID);
  2772. } break;
  2773. }
  2774. };
  2775. OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager,
  2776. ErrorReportFn);
  2777. }
  2778. /// Loads all the offload entries information from the host IR
  2779. /// metadata.
  2780. void CGOpenMPRuntime::loadOffloadInfoMetadata() {
  2781. // If we are in target mode, load the metadata from the host IR. This code has
  2782. // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
  2783. if (!CGM.getLangOpts().OpenMPIsDevice)
  2784. return;
  2785. if (CGM.getLangOpts().OMPHostIRFile.empty())
  2786. return;
  2787. auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
  2788. if (auto EC = Buf.getError()) {
  2789. CGM.getDiags().Report(diag::err_cannot_open_file)
  2790. << CGM.getLangOpts().OMPHostIRFile << EC.message();
  2791. return;
  2792. }
  2793. llvm::LLVMContext C;
  2794. auto ME = expectedToErrorOrAndEmitErrors(
  2795. C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
  2796. if (auto EC = ME.getError()) {
  2797. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  2798. DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
  2799. CGM.getDiags().Report(DiagID)
  2800. << CGM.getLangOpts().OMPHostIRFile << EC.message();
  2801. return;
  2802. }
  2803. OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager);
  2804. }
  2805. void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
  2806. if (!KmpRoutineEntryPtrTy) {
  2807. // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
  2808. ASTContext &C = CGM.getContext();
  2809. QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
  2810. FunctionProtoType::ExtProtoInfo EPI;
  2811. KmpRoutineEntryPtrQTy = C.getPointerType(
  2812. C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
  2813. KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
  2814. }
  2815. }
  2816. namespace {
  2817. struct PrivateHelpersTy {
  2818. PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
  2819. const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
  2820. : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
  2821. PrivateElemInit(PrivateElemInit) {}
  2822. PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
  2823. const Expr *OriginalRef = nullptr;
  2824. const VarDecl *Original = nullptr;
  2825. const VarDecl *PrivateCopy = nullptr;
  2826. const VarDecl *PrivateElemInit = nullptr;
  2827. bool isLocalPrivate() const {
  2828. return !OriginalRef && !PrivateCopy && !PrivateElemInit;
  2829. }
  2830. };
  2831. typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
  2832. } // anonymous namespace
  2833. static bool isAllocatableDecl(const VarDecl *VD) {
  2834. const VarDecl *CVD = VD->getCanonicalDecl();
  2835. if (!CVD->hasAttr<OMPAllocateDeclAttr>())
  2836. return false;
  2837. const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
  2838. // Use the default allocation.
  2839. return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
  2840. !AA->getAllocator());
  2841. }
  2842. static RecordDecl *
  2843. createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
  2844. if (!Privates.empty()) {
  2845. ASTContext &C = CGM.getContext();
  2846. // Build struct .kmp_privates_t. {
  2847. // /* private vars */
  2848. // };
  2849. RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
  2850. RD->startDefinition();
  2851. for (const auto &Pair : Privates) {
  2852. const VarDecl *VD = Pair.second.Original;
  2853. QualType Type = VD->getType().getNonReferenceType();
  2854. // If the private variable is a local variable with lvalue ref type,
  2855. // allocate the pointer instead of the pointee type.
  2856. if (Pair.second.isLocalPrivate()) {
  2857. if (VD->getType()->isLValueReferenceType())
  2858. Type = C.getPointerType(Type);
  2859. if (isAllocatableDecl(VD))
  2860. Type = C.getPointerType(Type);
  2861. }
  2862. FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
  2863. if (VD->hasAttrs()) {
  2864. for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
  2865. E(VD->getAttrs().end());
  2866. I != E; ++I)
  2867. FD->addAttr(*I);
  2868. }
  2869. }
  2870. RD->completeDefinition();
  2871. return RD;
  2872. }
  2873. return nullptr;
  2874. }
  2875. static RecordDecl *
  2876. createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
  2877. QualType KmpInt32Ty,
  2878. QualType KmpRoutineEntryPointerQTy) {
  2879. ASTContext &C = CGM.getContext();
  2880. // Build struct kmp_task_t {
  2881. // void * shareds;
  2882. // kmp_routine_entry_t routine;
  2883. // kmp_int32 part_id;
  2884. // kmp_cmplrdata_t data1;
  2885. // kmp_cmplrdata_t data2;
  2886. // For taskloops additional fields:
  2887. // kmp_uint64 lb;
  2888. // kmp_uint64 ub;
  2889. // kmp_int64 st;
  2890. // kmp_int32 liter;
  2891. // void * reductions;
  2892. // };
  2893. RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
  2894. UD->startDefinition();
  2895. addFieldToRecordDecl(C, UD, KmpInt32Ty);
  2896. addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
  2897. UD->completeDefinition();
  2898. QualType KmpCmplrdataTy = C.getRecordType(UD);
  2899. RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
  2900. RD->startDefinition();
  2901. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  2902. addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
  2903. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  2904. addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
  2905. addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
  2906. if (isOpenMPTaskLoopDirective(Kind)) {
  2907. QualType KmpUInt64Ty =
  2908. CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
  2909. QualType KmpInt64Ty =
  2910. CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
  2911. addFieldToRecordDecl(C, RD, KmpUInt64Ty);
  2912. addFieldToRecordDecl(C, RD, KmpUInt64Ty);
  2913. addFieldToRecordDecl(C, RD, KmpInt64Ty);
  2914. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  2915. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  2916. }
  2917. RD->completeDefinition();
  2918. return RD;
  2919. }
  2920. static RecordDecl *
  2921. createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
  2922. ArrayRef<PrivateDataTy> Privates) {
  2923. ASTContext &C = CGM.getContext();
  2924. // Build struct kmp_task_t_with_privates {
  2925. // kmp_task_t task_data;
  2926. // .kmp_privates_t. privates;
  2927. // };
  2928. RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
  2929. RD->startDefinition();
  2930. addFieldToRecordDecl(C, RD, KmpTaskTQTy);
  2931. if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
  2932. addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
  2933. RD->completeDefinition();
  2934. return RD;
  2935. }
  2936. /// Emit a proxy function which accepts kmp_task_t as the second
  2937. /// argument.
  2938. /// \code
  2939. /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
  2940. /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
  2941. /// For taskloops:
  2942. /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
  2943. /// tt->reductions, tt->shareds);
  2944. /// return 0;
  2945. /// }
  2946. /// \endcode
  2947. static llvm::Function *
  2948. emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
  2949. OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
  2950. QualType KmpTaskTWithPrivatesPtrQTy,
  2951. QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
  2952. QualType SharedsPtrTy, llvm::Function *TaskFunction,
  2953. llvm::Value *TaskPrivatesMap) {
  2954. ASTContext &C = CGM.getContext();
  2955. FunctionArgList Args;
  2956. ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
  2957. ImplicitParamDecl::Other);
  2958. ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  2959. KmpTaskTWithPrivatesPtrQTy.withRestrict(),
  2960. ImplicitParamDecl::Other);
  2961. Args.push_back(&GtidArg);
  2962. Args.push_back(&TaskTypeArg);
  2963. const auto &TaskEntryFnInfo =
  2964. CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
  2965. llvm::FunctionType *TaskEntryTy =
  2966. CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
  2967. std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
  2968. auto *TaskEntry = llvm::Function::Create(
  2969. TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
  2970. CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
  2971. TaskEntry->setDoesNotRecurse();
  2972. CodeGenFunction CGF(CGM);
  2973. CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
  2974. Loc, Loc);
  2975. // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
  2976. // tt,
  2977. // For taskloops:
  2978. // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
  2979. // tt->task_data.shareds);
  2980. llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
  2981. CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
  2982. LValue TDBase = CGF.EmitLoadOfPointerLValue(
  2983. CGF.GetAddrOfLocalVar(&TaskTypeArg),
  2984. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  2985. const auto *KmpTaskTWithPrivatesQTyRD =
  2986. cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
  2987. LValue Base =
  2988. CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
  2989. const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
  2990. auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
  2991. LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
  2992. llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
  2993. auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
  2994. LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
  2995. llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  2996. CGF.EmitLoadOfScalar(SharedsLVal, Loc),
  2997. CGF.ConvertTypeForMem(SharedsPtrTy));
  2998. auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
  2999. llvm::Value *PrivatesParam;
  3000. if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
  3001. LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
  3002. PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3003. PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
  3004. } else {
  3005. PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  3006. }
  3007. llvm::Value *CommonArgs[] = {
  3008. GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
  3009. CGF.Builder
  3010. .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
  3011. CGF.VoidPtrTy, CGF.Int8Ty)
  3012. .getPointer()};
  3013. SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
  3014. std::end(CommonArgs));
  3015. if (isOpenMPTaskLoopDirective(Kind)) {
  3016. auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
  3017. LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
  3018. llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
  3019. auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
  3020. LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
  3021. llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
  3022. auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
  3023. LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
  3024. llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
  3025. auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
  3026. LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
  3027. llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
  3028. auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
  3029. LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
  3030. llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
  3031. CallArgs.push_back(LBParam);
  3032. CallArgs.push_back(UBParam);
  3033. CallArgs.push_back(StParam);
  3034. CallArgs.push_back(LIParam);
  3035. CallArgs.push_back(RParam);
  3036. }
  3037. CallArgs.push_back(SharedsParam);
  3038. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
  3039. CallArgs);
  3040. CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
  3041. CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
  3042. CGF.FinishFunction();
  3043. return TaskEntry;
  3044. }
  3045. static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
  3046. SourceLocation Loc,
  3047. QualType KmpInt32Ty,
  3048. QualType KmpTaskTWithPrivatesPtrQTy,
  3049. QualType KmpTaskTWithPrivatesQTy) {
  3050. ASTContext &C = CGM.getContext();
  3051. FunctionArgList Args;
  3052. ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
  3053. ImplicitParamDecl::Other);
  3054. ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3055. KmpTaskTWithPrivatesPtrQTy.withRestrict(),
  3056. ImplicitParamDecl::Other);
  3057. Args.push_back(&GtidArg);
  3058. Args.push_back(&TaskTypeArg);
  3059. const auto &DestructorFnInfo =
  3060. CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
  3061. llvm::FunctionType *DestructorFnTy =
  3062. CGM.getTypes().GetFunctionType(DestructorFnInfo);
  3063. std::string Name =
  3064. CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
  3065. auto *DestructorFn =
  3066. llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
  3067. Name, &CGM.getModule());
  3068. CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
  3069. DestructorFnInfo);
  3070. DestructorFn->setDoesNotRecurse();
  3071. CodeGenFunction CGF(CGM);
  3072. CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
  3073. Args, Loc, Loc);
  3074. LValue Base = CGF.EmitLoadOfPointerLValue(
  3075. CGF.GetAddrOfLocalVar(&TaskTypeArg),
  3076. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  3077. const auto *KmpTaskTWithPrivatesQTyRD =
  3078. cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
  3079. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
  3080. Base = CGF.EmitLValueForField(Base, *FI);
  3081. for (const auto *Field :
  3082. cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
  3083. if (QualType::DestructionKind DtorKind =
  3084. Field->getType().isDestructedType()) {
  3085. LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
  3086. CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
  3087. }
  3088. }
  3089. CGF.FinishFunction();
  3090. return DestructorFn;
  3091. }
  3092. /// Emit a privates mapping function for correct handling of private and
  3093. /// firstprivate variables.
  3094. /// \code
  3095. /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
  3096. /// **noalias priv1,..., <tyn> **noalias privn) {
  3097. /// *priv1 = &.privates.priv1;
  3098. /// ...;
  3099. /// *privn = &.privates.privn;
  3100. /// }
  3101. /// \endcode
  3102. static llvm::Value *
  3103. emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
  3104. const OMPTaskDataTy &Data, QualType PrivatesQTy,
  3105. ArrayRef<PrivateDataTy> Privates) {
  3106. ASTContext &C = CGM.getContext();
  3107. FunctionArgList Args;
  3108. ImplicitParamDecl TaskPrivatesArg(
  3109. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3110. C.getPointerType(PrivatesQTy).withConst().withRestrict(),
  3111. ImplicitParamDecl::Other);
  3112. Args.push_back(&TaskPrivatesArg);
  3113. llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
  3114. unsigned Counter = 1;
  3115. for (const Expr *E : Data.PrivateVars) {
  3116. Args.push_back(ImplicitParamDecl::Create(
  3117. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3118. C.getPointerType(C.getPointerType(E->getType()))
  3119. .withConst()
  3120. .withRestrict(),
  3121. ImplicitParamDecl::Other));
  3122. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3123. PrivateVarsPos[VD] = Counter;
  3124. ++Counter;
  3125. }
  3126. for (const Expr *E : Data.FirstprivateVars) {
  3127. Args.push_back(ImplicitParamDecl::Create(
  3128. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3129. C.getPointerType(C.getPointerType(E->getType()))
  3130. .withConst()
  3131. .withRestrict(),
  3132. ImplicitParamDecl::Other));
  3133. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3134. PrivateVarsPos[VD] = Counter;
  3135. ++Counter;
  3136. }
  3137. for (const Expr *E : Data.LastprivateVars) {
  3138. Args.push_back(ImplicitParamDecl::Create(
  3139. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3140. C.getPointerType(C.getPointerType(E->getType()))
  3141. .withConst()
  3142. .withRestrict(),
  3143. ImplicitParamDecl::Other));
  3144. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3145. PrivateVarsPos[VD] = Counter;
  3146. ++Counter;
  3147. }
  3148. for (const VarDecl *VD : Data.PrivateLocals) {
  3149. QualType Ty = VD->getType().getNonReferenceType();
  3150. if (VD->getType()->isLValueReferenceType())
  3151. Ty = C.getPointerType(Ty);
  3152. if (isAllocatableDecl(VD))
  3153. Ty = C.getPointerType(Ty);
  3154. Args.push_back(ImplicitParamDecl::Create(
  3155. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3156. C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
  3157. ImplicitParamDecl::Other));
  3158. PrivateVarsPos[VD] = Counter;
  3159. ++Counter;
  3160. }
  3161. const auto &TaskPrivatesMapFnInfo =
  3162. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  3163. llvm::FunctionType *TaskPrivatesMapTy =
  3164. CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
  3165. std::string Name =
  3166. CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
  3167. auto *TaskPrivatesMap = llvm::Function::Create(
  3168. TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
  3169. &CGM.getModule());
  3170. CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
  3171. TaskPrivatesMapFnInfo);
  3172. if (CGM.getLangOpts().Optimize) {
  3173. TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
  3174. TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
  3175. TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
  3176. }
  3177. CodeGenFunction CGF(CGM);
  3178. CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
  3179. TaskPrivatesMapFnInfo, Args, Loc, Loc);
  3180. // *privi = &.privates.privi;
  3181. LValue Base = CGF.EmitLoadOfPointerLValue(
  3182. CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
  3183. TaskPrivatesArg.getType()->castAs<PointerType>());
  3184. const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
  3185. Counter = 0;
  3186. for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
  3187. LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
  3188. const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
  3189. LValue RefLVal =
  3190. CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
  3191. LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
  3192. RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
  3193. CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
  3194. ++Counter;
  3195. }
  3196. CGF.FinishFunction();
  3197. return TaskPrivatesMap;
  3198. }
  3199. /// Emit initialization for private variables in task-based directives.
  3200. static void emitPrivatesInit(CodeGenFunction &CGF,
  3201. const OMPExecutableDirective &D,
  3202. Address KmpTaskSharedsPtr, LValue TDBase,
  3203. const RecordDecl *KmpTaskTWithPrivatesQTyRD,
  3204. QualType SharedsTy, QualType SharedsPtrTy,
  3205. const OMPTaskDataTy &Data,
  3206. ArrayRef<PrivateDataTy> Privates, bool ForDup) {
  3207. ASTContext &C = CGF.getContext();
  3208. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
  3209. LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
  3210. OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
  3211. ? OMPD_taskloop
  3212. : OMPD_task;
  3213. const CapturedStmt &CS = *D.getCapturedStmt(Kind);
  3214. CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
  3215. LValue SrcBase;
  3216. bool IsTargetTask =
  3217. isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
  3218. isOpenMPTargetExecutionDirective(D.getDirectiveKind());
  3219. // For target-based directives skip 4 firstprivate arrays BasePointersArray,
  3220. // PointersArray, SizesArray, and MappersArray. The original variables for
  3221. // these arrays are not captured and we get their addresses explicitly.
  3222. if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
  3223. (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
  3224. SrcBase = CGF.MakeAddrLValue(
  3225. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3226. KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
  3227. CGF.ConvertTypeForMem(SharedsTy)),
  3228. SharedsTy);
  3229. }
  3230. FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
  3231. for (const PrivateDataTy &Pair : Privates) {
  3232. // Do not initialize private locals.
  3233. if (Pair.second.isLocalPrivate()) {
  3234. ++FI;
  3235. continue;
  3236. }
  3237. const VarDecl *VD = Pair.second.PrivateCopy;
  3238. const Expr *Init = VD->getAnyInitializer();
  3239. if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
  3240. !CGF.isTrivialInitializer(Init)))) {
  3241. LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
  3242. if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
  3243. const VarDecl *OriginalVD = Pair.second.Original;
  3244. // Check if the variable is the target-based BasePointersArray,
  3245. // PointersArray, SizesArray, or MappersArray.
  3246. LValue SharedRefLValue;
  3247. QualType Type = PrivateLValue.getType();
  3248. const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
  3249. if (IsTargetTask && !SharedField) {
  3250. assert(isa<ImplicitParamDecl>(OriginalVD) &&
  3251. isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
  3252. cast<CapturedDecl>(OriginalVD->getDeclContext())
  3253. ->getNumParams() == 0 &&
  3254. isa<TranslationUnitDecl>(
  3255. cast<CapturedDecl>(OriginalVD->getDeclContext())
  3256. ->getDeclContext()) &&
  3257. "Expected artificial target data variable.");
  3258. SharedRefLValue =
  3259. CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
  3260. } else if (ForDup) {
  3261. SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
  3262. SharedRefLValue = CGF.MakeAddrLValue(
  3263. SharedRefLValue.getAddress(CGF).withAlignment(
  3264. C.getDeclAlign(OriginalVD)),
  3265. SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
  3266. SharedRefLValue.getTBAAInfo());
  3267. } else if (CGF.LambdaCaptureFields.count(
  3268. Pair.second.Original->getCanonicalDecl()) > 0 ||
  3269. isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
  3270. SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
  3271. } else {
  3272. // Processing for implicitly captured variables.
  3273. InlinedOpenMPRegionRAII Region(
  3274. CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
  3275. /*HasCancel=*/false, /*NoInheritance=*/true);
  3276. SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
  3277. }
  3278. if (Type->isArrayType()) {
  3279. // Initialize firstprivate array.
  3280. if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
  3281. // Perform simple memcpy.
  3282. CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
  3283. } else {
  3284. // Initialize firstprivate array using element-by-element
  3285. // initialization.
  3286. CGF.EmitOMPAggregateAssign(
  3287. PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
  3288. Type,
  3289. [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
  3290. Address SrcElement) {
  3291. // Clean up any temporaries needed by the initialization.
  3292. CodeGenFunction::OMPPrivateScope InitScope(CGF);
  3293. InitScope.addPrivate(Elem, SrcElement);
  3294. (void)InitScope.Privatize();
  3295. // Emit initialization for single element.
  3296. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
  3297. CGF, &CapturesInfo);
  3298. CGF.EmitAnyExprToMem(Init, DestElement,
  3299. Init->getType().getQualifiers(),
  3300. /*IsInitializer=*/false);
  3301. });
  3302. }
  3303. } else {
  3304. CodeGenFunction::OMPPrivateScope InitScope(CGF);
  3305. InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
  3306. (void)InitScope.Privatize();
  3307. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
  3308. CGF.EmitExprAsInit(Init, VD, PrivateLValue,
  3309. /*capturedByInit=*/false);
  3310. }
  3311. } else {
  3312. CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
  3313. }
  3314. }
  3315. ++FI;
  3316. }
  3317. }
  3318. /// Check if duplication function is required for taskloops.
  3319. static bool checkInitIsRequired(CodeGenFunction &CGF,
  3320. ArrayRef<PrivateDataTy> Privates) {
  3321. bool InitRequired = false;
  3322. for (const PrivateDataTy &Pair : Privates) {
  3323. if (Pair.second.isLocalPrivate())
  3324. continue;
  3325. const VarDecl *VD = Pair.second.PrivateCopy;
  3326. const Expr *Init = VD->getAnyInitializer();
  3327. InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
  3328. !CGF.isTrivialInitializer(Init));
  3329. if (InitRequired)
  3330. break;
  3331. }
  3332. return InitRequired;
  3333. }
  3334. /// Emit task_dup function (for initialization of
  3335. /// private/firstprivate/lastprivate vars and last_iter flag)
  3336. /// \code
  3337. /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
  3338. /// lastpriv) {
  3339. /// // setup lastprivate flag
  3340. /// task_dst->last = lastpriv;
  3341. /// // could be constructor calls here...
  3342. /// }
  3343. /// \endcode
  3344. static llvm::Value *
  3345. emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
  3346. const OMPExecutableDirective &D,
  3347. QualType KmpTaskTWithPrivatesPtrQTy,
  3348. const RecordDecl *KmpTaskTWithPrivatesQTyRD,
  3349. const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
  3350. QualType SharedsPtrTy, const OMPTaskDataTy &Data,
  3351. ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
  3352. ASTContext &C = CGM.getContext();
  3353. FunctionArgList Args;
  3354. ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3355. KmpTaskTWithPrivatesPtrQTy,
  3356. ImplicitParamDecl::Other);
  3357. ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  3358. KmpTaskTWithPrivatesPtrQTy,
  3359. ImplicitParamDecl::Other);
  3360. ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
  3361. ImplicitParamDecl::Other);
  3362. Args.push_back(&DstArg);
  3363. Args.push_back(&SrcArg);
  3364. Args.push_back(&LastprivArg);
  3365. const auto &TaskDupFnInfo =
  3366. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  3367. llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
  3368. std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
  3369. auto *TaskDup = llvm::Function::Create(
  3370. TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
  3371. CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
  3372. TaskDup->setDoesNotRecurse();
  3373. CodeGenFunction CGF(CGM);
  3374. CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
  3375. Loc);
  3376. LValue TDBase = CGF.EmitLoadOfPointerLValue(
  3377. CGF.GetAddrOfLocalVar(&DstArg),
  3378. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  3379. // task_dst->liter = lastpriv;
  3380. if (WithLastIter) {
  3381. auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
  3382. LValue Base = CGF.EmitLValueForField(
  3383. TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
  3384. LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
  3385. llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
  3386. CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
  3387. CGF.EmitStoreOfScalar(Lastpriv, LILVal);
  3388. }
  3389. // Emit initial values for private copies (if any).
  3390. assert(!Privates.empty());
  3391. Address KmpTaskSharedsPtr = Address::invalid();
  3392. if (!Data.FirstprivateVars.empty()) {
  3393. LValue TDBase = CGF.EmitLoadOfPointerLValue(
  3394. CGF.GetAddrOfLocalVar(&SrcArg),
  3395. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  3396. LValue Base = CGF.EmitLValueForField(
  3397. TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
  3398. KmpTaskSharedsPtr = Address(
  3399. CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
  3400. Base, *std::next(KmpTaskTQTyRD->field_begin(),
  3401. KmpTaskTShareds)),
  3402. Loc),
  3403. CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
  3404. }
  3405. emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
  3406. SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
  3407. CGF.FinishFunction();
  3408. return TaskDup;
  3409. }
  3410. /// Checks if destructor function is required to be generated.
  3411. /// \return true if cleanups are required, false otherwise.
  3412. static bool
  3413. checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
  3414. ArrayRef<PrivateDataTy> Privates) {
  3415. for (const PrivateDataTy &P : Privates) {
  3416. if (P.second.isLocalPrivate())
  3417. continue;
  3418. QualType Ty = P.second.Original->getType().getNonReferenceType();
  3419. if (Ty.isDestructedType())
  3420. return true;
  3421. }
  3422. return false;
  3423. }
  3424. namespace {
  3425. /// Loop generator for OpenMP iterator expression.
  3426. class OMPIteratorGeneratorScope final
  3427. : public CodeGenFunction::OMPPrivateScope {
  3428. CodeGenFunction &CGF;
  3429. const OMPIteratorExpr *E = nullptr;
  3430. SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
  3431. SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
  3432. OMPIteratorGeneratorScope() = delete;
  3433. OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
  3434. public:
  3435. OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
  3436. : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
  3437. if (!E)
  3438. return;
  3439. SmallVector<llvm::Value *, 4> Uppers;
  3440. for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
  3441. Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
  3442. const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
  3443. addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
  3444. const OMPIteratorHelperData &HelperData = E->getHelper(I);
  3445. addPrivate(
  3446. HelperData.CounterVD,
  3447. CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
  3448. }
  3449. Privatize();
  3450. for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
  3451. const OMPIteratorHelperData &HelperData = E->getHelper(I);
  3452. LValue CLVal =
  3453. CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
  3454. HelperData.CounterVD->getType());
  3455. // Counter = 0;
  3456. CGF.EmitStoreOfScalar(
  3457. llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
  3458. CLVal);
  3459. CodeGenFunction::JumpDest &ContDest =
  3460. ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
  3461. CodeGenFunction::JumpDest &ExitDest =
  3462. ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
  3463. // N = <number-of_iterations>;
  3464. llvm::Value *N = Uppers[I];
  3465. // cont:
  3466. // if (Counter < N) goto body; else goto exit;
  3467. CGF.EmitBlock(ContDest.getBlock());
  3468. auto *CVal =
  3469. CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
  3470. llvm::Value *Cmp =
  3471. HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
  3472. ? CGF.Builder.CreateICmpSLT(CVal, N)
  3473. : CGF.Builder.CreateICmpULT(CVal, N);
  3474. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
  3475. CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
  3476. // body:
  3477. CGF.EmitBlock(BodyBB);
  3478. // Iteri = Begini + Counter * Stepi;
  3479. CGF.EmitIgnoredExpr(HelperData.Update);
  3480. }
  3481. }
  3482. ~OMPIteratorGeneratorScope() {
  3483. if (!E)
  3484. return;
  3485. for (unsigned I = E->numOfIterators(); I > 0; --I) {
  3486. // Counter = Counter + 1;
  3487. const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
  3488. CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
  3489. // goto cont;
  3490. CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
  3491. // exit:
  3492. CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
  3493. }
  3494. }
  3495. };
  3496. } // namespace
  3497. static std::pair<llvm::Value *, llvm::Value *>
  3498. getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
  3499. const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
  3500. llvm::Value *Addr;
  3501. if (OASE) {
  3502. const Expr *Base = OASE->getBase();
  3503. Addr = CGF.EmitScalarExpr(Base);
  3504. } else {
  3505. Addr = CGF.EmitLValue(E).getPointer(CGF);
  3506. }
  3507. llvm::Value *SizeVal;
  3508. QualType Ty = E->getType();
  3509. if (OASE) {
  3510. SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
  3511. for (const Expr *SE : OASE->getDimensions()) {
  3512. llvm::Value *Sz = CGF.EmitScalarExpr(SE);
  3513. Sz = CGF.EmitScalarConversion(
  3514. Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
  3515. SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
  3516. }
  3517. } else if (const auto *ASE =
  3518. dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
  3519. LValue UpAddrLVal =
  3520. CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
  3521. Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
  3522. llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
  3523. UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
  3524. llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
  3525. llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
  3526. SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
  3527. } else {
  3528. SizeVal = CGF.getTypeSize(Ty);
  3529. }
  3530. return std::make_pair(Addr, SizeVal);
  3531. }
  3532. /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
  3533. static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
  3534. QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
  3535. if (KmpTaskAffinityInfoTy.isNull()) {
  3536. RecordDecl *KmpAffinityInfoRD =
  3537. C.buildImplicitRecord("kmp_task_affinity_info_t");
  3538. KmpAffinityInfoRD->startDefinition();
  3539. addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
  3540. addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
  3541. addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
  3542. KmpAffinityInfoRD->completeDefinition();
  3543. KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
  3544. }
  3545. }
  3546. CGOpenMPRuntime::TaskResultTy
  3547. CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
  3548. const OMPExecutableDirective &D,
  3549. llvm::Function *TaskFunction, QualType SharedsTy,
  3550. Address Shareds, const OMPTaskDataTy &Data) {
  3551. ASTContext &C = CGM.getContext();
  3552. llvm::SmallVector<PrivateDataTy, 4> Privates;
  3553. // Aggregate privates and sort them by the alignment.
  3554. const auto *I = Data.PrivateCopies.begin();
  3555. for (const Expr *E : Data.PrivateVars) {
  3556. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3557. Privates.emplace_back(
  3558. C.getDeclAlign(VD),
  3559. PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
  3560. /*PrivateElemInit=*/nullptr));
  3561. ++I;
  3562. }
  3563. I = Data.FirstprivateCopies.begin();
  3564. const auto *IElemInitRef = Data.FirstprivateInits.begin();
  3565. for (const Expr *E : Data.FirstprivateVars) {
  3566. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3567. Privates.emplace_back(
  3568. C.getDeclAlign(VD),
  3569. PrivateHelpersTy(
  3570. E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
  3571. cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
  3572. ++I;
  3573. ++IElemInitRef;
  3574. }
  3575. I = Data.LastprivateCopies.begin();
  3576. for (const Expr *E : Data.LastprivateVars) {
  3577. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3578. Privates.emplace_back(
  3579. C.getDeclAlign(VD),
  3580. PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
  3581. /*PrivateElemInit=*/nullptr));
  3582. ++I;
  3583. }
  3584. for (const VarDecl *VD : Data.PrivateLocals) {
  3585. if (isAllocatableDecl(VD))
  3586. Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
  3587. else
  3588. Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
  3589. }
  3590. llvm::stable_sort(Privates,
  3591. [](const PrivateDataTy &L, const PrivateDataTy &R) {
  3592. return L.first > R.first;
  3593. });
  3594. QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  3595. // Build type kmp_routine_entry_t (if not built yet).
  3596. emitKmpRoutineEntryT(KmpInt32Ty);
  3597. // Build type kmp_task_t (if not built yet).
  3598. if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
  3599. if (SavedKmpTaskloopTQTy.isNull()) {
  3600. SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
  3601. CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
  3602. }
  3603. KmpTaskTQTy = SavedKmpTaskloopTQTy;
  3604. } else {
  3605. assert((D.getDirectiveKind() == OMPD_task ||
  3606. isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
  3607. isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
  3608. "Expected taskloop, task or target directive");
  3609. if (SavedKmpTaskTQTy.isNull()) {
  3610. SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
  3611. CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
  3612. }
  3613. KmpTaskTQTy = SavedKmpTaskTQTy;
  3614. }
  3615. const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
  3616. // Build particular struct kmp_task_t for the given task.
  3617. const RecordDecl *KmpTaskTWithPrivatesQTyRD =
  3618. createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
  3619. QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
  3620. QualType KmpTaskTWithPrivatesPtrQTy =
  3621. C.getPointerType(KmpTaskTWithPrivatesQTy);
  3622. llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
  3623. llvm::Type *KmpTaskTWithPrivatesPtrTy =
  3624. KmpTaskTWithPrivatesTy->getPointerTo();
  3625. llvm::Value *KmpTaskTWithPrivatesTySize =
  3626. CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
  3627. QualType SharedsPtrTy = C.getPointerType(SharedsTy);
  3628. // Emit initial values for private copies (if any).
  3629. llvm::Value *TaskPrivatesMap = nullptr;
  3630. llvm::Type *TaskPrivatesMapTy =
  3631. std::next(TaskFunction->arg_begin(), 3)->getType();
  3632. if (!Privates.empty()) {
  3633. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
  3634. TaskPrivatesMap =
  3635. emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
  3636. TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3637. TaskPrivatesMap, TaskPrivatesMapTy);
  3638. } else {
  3639. TaskPrivatesMap = llvm::ConstantPointerNull::get(
  3640. cast<llvm::PointerType>(TaskPrivatesMapTy));
  3641. }
  3642. // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
  3643. // kmp_task_t *tt);
  3644. llvm::Function *TaskEntry = emitProxyTaskFunction(
  3645. CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
  3646. KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
  3647. TaskPrivatesMap);
  3648. // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
  3649. // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  3650. // kmp_routine_entry_t *task_entry);
  3651. // Task flags. Format is taken from
  3652. // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
  3653. // description of kmp_tasking_flags struct.
  3654. enum {
  3655. TiedFlag = 0x1,
  3656. FinalFlag = 0x2,
  3657. DestructorsFlag = 0x8,
  3658. PriorityFlag = 0x20,
  3659. DetachableFlag = 0x40,
  3660. };
  3661. unsigned Flags = Data.Tied ? TiedFlag : 0;
  3662. bool NeedsCleanup = false;
  3663. if (!Privates.empty()) {
  3664. NeedsCleanup =
  3665. checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
  3666. if (NeedsCleanup)
  3667. Flags = Flags | DestructorsFlag;
  3668. }
  3669. if (Data.Priority.getInt())
  3670. Flags = Flags | PriorityFlag;
  3671. if (D.hasClausesOfKind<OMPDetachClause>())
  3672. Flags = Flags | DetachableFlag;
  3673. llvm::Value *TaskFlags =
  3674. Data.Final.getPointer()
  3675. ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
  3676. CGF.Builder.getInt32(FinalFlag),
  3677. CGF.Builder.getInt32(/*C=*/0))
  3678. : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
  3679. TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
  3680. llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
  3681. SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
  3682. getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
  3683. SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3684. TaskEntry, KmpRoutineEntryPtrTy)};
  3685. llvm::Value *NewTask;
  3686. if (D.hasClausesOfKind<OMPNowaitClause>()) {
  3687. // Check if we have any device clause associated with the directive.
  3688. const Expr *Device = nullptr;
  3689. if (auto *C = D.getSingleClause<OMPDeviceClause>())
  3690. Device = C->getDevice();
  3691. // Emit device ID if any otherwise use default value.
  3692. llvm::Value *DeviceID;
  3693. if (Device)
  3694. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  3695. CGF.Int64Ty, /*isSigned=*/true);
  3696. else
  3697. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  3698. AllocArgs.push_back(DeviceID);
  3699. NewTask = CGF.EmitRuntimeCall(
  3700. OMPBuilder.getOrCreateRuntimeFunction(
  3701. CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
  3702. AllocArgs);
  3703. } else {
  3704. NewTask =
  3705. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  3706. CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
  3707. AllocArgs);
  3708. }
  3709. // Emit detach clause initialization.
  3710. // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
  3711. // task_descriptor);
  3712. if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
  3713. const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
  3714. LValue EvtLVal = CGF.EmitLValue(Evt);
  3715. // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
  3716. // int gtid, kmp_task_t *task);
  3717. llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
  3718. llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
  3719. Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
  3720. llvm::Value *EvtVal = CGF.EmitRuntimeCall(
  3721. OMPBuilder.getOrCreateRuntimeFunction(
  3722. CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
  3723. {Loc, Tid, NewTask});
  3724. EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
  3725. Evt->getExprLoc());
  3726. CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
  3727. }
  3728. // Process affinity clauses.
  3729. if (D.hasClausesOfKind<OMPAffinityClause>()) {
  3730. // Process list of affinity data.
  3731. ASTContext &C = CGM.getContext();
  3732. Address AffinitiesArray = Address::invalid();
  3733. // Calculate number of elements to form the array of affinity data.
  3734. llvm::Value *NumOfElements = nullptr;
  3735. unsigned NumAffinities = 0;
  3736. for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
  3737. if (const Expr *Modifier = C->getModifier()) {
  3738. const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
  3739. for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
  3740. llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
  3741. Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
  3742. NumOfElements =
  3743. NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
  3744. }
  3745. } else {
  3746. NumAffinities += C->varlist_size();
  3747. }
  3748. }
  3749. getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
  3750. // Fields ids in kmp_task_affinity_info record.
  3751. enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
  3752. QualType KmpTaskAffinityInfoArrayTy;
  3753. if (NumOfElements) {
  3754. NumOfElements = CGF.Builder.CreateNUWAdd(
  3755. llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
  3756. auto *OVE = new (C) OpaqueValueExpr(
  3757. Loc,
  3758. C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
  3759. VK_PRValue);
  3760. CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
  3761. RValue::get(NumOfElements));
  3762. KmpTaskAffinityInfoArrayTy =
  3763. C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
  3764. /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
  3765. // Properly emit variable-sized array.
  3766. auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
  3767. ImplicitParamDecl::Other);
  3768. CGF.EmitVarDecl(*PD);
  3769. AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
  3770. NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
  3771. /*isSigned=*/false);
  3772. } else {
  3773. KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
  3774. KmpTaskAffinityInfoTy,
  3775. llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
  3776. ArrayType::Normal, /*IndexTypeQuals=*/0);
  3777. AffinitiesArray =
  3778. CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
  3779. AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
  3780. NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
  3781. /*isSigned=*/false);
  3782. }
  3783. const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
  3784. // Fill array by elements without iterators.
  3785. unsigned Pos = 0;
  3786. bool HasIterator = false;
  3787. for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
  3788. if (C->getModifier()) {
  3789. HasIterator = true;
  3790. continue;
  3791. }
  3792. for (const Expr *E : C->varlists()) {
  3793. llvm::Value *Addr;
  3794. llvm::Value *Size;
  3795. std::tie(Addr, Size) = getPointerAndSize(CGF, E);
  3796. LValue Base =
  3797. CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
  3798. KmpTaskAffinityInfoTy);
  3799. // affs[i].base_addr = &<Affinities[i].second>;
  3800. LValue BaseAddrLVal = CGF.EmitLValueForField(
  3801. Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
  3802. CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
  3803. BaseAddrLVal);
  3804. // affs[i].len = sizeof(<Affinities[i].second>);
  3805. LValue LenLVal = CGF.EmitLValueForField(
  3806. Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
  3807. CGF.EmitStoreOfScalar(Size, LenLVal);
  3808. ++Pos;
  3809. }
  3810. }
  3811. LValue PosLVal;
  3812. if (HasIterator) {
  3813. PosLVal = CGF.MakeAddrLValue(
  3814. CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
  3815. C.getSizeType());
  3816. CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
  3817. }
  3818. // Process elements with iterators.
  3819. for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
  3820. const Expr *Modifier = C->getModifier();
  3821. if (!Modifier)
  3822. continue;
  3823. OMPIteratorGeneratorScope IteratorScope(
  3824. CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
  3825. for (const Expr *E : C->varlists()) {
  3826. llvm::Value *Addr;
  3827. llvm::Value *Size;
  3828. std::tie(Addr, Size) = getPointerAndSize(CGF, E);
  3829. llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
  3830. LValue Base = CGF.MakeAddrLValue(
  3831. CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
  3832. // affs[i].base_addr = &<Affinities[i].second>;
  3833. LValue BaseAddrLVal = CGF.EmitLValueForField(
  3834. Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
  3835. CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
  3836. BaseAddrLVal);
  3837. // affs[i].len = sizeof(<Affinities[i].second>);
  3838. LValue LenLVal = CGF.EmitLValueForField(
  3839. Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
  3840. CGF.EmitStoreOfScalar(Size, LenLVal);
  3841. Idx = CGF.Builder.CreateNUWAdd(
  3842. Idx, llvm::ConstantInt::get(Idx->getType(), 1));
  3843. CGF.EmitStoreOfScalar(Idx, PosLVal);
  3844. }
  3845. }
  3846. // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
  3847. // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
  3848. // naffins, kmp_task_affinity_info_t *affin_list);
  3849. llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
  3850. llvm::Value *GTid = getThreadID(CGF, Loc);
  3851. llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3852. AffinitiesArray.getPointer(), CGM.VoidPtrTy);
  3853. // FIXME: Emit the function and ignore its result for now unless the
  3854. // runtime function is properly implemented.
  3855. (void)CGF.EmitRuntimeCall(
  3856. OMPBuilder.getOrCreateRuntimeFunction(
  3857. CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
  3858. {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
  3859. }
  3860. llvm::Value *NewTaskNewTaskTTy =
  3861. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3862. NewTask, KmpTaskTWithPrivatesPtrTy);
  3863. LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
  3864. KmpTaskTWithPrivatesQTy);
  3865. LValue TDBase =
  3866. CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
  3867. // Fill the data in the resulting kmp_task_t record.
  3868. // Copy shareds if there are any.
  3869. Address KmpTaskSharedsPtr = Address::invalid();
  3870. if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
  3871. KmpTaskSharedsPtr = Address(
  3872. CGF.EmitLoadOfScalar(
  3873. CGF.EmitLValueForField(
  3874. TDBase,
  3875. *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
  3876. Loc),
  3877. CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
  3878. LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
  3879. LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
  3880. CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
  3881. }
  3882. // Emit initial values for private copies (if any).
  3883. TaskResultTy Result;
  3884. if (!Privates.empty()) {
  3885. emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
  3886. SharedsTy, SharedsPtrTy, Data, Privates,
  3887. /*ForDup=*/false);
  3888. if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
  3889. (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
  3890. Result.TaskDupFn = emitTaskDupFunction(
  3891. CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
  3892. KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
  3893. /*WithLastIter=*/!Data.LastprivateVars.empty());
  3894. }
  3895. }
  3896. // Fields of union "kmp_cmplrdata_t" for destructors and priority.
  3897. enum { Priority = 0, Destructors = 1 };
  3898. // Provide pointer to function with destructors for privates.
  3899. auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
  3900. const RecordDecl *KmpCmplrdataUD =
  3901. (*FI)->getType()->getAsUnionType()->getDecl();
  3902. if (NeedsCleanup) {
  3903. llvm::Value *DestructorFn = emitDestructorsFunction(
  3904. CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
  3905. KmpTaskTWithPrivatesQTy);
  3906. LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
  3907. LValue DestructorsLV = CGF.EmitLValueForField(
  3908. Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
  3909. CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3910. DestructorFn, KmpRoutineEntryPtrTy),
  3911. DestructorsLV);
  3912. }
  3913. // Set priority.
  3914. if (Data.Priority.getInt()) {
  3915. LValue Data2LV = CGF.EmitLValueForField(
  3916. TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
  3917. LValue PriorityLV = CGF.EmitLValueForField(
  3918. Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
  3919. CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
  3920. }
  3921. Result.NewTask = NewTask;
  3922. Result.TaskEntry = TaskEntry;
  3923. Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
  3924. Result.TDBase = TDBase;
  3925. Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
  3926. return Result;
  3927. }
  3928. /// Translates internal dependency kind into the runtime kind.
  3929. static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
  3930. RTLDependenceKindTy DepKind;
  3931. switch (K) {
  3932. case OMPC_DEPEND_in:
  3933. DepKind = RTLDependenceKindTy::DepIn;
  3934. break;
  3935. // Out and InOut dependencies must use the same code.
  3936. case OMPC_DEPEND_out:
  3937. case OMPC_DEPEND_inout:
  3938. DepKind = RTLDependenceKindTy::DepInOut;
  3939. break;
  3940. case OMPC_DEPEND_mutexinoutset:
  3941. DepKind = RTLDependenceKindTy::DepMutexInOutSet;
  3942. break;
  3943. case OMPC_DEPEND_inoutset:
  3944. DepKind = RTLDependenceKindTy::DepInOutSet;
  3945. break;
  3946. case OMPC_DEPEND_outallmemory:
  3947. DepKind = RTLDependenceKindTy::DepOmpAllMem;
  3948. break;
  3949. case OMPC_DEPEND_source:
  3950. case OMPC_DEPEND_sink:
  3951. case OMPC_DEPEND_depobj:
  3952. case OMPC_DEPEND_inoutallmemory:
  3953. case OMPC_DEPEND_unknown:
  3954. llvm_unreachable("Unknown task dependence type");
  3955. }
  3956. return DepKind;
  3957. }
  3958. /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
  3959. static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
  3960. QualType &FlagsTy) {
  3961. FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
  3962. if (KmpDependInfoTy.isNull()) {
  3963. RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
  3964. KmpDependInfoRD->startDefinition();
  3965. addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
  3966. addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
  3967. addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
  3968. KmpDependInfoRD->completeDefinition();
  3969. KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
  3970. }
  3971. }
  3972. std::pair<llvm::Value *, LValue>
  3973. CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
  3974. SourceLocation Loc) {
  3975. ASTContext &C = CGM.getContext();
  3976. QualType FlagsTy;
  3977. getDependTypes(C, KmpDependInfoTy, FlagsTy);
  3978. RecordDecl *KmpDependInfoRD =
  3979. cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
  3980. QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
  3981. LValue Base = CGF.EmitLoadOfPointerLValue(
  3982. CGF.Builder.CreateElementBitCast(
  3983. DepobjLVal.getAddress(CGF),
  3984. CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
  3985. KmpDependInfoPtrTy->castAs<PointerType>());
  3986. Address DepObjAddr = CGF.Builder.CreateGEP(
  3987. Base.getAddress(CGF),
  3988. llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
  3989. LValue NumDepsBase = CGF.MakeAddrLValue(
  3990. DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
  3991. // NumDeps = deps[i].base_addr;
  3992. LValue BaseAddrLVal = CGF.EmitLValueForField(
  3993. NumDepsBase,
  3994. *std::next(KmpDependInfoRD->field_begin(),
  3995. static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
  3996. llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
  3997. return std::make_pair(NumDeps, Base);
  3998. }
  3999. static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
  4000. llvm::PointerUnion<unsigned *, LValue *> Pos,
  4001. const OMPTaskDataTy::DependData &Data,
  4002. Address DependenciesArray) {
  4003. CodeGenModule &CGM = CGF.CGM;
  4004. ASTContext &C = CGM.getContext();
  4005. QualType FlagsTy;
  4006. getDependTypes(C, KmpDependInfoTy, FlagsTy);
  4007. RecordDecl *KmpDependInfoRD =
  4008. cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
  4009. llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
  4010. OMPIteratorGeneratorScope IteratorScope(
  4011. CGF, cast_or_null<OMPIteratorExpr>(
  4012. Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
  4013. : nullptr));
  4014. for (const Expr *E : Data.DepExprs) {
  4015. llvm::Value *Addr;
  4016. llvm::Value *Size;
  4017. // The expression will be a nullptr in the 'omp_all_memory' case.
  4018. if (E) {
  4019. std::tie(Addr, Size) = getPointerAndSize(CGF, E);
  4020. Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
  4021. } else {
  4022. Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
  4023. Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
  4024. }
  4025. LValue Base;
  4026. if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
  4027. Base = CGF.MakeAddrLValue(
  4028. CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
  4029. } else {
  4030. assert(E && "Expected a non-null expression");
  4031. LValue &PosLVal = *Pos.get<LValue *>();
  4032. llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
  4033. Base = CGF.MakeAddrLValue(
  4034. CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
  4035. }
  4036. // deps[i].base_addr = &<Dependencies[i].second>;
  4037. LValue BaseAddrLVal = CGF.EmitLValueForField(
  4038. Base,
  4039. *std::next(KmpDependInfoRD->field_begin(),
  4040. static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
  4041. CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
  4042. // deps[i].len = sizeof(<Dependencies[i].second>);
  4043. LValue LenLVal = CGF.EmitLValueForField(
  4044. Base, *std::next(KmpDependInfoRD->field_begin(),
  4045. static_cast<unsigned int>(RTLDependInfoFields::Len)));
  4046. CGF.EmitStoreOfScalar(Size, LenLVal);
  4047. // deps[i].flags = <Dependencies[i].first>;
  4048. RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
  4049. LValue FlagsLVal = CGF.EmitLValueForField(
  4050. Base,
  4051. *std::next(KmpDependInfoRD->field_begin(),
  4052. static_cast<unsigned int>(RTLDependInfoFields::Flags)));
  4053. CGF.EmitStoreOfScalar(
  4054. llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
  4055. FlagsLVal);
  4056. if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
  4057. ++(*P);
  4058. } else {
  4059. LValue &PosLVal = *Pos.get<LValue *>();
  4060. llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
  4061. Idx = CGF.Builder.CreateNUWAdd(Idx,
  4062. llvm::ConstantInt::get(Idx->getType(), 1));
  4063. CGF.EmitStoreOfScalar(Idx, PosLVal);
  4064. }
  4065. }
  4066. }
  4067. SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
  4068. CodeGenFunction &CGF, QualType &KmpDependInfoTy,
  4069. const OMPTaskDataTy::DependData &Data) {
  4070. assert(Data.DepKind == OMPC_DEPEND_depobj &&
  4071. "Expected depobj dependency kind.");
  4072. SmallVector<llvm::Value *, 4> Sizes;
  4073. SmallVector<LValue, 4> SizeLVals;
  4074. ASTContext &C = CGF.getContext();
  4075. {
  4076. OMPIteratorGeneratorScope IteratorScope(
  4077. CGF, cast_or_null<OMPIteratorExpr>(
  4078. Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
  4079. : nullptr));
  4080. for (const Expr *E : Data.DepExprs) {
  4081. llvm::Value *NumDeps;
  4082. LValue Base;
  4083. LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
  4084. std::tie(NumDeps, Base) =
  4085. getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
  4086. LValue NumLVal = CGF.MakeAddrLValue(
  4087. CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
  4088. C.getUIntPtrType());
  4089. CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
  4090. NumLVal.getAddress(CGF));
  4091. llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
  4092. llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
  4093. CGF.EmitStoreOfScalar(Add, NumLVal);
  4094. SizeLVals.push_back(NumLVal);
  4095. }
  4096. }
  4097. for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
  4098. llvm::Value *Size =
  4099. CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
  4100. Sizes.push_back(Size);
  4101. }
  4102. return Sizes;
  4103. }
  4104. void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
  4105. QualType &KmpDependInfoTy,
  4106. LValue PosLVal,
  4107. const OMPTaskDataTy::DependData &Data,
  4108. Address DependenciesArray) {
  4109. assert(Data.DepKind == OMPC_DEPEND_depobj &&
  4110. "Expected depobj dependency kind.");
  4111. llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
  4112. {
  4113. OMPIteratorGeneratorScope IteratorScope(
  4114. CGF, cast_or_null<OMPIteratorExpr>(
  4115. Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
  4116. : nullptr));
  4117. for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
  4118. const Expr *E = Data.DepExprs[I];
  4119. llvm::Value *NumDeps;
  4120. LValue Base;
  4121. LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
  4122. std::tie(NumDeps, Base) =
  4123. getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
  4124. // memcopy dependency data.
  4125. llvm::Value *Size = CGF.Builder.CreateNUWMul(
  4126. ElSize,
  4127. CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
  4128. llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
  4129. Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
  4130. CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
  4131. // Increase pos.
  4132. // pos += size;
  4133. llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
  4134. CGF.EmitStoreOfScalar(Add, PosLVal);
  4135. }
  4136. }
  4137. }
  4138. std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
  4139. CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
  4140. SourceLocation Loc) {
  4141. if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
  4142. return D.DepExprs.empty();
  4143. }))
  4144. return std::make_pair(nullptr, Address::invalid());
  4145. // Process list of dependencies.
  4146. ASTContext &C = CGM.getContext();
  4147. Address DependenciesArray = Address::invalid();
  4148. llvm::Value *NumOfElements = nullptr;
  4149. unsigned NumDependencies = std::accumulate(
  4150. Dependencies.begin(), Dependencies.end(), 0,
  4151. [](unsigned V, const OMPTaskDataTy::DependData &D) {
  4152. return D.DepKind == OMPC_DEPEND_depobj
  4153. ? V
  4154. : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
  4155. });
  4156. QualType FlagsTy;
  4157. getDependTypes(C, KmpDependInfoTy, FlagsTy);
  4158. bool HasDepobjDeps = false;
  4159. bool HasRegularWithIterators = false;
  4160. llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
  4161. llvm::Value *NumOfRegularWithIterators =
  4162. llvm::ConstantInt::get(CGF.IntPtrTy, 0);
  4163. // Calculate number of depobj dependencies and regular deps with the
  4164. // iterators.
  4165. for (const OMPTaskDataTy::DependData &D : Dependencies) {
  4166. if (D.DepKind == OMPC_DEPEND_depobj) {
  4167. SmallVector<llvm::Value *, 4> Sizes =
  4168. emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
  4169. for (llvm::Value *Size : Sizes) {
  4170. NumOfDepobjElements =
  4171. CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
  4172. }
  4173. HasDepobjDeps = true;
  4174. continue;
  4175. }
  4176. // Include number of iterations, if any.
  4177. if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
  4178. for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
  4179. llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
  4180. Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
  4181. llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
  4182. Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
  4183. NumOfRegularWithIterators =
  4184. CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
  4185. }
  4186. HasRegularWithIterators = true;
  4187. continue;
  4188. }
  4189. }
  4190. QualType KmpDependInfoArrayTy;
  4191. if (HasDepobjDeps || HasRegularWithIterators) {
  4192. NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
  4193. /*isSigned=*/false);
  4194. if (HasDepobjDeps) {
  4195. NumOfElements =
  4196. CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
  4197. }
  4198. if (HasRegularWithIterators) {
  4199. NumOfElements =
  4200. CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
  4201. }
  4202. auto *OVE = new (C) OpaqueValueExpr(
  4203. Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
  4204. VK_PRValue);
  4205. CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
  4206. RValue::get(NumOfElements));
  4207. KmpDependInfoArrayTy =
  4208. C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
  4209. /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
  4210. // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
  4211. // Properly emit variable-sized array.
  4212. auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
  4213. ImplicitParamDecl::Other);
  4214. CGF.EmitVarDecl(*PD);
  4215. DependenciesArray = CGF.GetAddrOfLocalVar(PD);
  4216. NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
  4217. /*isSigned=*/false);
  4218. } else {
  4219. KmpDependInfoArrayTy = C.getConstantArrayType(
  4220. KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
  4221. ArrayType::Normal, /*IndexTypeQuals=*/0);
  4222. DependenciesArray =
  4223. CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
  4224. DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
  4225. NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
  4226. /*isSigned=*/false);
  4227. }
  4228. unsigned Pos = 0;
  4229. for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
  4230. if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
  4231. Dependencies[I].IteratorExpr)
  4232. continue;
  4233. emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
  4234. DependenciesArray);
  4235. }
  4236. // Copy regular dependencies with iterators.
  4237. LValue PosLVal = CGF.MakeAddrLValue(
  4238. CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
  4239. CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
  4240. for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
  4241. if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
  4242. !Dependencies[I].IteratorExpr)
  4243. continue;
  4244. emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
  4245. DependenciesArray);
  4246. }
  4247. // Copy final depobj arrays without iterators.
  4248. if (HasDepobjDeps) {
  4249. for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
  4250. if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
  4251. continue;
  4252. emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
  4253. DependenciesArray);
  4254. }
  4255. }
  4256. DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4257. DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
  4258. return std::make_pair(NumOfElements, DependenciesArray);
  4259. }
  4260. Address CGOpenMPRuntime::emitDepobjDependClause(
  4261. CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
  4262. SourceLocation Loc) {
  4263. if (Dependencies.DepExprs.empty())
  4264. return Address::invalid();
  4265. // Process list of dependencies.
  4266. ASTContext &C = CGM.getContext();
  4267. Address DependenciesArray = Address::invalid();
  4268. unsigned NumDependencies = Dependencies.DepExprs.size();
  4269. QualType FlagsTy;
  4270. getDependTypes(C, KmpDependInfoTy, FlagsTy);
  4271. RecordDecl *KmpDependInfoRD =
  4272. cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
  4273. llvm::Value *Size;
  4274. // Define type kmp_depend_info[<Dependencies.size()>];
  4275. // For depobj reserve one extra element to store the number of elements.
  4276. // It is required to handle depobj(x) update(in) construct.
  4277. // kmp_depend_info[<Dependencies.size()>] deps;
  4278. llvm::Value *NumDepsVal;
  4279. CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
  4280. if (const auto *IE =
  4281. cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
  4282. NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
  4283. for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
  4284. llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
  4285. Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
  4286. NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
  4287. }
  4288. Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
  4289. NumDepsVal);
  4290. CharUnits SizeInBytes =
  4291. C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
  4292. llvm::Value *RecSize = CGM.getSize(SizeInBytes);
  4293. Size = CGF.Builder.CreateNUWMul(Size, RecSize);
  4294. NumDepsVal =
  4295. CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
  4296. } else {
  4297. QualType KmpDependInfoArrayTy = C.getConstantArrayType(
  4298. KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
  4299. nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
  4300. CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
  4301. Size = CGM.getSize(Sz.alignTo(Align));
  4302. NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
  4303. }
  4304. // Need to allocate on the dynamic memory.
  4305. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  4306. // Use default allocator.
  4307. llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  4308. llvm::Value *Args[] = {ThreadID, Size, Allocator};
  4309. llvm::Value *Addr =
  4310. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  4311. CGM.getModule(), OMPRTL___kmpc_alloc),
  4312. Args, ".dep.arr.addr");
  4313. llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
  4314. Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4315. Addr, KmpDependInfoLlvmTy->getPointerTo());
  4316. DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
  4317. // Write number of elements in the first element of array for depobj.
  4318. LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
  4319. // deps[i].base_addr = NumDependencies;
  4320. LValue BaseAddrLVal = CGF.EmitLValueForField(
  4321. Base,
  4322. *std::next(KmpDependInfoRD->field_begin(),
  4323. static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
  4324. CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
  4325. llvm::PointerUnion<unsigned *, LValue *> Pos;
  4326. unsigned Idx = 1;
  4327. LValue PosLVal;
  4328. if (Dependencies.IteratorExpr) {
  4329. PosLVal = CGF.MakeAddrLValue(
  4330. CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
  4331. C.getSizeType());
  4332. CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
  4333. /*IsInit=*/true);
  4334. Pos = &PosLVal;
  4335. } else {
  4336. Pos = &Idx;
  4337. }
  4338. emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
  4339. DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4340. CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
  4341. CGF.Int8Ty);
  4342. return DependenciesArray;
  4343. }
  4344. void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
  4345. SourceLocation Loc) {
  4346. ASTContext &C = CGM.getContext();
  4347. QualType FlagsTy;
  4348. getDependTypes(C, KmpDependInfoTy, FlagsTy);
  4349. LValue Base = CGF.EmitLoadOfPointerLValue(
  4350. DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
  4351. QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
  4352. Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4353. Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
  4354. CGF.ConvertTypeForMem(KmpDependInfoTy));
  4355. llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
  4356. Addr.getElementType(), Addr.getPointer(),
  4357. llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
  4358. DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
  4359. CGF.VoidPtrTy);
  4360. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  4361. // Use default allocator.
  4362. llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  4363. llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
  4364. // _kmpc_free(gtid, addr, nullptr);
  4365. (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  4366. CGM.getModule(), OMPRTL___kmpc_free),
  4367. Args);
  4368. }
  4369. void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
  4370. OpenMPDependClauseKind NewDepKind,
  4371. SourceLocation Loc) {
  4372. ASTContext &C = CGM.getContext();
  4373. QualType FlagsTy;
  4374. getDependTypes(C, KmpDependInfoTy, FlagsTy);
  4375. RecordDecl *KmpDependInfoRD =
  4376. cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
  4377. llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
  4378. llvm::Value *NumDeps;
  4379. LValue Base;
  4380. std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
  4381. Address Begin = Base.getAddress(CGF);
  4382. // Cast from pointer to array type to pointer to single element.
  4383. llvm::Value *End = CGF.Builder.CreateGEP(
  4384. Begin.getElementType(), Begin.getPointer(), NumDeps);
  4385. // The basic structure here is a while-do loop.
  4386. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
  4387. llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
  4388. llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
  4389. CGF.EmitBlock(BodyBB);
  4390. llvm::PHINode *ElementPHI =
  4391. CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
  4392. ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
  4393. Begin = Begin.withPointer(ElementPHI);
  4394. Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
  4395. Base.getTBAAInfo());
  4396. // deps[i].flags = NewDepKind;
  4397. RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
  4398. LValue FlagsLVal = CGF.EmitLValueForField(
  4399. Base, *std::next(KmpDependInfoRD->field_begin(),
  4400. static_cast<unsigned int>(RTLDependInfoFields::Flags)));
  4401. CGF.EmitStoreOfScalar(
  4402. llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
  4403. FlagsLVal);
  4404. // Shift the address forward by one element.
  4405. Address ElementNext =
  4406. CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
  4407. ElementPHI->addIncoming(ElementNext.getPointer(),
  4408. CGF.Builder.GetInsertBlock());
  4409. llvm::Value *IsEmpty =
  4410. CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
  4411. CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  4412. // Done.
  4413. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  4414. }
  4415. void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
  4416. const OMPExecutableDirective &D,
  4417. llvm::Function *TaskFunction,
  4418. QualType SharedsTy, Address Shareds,
  4419. const Expr *IfCond,
  4420. const OMPTaskDataTy &Data) {
  4421. if (!CGF.HaveInsertPoint())
  4422. return;
  4423. TaskResultTy Result =
  4424. emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
  4425. llvm::Value *NewTask = Result.NewTask;
  4426. llvm::Function *TaskEntry = Result.TaskEntry;
  4427. llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
  4428. LValue TDBase = Result.TDBase;
  4429. const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
  4430. // Process list of dependences.
  4431. Address DependenciesArray = Address::invalid();
  4432. llvm::Value *NumOfElements;
  4433. std::tie(NumOfElements, DependenciesArray) =
  4434. emitDependClause(CGF, Data.Dependences, Loc);
  4435. // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
  4436. // libcall.
  4437. // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
  4438. // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
  4439. // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
  4440. // list is not empty
  4441. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  4442. llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
  4443. llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
  4444. llvm::Value *DepTaskArgs[7];
  4445. if (!Data.Dependences.empty()) {
  4446. DepTaskArgs[0] = UpLoc;
  4447. DepTaskArgs[1] = ThreadID;
  4448. DepTaskArgs[2] = NewTask;
  4449. DepTaskArgs[3] = NumOfElements;
  4450. DepTaskArgs[4] = DependenciesArray.getPointer();
  4451. DepTaskArgs[5] = CGF.Builder.getInt32(0);
  4452. DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  4453. }
  4454. auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
  4455. &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
  4456. if (!Data.Tied) {
  4457. auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
  4458. LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
  4459. CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
  4460. }
  4461. if (!Data.Dependences.empty()) {
  4462. CGF.EmitRuntimeCall(
  4463. OMPBuilder.getOrCreateRuntimeFunction(
  4464. CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
  4465. DepTaskArgs);
  4466. } else {
  4467. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  4468. CGM.getModule(), OMPRTL___kmpc_omp_task),
  4469. TaskArgs);
  4470. }
  4471. // Check if parent region is untied and build return for untied task;
  4472. if (auto *Region =
  4473. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  4474. Region->emitUntiedSwitch(CGF);
  4475. };
  4476. llvm::Value *DepWaitTaskArgs[7];
  4477. if (!Data.Dependences.empty()) {
  4478. DepWaitTaskArgs[0] = UpLoc;
  4479. DepWaitTaskArgs[1] = ThreadID;
  4480. DepWaitTaskArgs[2] = NumOfElements;
  4481. DepWaitTaskArgs[3] = DependenciesArray.getPointer();
  4482. DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
  4483. DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  4484. DepWaitTaskArgs[6] =
  4485. llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
  4486. }
  4487. auto &M = CGM.getModule();
  4488. auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
  4489. TaskEntry, &Data, &DepWaitTaskArgs,
  4490. Loc](CodeGenFunction &CGF, PrePostActionTy &) {
  4491. CodeGenFunction::RunCleanupsScope LocalScope(CGF);
  4492. // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
  4493. // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
  4494. // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
  4495. // is specified.
  4496. if (!Data.Dependences.empty())
  4497. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  4498. M, OMPRTL___kmpc_omp_taskwait_deps_51),
  4499. DepWaitTaskArgs);
  4500. // Call proxy_task_entry(gtid, new_task);
  4501. auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
  4502. Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4503. Action.Enter(CGF);
  4504. llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
  4505. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
  4506. OutlinedFnArgs);
  4507. };
  4508. // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
  4509. // kmp_task_t *new_task);
  4510. // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
  4511. // kmp_task_t *new_task);
  4512. RegionCodeGenTy RCG(CodeGen);
  4513. CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
  4514. M, OMPRTL___kmpc_omp_task_begin_if0),
  4515. TaskArgs,
  4516. OMPBuilder.getOrCreateRuntimeFunction(
  4517. M, OMPRTL___kmpc_omp_task_complete_if0),
  4518. TaskArgs);
  4519. RCG.setAction(Action);
  4520. RCG(CGF);
  4521. };
  4522. if (IfCond) {
  4523. emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
  4524. } else {
  4525. RegionCodeGenTy ThenRCG(ThenCodeGen);
  4526. ThenRCG(CGF);
  4527. }
  4528. }
  4529. void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
  4530. const OMPLoopDirective &D,
  4531. llvm::Function *TaskFunction,
  4532. QualType SharedsTy, Address Shareds,
  4533. const Expr *IfCond,
  4534. const OMPTaskDataTy &Data) {
  4535. if (!CGF.HaveInsertPoint())
  4536. return;
  4537. TaskResultTy Result =
  4538. emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
  4539. // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
  4540. // libcall.
  4541. // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
  4542. // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
  4543. // sched, kmp_uint64 grainsize, void *task_dup);
  4544. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  4545. llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
  4546. llvm::Value *IfVal;
  4547. if (IfCond) {
  4548. IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
  4549. /*isSigned=*/true);
  4550. } else {
  4551. IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
  4552. }
  4553. LValue LBLVal = CGF.EmitLValueForField(
  4554. Result.TDBase,
  4555. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
  4556. const auto *LBVar =
  4557. cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
  4558. CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
  4559. LBLVal.getQuals(),
  4560. /*IsInitializer=*/true);
  4561. LValue UBLVal = CGF.EmitLValueForField(
  4562. Result.TDBase,
  4563. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
  4564. const auto *UBVar =
  4565. cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
  4566. CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
  4567. UBLVal.getQuals(),
  4568. /*IsInitializer=*/true);
  4569. LValue StLVal = CGF.EmitLValueForField(
  4570. Result.TDBase,
  4571. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
  4572. const auto *StVar =
  4573. cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
  4574. CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
  4575. StLVal.getQuals(),
  4576. /*IsInitializer=*/true);
  4577. // Store reductions address.
  4578. LValue RedLVal = CGF.EmitLValueForField(
  4579. Result.TDBase,
  4580. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
  4581. if (Data.Reductions) {
  4582. CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
  4583. } else {
  4584. CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
  4585. CGF.getContext().VoidPtrTy);
  4586. }
  4587. enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
  4588. llvm::Value *TaskArgs[] = {
  4589. UpLoc,
  4590. ThreadID,
  4591. Result.NewTask,
  4592. IfVal,
  4593. LBLVal.getPointer(CGF),
  4594. UBLVal.getPointer(CGF),
  4595. CGF.EmitLoadOfScalar(StLVal, Loc),
  4596. llvm::ConstantInt::getSigned(
  4597. CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
  4598. llvm::ConstantInt::getSigned(
  4599. CGF.IntTy, Data.Schedule.getPointer()
  4600. ? Data.Schedule.getInt() ? NumTasks : Grainsize
  4601. : NoSchedule),
  4602. Data.Schedule.getPointer()
  4603. ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
  4604. /*isSigned=*/false)
  4605. : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
  4606. Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4607. Result.TaskDupFn, CGF.VoidPtrTy)
  4608. : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
  4609. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  4610. CGM.getModule(), OMPRTL___kmpc_taskloop),
  4611. TaskArgs);
  4612. }
  4613. /// Emit reduction operation for each element of array (required for
  4614. /// array sections) LHS op = RHS.
  4615. /// \param Type Type of array.
  4616. /// \param LHSVar Variable on the left side of the reduction operation
  4617. /// (references element of array in original variable).
  4618. /// \param RHSVar Variable on the right side of the reduction operation
  4619. /// (references element of array in original variable).
  4620. /// \param RedOpGen Generator of reduction operation with use of LHSVar and
  4621. /// RHSVar.
  4622. static void EmitOMPAggregateReduction(
  4623. CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
  4624. const VarDecl *RHSVar,
  4625. const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
  4626. const Expr *, const Expr *)> &RedOpGen,
  4627. const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
  4628. const Expr *UpExpr = nullptr) {
  4629. // Perform element-by-element initialization.
  4630. QualType ElementTy;
  4631. Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
  4632. Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
  4633. // Drill down to the base element type on both arrays.
  4634. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
  4635. llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
  4636. llvm::Value *RHSBegin = RHSAddr.getPointer();
  4637. llvm::Value *LHSBegin = LHSAddr.getPointer();
  4638. // Cast from pointer to array type to pointer to single element.
  4639. llvm::Value *LHSEnd =
  4640. CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
  4641. // The basic structure here is a while-do loop.
  4642. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
  4643. llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
  4644. llvm::Value *IsEmpty =
  4645. CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
  4646. CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  4647. // Enter the loop body, making that address the current address.
  4648. llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
  4649. CGF.EmitBlock(BodyBB);
  4650. CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
  4651. llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
  4652. RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
  4653. RHSElementPHI->addIncoming(RHSBegin, EntryBB);
  4654. Address RHSElementCurrent(
  4655. RHSElementPHI, RHSAddr.getElementType(),
  4656. RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  4657. llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
  4658. LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
  4659. LHSElementPHI->addIncoming(LHSBegin, EntryBB);
  4660. Address LHSElementCurrent(
  4661. LHSElementPHI, LHSAddr.getElementType(),
  4662. LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  4663. // Emit copy.
  4664. CodeGenFunction::OMPPrivateScope Scope(CGF);
  4665. Scope.addPrivate(LHSVar, LHSElementCurrent);
  4666. Scope.addPrivate(RHSVar, RHSElementCurrent);
  4667. Scope.Privatize();
  4668. RedOpGen(CGF, XExpr, EExpr, UpExpr);
  4669. Scope.ForceCleanup();
  4670. // Shift the address forward by one element.
  4671. llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
  4672. LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
  4673. "omp.arraycpy.dest.element");
  4674. llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
  4675. RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
  4676. "omp.arraycpy.src.element");
  4677. // Check whether we've reached the end.
  4678. llvm::Value *Done =
  4679. CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
  4680. CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
  4681. LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
  4682. RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
  4683. // Done.
  4684. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  4685. }
  4686. /// Emit reduction combiner. If the combiner is a simple expression emit it as
  4687. /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
  4688. /// UDR combiner function.
  4689. static void emitReductionCombiner(CodeGenFunction &CGF,
  4690. const Expr *ReductionOp) {
  4691. if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
  4692. if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
  4693. if (const auto *DRE =
  4694. dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
  4695. if (const auto *DRD =
  4696. dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
  4697. std::pair<llvm::Function *, llvm::Function *> Reduction =
  4698. CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
  4699. RValue Func = RValue::get(Reduction.first);
  4700. CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
  4701. CGF.EmitIgnoredExpr(ReductionOp);
  4702. return;
  4703. }
  4704. CGF.EmitIgnoredExpr(ReductionOp);
  4705. }
  4706. llvm::Function *CGOpenMPRuntime::emitReductionFunction(
  4707. SourceLocation Loc, llvm::Type *ArgsElemType,
  4708. ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
  4709. ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
  4710. ASTContext &C = CGM.getContext();
  4711. // void reduction_func(void *LHSArg, void *RHSArg);
  4712. FunctionArgList Args;
  4713. ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  4714. ImplicitParamDecl::Other);
  4715. ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  4716. ImplicitParamDecl::Other);
  4717. Args.push_back(&LHSArg);
  4718. Args.push_back(&RHSArg);
  4719. const auto &CGFI =
  4720. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  4721. std::string Name = getName({"omp", "reduction", "reduction_func"});
  4722. auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
  4723. llvm::GlobalValue::InternalLinkage, Name,
  4724. &CGM.getModule());
  4725. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
  4726. Fn->setDoesNotRecurse();
  4727. CodeGenFunction CGF(CGM);
  4728. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
  4729. // Dst = (void*[n])(LHSArg);
  4730. // Src = (void*[n])(RHSArg);
  4731. Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4732. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
  4733. ArgsElemType->getPointerTo()),
  4734. ArgsElemType, CGF.getPointerAlign());
  4735. Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4736. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
  4737. ArgsElemType->getPointerTo()),
  4738. ArgsElemType, CGF.getPointerAlign());
  4739. // ...
  4740. // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
  4741. // ...
  4742. CodeGenFunction::OMPPrivateScope Scope(CGF);
  4743. const auto *IPriv = Privates.begin();
  4744. unsigned Idx = 0;
  4745. for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
  4746. const auto *RHSVar =
  4747. cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
  4748. Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
  4749. const auto *LHSVar =
  4750. cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
  4751. Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
  4752. QualType PrivTy = (*IPriv)->getType();
  4753. if (PrivTy->isVariablyModifiedType()) {
  4754. // Get array size and emit VLA type.
  4755. ++Idx;
  4756. Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
  4757. llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
  4758. const VariableArrayType *VLA =
  4759. CGF.getContext().getAsVariableArrayType(PrivTy);
  4760. const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
  4761. CodeGenFunction::OpaqueValueMapping OpaqueMap(
  4762. CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
  4763. CGF.EmitVariablyModifiedType(PrivTy);
  4764. }
  4765. }
  4766. Scope.Privatize();
  4767. IPriv = Privates.begin();
  4768. const auto *ILHS = LHSExprs.begin();
  4769. const auto *IRHS = RHSExprs.begin();
  4770. for (const Expr *E : ReductionOps) {
  4771. if ((*IPriv)->getType()->isArrayType()) {
  4772. // Emit reduction for array section.
  4773. const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  4774. const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  4775. EmitOMPAggregateReduction(
  4776. CGF, (*IPriv)->getType(), LHSVar, RHSVar,
  4777. [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
  4778. emitReductionCombiner(CGF, E);
  4779. });
  4780. } else {
  4781. // Emit reduction for array subscript or single variable.
  4782. emitReductionCombiner(CGF, E);
  4783. }
  4784. ++IPriv;
  4785. ++ILHS;
  4786. ++IRHS;
  4787. }
  4788. Scope.ForceCleanup();
  4789. CGF.FinishFunction();
  4790. return Fn;
  4791. }
  4792. void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
  4793. const Expr *ReductionOp,
  4794. const Expr *PrivateRef,
  4795. const DeclRefExpr *LHS,
  4796. const DeclRefExpr *RHS) {
  4797. if (PrivateRef->getType()->isArrayType()) {
  4798. // Emit reduction for array section.
  4799. const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
  4800. const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
  4801. EmitOMPAggregateReduction(
  4802. CGF, PrivateRef->getType(), LHSVar, RHSVar,
  4803. [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
  4804. emitReductionCombiner(CGF, ReductionOp);
  4805. });
  4806. } else {
  4807. // Emit reduction for array subscript or single variable.
  4808. emitReductionCombiner(CGF, ReductionOp);
  4809. }
  4810. }
  4811. void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
  4812. ArrayRef<const Expr *> Privates,
  4813. ArrayRef<const Expr *> LHSExprs,
  4814. ArrayRef<const Expr *> RHSExprs,
  4815. ArrayRef<const Expr *> ReductionOps,
  4816. ReductionOptionsTy Options) {
  4817. if (!CGF.HaveInsertPoint())
  4818. return;
  4819. bool WithNowait = Options.WithNowait;
  4820. bool SimpleReduction = Options.SimpleReduction;
  4821. // Next code should be emitted for reduction:
  4822. //
  4823. // static kmp_critical_name lock = { 0 };
  4824. //
  4825. // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
  4826. // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
  4827. // ...
  4828. // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
  4829. // *(Type<n>-1*)rhs[<n>-1]);
  4830. // }
  4831. //
  4832. // ...
  4833. // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
  4834. // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
  4835. // RedList, reduce_func, &<lock>)) {
  4836. // case 1:
  4837. // ...
  4838. // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
  4839. // ...
  4840. // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
  4841. // break;
  4842. // case 2:
  4843. // ...
  4844. // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
  4845. // ...
  4846. // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
  4847. // break;
  4848. // default:;
  4849. // }
  4850. //
  4851. // if SimpleReduction is true, only the next code is generated:
  4852. // ...
  4853. // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
  4854. // ...
  4855. ASTContext &C = CGM.getContext();
  4856. if (SimpleReduction) {
  4857. CodeGenFunction::RunCleanupsScope Scope(CGF);
  4858. const auto *IPriv = Privates.begin();
  4859. const auto *ILHS = LHSExprs.begin();
  4860. const auto *IRHS = RHSExprs.begin();
  4861. for (const Expr *E : ReductionOps) {
  4862. emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
  4863. cast<DeclRefExpr>(*IRHS));
  4864. ++IPriv;
  4865. ++ILHS;
  4866. ++IRHS;
  4867. }
  4868. return;
  4869. }
  4870. // 1. Build a list of reduction variables.
  4871. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
  4872. auto Size = RHSExprs.size();
  4873. for (const Expr *E : Privates) {
  4874. if (E->getType()->isVariablyModifiedType())
  4875. // Reserve place for array size.
  4876. ++Size;
  4877. }
  4878. llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
  4879. QualType ReductionArrayTy =
  4880. C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
  4881. /*IndexTypeQuals=*/0);
  4882. Address ReductionList =
  4883. CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
  4884. const auto *IPriv = Privates.begin();
  4885. unsigned Idx = 0;
  4886. for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
  4887. Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
  4888. CGF.Builder.CreateStore(
  4889. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4890. CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
  4891. Elem);
  4892. if ((*IPriv)->getType()->isVariablyModifiedType()) {
  4893. // Store array size.
  4894. ++Idx;
  4895. Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
  4896. llvm::Value *Size = CGF.Builder.CreateIntCast(
  4897. CGF.getVLASize(
  4898. CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
  4899. .NumElts,
  4900. CGF.SizeTy, /*isSigned=*/false);
  4901. CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
  4902. Elem);
  4903. }
  4904. }
  4905. // 2. Emit reduce_func().
  4906. llvm::Function *ReductionFn =
  4907. emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
  4908. Privates, LHSExprs, RHSExprs, ReductionOps);
  4909. // 3. Create static kmp_critical_name lock = { 0 };
  4910. std::string Name = getName({"reduction"});
  4911. llvm::Value *Lock = getCriticalRegionLock(Name);
  4912. // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
  4913. // RedList, reduce_func, &<lock>);
  4914. llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
  4915. llvm::Value *ThreadId = getThreadID(CGF, Loc);
  4916. llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
  4917. llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4918. ReductionList.getPointer(), CGF.VoidPtrTy);
  4919. llvm::Value *Args[] = {
  4920. IdentTLoc, // ident_t *<loc>
  4921. ThreadId, // i32 <gtid>
  4922. CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
  4923. ReductionArrayTySize, // size_type sizeof(RedList)
  4924. RL, // void *RedList
  4925. ReductionFn, // void (*) (void *, void *) <reduce_func>
  4926. Lock // kmp_critical_name *&<lock>
  4927. };
  4928. llvm::Value *Res = CGF.EmitRuntimeCall(
  4929. OMPBuilder.getOrCreateRuntimeFunction(
  4930. CGM.getModule(),
  4931. WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
  4932. Args);
  4933. // 5. Build switch(res)
  4934. llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
  4935. llvm::SwitchInst *SwInst =
  4936. CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
  4937. // 6. Build case 1:
  4938. // ...
  4939. // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
  4940. // ...
  4941. // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
  4942. // break;
  4943. llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
  4944. SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
  4945. CGF.EmitBlock(Case1BB);
  4946. // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
  4947. llvm::Value *EndArgs[] = {
  4948. IdentTLoc, // ident_t *<loc>
  4949. ThreadId, // i32 <gtid>
  4950. Lock // kmp_critical_name *&<lock>
  4951. };
  4952. auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
  4953. CodeGenFunction &CGF, PrePostActionTy &Action) {
  4954. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  4955. const auto *IPriv = Privates.begin();
  4956. const auto *ILHS = LHSExprs.begin();
  4957. const auto *IRHS = RHSExprs.begin();
  4958. for (const Expr *E : ReductionOps) {
  4959. RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
  4960. cast<DeclRefExpr>(*IRHS));
  4961. ++IPriv;
  4962. ++ILHS;
  4963. ++IRHS;
  4964. }
  4965. };
  4966. RegionCodeGenTy RCG(CodeGen);
  4967. CommonActionTy Action(
  4968. nullptr, std::nullopt,
  4969. OMPBuilder.getOrCreateRuntimeFunction(
  4970. CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
  4971. : OMPRTL___kmpc_end_reduce),
  4972. EndArgs);
  4973. RCG.setAction(Action);
  4974. RCG(CGF);
  4975. CGF.EmitBranch(DefaultBB);
  4976. // 7. Build case 2:
  4977. // ...
  4978. // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
  4979. // ...
  4980. // break;
  4981. llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
  4982. SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
  4983. CGF.EmitBlock(Case2BB);
  4984. auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
  4985. CodeGenFunction &CGF, PrePostActionTy &Action) {
  4986. const auto *ILHS = LHSExprs.begin();
  4987. const auto *IRHS = RHSExprs.begin();
  4988. const auto *IPriv = Privates.begin();
  4989. for (const Expr *E : ReductionOps) {
  4990. const Expr *XExpr = nullptr;
  4991. const Expr *EExpr = nullptr;
  4992. const Expr *UpExpr = nullptr;
  4993. BinaryOperatorKind BO = BO_Comma;
  4994. if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
  4995. if (BO->getOpcode() == BO_Assign) {
  4996. XExpr = BO->getLHS();
  4997. UpExpr = BO->getRHS();
  4998. }
  4999. }
  5000. // Try to emit update expression as a simple atomic.
  5001. const Expr *RHSExpr = UpExpr;
  5002. if (RHSExpr) {
  5003. // Analyze RHS part of the whole expression.
  5004. if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
  5005. RHSExpr->IgnoreParenImpCasts())) {
  5006. // If this is a conditional operator, analyze its condition for
  5007. // min/max reduction operator.
  5008. RHSExpr = ACO->getCond();
  5009. }
  5010. if (const auto *BORHS =
  5011. dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
  5012. EExpr = BORHS->getRHS();
  5013. BO = BORHS->getOpcode();
  5014. }
  5015. }
  5016. if (XExpr) {
  5017. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  5018. auto &&AtomicRedGen = [BO, VD,
  5019. Loc](CodeGenFunction &CGF, const Expr *XExpr,
  5020. const Expr *EExpr, const Expr *UpExpr) {
  5021. LValue X = CGF.EmitLValue(XExpr);
  5022. RValue E;
  5023. if (EExpr)
  5024. E = CGF.EmitAnyExpr(EExpr);
  5025. CGF.EmitOMPAtomicSimpleUpdateExpr(
  5026. X, E, BO, /*IsXLHSInRHSPart=*/true,
  5027. llvm::AtomicOrdering::Monotonic, Loc,
  5028. [&CGF, UpExpr, VD, Loc](RValue XRValue) {
  5029. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  5030. Address LHSTemp = CGF.CreateMemTemp(VD->getType());
  5031. CGF.emitOMPSimpleStore(
  5032. CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
  5033. VD->getType().getNonReferenceType(), Loc);
  5034. PrivateScope.addPrivate(VD, LHSTemp);
  5035. (void)PrivateScope.Privatize();
  5036. return CGF.EmitAnyExpr(UpExpr);
  5037. });
  5038. };
  5039. if ((*IPriv)->getType()->isArrayType()) {
  5040. // Emit atomic reduction for array section.
  5041. const auto *RHSVar =
  5042. cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  5043. EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
  5044. AtomicRedGen, XExpr, EExpr, UpExpr);
  5045. } else {
  5046. // Emit atomic reduction for array subscript or single variable.
  5047. AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
  5048. }
  5049. } else {
  5050. // Emit as a critical region.
  5051. auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
  5052. const Expr *, const Expr *) {
  5053. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  5054. std::string Name = RT.getName({"atomic_reduction"});
  5055. RT.emitCriticalRegion(
  5056. CGF, Name,
  5057. [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
  5058. Action.Enter(CGF);
  5059. emitReductionCombiner(CGF, E);
  5060. },
  5061. Loc);
  5062. };
  5063. if ((*IPriv)->getType()->isArrayType()) {
  5064. const auto *LHSVar =
  5065. cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  5066. const auto *RHSVar =
  5067. cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  5068. EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
  5069. CritRedGen);
  5070. } else {
  5071. CritRedGen(CGF, nullptr, nullptr, nullptr);
  5072. }
  5073. }
  5074. ++ILHS;
  5075. ++IRHS;
  5076. ++IPriv;
  5077. }
  5078. };
  5079. RegionCodeGenTy AtomicRCG(AtomicCodeGen);
  5080. if (!WithNowait) {
  5081. // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
  5082. llvm::Value *EndArgs[] = {
  5083. IdentTLoc, // ident_t *<loc>
  5084. ThreadId, // i32 <gtid>
  5085. Lock // kmp_critical_name *&<lock>
  5086. };
  5087. CommonActionTy Action(nullptr, std::nullopt,
  5088. OMPBuilder.getOrCreateRuntimeFunction(
  5089. CGM.getModule(), OMPRTL___kmpc_end_reduce),
  5090. EndArgs);
  5091. AtomicRCG.setAction(Action);
  5092. AtomicRCG(CGF);
  5093. } else {
  5094. AtomicRCG(CGF);
  5095. }
  5096. CGF.EmitBranch(DefaultBB);
  5097. CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
  5098. }
  5099. /// Generates unique name for artificial threadprivate variables.
  5100. /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
  5101. static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
  5102. const Expr *Ref) {
  5103. SmallString<256> Buffer;
  5104. llvm::raw_svector_ostream Out(Buffer);
  5105. const clang::DeclRefExpr *DE;
  5106. const VarDecl *D = ::getBaseDecl(Ref, DE);
  5107. if (!D)
  5108. D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
  5109. D = D->getCanonicalDecl();
  5110. std::string Name = CGM.getOpenMPRuntime().getName(
  5111. {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
  5112. Out << Prefix << Name << "_"
  5113. << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
  5114. return std::string(Out.str());
  5115. }
  5116. /// Emits reduction initializer function:
  5117. /// \code
  5118. /// void @.red_init(void* %arg, void* %orig) {
  5119. /// %0 = bitcast void* %arg to <type>*
  5120. /// store <type> <init>, <type>* %0
  5121. /// ret void
  5122. /// }
  5123. /// \endcode
  5124. static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
  5125. SourceLocation Loc,
  5126. ReductionCodeGen &RCG, unsigned N) {
  5127. ASTContext &C = CGM.getContext();
  5128. QualType VoidPtrTy = C.VoidPtrTy;
  5129. VoidPtrTy.addRestrict();
  5130. FunctionArgList Args;
  5131. ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
  5132. ImplicitParamDecl::Other);
  5133. ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
  5134. ImplicitParamDecl::Other);
  5135. Args.emplace_back(&Param);
  5136. Args.emplace_back(&ParamOrig);
  5137. const auto &FnInfo =
  5138. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5139. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  5140. std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
  5141. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  5142. Name, &CGM.getModule());
  5143. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  5144. Fn->setDoesNotRecurse();
  5145. CodeGenFunction CGF(CGM);
  5146. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  5147. QualType PrivateType = RCG.getPrivateType(N);
  5148. Address PrivateAddr = CGF.EmitLoadOfPointer(
  5149. CGF.Builder.CreateElementBitCast(
  5150. CGF.GetAddrOfLocalVar(&Param),
  5151. CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
  5152. C.getPointerType(PrivateType)->castAs<PointerType>());
  5153. llvm::Value *Size = nullptr;
  5154. // If the size of the reduction item is non-constant, load it from global
  5155. // threadprivate variable.
  5156. if (RCG.getSizes(N).second) {
  5157. Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5158. CGF, CGM.getContext().getSizeType(),
  5159. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5160. Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
  5161. CGM.getContext().getSizeType(), Loc);
  5162. }
  5163. RCG.emitAggregateType(CGF, N, Size);
  5164. Address OrigAddr = Address::invalid();
  5165. // If initializer uses initializer from declare reduction construct, emit a
  5166. // pointer to the address of the original reduction item (reuired by reduction
  5167. // initializer)
  5168. if (RCG.usesReductionInitializer(N)) {
  5169. Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
  5170. OrigAddr = CGF.EmitLoadOfPointer(
  5171. SharedAddr,
  5172. CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
  5173. }
  5174. // Emit the initializer:
  5175. // %0 = bitcast void* %arg to <type>*
  5176. // store <type> <init>, <type>* %0
  5177. RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
  5178. [](CodeGenFunction &) { return false; });
  5179. CGF.FinishFunction();
  5180. return Fn;
  5181. }
  5182. /// Emits reduction combiner function:
  5183. /// \code
  5184. /// void @.red_comb(void* %arg0, void* %arg1) {
  5185. /// %lhs = bitcast void* %arg0 to <type>*
  5186. /// %rhs = bitcast void* %arg1 to <type>*
  5187. /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
  5188. /// store <type> %2, <type>* %lhs
  5189. /// ret void
  5190. /// }
  5191. /// \endcode
  5192. static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
  5193. SourceLocation Loc,
  5194. ReductionCodeGen &RCG, unsigned N,
  5195. const Expr *ReductionOp,
  5196. const Expr *LHS, const Expr *RHS,
  5197. const Expr *PrivateRef) {
  5198. ASTContext &C = CGM.getContext();
  5199. const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
  5200. const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
  5201. FunctionArgList Args;
  5202. ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  5203. C.VoidPtrTy, ImplicitParamDecl::Other);
  5204. ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5205. ImplicitParamDecl::Other);
  5206. Args.emplace_back(&ParamInOut);
  5207. Args.emplace_back(&ParamIn);
  5208. const auto &FnInfo =
  5209. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5210. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  5211. std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
  5212. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  5213. Name, &CGM.getModule());
  5214. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  5215. Fn->setDoesNotRecurse();
  5216. CodeGenFunction CGF(CGM);
  5217. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  5218. llvm::Value *Size = nullptr;
  5219. // If the size of the reduction item is non-constant, load it from global
  5220. // threadprivate variable.
  5221. if (RCG.getSizes(N).second) {
  5222. Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5223. CGF, CGM.getContext().getSizeType(),
  5224. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5225. Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
  5226. CGM.getContext().getSizeType(), Loc);
  5227. }
  5228. RCG.emitAggregateType(CGF, N, Size);
  5229. // Remap lhs and rhs variables to the addresses of the function arguments.
  5230. // %lhs = bitcast void* %arg0 to <type>*
  5231. // %rhs = bitcast void* %arg1 to <type>*
  5232. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  5233. PrivateScope.addPrivate(
  5234. LHSVD,
  5235. // Pull out the pointer to the variable.
  5236. CGF.EmitLoadOfPointer(
  5237. CGF.Builder.CreateElementBitCast(
  5238. CGF.GetAddrOfLocalVar(&ParamInOut),
  5239. CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
  5240. C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
  5241. PrivateScope.addPrivate(
  5242. RHSVD,
  5243. // Pull out the pointer to the variable.
  5244. CGF.EmitLoadOfPointer(
  5245. CGF.Builder.CreateElementBitCast(
  5246. CGF.GetAddrOfLocalVar(&ParamIn),
  5247. CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
  5248. C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
  5249. PrivateScope.Privatize();
  5250. // Emit the combiner body:
  5251. // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
  5252. // store <type> %2, <type>* %lhs
  5253. CGM.getOpenMPRuntime().emitSingleReductionCombiner(
  5254. CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
  5255. cast<DeclRefExpr>(RHS));
  5256. CGF.FinishFunction();
  5257. return Fn;
  5258. }
  5259. /// Emits reduction finalizer function:
  5260. /// \code
  5261. /// void @.red_fini(void* %arg) {
  5262. /// %0 = bitcast void* %arg to <type>*
  5263. /// <destroy>(<type>* %0)
  5264. /// ret void
  5265. /// }
  5266. /// \endcode
  5267. static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
  5268. SourceLocation Loc,
  5269. ReductionCodeGen &RCG, unsigned N) {
  5270. if (!RCG.needCleanups(N))
  5271. return nullptr;
  5272. ASTContext &C = CGM.getContext();
  5273. FunctionArgList Args;
  5274. ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5275. ImplicitParamDecl::Other);
  5276. Args.emplace_back(&Param);
  5277. const auto &FnInfo =
  5278. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5279. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  5280. std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
  5281. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  5282. Name, &CGM.getModule());
  5283. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  5284. Fn->setDoesNotRecurse();
  5285. CodeGenFunction CGF(CGM);
  5286. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  5287. Address PrivateAddr = CGF.EmitLoadOfPointer(
  5288. CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
  5289. llvm::Value *Size = nullptr;
  5290. // If the size of the reduction item is non-constant, load it from global
  5291. // threadprivate variable.
  5292. if (RCG.getSizes(N).second) {
  5293. Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5294. CGF, CGM.getContext().getSizeType(),
  5295. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5296. Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
  5297. CGM.getContext().getSizeType(), Loc);
  5298. }
  5299. RCG.emitAggregateType(CGF, N, Size);
  5300. // Emit the finalizer body:
  5301. // <destroy>(<type>* %0)
  5302. RCG.emitCleanups(CGF, N, PrivateAddr);
  5303. CGF.FinishFunction(Loc);
  5304. return Fn;
  5305. }
  5306. llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
  5307. CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
  5308. ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
  5309. if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
  5310. return nullptr;
  5311. // Build typedef struct:
  5312. // kmp_taskred_input {
  5313. // void *reduce_shar; // shared reduction item
  5314. // void *reduce_orig; // original reduction item used for initialization
  5315. // size_t reduce_size; // size of data item
  5316. // void *reduce_init; // data initialization routine
  5317. // void *reduce_fini; // data finalization routine
  5318. // void *reduce_comb; // data combiner routine
  5319. // kmp_task_red_flags_t flags; // flags for additional info from compiler
  5320. // } kmp_taskred_input_t;
  5321. ASTContext &C = CGM.getContext();
  5322. RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
  5323. RD->startDefinition();
  5324. const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5325. const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5326. const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
  5327. const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5328. const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5329. const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5330. const FieldDecl *FlagsFD = addFieldToRecordDecl(
  5331. C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
  5332. RD->completeDefinition();
  5333. QualType RDType = C.getRecordType(RD);
  5334. unsigned Size = Data.ReductionVars.size();
  5335. llvm::APInt ArraySize(/*numBits=*/64, Size);
  5336. QualType ArrayRDType = C.getConstantArrayType(
  5337. RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
  5338. // kmp_task_red_input_t .rd_input.[Size];
  5339. Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
  5340. ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
  5341. Data.ReductionCopies, Data.ReductionOps);
  5342. for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
  5343. // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
  5344. llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
  5345. llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
  5346. llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
  5347. TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
  5348. /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
  5349. ".rd_input.gep.");
  5350. LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
  5351. // ElemLVal.reduce_shar = &Shareds[Cnt];
  5352. LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
  5353. RCG.emitSharedOrigLValue(CGF, Cnt);
  5354. llvm::Value *CastedShared =
  5355. CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
  5356. CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
  5357. // ElemLVal.reduce_orig = &Origs[Cnt];
  5358. LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
  5359. llvm::Value *CastedOrig =
  5360. CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
  5361. CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
  5362. RCG.emitAggregateType(CGF, Cnt);
  5363. llvm::Value *SizeValInChars;
  5364. llvm::Value *SizeVal;
  5365. std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
  5366. // We use delayed creation/initialization for VLAs and array sections. It is
  5367. // required because runtime does not provide the way to pass the sizes of
  5368. // VLAs/array sections to initializer/combiner/finalizer functions. Instead
  5369. // threadprivate global variables are used to store these values and use
  5370. // them in the functions.
  5371. bool DelayedCreation = !!SizeVal;
  5372. SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
  5373. /*isSigned=*/false);
  5374. LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
  5375. CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
  5376. // ElemLVal.reduce_init = init;
  5377. LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
  5378. llvm::Value *InitAddr =
  5379. CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
  5380. CGF.EmitStoreOfScalar(InitAddr, InitLVal);
  5381. // ElemLVal.reduce_fini = fini;
  5382. LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
  5383. llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
  5384. llvm::Value *FiniAddr = Fini
  5385. ? CGF.EmitCastToVoidPtr(Fini)
  5386. : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
  5387. CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
  5388. // ElemLVal.reduce_comb = comb;
  5389. LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
  5390. llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
  5391. CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
  5392. RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
  5393. CGF.EmitStoreOfScalar(CombAddr, CombLVal);
  5394. // ElemLVal.flags = 0;
  5395. LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
  5396. if (DelayedCreation) {
  5397. CGF.EmitStoreOfScalar(
  5398. llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
  5399. FlagsLVal);
  5400. } else
  5401. CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
  5402. FlagsLVal.getType());
  5403. }
  5404. if (Data.IsReductionWithTaskMod) {
  5405. // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
  5406. // is_ws, int num, void *data);
  5407. llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
  5408. llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
  5409. CGM.IntTy, /*isSigned=*/true);
  5410. llvm::Value *Args[] = {
  5411. IdentTLoc, GTid,
  5412. llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
  5413. /*isSigned=*/true),
  5414. llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
  5415. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5416. TaskRedInput.getPointer(), CGM.VoidPtrTy)};
  5417. return CGF.EmitRuntimeCall(
  5418. OMPBuilder.getOrCreateRuntimeFunction(
  5419. CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
  5420. Args);
  5421. }
  5422. // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
  5423. llvm::Value *Args[] = {
  5424. CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
  5425. /*isSigned=*/true),
  5426. llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
  5427. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
  5428. CGM.VoidPtrTy)};
  5429. return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  5430. CGM.getModule(), OMPRTL___kmpc_taskred_init),
  5431. Args);
  5432. }
  5433. void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
  5434. SourceLocation Loc,
  5435. bool IsWorksharingReduction) {
  5436. // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
  5437. // is_ws, int num, void *data);
  5438. llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
  5439. llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
  5440. CGM.IntTy, /*isSigned=*/true);
  5441. llvm::Value *Args[] = {IdentTLoc, GTid,
  5442. llvm::ConstantInt::get(CGM.IntTy,
  5443. IsWorksharingReduction ? 1 : 0,
  5444. /*isSigned=*/true)};
  5445. (void)CGF.EmitRuntimeCall(
  5446. OMPBuilder.getOrCreateRuntimeFunction(
  5447. CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
  5448. Args);
  5449. }
  5450. void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
  5451. SourceLocation Loc,
  5452. ReductionCodeGen &RCG,
  5453. unsigned N) {
  5454. auto Sizes = RCG.getSizes(N);
  5455. // Emit threadprivate global variable if the type is non-constant
  5456. // (Sizes.second = nullptr).
  5457. if (Sizes.second) {
  5458. llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
  5459. /*isSigned=*/false);
  5460. Address SizeAddr = getAddrOfArtificialThreadPrivate(
  5461. CGF, CGM.getContext().getSizeType(),
  5462. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5463. CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
  5464. }
  5465. }
  5466. Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
  5467. SourceLocation Loc,
  5468. llvm::Value *ReductionsPtr,
  5469. LValue SharedLVal) {
  5470. // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
  5471. // *d);
  5472. llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
  5473. CGM.IntTy,
  5474. /*isSigned=*/true),
  5475. ReductionsPtr,
  5476. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5477. SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
  5478. return Address(
  5479. CGF.EmitRuntimeCall(
  5480. OMPBuilder.getOrCreateRuntimeFunction(
  5481. CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
  5482. Args),
  5483. CGF.Int8Ty, SharedLVal.getAlignment());
  5484. }
  5485. void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
  5486. const OMPTaskDataTy &Data) {
  5487. if (!CGF.HaveInsertPoint())
  5488. return;
  5489. if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
  5490. // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
  5491. OMPBuilder.createTaskwait(CGF.Builder);
  5492. } else {
  5493. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  5494. llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
  5495. auto &M = CGM.getModule();
  5496. Address DependenciesArray = Address::invalid();
  5497. llvm::Value *NumOfElements;
  5498. std::tie(NumOfElements, DependenciesArray) =
  5499. emitDependClause(CGF, Data.Dependences, Loc);
  5500. if (!Data.Dependences.empty()) {
  5501. llvm::Value *DepWaitTaskArgs[7];
  5502. DepWaitTaskArgs[0] = UpLoc;
  5503. DepWaitTaskArgs[1] = ThreadID;
  5504. DepWaitTaskArgs[2] = NumOfElements;
  5505. DepWaitTaskArgs[3] = DependenciesArray.getPointer();
  5506. DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
  5507. DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  5508. DepWaitTaskArgs[6] =
  5509. llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
  5510. CodeGenFunction::RunCleanupsScope LocalScope(CGF);
  5511. // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
  5512. // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
  5513. // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
  5514. // kmp_int32 has_no_wait); if dependence info is specified.
  5515. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  5516. M, OMPRTL___kmpc_omp_taskwait_deps_51),
  5517. DepWaitTaskArgs);
  5518. } else {
  5519. // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
  5520. // global_tid);
  5521. llvm::Value *Args[] = {UpLoc, ThreadID};
  5522. // Ignore return result until untied tasks are supported.
  5523. CGF.EmitRuntimeCall(
  5524. OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
  5525. Args);
  5526. }
  5527. }
  5528. if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  5529. Region->emitUntiedSwitch(CGF);
  5530. }
  5531. void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
  5532. OpenMPDirectiveKind InnerKind,
  5533. const RegionCodeGenTy &CodeGen,
  5534. bool HasCancel) {
  5535. if (!CGF.HaveInsertPoint())
  5536. return;
  5537. InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
  5538. InnerKind != OMPD_critical &&
  5539. InnerKind != OMPD_master &&
  5540. InnerKind != OMPD_masked);
  5541. CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
  5542. }
  5543. namespace {
  5544. enum RTCancelKind {
  5545. CancelNoreq = 0,
  5546. CancelParallel = 1,
  5547. CancelLoop = 2,
  5548. CancelSections = 3,
  5549. CancelTaskgroup = 4
  5550. };
  5551. } // anonymous namespace
  5552. static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
  5553. RTCancelKind CancelKind = CancelNoreq;
  5554. if (CancelRegion == OMPD_parallel)
  5555. CancelKind = CancelParallel;
  5556. else if (CancelRegion == OMPD_for)
  5557. CancelKind = CancelLoop;
  5558. else if (CancelRegion == OMPD_sections)
  5559. CancelKind = CancelSections;
  5560. else {
  5561. assert(CancelRegion == OMPD_taskgroup);
  5562. CancelKind = CancelTaskgroup;
  5563. }
  5564. return CancelKind;
  5565. }
  5566. void CGOpenMPRuntime::emitCancellationPointCall(
  5567. CodeGenFunction &CGF, SourceLocation Loc,
  5568. OpenMPDirectiveKind CancelRegion) {
  5569. if (!CGF.HaveInsertPoint())
  5570. return;
  5571. // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
  5572. // global_tid, kmp_int32 cncl_kind);
  5573. if (auto *OMPRegionInfo =
  5574. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  5575. // For 'cancellation point taskgroup', the task region info may not have a
  5576. // cancel. This may instead happen in another adjacent task.
  5577. if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
  5578. llvm::Value *Args[] = {
  5579. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  5580. CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
  5581. // Ignore return result until untied tasks are supported.
  5582. llvm::Value *Result = CGF.EmitRuntimeCall(
  5583. OMPBuilder.getOrCreateRuntimeFunction(
  5584. CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
  5585. Args);
  5586. // if (__kmpc_cancellationpoint()) {
  5587. // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
  5588. // exit from construct;
  5589. // }
  5590. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
  5591. llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
  5592. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
  5593. CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
  5594. CGF.EmitBlock(ExitBB);
  5595. if (CancelRegion == OMPD_parallel)
  5596. emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
  5597. // exit from construct;
  5598. CodeGenFunction::JumpDest CancelDest =
  5599. CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
  5600. CGF.EmitBranchThroughCleanup(CancelDest);
  5601. CGF.EmitBlock(ContBB, /*IsFinished=*/true);
  5602. }
  5603. }
  5604. }
  5605. void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
  5606. const Expr *IfCond,
  5607. OpenMPDirectiveKind CancelRegion) {
  5608. if (!CGF.HaveInsertPoint())
  5609. return;
  5610. // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
  5611. // kmp_int32 cncl_kind);
  5612. auto &M = CGM.getModule();
  5613. if (auto *OMPRegionInfo =
  5614. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  5615. auto &&ThenGen = [this, &M, Loc, CancelRegion,
  5616. OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
  5617. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  5618. llvm::Value *Args[] = {
  5619. RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
  5620. CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
  5621. // Ignore return result until untied tasks are supported.
  5622. llvm::Value *Result = CGF.EmitRuntimeCall(
  5623. OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
  5624. // if (__kmpc_cancel()) {
  5625. // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
  5626. // exit from construct;
  5627. // }
  5628. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
  5629. llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
  5630. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
  5631. CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
  5632. CGF.EmitBlock(ExitBB);
  5633. if (CancelRegion == OMPD_parallel)
  5634. RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
  5635. // exit from construct;
  5636. CodeGenFunction::JumpDest CancelDest =
  5637. CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
  5638. CGF.EmitBranchThroughCleanup(CancelDest);
  5639. CGF.EmitBlock(ContBB, /*IsFinished=*/true);
  5640. };
  5641. if (IfCond) {
  5642. emitIfClause(CGF, IfCond, ThenGen,
  5643. [](CodeGenFunction &, PrePostActionTy &) {});
  5644. } else {
  5645. RegionCodeGenTy ThenRCG(ThenGen);
  5646. ThenRCG(CGF);
  5647. }
  5648. }
  5649. }
  5650. namespace {
  5651. /// Cleanup action for uses_allocators support.
  5652. class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
  5653. ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
  5654. public:
  5655. OMPUsesAllocatorsActionTy(
  5656. ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
  5657. : Allocators(Allocators) {}
  5658. void Enter(CodeGenFunction &CGF) override {
  5659. if (!CGF.HaveInsertPoint())
  5660. return;
  5661. for (const auto &AllocatorData : Allocators) {
  5662. CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
  5663. CGF, AllocatorData.first, AllocatorData.second);
  5664. }
  5665. }
  5666. void Exit(CodeGenFunction &CGF) override {
  5667. if (!CGF.HaveInsertPoint())
  5668. return;
  5669. for (const auto &AllocatorData : Allocators) {
  5670. CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
  5671. AllocatorData.first);
  5672. }
  5673. }
  5674. };
  5675. } // namespace
  5676. void CGOpenMPRuntime::emitTargetOutlinedFunction(
  5677. const OMPExecutableDirective &D, StringRef ParentName,
  5678. llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
  5679. bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
  5680. assert(!ParentName.empty() && "Invalid target entry parent name!");
  5681. HasEmittedTargetRegion = true;
  5682. SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
  5683. for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
  5684. for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
  5685. const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
  5686. if (!D.AllocatorTraits)
  5687. continue;
  5688. Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
  5689. }
  5690. }
  5691. OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
  5692. CodeGen.setAction(UsesAllocatorAction);
  5693. emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
  5694. IsOffloadEntry, CodeGen);
  5695. }
  5696. void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
  5697. const Expr *Allocator,
  5698. const Expr *AllocatorTraits) {
  5699. llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
  5700. ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
  5701. // Use default memspace handle.
  5702. llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  5703. llvm::Value *NumTraits = llvm::ConstantInt::get(
  5704. CGF.IntTy, cast<ConstantArrayType>(
  5705. AllocatorTraits->getType()->getAsArrayTypeUnsafe())
  5706. ->getSize()
  5707. .getLimitedValue());
  5708. LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
  5709. Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5710. AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
  5711. AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
  5712. AllocatorTraitsLVal.getBaseInfo(),
  5713. AllocatorTraitsLVal.getTBAAInfo());
  5714. llvm::Value *Traits =
  5715. CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
  5716. llvm::Value *AllocatorVal =
  5717. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  5718. CGM.getModule(), OMPRTL___kmpc_init_allocator),
  5719. {ThreadId, MemSpaceHandle, NumTraits, Traits});
  5720. // Store to allocator.
  5721. CGF.EmitVarDecl(*cast<VarDecl>(
  5722. cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
  5723. LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
  5724. AllocatorVal =
  5725. CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
  5726. Allocator->getType(), Allocator->getExprLoc());
  5727. CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
  5728. }
  5729. void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
  5730. const Expr *Allocator) {
  5731. llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
  5732. ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
  5733. LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
  5734. llvm::Value *AllocatorVal =
  5735. CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
  5736. AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
  5737. CGF.getContext().VoidPtrTy,
  5738. Allocator->getExprLoc());
  5739. (void)CGF.EmitRuntimeCall(
  5740. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  5741. OMPRTL___kmpc_destroy_allocator),
  5742. {ThreadId, AllocatorVal});
  5743. }
  5744. void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
  5745. const OMPExecutableDirective &D, StringRef ParentName,
  5746. llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
  5747. bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
  5748. auto EntryInfo =
  5749. getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName);
  5750. CodeGenFunction CGF(CGM, true);
  5751. llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
  5752. [&CGF, &D, &CodeGen](StringRef EntryFnName) {
  5753. const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
  5754. CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
  5755. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  5756. return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
  5757. };
  5758. // Get NumTeams and ThreadLimit attributes
  5759. int32_t DefaultValTeams = -1;
  5760. int32_t DefaultValThreads = -1;
  5761. getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
  5762. getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
  5763. OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo,
  5764. GenerateOutlinedFunction, DefaultValTeams,
  5765. DefaultValThreads, IsOffloadEntry,
  5766. OutlinedFn, OutlinedFnID);
  5767. if (OutlinedFn != nullptr)
  5768. CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
  5769. }
  5770. /// Checks if the expression is constant or does not have non-trivial function
  5771. /// calls.
  5772. static bool isTrivial(ASTContext &Ctx, const Expr * E) {
  5773. // We can skip constant expressions.
  5774. // We can skip expressions with trivial calls or simple expressions.
  5775. return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
  5776. !E->hasNonTrivialCall(Ctx)) &&
  5777. !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
  5778. }
  5779. const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
  5780. const Stmt *Body) {
  5781. const Stmt *Child = Body->IgnoreContainers();
  5782. while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
  5783. Child = nullptr;
  5784. for (const Stmt *S : C->body()) {
  5785. if (const auto *E = dyn_cast<Expr>(S)) {
  5786. if (isTrivial(Ctx, E))
  5787. continue;
  5788. }
  5789. // Some of the statements can be ignored.
  5790. if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
  5791. isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
  5792. continue;
  5793. // Analyze declarations.
  5794. if (const auto *DS = dyn_cast<DeclStmt>(S)) {
  5795. if (llvm::all_of(DS->decls(), [](const Decl *D) {
  5796. if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
  5797. isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
  5798. isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
  5799. isa<UsingDirectiveDecl>(D) ||
  5800. isa<OMPDeclareReductionDecl>(D) ||
  5801. isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
  5802. return true;
  5803. const auto *VD = dyn_cast<VarDecl>(D);
  5804. if (!VD)
  5805. return false;
  5806. return VD->hasGlobalStorage() || !VD->isUsed();
  5807. }))
  5808. continue;
  5809. }
  5810. // Found multiple children - cannot get the one child only.
  5811. if (Child)
  5812. return nullptr;
  5813. Child = S;
  5814. }
  5815. if (Child)
  5816. Child = Child->IgnoreContainers();
  5817. }
  5818. return Child;
  5819. }
  5820. const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
  5821. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  5822. int32_t &DefaultVal) {
  5823. OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
  5824. assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
  5825. "Expected target-based executable directive.");
  5826. switch (DirectiveKind) {
  5827. case OMPD_target: {
  5828. const auto *CS = D.getInnermostCapturedStmt();
  5829. const auto *Body =
  5830. CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
  5831. const Stmt *ChildStmt =
  5832. CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
  5833. if (const auto *NestedDir =
  5834. dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
  5835. if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
  5836. if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
  5837. const Expr *NumTeams =
  5838. NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
  5839. if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
  5840. if (auto Constant =
  5841. NumTeams->getIntegerConstantExpr(CGF.getContext()))
  5842. DefaultVal = Constant->getExtValue();
  5843. return NumTeams;
  5844. }
  5845. DefaultVal = 0;
  5846. return nullptr;
  5847. }
  5848. if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
  5849. isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
  5850. DefaultVal = 1;
  5851. return nullptr;
  5852. }
  5853. DefaultVal = 1;
  5854. return nullptr;
  5855. }
  5856. // A value of -1 is used to check if we need to emit no teams region
  5857. DefaultVal = -1;
  5858. return nullptr;
  5859. }
  5860. case OMPD_target_teams:
  5861. case OMPD_target_teams_distribute:
  5862. case OMPD_target_teams_distribute_simd:
  5863. case OMPD_target_teams_distribute_parallel_for:
  5864. case OMPD_target_teams_distribute_parallel_for_simd: {
  5865. if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
  5866. const Expr *NumTeams =
  5867. D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
  5868. if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
  5869. if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
  5870. DefaultVal = Constant->getExtValue();
  5871. return NumTeams;
  5872. }
  5873. DefaultVal = 0;
  5874. return nullptr;
  5875. }
  5876. case OMPD_target_parallel:
  5877. case OMPD_target_parallel_for:
  5878. case OMPD_target_parallel_for_simd:
  5879. case OMPD_target_simd:
  5880. DefaultVal = 1;
  5881. return nullptr;
  5882. case OMPD_parallel:
  5883. case OMPD_for:
  5884. case OMPD_parallel_for:
  5885. case OMPD_parallel_master:
  5886. case OMPD_parallel_sections:
  5887. case OMPD_for_simd:
  5888. case OMPD_parallel_for_simd:
  5889. case OMPD_cancel:
  5890. case OMPD_cancellation_point:
  5891. case OMPD_ordered:
  5892. case OMPD_threadprivate:
  5893. case OMPD_allocate:
  5894. case OMPD_task:
  5895. case OMPD_simd:
  5896. case OMPD_tile:
  5897. case OMPD_unroll:
  5898. case OMPD_sections:
  5899. case OMPD_section:
  5900. case OMPD_single:
  5901. case OMPD_master:
  5902. case OMPD_critical:
  5903. case OMPD_taskyield:
  5904. case OMPD_barrier:
  5905. case OMPD_taskwait:
  5906. case OMPD_taskgroup:
  5907. case OMPD_atomic:
  5908. case OMPD_flush:
  5909. case OMPD_depobj:
  5910. case OMPD_scan:
  5911. case OMPD_teams:
  5912. case OMPD_target_data:
  5913. case OMPD_target_exit_data:
  5914. case OMPD_target_enter_data:
  5915. case OMPD_distribute:
  5916. case OMPD_distribute_simd:
  5917. case OMPD_distribute_parallel_for:
  5918. case OMPD_distribute_parallel_for_simd:
  5919. case OMPD_teams_distribute:
  5920. case OMPD_teams_distribute_simd:
  5921. case OMPD_teams_distribute_parallel_for:
  5922. case OMPD_teams_distribute_parallel_for_simd:
  5923. case OMPD_target_update:
  5924. case OMPD_declare_simd:
  5925. case OMPD_declare_variant:
  5926. case OMPD_begin_declare_variant:
  5927. case OMPD_end_declare_variant:
  5928. case OMPD_declare_target:
  5929. case OMPD_end_declare_target:
  5930. case OMPD_declare_reduction:
  5931. case OMPD_declare_mapper:
  5932. case OMPD_taskloop:
  5933. case OMPD_taskloop_simd:
  5934. case OMPD_master_taskloop:
  5935. case OMPD_master_taskloop_simd:
  5936. case OMPD_parallel_master_taskloop:
  5937. case OMPD_parallel_master_taskloop_simd:
  5938. case OMPD_requires:
  5939. case OMPD_metadirective:
  5940. case OMPD_unknown:
  5941. break;
  5942. default:
  5943. break;
  5944. }
  5945. llvm_unreachable("Unexpected directive kind.");
  5946. }
  5947. llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
  5948. CodeGenFunction &CGF, const OMPExecutableDirective &D) {
  5949. assert(!CGF.getLangOpts().OpenMPIsDevice &&
  5950. "Clauses associated with the teams directive expected to be emitted "
  5951. "only for the host!");
  5952. CGBuilderTy &Bld = CGF.Builder;
  5953. int32_t DefaultNT = -1;
  5954. const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
  5955. if (NumTeams != nullptr) {
  5956. OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
  5957. switch (DirectiveKind) {
  5958. case OMPD_target: {
  5959. const auto *CS = D.getInnermostCapturedStmt();
  5960. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  5961. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  5962. llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
  5963. /*IgnoreResultAssign*/ true);
  5964. return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
  5965. /*isSigned=*/true);
  5966. }
  5967. case OMPD_target_teams:
  5968. case OMPD_target_teams_distribute:
  5969. case OMPD_target_teams_distribute_simd:
  5970. case OMPD_target_teams_distribute_parallel_for:
  5971. case OMPD_target_teams_distribute_parallel_for_simd: {
  5972. CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
  5973. llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
  5974. /*IgnoreResultAssign*/ true);
  5975. return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
  5976. /*isSigned=*/true);
  5977. }
  5978. default:
  5979. break;
  5980. }
  5981. }
  5982. return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
  5983. }
  5984. static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
  5985. llvm::Value *DefaultThreadLimitVal) {
  5986. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  5987. CGF.getContext(), CS->getCapturedStmt());
  5988. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  5989. if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
  5990. llvm::Value *NumThreads = nullptr;
  5991. llvm::Value *CondVal = nullptr;
  5992. // Handle if clause. If if clause present, the number of threads is
  5993. // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
  5994. if (Dir->hasClausesOfKind<OMPIfClause>()) {
  5995. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  5996. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  5997. const OMPIfClause *IfClause = nullptr;
  5998. for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
  5999. if (C->getNameModifier() == OMPD_unknown ||
  6000. C->getNameModifier() == OMPD_parallel) {
  6001. IfClause = C;
  6002. break;
  6003. }
  6004. }
  6005. if (IfClause) {
  6006. const Expr *Cond = IfClause->getCondition();
  6007. bool Result;
  6008. if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
  6009. if (!Result)
  6010. return CGF.Builder.getInt32(1);
  6011. } else {
  6012. CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
  6013. if (const auto *PreInit =
  6014. cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
  6015. for (const auto *I : PreInit->decls()) {
  6016. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  6017. CGF.EmitVarDecl(cast<VarDecl>(*I));
  6018. } else {
  6019. CodeGenFunction::AutoVarEmission Emission =
  6020. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  6021. CGF.EmitAutoVarCleanups(Emission);
  6022. }
  6023. }
  6024. }
  6025. CondVal = CGF.EvaluateExprAsBool(Cond);
  6026. }
  6027. }
  6028. }
  6029. // Check the value of num_threads clause iff if clause was not specified
  6030. // or is not evaluated to false.
  6031. if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
  6032. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  6033. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6034. const auto *NumThreadsClause =
  6035. Dir->getSingleClause<OMPNumThreadsClause>();
  6036. CodeGenFunction::LexicalScope Scope(
  6037. CGF, NumThreadsClause->getNumThreads()->getSourceRange());
  6038. if (const auto *PreInit =
  6039. cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
  6040. for (const auto *I : PreInit->decls()) {
  6041. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  6042. CGF.EmitVarDecl(cast<VarDecl>(*I));
  6043. } else {
  6044. CodeGenFunction::AutoVarEmission Emission =
  6045. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  6046. CGF.EmitAutoVarCleanups(Emission);
  6047. }
  6048. }
  6049. }
  6050. NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
  6051. NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
  6052. /*isSigned=*/false);
  6053. if (DefaultThreadLimitVal)
  6054. NumThreads = CGF.Builder.CreateSelect(
  6055. CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
  6056. DefaultThreadLimitVal, NumThreads);
  6057. } else {
  6058. NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
  6059. : CGF.Builder.getInt32(0);
  6060. }
  6061. // Process condition of the if clause.
  6062. if (CondVal) {
  6063. NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
  6064. CGF.Builder.getInt32(1));
  6065. }
  6066. return NumThreads;
  6067. }
  6068. if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
  6069. return CGF.Builder.getInt32(1);
  6070. }
  6071. return DefaultThreadLimitVal;
  6072. }
  6073. const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
  6074. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  6075. int32_t &DefaultVal) {
  6076. OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
  6077. assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
  6078. "Expected target-based executable directive.");
  6079. switch (DirectiveKind) {
  6080. case OMPD_target:
  6081. // Teams have no clause thread_limit
  6082. return nullptr;
  6083. case OMPD_target_teams:
  6084. case OMPD_target_teams_distribute:
  6085. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6086. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6087. const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
  6088. if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
  6089. if (auto Constant =
  6090. ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
  6091. DefaultVal = Constant->getExtValue();
  6092. return ThreadLimit;
  6093. }
  6094. return nullptr;
  6095. case OMPD_target_parallel:
  6096. case OMPD_target_parallel_for:
  6097. case OMPD_target_parallel_for_simd:
  6098. case OMPD_target_teams_distribute_parallel_for:
  6099. case OMPD_target_teams_distribute_parallel_for_simd: {
  6100. Expr *ThreadLimit = nullptr;
  6101. Expr *NumThreads = nullptr;
  6102. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6103. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6104. ThreadLimit = ThreadLimitClause->getThreadLimit();
  6105. if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
  6106. if (auto Constant =
  6107. ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
  6108. DefaultVal = Constant->getExtValue();
  6109. }
  6110. if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
  6111. const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
  6112. NumThreads = NumThreadsClause->getNumThreads();
  6113. if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
  6114. if (auto Constant =
  6115. NumThreads->getIntegerConstantExpr(CGF.getContext())) {
  6116. if (Constant->getExtValue() < DefaultVal) {
  6117. DefaultVal = Constant->getExtValue();
  6118. ThreadLimit = NumThreads;
  6119. }
  6120. }
  6121. }
  6122. }
  6123. return ThreadLimit;
  6124. }
  6125. case OMPD_target_teams_distribute_simd:
  6126. case OMPD_target_simd:
  6127. DefaultVal = 1;
  6128. return nullptr;
  6129. case OMPD_parallel:
  6130. case OMPD_for:
  6131. case OMPD_parallel_for:
  6132. case OMPD_parallel_master:
  6133. case OMPD_parallel_sections:
  6134. case OMPD_for_simd:
  6135. case OMPD_parallel_for_simd:
  6136. case OMPD_cancel:
  6137. case OMPD_cancellation_point:
  6138. case OMPD_ordered:
  6139. case OMPD_threadprivate:
  6140. case OMPD_allocate:
  6141. case OMPD_task:
  6142. case OMPD_simd:
  6143. case OMPD_tile:
  6144. case OMPD_unroll:
  6145. case OMPD_sections:
  6146. case OMPD_section:
  6147. case OMPD_single:
  6148. case OMPD_master:
  6149. case OMPD_critical:
  6150. case OMPD_taskyield:
  6151. case OMPD_barrier:
  6152. case OMPD_taskwait:
  6153. case OMPD_taskgroup:
  6154. case OMPD_atomic:
  6155. case OMPD_flush:
  6156. case OMPD_depobj:
  6157. case OMPD_scan:
  6158. case OMPD_teams:
  6159. case OMPD_target_data:
  6160. case OMPD_target_exit_data:
  6161. case OMPD_target_enter_data:
  6162. case OMPD_distribute:
  6163. case OMPD_distribute_simd:
  6164. case OMPD_distribute_parallel_for:
  6165. case OMPD_distribute_parallel_for_simd:
  6166. case OMPD_teams_distribute:
  6167. case OMPD_teams_distribute_simd:
  6168. case OMPD_teams_distribute_parallel_for:
  6169. case OMPD_teams_distribute_parallel_for_simd:
  6170. case OMPD_target_update:
  6171. case OMPD_declare_simd:
  6172. case OMPD_declare_variant:
  6173. case OMPD_begin_declare_variant:
  6174. case OMPD_end_declare_variant:
  6175. case OMPD_declare_target:
  6176. case OMPD_end_declare_target:
  6177. case OMPD_declare_reduction:
  6178. case OMPD_declare_mapper:
  6179. case OMPD_taskloop:
  6180. case OMPD_taskloop_simd:
  6181. case OMPD_master_taskloop:
  6182. case OMPD_master_taskloop_simd:
  6183. case OMPD_parallel_master_taskloop:
  6184. case OMPD_parallel_master_taskloop_simd:
  6185. case OMPD_requires:
  6186. case OMPD_unknown:
  6187. break;
  6188. default:
  6189. break;
  6190. }
  6191. llvm_unreachable("Unsupported directive kind.");
  6192. }
  6193. llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
  6194. CodeGenFunction &CGF, const OMPExecutableDirective &D) {
  6195. assert(!CGF.getLangOpts().OpenMPIsDevice &&
  6196. "Clauses associated with the teams directive expected to be emitted "
  6197. "only for the host!");
  6198. OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
  6199. assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
  6200. "Expected target-based executable directive.");
  6201. CGBuilderTy &Bld = CGF.Builder;
  6202. llvm::Value *ThreadLimitVal = nullptr;
  6203. llvm::Value *NumThreadsVal = nullptr;
  6204. switch (DirectiveKind) {
  6205. case OMPD_target: {
  6206. const CapturedStmt *CS = D.getInnermostCapturedStmt();
  6207. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6208. return NumThreads;
  6209. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6210. CGF.getContext(), CS->getCapturedStmt());
  6211. // TODO: The standard is not clear how to resolve two thread limit clauses,
  6212. // let's pick the teams one if it's present, otherwise the target one.
  6213. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6214. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  6215. if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
  6216. ThreadLimitClause = TLC;
  6217. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  6218. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6219. CodeGenFunction::LexicalScope Scope(
  6220. CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
  6221. if (const auto *PreInit =
  6222. cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
  6223. for (const auto *I : PreInit->decls()) {
  6224. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  6225. CGF.EmitVarDecl(cast<VarDecl>(*I));
  6226. } else {
  6227. CodeGenFunction::AutoVarEmission Emission =
  6228. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  6229. CGF.EmitAutoVarCleanups(Emission);
  6230. }
  6231. }
  6232. }
  6233. }
  6234. }
  6235. if (ThreadLimitClause) {
  6236. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6237. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6238. ThreadLimitVal =
  6239. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6240. }
  6241. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  6242. if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
  6243. !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
  6244. CS = Dir->getInnermostCapturedStmt();
  6245. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6246. CGF.getContext(), CS->getCapturedStmt());
  6247. Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
  6248. }
  6249. if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
  6250. !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
  6251. CS = Dir->getInnermostCapturedStmt();
  6252. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6253. return NumThreads;
  6254. }
  6255. if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
  6256. return Bld.getInt32(1);
  6257. }
  6258. return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
  6259. }
  6260. case OMPD_target_teams: {
  6261. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6262. CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
  6263. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6264. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6265. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6266. ThreadLimitVal =
  6267. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6268. }
  6269. const CapturedStmt *CS = D.getInnermostCapturedStmt();
  6270. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6271. return NumThreads;
  6272. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6273. CGF.getContext(), CS->getCapturedStmt());
  6274. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  6275. if (Dir->getDirectiveKind() == OMPD_distribute) {
  6276. CS = Dir->getInnermostCapturedStmt();
  6277. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6278. return NumThreads;
  6279. }
  6280. }
  6281. return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
  6282. }
  6283. case OMPD_target_teams_distribute:
  6284. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6285. CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
  6286. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6287. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6288. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6289. ThreadLimitVal =
  6290. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6291. }
  6292. if (llvm::Value *NumThreads =
  6293. getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
  6294. return NumThreads;
  6295. return Bld.getInt32(0);
  6296. case OMPD_target_parallel:
  6297. case OMPD_target_parallel_for:
  6298. case OMPD_target_parallel_for_simd:
  6299. case OMPD_target_teams_distribute_parallel_for:
  6300. case OMPD_target_teams_distribute_parallel_for_simd: {
  6301. llvm::Value *CondVal = nullptr;
  6302. // Handle if clause. If if clause present, the number of threads is
  6303. // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
  6304. if (D.hasClausesOfKind<OMPIfClause>()) {
  6305. const OMPIfClause *IfClause = nullptr;
  6306. for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
  6307. if (C->getNameModifier() == OMPD_unknown ||
  6308. C->getNameModifier() == OMPD_parallel) {
  6309. IfClause = C;
  6310. break;
  6311. }
  6312. }
  6313. if (IfClause) {
  6314. const Expr *Cond = IfClause->getCondition();
  6315. bool Result;
  6316. if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
  6317. if (!Result)
  6318. return Bld.getInt32(1);
  6319. } else {
  6320. CodeGenFunction::RunCleanupsScope Scope(CGF);
  6321. CondVal = CGF.EvaluateExprAsBool(Cond);
  6322. }
  6323. }
  6324. }
  6325. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6326. CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
  6327. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6328. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6329. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6330. ThreadLimitVal =
  6331. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6332. }
  6333. if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
  6334. CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
  6335. const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
  6336. llvm::Value *NumThreads = CGF.EmitScalarExpr(
  6337. NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
  6338. NumThreadsVal =
  6339. Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
  6340. ThreadLimitVal = ThreadLimitVal
  6341. ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
  6342. ThreadLimitVal),
  6343. NumThreadsVal, ThreadLimitVal)
  6344. : NumThreadsVal;
  6345. }
  6346. if (!ThreadLimitVal)
  6347. ThreadLimitVal = Bld.getInt32(0);
  6348. if (CondVal)
  6349. return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
  6350. return ThreadLimitVal;
  6351. }
  6352. case OMPD_target_teams_distribute_simd:
  6353. case OMPD_target_simd:
  6354. return Bld.getInt32(1);
  6355. case OMPD_parallel:
  6356. case OMPD_for:
  6357. case OMPD_parallel_for:
  6358. case OMPD_parallel_master:
  6359. case OMPD_parallel_sections:
  6360. case OMPD_for_simd:
  6361. case OMPD_parallel_for_simd:
  6362. case OMPD_cancel:
  6363. case OMPD_cancellation_point:
  6364. case OMPD_ordered:
  6365. case OMPD_threadprivate:
  6366. case OMPD_allocate:
  6367. case OMPD_task:
  6368. case OMPD_simd:
  6369. case OMPD_tile:
  6370. case OMPD_unroll:
  6371. case OMPD_sections:
  6372. case OMPD_section:
  6373. case OMPD_single:
  6374. case OMPD_master:
  6375. case OMPD_critical:
  6376. case OMPD_taskyield:
  6377. case OMPD_barrier:
  6378. case OMPD_taskwait:
  6379. case OMPD_taskgroup:
  6380. case OMPD_atomic:
  6381. case OMPD_flush:
  6382. case OMPD_depobj:
  6383. case OMPD_scan:
  6384. case OMPD_teams:
  6385. case OMPD_target_data:
  6386. case OMPD_target_exit_data:
  6387. case OMPD_target_enter_data:
  6388. case OMPD_distribute:
  6389. case OMPD_distribute_simd:
  6390. case OMPD_distribute_parallel_for:
  6391. case OMPD_distribute_parallel_for_simd:
  6392. case OMPD_teams_distribute:
  6393. case OMPD_teams_distribute_simd:
  6394. case OMPD_teams_distribute_parallel_for:
  6395. case OMPD_teams_distribute_parallel_for_simd:
  6396. case OMPD_target_update:
  6397. case OMPD_declare_simd:
  6398. case OMPD_declare_variant:
  6399. case OMPD_begin_declare_variant:
  6400. case OMPD_end_declare_variant:
  6401. case OMPD_declare_target:
  6402. case OMPD_end_declare_target:
  6403. case OMPD_declare_reduction:
  6404. case OMPD_declare_mapper:
  6405. case OMPD_taskloop:
  6406. case OMPD_taskloop_simd:
  6407. case OMPD_master_taskloop:
  6408. case OMPD_master_taskloop_simd:
  6409. case OMPD_parallel_master_taskloop:
  6410. case OMPD_parallel_master_taskloop_simd:
  6411. case OMPD_requires:
  6412. case OMPD_metadirective:
  6413. case OMPD_unknown:
  6414. break;
  6415. default:
  6416. break;
  6417. }
  6418. llvm_unreachable("Unsupported directive kind.");
  6419. }
  6420. namespace {
  6421. LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
  6422. // Utility to handle information from clauses associated with a given
  6423. // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
  6424. // It provides a convenient interface to obtain the information and generate
  6425. // code for that information.
  6426. class MappableExprsHandler {
  6427. public:
  6428. /// Get the offset of the OMP_MAP_MEMBER_OF field.
  6429. static unsigned getFlagMemberOffset() {
  6430. unsigned Offset = 0;
  6431. for (uint64_t Remain =
  6432. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  6433. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
  6434. !(Remain & 1); Remain = Remain >> 1)
  6435. Offset++;
  6436. return Offset;
  6437. }
  6438. /// Class that holds debugging information for a data mapping to be passed to
  6439. /// the runtime library.
  6440. class MappingExprInfo {
  6441. /// The variable declaration used for the data mapping.
  6442. const ValueDecl *MapDecl = nullptr;
  6443. /// The original expression used in the map clause, or null if there is
  6444. /// none.
  6445. const Expr *MapExpr = nullptr;
  6446. public:
  6447. MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
  6448. : MapDecl(MapDecl), MapExpr(MapExpr) {}
  6449. const ValueDecl *getMapDecl() const { return MapDecl; }
  6450. const Expr *getMapExpr() const { return MapExpr; }
  6451. };
  6452. /// Class that associates information with a base pointer to be passed to the
  6453. /// runtime library.
  6454. class BasePointerInfo {
  6455. /// The base pointer.
  6456. llvm::Value *Ptr = nullptr;
  6457. /// The base declaration that refers to this device pointer, or null if
  6458. /// there is none.
  6459. const ValueDecl *DevPtrDecl = nullptr;
  6460. public:
  6461. BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
  6462. : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
  6463. llvm::Value *operator*() const { return Ptr; }
  6464. const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
  6465. void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
  6466. };
  6467. using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
  6468. using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
  6469. using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
  6470. using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
  6471. using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
  6472. using MapDimArrayTy = SmallVector<uint64_t, 4>;
  6473. using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
  6474. /// This structure contains combined information generated for mappable
  6475. /// clauses, including base pointers, pointers, sizes, map types, user-defined
  6476. /// mappers, and non-contiguous information.
  6477. struct MapCombinedInfoTy {
  6478. struct StructNonContiguousInfo {
  6479. bool IsNonContiguous = false;
  6480. MapDimArrayTy Dims;
  6481. MapNonContiguousArrayTy Offsets;
  6482. MapNonContiguousArrayTy Counts;
  6483. MapNonContiguousArrayTy Strides;
  6484. };
  6485. MapExprsArrayTy Exprs;
  6486. MapBaseValuesArrayTy BasePointers;
  6487. MapValuesArrayTy Pointers;
  6488. MapValuesArrayTy Sizes;
  6489. MapFlagsArrayTy Types;
  6490. MapMappersArrayTy Mappers;
  6491. StructNonContiguousInfo NonContigInfo;
  6492. /// Append arrays in \a CurInfo.
  6493. void append(MapCombinedInfoTy &CurInfo) {
  6494. Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
  6495. BasePointers.append(CurInfo.BasePointers.begin(),
  6496. CurInfo.BasePointers.end());
  6497. Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
  6498. Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
  6499. Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
  6500. Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
  6501. NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
  6502. CurInfo.NonContigInfo.Dims.end());
  6503. NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
  6504. CurInfo.NonContigInfo.Offsets.end());
  6505. NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
  6506. CurInfo.NonContigInfo.Counts.end());
  6507. NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
  6508. CurInfo.NonContigInfo.Strides.end());
  6509. }
  6510. };
  6511. /// Map between a struct and the its lowest & highest elements which have been
  6512. /// mapped.
  6513. /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
  6514. /// HE(FieldIndex, Pointer)}
  6515. struct StructRangeInfoTy {
  6516. MapCombinedInfoTy PreliminaryMapData;
  6517. std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
  6518. 0, Address::invalid()};
  6519. std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
  6520. 0, Address::invalid()};
  6521. Address Base = Address::invalid();
  6522. Address LB = Address::invalid();
  6523. bool IsArraySection = false;
  6524. bool HasCompleteRecord = false;
  6525. };
  6526. private:
  6527. /// Kind that defines how a device pointer has to be returned.
  6528. struct MapInfo {
  6529. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  6530. OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
  6531. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  6532. ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
  6533. bool ReturnDevicePointer = false;
  6534. bool IsImplicit = false;
  6535. const ValueDecl *Mapper = nullptr;
  6536. const Expr *VarRef = nullptr;
  6537. bool ForDeviceAddr = false;
  6538. MapInfo() = default;
  6539. MapInfo(
  6540. OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
  6541. OpenMPMapClauseKind MapType,
  6542. ArrayRef<OpenMPMapModifierKind> MapModifiers,
  6543. ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
  6544. bool ReturnDevicePointer, bool IsImplicit,
  6545. const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
  6546. bool ForDeviceAddr = false)
  6547. : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
  6548. MotionModifiers(MotionModifiers),
  6549. ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
  6550. Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
  6551. };
  6552. /// If use_device_ptr or use_device_addr is used on a decl which is a struct
  6553. /// member and there is no map information about it, then emission of that
  6554. /// entry is deferred until the whole struct has been processed.
  6555. struct DeferredDevicePtrEntryTy {
  6556. const Expr *IE = nullptr;
  6557. const ValueDecl *VD = nullptr;
  6558. bool ForDeviceAddr = false;
  6559. DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
  6560. bool ForDeviceAddr)
  6561. : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
  6562. };
  6563. /// The target directive from where the mappable clauses were extracted. It
  6564. /// is either a executable directive or a user-defined mapper directive.
  6565. llvm::PointerUnion<const OMPExecutableDirective *,
  6566. const OMPDeclareMapperDecl *>
  6567. CurDir;
  6568. /// Function the directive is being generated for.
  6569. CodeGenFunction &CGF;
  6570. /// Set of all first private variables in the current directive.
  6571. /// bool data is set to true if the variable is implicitly marked as
  6572. /// firstprivate, false otherwise.
  6573. llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
  6574. /// Map between device pointer declarations and their expression components.
  6575. /// The key value for declarations in 'this' is null.
  6576. llvm::DenseMap<
  6577. const ValueDecl *,
  6578. SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
  6579. DevPointersMap;
  6580. /// Map between device addr declarations and their expression components.
  6581. /// The key value for declarations in 'this' is null.
  6582. llvm::DenseMap<
  6583. const ValueDecl *,
  6584. SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
  6585. HasDevAddrsMap;
  6586. /// Map between lambda declarations and their map type.
  6587. llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
  6588. llvm::Value *getExprTypeSize(const Expr *E) const {
  6589. QualType ExprTy = E->getType().getCanonicalType();
  6590. // Calculate the size for array shaping expression.
  6591. if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
  6592. llvm::Value *Size =
  6593. CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
  6594. for (const Expr *SE : OAE->getDimensions()) {
  6595. llvm::Value *Sz = CGF.EmitScalarExpr(SE);
  6596. Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
  6597. CGF.getContext().getSizeType(),
  6598. SE->getExprLoc());
  6599. Size = CGF.Builder.CreateNUWMul(Size, Sz);
  6600. }
  6601. return Size;
  6602. }
  6603. // Reference types are ignored for mapping purposes.
  6604. if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
  6605. ExprTy = RefTy->getPointeeType().getCanonicalType();
  6606. // Given that an array section is considered a built-in type, we need to
  6607. // do the calculation based on the length of the section instead of relying
  6608. // on CGF.getTypeSize(E->getType()).
  6609. if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
  6610. QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
  6611. OAE->getBase()->IgnoreParenImpCasts())
  6612. .getCanonicalType();
  6613. // If there is no length associated with the expression and lower bound is
  6614. // not specified too, that means we are using the whole length of the
  6615. // base.
  6616. if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
  6617. !OAE->getLowerBound())
  6618. return CGF.getTypeSize(BaseTy);
  6619. llvm::Value *ElemSize;
  6620. if (const auto *PTy = BaseTy->getAs<PointerType>()) {
  6621. ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
  6622. } else {
  6623. const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
  6624. assert(ATy && "Expecting array type if not a pointer type.");
  6625. ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
  6626. }
  6627. // If we don't have a length at this point, that is because we have an
  6628. // array section with a single element.
  6629. if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
  6630. return ElemSize;
  6631. if (const Expr *LenExpr = OAE->getLength()) {
  6632. llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
  6633. LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
  6634. CGF.getContext().getSizeType(),
  6635. LenExpr->getExprLoc());
  6636. return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
  6637. }
  6638. assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
  6639. OAE->getLowerBound() && "expected array_section[lb:].");
  6640. // Size = sizetype - lb * elemtype;
  6641. llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
  6642. llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
  6643. LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
  6644. CGF.getContext().getSizeType(),
  6645. OAE->getLowerBound()->getExprLoc());
  6646. LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
  6647. llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
  6648. llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
  6649. LengthVal = CGF.Builder.CreateSelect(
  6650. Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
  6651. return LengthVal;
  6652. }
  6653. return CGF.getTypeSize(ExprTy);
  6654. }
  6655. /// Return the corresponding bits for a given map clause modifier. Add
  6656. /// a flag marking the map as a pointer if requested. Add a flag marking the
  6657. /// map as the first one of a series of maps that relate to the same map
  6658. /// expression.
  6659. OpenMPOffloadMappingFlags getMapTypeBits(
  6660. OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
  6661. ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
  6662. bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
  6663. OpenMPOffloadMappingFlags Bits =
  6664. IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
  6665. : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
  6666. switch (MapType) {
  6667. case OMPC_MAP_alloc:
  6668. case OMPC_MAP_release:
  6669. // alloc and release is the default behavior in the runtime library, i.e.
  6670. // if we don't pass any bits alloc/release that is what the runtime is
  6671. // going to do. Therefore, we don't need to signal anything for these two
  6672. // type modifiers.
  6673. break;
  6674. case OMPC_MAP_to:
  6675. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
  6676. break;
  6677. case OMPC_MAP_from:
  6678. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
  6679. break;
  6680. case OMPC_MAP_tofrom:
  6681. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
  6682. OpenMPOffloadMappingFlags::OMP_MAP_FROM;
  6683. break;
  6684. case OMPC_MAP_delete:
  6685. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
  6686. break;
  6687. case OMPC_MAP_unknown:
  6688. llvm_unreachable("Unexpected map type!");
  6689. }
  6690. if (AddPtrFlag)
  6691. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
  6692. if (AddIsTargetParamFlag)
  6693. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
  6694. if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
  6695. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
  6696. if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
  6697. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
  6698. if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
  6699. llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
  6700. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
  6701. if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
  6702. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
  6703. if (IsNonContiguous)
  6704. Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
  6705. return Bits;
  6706. }
  6707. /// Return true if the provided expression is a final array section. A
  6708. /// final array section, is one whose length can't be proved to be one.
  6709. bool isFinalArraySectionExpression(const Expr *E) const {
  6710. const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
  6711. // It is not an array section and therefore not a unity-size one.
  6712. if (!OASE)
  6713. return false;
  6714. // An array section with no colon always refer to a single element.
  6715. if (OASE->getColonLocFirst().isInvalid())
  6716. return false;
  6717. const Expr *Length = OASE->getLength();
  6718. // If we don't have a length we have to check if the array has size 1
  6719. // for this dimension. Also, we should always expect a length if the
  6720. // base type is pointer.
  6721. if (!Length) {
  6722. QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
  6723. OASE->getBase()->IgnoreParenImpCasts())
  6724. .getCanonicalType();
  6725. if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
  6726. return ATy->getSize().getSExtValue() != 1;
  6727. // If we don't have a constant dimension length, we have to consider
  6728. // the current section as having any size, so it is not necessarily
  6729. // unitary. If it happen to be unity size, that's user fault.
  6730. return true;
  6731. }
  6732. // Check if the length evaluates to 1.
  6733. Expr::EvalResult Result;
  6734. if (!Length->EvaluateAsInt(Result, CGF.getContext()))
  6735. return true; // Can have more that size 1.
  6736. llvm::APSInt ConstLength = Result.Val.getInt();
  6737. return ConstLength.getSExtValue() != 1;
  6738. }
  6739. /// Generate the base pointers, section pointers, sizes, map type bits, and
  6740. /// user-defined mappers (all included in \a CombinedInfo) for the provided
  6741. /// map type, map or motion modifiers, and expression components.
  6742. /// \a IsFirstComponent should be set to true if the provided set of
  6743. /// components is the first associated with a capture.
  6744. void generateInfoForComponentList(
  6745. OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
  6746. ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
  6747. OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
  6748. MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
  6749. bool IsFirstComponentList, bool IsImplicit,
  6750. const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
  6751. const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
  6752. ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
  6753. OverlappedElements = std::nullopt) const {
  6754. // The following summarizes what has to be generated for each map and the
  6755. // types below. The generated information is expressed in this order:
  6756. // base pointer, section pointer, size, flags
  6757. // (to add to the ones that come from the map type and modifier).
  6758. //
  6759. // double d;
  6760. // int i[100];
  6761. // float *p;
  6762. //
  6763. // struct S1 {
  6764. // int i;
  6765. // float f[50];
  6766. // }
  6767. // struct S2 {
  6768. // int i;
  6769. // float f[50];
  6770. // S1 s;
  6771. // double *p;
  6772. // struct S2 *ps;
  6773. // int &ref;
  6774. // }
  6775. // S2 s;
  6776. // S2 *ps;
  6777. //
  6778. // map(d)
  6779. // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
  6780. //
  6781. // map(i)
  6782. // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
  6783. //
  6784. // map(i[1:23])
  6785. // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
  6786. //
  6787. // map(p)
  6788. // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
  6789. //
  6790. // map(p[1:24])
  6791. // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
  6792. // in unified shared memory mode or for local pointers
  6793. // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
  6794. //
  6795. // map(s)
  6796. // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
  6797. //
  6798. // map(s.i)
  6799. // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
  6800. //
  6801. // map(s.s.f)
  6802. // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
  6803. //
  6804. // map(s.p)
  6805. // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
  6806. //
  6807. // map(to: s.p[:22])
  6808. // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
  6809. // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
  6810. // &(s.p), &(s.p[0]), 22*sizeof(double),
  6811. // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
  6812. // (*) alloc space for struct members, only this is a target parameter
  6813. // (**) map the pointer (nothing to be mapped in this example) (the compiler
  6814. // optimizes this entry out, same in the examples below)
  6815. // (***) map the pointee (map: to)
  6816. //
  6817. // map(to: s.ref)
  6818. // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
  6819. // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
  6820. // (*) alloc space for struct members, only this is a target parameter
  6821. // (**) map the pointer (nothing to be mapped in this example) (the compiler
  6822. // optimizes this entry out, same in the examples below)
  6823. // (***) map the pointee (map: to)
  6824. //
  6825. // map(s.ps)
  6826. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
  6827. //
  6828. // map(from: s.ps->s.i)
  6829. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  6830. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  6831. // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  6832. //
  6833. // map(to: s.ps->ps)
  6834. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  6835. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  6836. // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
  6837. //
  6838. // map(s.ps->ps->ps)
  6839. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  6840. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  6841. // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  6842. // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
  6843. //
  6844. // map(to: s.ps->ps->s.f[:22])
  6845. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  6846. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  6847. // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  6848. // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
  6849. //
  6850. // map(ps)
  6851. // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
  6852. //
  6853. // map(ps->i)
  6854. // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
  6855. //
  6856. // map(ps->s.f)
  6857. // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
  6858. //
  6859. // map(from: ps->p)
  6860. // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
  6861. //
  6862. // map(to: ps->p[:22])
  6863. // ps, &(ps->p), sizeof(double*), TARGET_PARAM
  6864. // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
  6865. // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
  6866. //
  6867. // map(ps->ps)
  6868. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
  6869. //
  6870. // map(from: ps->ps->s.i)
  6871. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  6872. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  6873. // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  6874. //
  6875. // map(from: ps->ps->ps)
  6876. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  6877. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  6878. // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  6879. //
  6880. // map(ps->ps->ps->ps)
  6881. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  6882. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  6883. // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  6884. // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
  6885. //
  6886. // map(to: ps->ps->ps->s.f[:22])
  6887. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  6888. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  6889. // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  6890. // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
  6891. //
  6892. // map(to: s.f[:22]) map(from: s.p[:33])
  6893. // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
  6894. // sizeof(double*) (**), TARGET_PARAM
  6895. // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
  6896. // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
  6897. // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  6898. // (*) allocate contiguous space needed to fit all mapped members even if
  6899. // we allocate space for members not mapped (in this example,
  6900. // s.f[22..49] and s.s are not mapped, yet we must allocate space for
  6901. // them as well because they fall between &s.f[0] and &s.p)
  6902. //
  6903. // map(from: s.f[:22]) map(to: ps->p[:33])
  6904. // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
  6905. // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
  6906. // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
  6907. // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
  6908. // (*) the struct this entry pertains to is the 2nd element in the list of
  6909. // arguments, hence MEMBER_OF(2)
  6910. //
  6911. // map(from: s.f[:22], s.s) map(to: ps->p[:33])
  6912. // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
  6913. // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
  6914. // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
  6915. // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
  6916. // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
  6917. // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
  6918. // (*) the struct this entry pertains to is the 4th element in the list
  6919. // of arguments, hence MEMBER_OF(4)
  6920. // Track if the map information being generated is the first for a capture.
  6921. bool IsCaptureFirstInfo = IsFirstComponentList;
  6922. // When the variable is on a declare target link or in a to clause with
  6923. // unified memory, a reference is needed to hold the host/device address
  6924. // of the variable.
  6925. bool RequiresReference = false;
  6926. // Scan the components from the base to the complete expression.
  6927. auto CI = Components.rbegin();
  6928. auto CE = Components.rend();
  6929. auto I = CI;
  6930. // Track if the map information being generated is the first for a list of
  6931. // components.
  6932. bool IsExpressionFirstInfo = true;
  6933. bool FirstPointerInComplexData = false;
  6934. Address BP = Address::invalid();
  6935. const Expr *AssocExpr = I->getAssociatedExpression();
  6936. const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
  6937. const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
  6938. const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
  6939. if (isa<MemberExpr>(AssocExpr)) {
  6940. // The base is the 'this' pointer. The content of the pointer is going
  6941. // to be the base of the field being mapped.
  6942. BP = CGF.LoadCXXThisAddress();
  6943. } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
  6944. (OASE &&
  6945. isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
  6946. BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
  6947. } else if (OAShE &&
  6948. isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
  6949. BP = Address(
  6950. CGF.EmitScalarExpr(OAShE->getBase()),
  6951. CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
  6952. CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
  6953. } else {
  6954. // The base is the reference to the variable.
  6955. // BP = &Var.
  6956. BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
  6957. if (const auto *VD =
  6958. dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
  6959. if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  6960. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
  6961. if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
  6962. ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  6963. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  6964. CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
  6965. RequiresReference = true;
  6966. BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
  6967. }
  6968. }
  6969. }
  6970. // If the variable is a pointer and is being dereferenced (i.e. is not
  6971. // the last component), the base has to be the pointer itself, not its
  6972. // reference. References are ignored for mapping purposes.
  6973. QualType Ty =
  6974. I->getAssociatedDeclaration()->getType().getNonReferenceType();
  6975. if (Ty->isAnyPointerType() && std::next(I) != CE) {
  6976. // No need to generate individual map information for the pointer, it
  6977. // can be associated with the combined storage if shared memory mode is
  6978. // active or the base declaration is not global variable.
  6979. const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
  6980. if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
  6981. !VD || VD->hasLocalStorage())
  6982. BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
  6983. else
  6984. FirstPointerInComplexData = true;
  6985. ++I;
  6986. }
  6987. }
  6988. // Track whether a component of the list should be marked as MEMBER_OF some
  6989. // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
  6990. // in a component list should be marked as MEMBER_OF, all subsequent entries
  6991. // do not belong to the base struct. E.g.
  6992. // struct S2 s;
  6993. // s.ps->ps->ps->f[:]
  6994. // (1) (2) (3) (4)
  6995. // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
  6996. // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
  6997. // is the pointee of ps(2) which is not member of struct s, so it should not
  6998. // be marked as such (it is still PTR_AND_OBJ).
  6999. // The variable is initialized to false so that PTR_AND_OBJ entries which
  7000. // are not struct members are not considered (e.g. array of pointers to
  7001. // data).
  7002. bool ShouldBeMemberOf = false;
  7003. // Variable keeping track of whether or not we have encountered a component
  7004. // in the component list which is a member expression. Useful when we have a
  7005. // pointer or a final array section, in which case it is the previous
  7006. // component in the list which tells us whether we have a member expression.
  7007. // E.g. X.f[:]
  7008. // While processing the final array section "[:]" it is "f" which tells us
  7009. // whether we are dealing with a member of a declared struct.
  7010. const MemberExpr *EncounteredME = nullptr;
  7011. // Track for the total number of dimension. Start from one for the dummy
  7012. // dimension.
  7013. uint64_t DimSize = 1;
  7014. bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
  7015. bool IsPrevMemberReference = false;
  7016. for (; I != CE; ++I) {
  7017. // If the current component is member of a struct (parent struct) mark it.
  7018. if (!EncounteredME) {
  7019. EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
  7020. // If we encounter a PTR_AND_OBJ entry from now on it should be marked
  7021. // as MEMBER_OF the parent struct.
  7022. if (EncounteredME) {
  7023. ShouldBeMemberOf = true;
  7024. // Do not emit as complex pointer if this is actually not array-like
  7025. // expression.
  7026. if (FirstPointerInComplexData) {
  7027. QualType Ty = std::prev(I)
  7028. ->getAssociatedDeclaration()
  7029. ->getType()
  7030. .getNonReferenceType();
  7031. BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
  7032. FirstPointerInComplexData = false;
  7033. }
  7034. }
  7035. }
  7036. auto Next = std::next(I);
  7037. // We need to generate the addresses and sizes if this is the last
  7038. // component, if the component is a pointer or if it is an array section
  7039. // whose length can't be proved to be one. If this is a pointer, it
  7040. // becomes the base address for the following components.
  7041. // A final array section, is one whose length can't be proved to be one.
  7042. // If the map item is non-contiguous then we don't treat any array section
  7043. // as final array section.
  7044. bool IsFinalArraySection =
  7045. !IsNonContiguous &&
  7046. isFinalArraySectionExpression(I->getAssociatedExpression());
  7047. // If we have a declaration for the mapping use that, otherwise use
  7048. // the base declaration of the map clause.
  7049. const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
  7050. ? I->getAssociatedDeclaration()
  7051. : BaseDecl;
  7052. MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
  7053. : MapExpr;
  7054. // Get information on whether the element is a pointer. Have to do a
  7055. // special treatment for array sections given that they are built-in
  7056. // types.
  7057. const auto *OASE =
  7058. dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
  7059. const auto *OAShE =
  7060. dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
  7061. const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
  7062. const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
  7063. bool IsPointer =
  7064. OAShE ||
  7065. (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
  7066. .getCanonicalType()
  7067. ->isAnyPointerType()) ||
  7068. I->getAssociatedExpression()->getType()->isAnyPointerType();
  7069. bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
  7070. MapDecl &&
  7071. MapDecl->getType()->isLValueReferenceType();
  7072. bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
  7073. if (OASE)
  7074. ++DimSize;
  7075. if (Next == CE || IsMemberReference || IsNonDerefPointer ||
  7076. IsFinalArraySection) {
  7077. // If this is not the last component, we expect the pointer to be
  7078. // associated with an array expression or member expression.
  7079. assert((Next == CE ||
  7080. isa<MemberExpr>(Next->getAssociatedExpression()) ||
  7081. isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
  7082. isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
  7083. isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
  7084. isa<UnaryOperator>(Next->getAssociatedExpression()) ||
  7085. isa<BinaryOperator>(Next->getAssociatedExpression())) &&
  7086. "Unexpected expression");
  7087. Address LB = Address::invalid();
  7088. Address LowestElem = Address::invalid();
  7089. auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
  7090. const MemberExpr *E) {
  7091. const Expr *BaseExpr = E->getBase();
  7092. // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
  7093. // scalar.
  7094. LValue BaseLV;
  7095. if (E->isArrow()) {
  7096. LValueBaseInfo BaseInfo;
  7097. TBAAAccessInfo TBAAInfo;
  7098. Address Addr =
  7099. CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
  7100. QualType PtrTy = BaseExpr->getType()->getPointeeType();
  7101. BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
  7102. } else {
  7103. BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
  7104. }
  7105. return BaseLV;
  7106. };
  7107. if (OAShE) {
  7108. LowestElem = LB =
  7109. Address(CGF.EmitScalarExpr(OAShE->getBase()),
  7110. CGF.ConvertTypeForMem(
  7111. OAShE->getBase()->getType()->getPointeeType()),
  7112. CGF.getContext().getTypeAlignInChars(
  7113. OAShE->getBase()->getType()));
  7114. } else if (IsMemberReference) {
  7115. const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
  7116. LValue BaseLVal = EmitMemberExprBase(CGF, ME);
  7117. LowestElem = CGF.EmitLValueForFieldInitialization(
  7118. BaseLVal, cast<FieldDecl>(MapDecl))
  7119. .getAddress(CGF);
  7120. LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
  7121. .getAddress(CGF);
  7122. } else {
  7123. LowestElem = LB =
  7124. CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
  7125. .getAddress(CGF);
  7126. }
  7127. // If this component is a pointer inside the base struct then we don't
  7128. // need to create any entry for it - it will be combined with the object
  7129. // it is pointing to into a single PTR_AND_OBJ entry.
  7130. bool IsMemberPointerOrAddr =
  7131. EncounteredME &&
  7132. (((IsPointer || ForDeviceAddr) &&
  7133. I->getAssociatedExpression() == EncounteredME) ||
  7134. (IsPrevMemberReference && !IsPointer) ||
  7135. (IsMemberReference && Next != CE &&
  7136. !Next->getAssociatedExpression()->getType()->isPointerType()));
  7137. if (!OverlappedElements.empty() && Next == CE) {
  7138. // Handle base element with the info for overlapped elements.
  7139. assert(!PartialStruct.Base.isValid() && "The base element is set.");
  7140. assert(!IsPointer &&
  7141. "Unexpected base element with the pointer type.");
  7142. // Mark the whole struct as the struct that requires allocation on the
  7143. // device.
  7144. PartialStruct.LowestElem = {0, LowestElem};
  7145. CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
  7146. I->getAssociatedExpression()->getType());
  7147. Address HB = CGF.Builder.CreateConstGEP(
  7148. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  7149. LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
  7150. TypeSize.getQuantity() - 1);
  7151. PartialStruct.HighestElem = {
  7152. std::numeric_limits<decltype(
  7153. PartialStruct.HighestElem.first)>::max(),
  7154. HB};
  7155. PartialStruct.Base = BP;
  7156. PartialStruct.LB = LB;
  7157. assert(
  7158. PartialStruct.PreliminaryMapData.BasePointers.empty() &&
  7159. "Overlapped elements must be used only once for the variable.");
  7160. std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
  7161. // Emit data for non-overlapped data.
  7162. OpenMPOffloadMappingFlags Flags =
  7163. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
  7164. getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
  7165. /*AddPtrFlag=*/false,
  7166. /*AddIsTargetParamFlag=*/false, IsNonContiguous);
  7167. llvm::Value *Size = nullptr;
  7168. // Do bitcopy of all non-overlapped structure elements.
  7169. for (OMPClauseMappableExprCommon::MappableExprComponentListRef
  7170. Component : OverlappedElements) {
  7171. Address ComponentLB = Address::invalid();
  7172. for (const OMPClauseMappableExprCommon::MappableComponent &MC :
  7173. Component) {
  7174. if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
  7175. const auto *FD = dyn_cast<FieldDecl>(VD);
  7176. if (FD && FD->getType()->isLValueReferenceType()) {
  7177. const auto *ME =
  7178. cast<MemberExpr>(MC.getAssociatedExpression());
  7179. LValue BaseLVal = EmitMemberExprBase(CGF, ME);
  7180. ComponentLB =
  7181. CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
  7182. .getAddress(CGF);
  7183. } else {
  7184. ComponentLB =
  7185. CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
  7186. .getAddress(CGF);
  7187. }
  7188. Size = CGF.Builder.CreatePtrDiff(
  7189. CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
  7190. CGF.EmitCastToVoidPtr(LB.getPointer()));
  7191. break;
  7192. }
  7193. }
  7194. assert(Size && "Failed to determine structure size");
  7195. CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
  7196. CombinedInfo.BasePointers.push_back(BP.getPointer());
  7197. CombinedInfo.Pointers.push_back(LB.getPointer());
  7198. CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
  7199. Size, CGF.Int64Ty, /*isSigned=*/true));
  7200. CombinedInfo.Types.push_back(Flags);
  7201. CombinedInfo.Mappers.push_back(nullptr);
  7202. CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
  7203. : 1);
  7204. LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
  7205. }
  7206. CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
  7207. CombinedInfo.BasePointers.push_back(BP.getPointer());
  7208. CombinedInfo.Pointers.push_back(LB.getPointer());
  7209. Size = CGF.Builder.CreatePtrDiff(
  7210. CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
  7211. CGF.EmitCastToVoidPtr(LB.getPointer()));
  7212. CombinedInfo.Sizes.push_back(
  7213. CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
  7214. CombinedInfo.Types.push_back(Flags);
  7215. CombinedInfo.Mappers.push_back(nullptr);
  7216. CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
  7217. : 1);
  7218. break;
  7219. }
  7220. llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
  7221. if (!IsMemberPointerOrAddr ||
  7222. (Next == CE && MapType != OMPC_MAP_unknown)) {
  7223. CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
  7224. CombinedInfo.BasePointers.push_back(BP.getPointer());
  7225. CombinedInfo.Pointers.push_back(LB.getPointer());
  7226. CombinedInfo.Sizes.push_back(
  7227. CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
  7228. CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
  7229. : 1);
  7230. // If Mapper is valid, the last component inherits the mapper.
  7231. bool HasMapper = Mapper && Next == CE;
  7232. CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
  7233. // We need to add a pointer flag for each map that comes from the
  7234. // same expression except for the first one. We also need to signal
  7235. // this map is the first one that relates with the current capture
  7236. // (there is a set of entries for each capture).
  7237. OpenMPOffloadMappingFlags Flags = getMapTypeBits(
  7238. MapType, MapModifiers, MotionModifiers, IsImplicit,
  7239. !IsExpressionFirstInfo || RequiresReference ||
  7240. FirstPointerInComplexData || IsMemberReference,
  7241. IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
  7242. if (!IsExpressionFirstInfo || IsMemberReference) {
  7243. // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
  7244. // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
  7245. if (IsPointer || (IsMemberReference && Next != CE))
  7246. Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
  7247. OpenMPOffloadMappingFlags::OMP_MAP_FROM |
  7248. OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
  7249. OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
  7250. OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
  7251. if (ShouldBeMemberOf) {
  7252. // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
  7253. // should be later updated with the correct value of MEMBER_OF.
  7254. Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
  7255. // From now on, all subsequent PTR_AND_OBJ entries should not be
  7256. // marked as MEMBER_OF.
  7257. ShouldBeMemberOf = false;
  7258. }
  7259. }
  7260. CombinedInfo.Types.push_back(Flags);
  7261. }
  7262. // If we have encountered a member expression so far, keep track of the
  7263. // mapped member. If the parent is "*this", then the value declaration
  7264. // is nullptr.
  7265. if (EncounteredME) {
  7266. const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
  7267. unsigned FieldIndex = FD->getFieldIndex();
  7268. // Update info about the lowest and highest elements for this struct
  7269. if (!PartialStruct.Base.isValid()) {
  7270. PartialStruct.LowestElem = {FieldIndex, LowestElem};
  7271. if (IsFinalArraySection) {
  7272. Address HB =
  7273. CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
  7274. .getAddress(CGF);
  7275. PartialStruct.HighestElem = {FieldIndex, HB};
  7276. } else {
  7277. PartialStruct.HighestElem = {FieldIndex, LowestElem};
  7278. }
  7279. PartialStruct.Base = BP;
  7280. PartialStruct.LB = BP;
  7281. } else if (FieldIndex < PartialStruct.LowestElem.first) {
  7282. PartialStruct.LowestElem = {FieldIndex, LowestElem};
  7283. } else if (FieldIndex > PartialStruct.HighestElem.first) {
  7284. PartialStruct.HighestElem = {FieldIndex, LowestElem};
  7285. }
  7286. }
  7287. // Need to emit combined struct for array sections.
  7288. if (IsFinalArraySection || IsNonContiguous)
  7289. PartialStruct.IsArraySection = true;
  7290. // If we have a final array section, we are done with this expression.
  7291. if (IsFinalArraySection)
  7292. break;
  7293. // The pointer becomes the base for the next element.
  7294. if (Next != CE)
  7295. BP = IsMemberReference ? LowestElem : LB;
  7296. IsExpressionFirstInfo = false;
  7297. IsCaptureFirstInfo = false;
  7298. FirstPointerInComplexData = false;
  7299. IsPrevMemberReference = IsMemberReference;
  7300. } else if (FirstPointerInComplexData) {
  7301. QualType Ty = Components.rbegin()
  7302. ->getAssociatedDeclaration()
  7303. ->getType()
  7304. .getNonReferenceType();
  7305. BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
  7306. FirstPointerInComplexData = false;
  7307. }
  7308. }
  7309. // If ran into the whole component - allocate the space for the whole
  7310. // record.
  7311. if (!EncounteredME)
  7312. PartialStruct.HasCompleteRecord = true;
  7313. if (!IsNonContiguous)
  7314. return;
  7315. const ASTContext &Context = CGF.getContext();
  7316. // For supporting stride in array section, we need to initialize the first
  7317. // dimension size as 1, first offset as 0, and first count as 1
  7318. MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
  7319. MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
  7320. MapValuesArrayTy CurStrides;
  7321. MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
  7322. uint64_t ElementTypeSize;
  7323. // Collect Size information for each dimension and get the element size as
  7324. // the first Stride. For example, for `int arr[10][10]`, the DimSizes
  7325. // should be [10, 10] and the first stride is 4 btyes.
  7326. for (const OMPClauseMappableExprCommon::MappableComponent &Component :
  7327. Components) {
  7328. const Expr *AssocExpr = Component.getAssociatedExpression();
  7329. const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
  7330. if (!OASE)
  7331. continue;
  7332. QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
  7333. auto *CAT = Context.getAsConstantArrayType(Ty);
  7334. auto *VAT = Context.getAsVariableArrayType(Ty);
  7335. // We need all the dimension size except for the last dimension.
  7336. assert((VAT || CAT || &Component == &*Components.begin()) &&
  7337. "Should be either ConstantArray or VariableArray if not the "
  7338. "first Component");
  7339. // Get element size if CurStrides is empty.
  7340. if (CurStrides.empty()) {
  7341. const Type *ElementType = nullptr;
  7342. if (CAT)
  7343. ElementType = CAT->getElementType().getTypePtr();
  7344. else if (VAT)
  7345. ElementType = VAT->getElementType().getTypePtr();
  7346. else
  7347. assert(&Component == &*Components.begin() &&
  7348. "Only expect pointer (non CAT or VAT) when this is the "
  7349. "first Component");
  7350. // If ElementType is null, then it means the base is a pointer
  7351. // (neither CAT nor VAT) and we'll attempt to get ElementType again
  7352. // for next iteration.
  7353. if (ElementType) {
  7354. // For the case that having pointer as base, we need to remove one
  7355. // level of indirection.
  7356. if (&Component != &*Components.begin())
  7357. ElementType = ElementType->getPointeeOrArrayElementType();
  7358. ElementTypeSize =
  7359. Context.getTypeSizeInChars(ElementType).getQuantity();
  7360. CurStrides.push_back(
  7361. llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
  7362. }
  7363. }
  7364. // Get dimension value except for the last dimension since we don't need
  7365. // it.
  7366. if (DimSizes.size() < Components.size() - 1) {
  7367. if (CAT)
  7368. DimSizes.push_back(llvm::ConstantInt::get(
  7369. CGF.Int64Ty, CAT->getSize().getZExtValue()));
  7370. else if (VAT)
  7371. DimSizes.push_back(CGF.Builder.CreateIntCast(
  7372. CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
  7373. /*IsSigned=*/false));
  7374. }
  7375. }
  7376. // Skip the dummy dimension since we have already have its information.
  7377. auto *DI = DimSizes.begin() + 1;
  7378. // Product of dimension.
  7379. llvm::Value *DimProd =
  7380. llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
  7381. // Collect info for non-contiguous. Notice that offset, count, and stride
  7382. // are only meaningful for array-section, so we insert a null for anything
  7383. // other than array-section.
  7384. // Also, the size of offset, count, and stride are not the same as
  7385. // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
  7386. // count, and stride are the same as the number of non-contiguous
  7387. // declaration in target update to/from clause.
  7388. for (const OMPClauseMappableExprCommon::MappableComponent &Component :
  7389. Components) {
  7390. const Expr *AssocExpr = Component.getAssociatedExpression();
  7391. if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
  7392. llvm::Value *Offset = CGF.Builder.CreateIntCast(
  7393. CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
  7394. /*isSigned=*/false);
  7395. CurOffsets.push_back(Offset);
  7396. CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
  7397. CurStrides.push_back(CurStrides.back());
  7398. continue;
  7399. }
  7400. const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
  7401. if (!OASE)
  7402. continue;
  7403. // Offset
  7404. const Expr *OffsetExpr = OASE->getLowerBound();
  7405. llvm::Value *Offset = nullptr;
  7406. if (!OffsetExpr) {
  7407. // If offset is absent, then we just set it to zero.
  7408. Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
  7409. } else {
  7410. Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
  7411. CGF.Int64Ty,
  7412. /*isSigned=*/false);
  7413. }
  7414. CurOffsets.push_back(Offset);
  7415. // Count
  7416. const Expr *CountExpr = OASE->getLength();
  7417. llvm::Value *Count = nullptr;
  7418. if (!CountExpr) {
  7419. // In Clang, once a high dimension is an array section, we construct all
  7420. // the lower dimension as array section, however, for case like
  7421. // arr[0:2][2], Clang construct the inner dimension as an array section
  7422. // but it actually is not in an array section form according to spec.
  7423. if (!OASE->getColonLocFirst().isValid() &&
  7424. !OASE->getColonLocSecond().isValid()) {
  7425. Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
  7426. } else {
  7427. // OpenMP 5.0, 2.1.5 Array Sections, Description.
  7428. // When the length is absent it defaults to ⌈(size −
  7429. // lower-bound)/stride⌉, where size is the size of the array
  7430. // dimension.
  7431. const Expr *StrideExpr = OASE->getStride();
  7432. llvm::Value *Stride =
  7433. StrideExpr
  7434. ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
  7435. CGF.Int64Ty, /*isSigned=*/false)
  7436. : nullptr;
  7437. if (Stride)
  7438. Count = CGF.Builder.CreateUDiv(
  7439. CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
  7440. else
  7441. Count = CGF.Builder.CreateNUWSub(*DI, Offset);
  7442. }
  7443. } else {
  7444. Count = CGF.EmitScalarExpr(CountExpr);
  7445. }
  7446. Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
  7447. CurCounts.push_back(Count);
  7448. // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
  7449. // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
  7450. // Offset Count Stride
  7451. // D0 0 1 4 (int) <- dummy dimension
  7452. // D1 0 2 8 (2 * (1) * 4)
  7453. // D2 1 2 20 (1 * (1 * 5) * 4)
  7454. // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
  7455. const Expr *StrideExpr = OASE->getStride();
  7456. llvm::Value *Stride =
  7457. StrideExpr
  7458. ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
  7459. CGF.Int64Ty, /*isSigned=*/false)
  7460. : nullptr;
  7461. DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
  7462. if (Stride)
  7463. CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
  7464. else
  7465. CurStrides.push_back(DimProd);
  7466. if (DI != DimSizes.end())
  7467. ++DI;
  7468. }
  7469. CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
  7470. CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
  7471. CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
  7472. }
  7473. /// Return the adjusted map modifiers if the declaration a capture refers to
  7474. /// appears in a first-private clause. This is expected to be used only with
  7475. /// directives that start with 'target'.
  7476. OpenMPOffloadMappingFlags
  7477. getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
  7478. assert(Cap.capturesVariable() && "Expected capture by reference only!");
  7479. // A first private variable captured by reference will use only the
  7480. // 'private ptr' and 'map to' flag. Return the right flags if the captured
  7481. // declaration is known as first-private in this handler.
  7482. if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
  7483. if (Cap.getCapturedVar()->getType()->isAnyPointerType())
  7484. return OpenMPOffloadMappingFlags::OMP_MAP_TO |
  7485. OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
  7486. return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
  7487. OpenMPOffloadMappingFlags::OMP_MAP_TO;
  7488. }
  7489. auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
  7490. if (I != LambdasMap.end())
  7491. // for map(to: lambda): using user specified map type.
  7492. return getMapTypeBits(
  7493. I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
  7494. /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
  7495. /*AddPtrFlag=*/false,
  7496. /*AddIsTargetParamFlag=*/false,
  7497. /*isNonContiguous=*/false);
  7498. return OpenMPOffloadMappingFlags::OMP_MAP_TO |
  7499. OpenMPOffloadMappingFlags::OMP_MAP_FROM;
  7500. }
  7501. static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
  7502. // Rotate by getFlagMemberOffset() bits.
  7503. return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
  7504. << getFlagMemberOffset());
  7505. }
  7506. static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
  7507. OpenMPOffloadMappingFlags MemberOfFlag) {
  7508. // If the entry is PTR_AND_OBJ but has not been marked with the special
  7509. // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
  7510. // marked as MEMBER_OF.
  7511. if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  7512. Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
  7513. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  7514. (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
  7515. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
  7516. return;
  7517. // Reset the placeholder value to prepare the flag for the assignment of the
  7518. // proper MEMBER_OF value.
  7519. Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
  7520. Flags |= MemberOfFlag;
  7521. }
  7522. void getPlainLayout(const CXXRecordDecl *RD,
  7523. llvm::SmallVectorImpl<const FieldDecl *> &Layout,
  7524. bool AsBase) const {
  7525. const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
  7526. llvm::StructType *St =
  7527. AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
  7528. unsigned NumElements = St->getNumElements();
  7529. llvm::SmallVector<
  7530. llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
  7531. RecordLayout(NumElements);
  7532. // Fill bases.
  7533. for (const auto &I : RD->bases()) {
  7534. if (I.isVirtual())
  7535. continue;
  7536. const auto *Base = I.getType()->getAsCXXRecordDecl();
  7537. // Ignore empty bases.
  7538. if (Base->isEmpty() || CGF.getContext()
  7539. .getASTRecordLayout(Base)
  7540. .getNonVirtualSize()
  7541. .isZero())
  7542. continue;
  7543. unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
  7544. RecordLayout[FieldIndex] = Base;
  7545. }
  7546. // Fill in virtual bases.
  7547. for (const auto &I : RD->vbases()) {
  7548. const auto *Base = I.getType()->getAsCXXRecordDecl();
  7549. // Ignore empty bases.
  7550. if (Base->isEmpty())
  7551. continue;
  7552. unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
  7553. if (RecordLayout[FieldIndex])
  7554. continue;
  7555. RecordLayout[FieldIndex] = Base;
  7556. }
  7557. // Fill in all the fields.
  7558. assert(!RD->isUnion() && "Unexpected union.");
  7559. for (const auto *Field : RD->fields()) {
  7560. // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
  7561. // will fill in later.)
  7562. if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
  7563. unsigned FieldIndex = RL.getLLVMFieldNo(Field);
  7564. RecordLayout[FieldIndex] = Field;
  7565. }
  7566. }
  7567. for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
  7568. &Data : RecordLayout) {
  7569. if (Data.isNull())
  7570. continue;
  7571. if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
  7572. getPlainLayout(Base, Layout, /*AsBase=*/true);
  7573. else
  7574. Layout.push_back(Data.get<const FieldDecl *>());
  7575. }
  7576. }
  7577. /// Generate all the base pointers, section pointers, sizes, map types, and
  7578. /// mappers for the extracted mappable expressions (all included in \a
  7579. /// CombinedInfo). Also, for each item that relates with a device pointer, a
  7580. /// pair of the relevant declaration and index where it occurs is appended to
  7581. /// the device pointers info array.
  7582. void generateAllInfoForClauses(
  7583. ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
  7584. const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
  7585. llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
  7586. // We have to process the component lists that relate with the same
  7587. // declaration in a single chunk so that we can generate the map flags
  7588. // correctly. Therefore, we organize all lists in a map.
  7589. enum MapKind { Present, Allocs, Other, Total };
  7590. llvm::MapVector<CanonicalDeclPtr<const Decl>,
  7591. SmallVector<SmallVector<MapInfo, 8>, 4>>
  7592. Info;
  7593. // Helper function to fill the information map for the different supported
  7594. // clauses.
  7595. auto &&InfoGen =
  7596. [&Info, &SkipVarSet](
  7597. const ValueDecl *D, MapKind Kind,
  7598. OMPClauseMappableExprCommon::MappableExprComponentListRef L,
  7599. OpenMPMapClauseKind MapType,
  7600. ArrayRef<OpenMPMapModifierKind> MapModifiers,
  7601. ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
  7602. bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
  7603. const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
  7604. if (SkipVarSet.contains(D))
  7605. return;
  7606. auto It = Info.find(D);
  7607. if (It == Info.end())
  7608. It = Info
  7609. .insert(std::make_pair(
  7610. D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
  7611. .first;
  7612. It->second[Kind].emplace_back(
  7613. L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
  7614. IsImplicit, Mapper, VarRef, ForDeviceAddr);
  7615. };
  7616. for (const auto *Cl : Clauses) {
  7617. const auto *C = dyn_cast<OMPMapClause>(Cl);
  7618. if (!C)
  7619. continue;
  7620. MapKind Kind = Other;
  7621. if (llvm::is_contained(C->getMapTypeModifiers(),
  7622. OMPC_MAP_MODIFIER_present))
  7623. Kind = Present;
  7624. else if (C->getMapType() == OMPC_MAP_alloc)
  7625. Kind = Allocs;
  7626. const auto *EI = C->getVarRefs().begin();
  7627. for (const auto L : C->component_lists()) {
  7628. const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
  7629. InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
  7630. C->getMapTypeModifiers(), std::nullopt,
  7631. /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
  7632. E);
  7633. ++EI;
  7634. }
  7635. }
  7636. for (const auto *Cl : Clauses) {
  7637. const auto *C = dyn_cast<OMPToClause>(Cl);
  7638. if (!C)
  7639. continue;
  7640. MapKind Kind = Other;
  7641. if (llvm::is_contained(C->getMotionModifiers(),
  7642. OMPC_MOTION_MODIFIER_present))
  7643. Kind = Present;
  7644. const auto *EI = C->getVarRefs().begin();
  7645. for (const auto L : C->component_lists()) {
  7646. InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
  7647. C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
  7648. C->isImplicit(), std::get<2>(L), *EI);
  7649. ++EI;
  7650. }
  7651. }
  7652. for (const auto *Cl : Clauses) {
  7653. const auto *C = dyn_cast<OMPFromClause>(Cl);
  7654. if (!C)
  7655. continue;
  7656. MapKind Kind = Other;
  7657. if (llvm::is_contained(C->getMotionModifiers(),
  7658. OMPC_MOTION_MODIFIER_present))
  7659. Kind = Present;
  7660. const auto *EI = C->getVarRefs().begin();
  7661. for (const auto L : C->component_lists()) {
  7662. InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
  7663. std::nullopt, C->getMotionModifiers(),
  7664. /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
  7665. *EI);
  7666. ++EI;
  7667. }
  7668. }
  7669. // Look at the use_device_ptr and use_device_addr clauses information and
  7670. // mark the existing map entries as such. If there is no map information for
  7671. // an entry in the use_device_ptr and use_device_addr list, we create one
  7672. // with map type 'alloc' and zero size section. It is the user fault if that
  7673. // was not mapped before. If there is no map information and the pointer is
  7674. // a struct member, then we defer the emission of that entry until the whole
  7675. // struct has been processed.
  7676. llvm::MapVector<CanonicalDeclPtr<const Decl>,
  7677. SmallVector<DeferredDevicePtrEntryTy, 4>>
  7678. DeferredInfo;
  7679. MapCombinedInfoTy UseDeviceDataCombinedInfo;
  7680. auto &&UseDeviceDataCombinedInfoGen =
  7681. [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
  7682. CodeGenFunction &CGF) {
  7683. UseDeviceDataCombinedInfo.Exprs.push_back(VD);
  7684. UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD);
  7685. UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
  7686. UseDeviceDataCombinedInfo.Sizes.push_back(
  7687. llvm::Constant::getNullValue(CGF.Int64Ty));
  7688. UseDeviceDataCombinedInfo.Types.push_back(
  7689. OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
  7690. UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
  7691. };
  7692. auto &&MapInfoGen =
  7693. [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
  7694. &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
  7695. OMPClauseMappableExprCommon::MappableExprComponentListRef
  7696. Components,
  7697. bool IsImplicit, bool IsDevAddr) {
  7698. // We didn't find any match in our map information - generate a zero
  7699. // size array section - if the pointer is a struct member we defer
  7700. // this action until the whole struct has been processed.
  7701. if (isa<MemberExpr>(IE)) {
  7702. // Insert the pointer into Info to be processed by
  7703. // generateInfoForComponentList. Because it is a member pointer
  7704. // without a pointee, no entry will be generated for it, therefore
  7705. // we need to generate one after the whole struct has been
  7706. // processed. Nonetheless, generateInfoForComponentList must be
  7707. // called to take the pointer into account for the calculation of
  7708. // the range of the partial struct.
  7709. InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
  7710. std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
  7711. nullptr, nullptr, IsDevAddr);
  7712. DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
  7713. } else {
  7714. llvm::Value *Ptr;
  7715. if (IsDevAddr) {
  7716. if (IE->isGLValue())
  7717. Ptr = CGF.EmitLValue(IE).getPointer(CGF);
  7718. else
  7719. Ptr = CGF.EmitScalarExpr(IE);
  7720. } else {
  7721. Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
  7722. }
  7723. UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
  7724. }
  7725. };
  7726. auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
  7727. const Expr *IE, bool IsDevAddr) -> bool {
  7728. // We potentially have map information for this declaration already.
  7729. // Look for the first set of components that refer to it. If found,
  7730. // return true.
  7731. // If the first component is a member expression, we have to look into
  7732. // 'this', which maps to null in the map of map information. Otherwise
  7733. // look directly for the information.
  7734. auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
  7735. if (It != Info.end()) {
  7736. bool Found = false;
  7737. for (auto &Data : It->second) {
  7738. auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
  7739. return MI.Components.back().getAssociatedDeclaration() == VD;
  7740. });
  7741. // If we found a map entry, signal that the pointer has to be
  7742. // returned and move on to the next declaration. Exclude cases where
  7743. // the base pointer is mapped as array subscript, array section or
  7744. // array shaping. The base address is passed as a pointer to base in
  7745. // this case and cannot be used as a base for use_device_ptr list
  7746. // item.
  7747. if (CI != Data.end()) {
  7748. if (IsDevAddr) {
  7749. CI->ReturnDevicePointer = true;
  7750. Found = true;
  7751. break;
  7752. } else {
  7753. auto PrevCI = std::next(CI->Components.rbegin());
  7754. const auto *VarD = dyn_cast<VarDecl>(VD);
  7755. if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
  7756. isa<MemberExpr>(IE) ||
  7757. !VD->getType().getNonReferenceType()->isPointerType() ||
  7758. PrevCI == CI->Components.rend() ||
  7759. isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
  7760. VarD->hasLocalStorage()) {
  7761. CI->ReturnDevicePointer = true;
  7762. Found = true;
  7763. break;
  7764. }
  7765. }
  7766. }
  7767. }
  7768. return Found;
  7769. }
  7770. return false;
  7771. };
  7772. // Look at the use_device_ptr clause information and mark the existing map
  7773. // entries as such. If there is no map information for an entry in the
  7774. // use_device_ptr list, we create one with map type 'alloc' and zero size
  7775. // section. It is the user fault if that was not mapped before. If there is
  7776. // no map information and the pointer is a struct member, then we defer the
  7777. // emission of that entry until the whole struct has been processed.
  7778. for (const auto *Cl : Clauses) {
  7779. const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
  7780. if (!C)
  7781. continue;
  7782. for (const auto L : C->component_lists()) {
  7783. OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
  7784. std::get<1>(L);
  7785. assert(!Components.empty() &&
  7786. "Not expecting empty list of components!");
  7787. const ValueDecl *VD = Components.back().getAssociatedDeclaration();
  7788. VD = cast<ValueDecl>(VD->getCanonicalDecl());
  7789. const Expr *IE = Components.back().getAssociatedExpression();
  7790. if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
  7791. continue;
  7792. MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
  7793. /*IsDevAddr=*/false);
  7794. }
  7795. }
  7796. llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
  7797. for (const auto *Cl : Clauses) {
  7798. const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
  7799. if (!C)
  7800. continue;
  7801. for (const auto L : C->component_lists()) {
  7802. OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
  7803. std::get<1>(L);
  7804. assert(!std::get<1>(L).empty() &&
  7805. "Not expecting empty list of components!");
  7806. const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
  7807. if (!Processed.insert(VD).second)
  7808. continue;
  7809. VD = cast<ValueDecl>(VD->getCanonicalDecl());
  7810. const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
  7811. if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
  7812. continue;
  7813. MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
  7814. /*IsDevAddr=*/true);
  7815. }
  7816. }
  7817. for (const auto &Data : Info) {
  7818. StructRangeInfoTy PartialStruct;
  7819. // Temporary generated information.
  7820. MapCombinedInfoTy CurInfo;
  7821. const Decl *D = Data.first;
  7822. const ValueDecl *VD = cast_or_null<ValueDecl>(D);
  7823. for (const auto &M : Data.second) {
  7824. for (const MapInfo &L : M) {
  7825. assert(!L.Components.empty() &&
  7826. "Not expecting declaration with no component lists.");
  7827. // Remember the current base pointer index.
  7828. unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
  7829. CurInfo.NonContigInfo.IsNonContiguous =
  7830. L.Components.back().isNonContiguous();
  7831. generateInfoForComponentList(
  7832. L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
  7833. CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
  7834. L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
  7835. // If this entry relates with a device pointer, set the relevant
  7836. // declaration and add the 'return pointer' flag.
  7837. if (L.ReturnDevicePointer) {
  7838. assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
  7839. "Unexpected number of mapped base pointers.");
  7840. const ValueDecl *RelevantVD =
  7841. L.Components.back().getAssociatedDeclaration();
  7842. assert(RelevantVD &&
  7843. "No relevant declaration related with device pointer??");
  7844. CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
  7845. RelevantVD);
  7846. CurInfo.Types[CurrentBasePointersIdx] |=
  7847. OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
  7848. }
  7849. }
  7850. }
  7851. // Append any pending zero-length pointers which are struct members and
  7852. // used with use_device_ptr or use_device_addr.
  7853. auto CI = DeferredInfo.find(Data.first);
  7854. if (CI != DeferredInfo.end()) {
  7855. for (const DeferredDevicePtrEntryTy &L : CI->second) {
  7856. llvm::Value *BasePtr;
  7857. llvm::Value *Ptr;
  7858. if (L.ForDeviceAddr) {
  7859. if (L.IE->isGLValue())
  7860. Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
  7861. else
  7862. Ptr = this->CGF.EmitScalarExpr(L.IE);
  7863. BasePtr = Ptr;
  7864. // Entry is RETURN_PARAM. Also, set the placeholder value
  7865. // MEMBER_OF=FFFF so that the entry is later updated with the
  7866. // correct value of MEMBER_OF.
  7867. CurInfo.Types.push_back(
  7868. OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
  7869. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
  7870. } else {
  7871. BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
  7872. Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
  7873. L.IE->getExprLoc());
  7874. // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
  7875. // placeholder value MEMBER_OF=FFFF so that the entry is later
  7876. // updated with the correct value of MEMBER_OF.
  7877. CurInfo.Types.push_back(
  7878. OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
  7879. OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
  7880. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
  7881. }
  7882. CurInfo.Exprs.push_back(L.VD);
  7883. CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
  7884. CurInfo.Pointers.push_back(Ptr);
  7885. CurInfo.Sizes.push_back(
  7886. llvm::Constant::getNullValue(this->CGF.Int64Ty));
  7887. CurInfo.Mappers.push_back(nullptr);
  7888. }
  7889. }
  7890. // If there is an entry in PartialStruct it means we have a struct with
  7891. // individual members mapped. Emit an extra combined entry.
  7892. if (PartialStruct.Base.isValid()) {
  7893. CurInfo.NonContigInfo.Dims.push_back(0);
  7894. emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
  7895. }
  7896. // We need to append the results of this capture to what we already
  7897. // have.
  7898. CombinedInfo.append(CurInfo);
  7899. }
  7900. // Append data for use_device_ptr clauses.
  7901. CombinedInfo.append(UseDeviceDataCombinedInfo);
  7902. }
  7903. public:
  7904. MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
  7905. : CurDir(&Dir), CGF(CGF) {
  7906. // Extract firstprivate clause information.
  7907. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
  7908. for (const auto *D : C->varlists())
  7909. FirstPrivateDecls.try_emplace(
  7910. cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
  7911. // Extract implicit firstprivates from uses_allocators clauses.
  7912. for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
  7913. for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
  7914. OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
  7915. if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
  7916. FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
  7917. /*Implicit=*/true);
  7918. else if (const auto *VD = dyn_cast<VarDecl>(
  7919. cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
  7920. ->getDecl()))
  7921. FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
  7922. }
  7923. }
  7924. // Extract device pointer clause information.
  7925. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
  7926. for (auto L : C->component_lists())
  7927. DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
  7928. // Extract device addr clause information.
  7929. for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
  7930. for (auto L : C->component_lists())
  7931. HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
  7932. // Extract map information.
  7933. for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
  7934. if (C->getMapType() != OMPC_MAP_to)
  7935. continue;
  7936. for (auto L : C->component_lists()) {
  7937. const ValueDecl *VD = std::get<0>(L);
  7938. const auto *RD = VD ? VD->getType()
  7939. .getCanonicalType()
  7940. .getNonReferenceType()
  7941. ->getAsCXXRecordDecl()
  7942. : nullptr;
  7943. if (RD && RD->isLambda())
  7944. LambdasMap.try_emplace(std::get<0>(L), C);
  7945. }
  7946. }
  7947. }
  7948. /// Constructor for the declare mapper directive.
  7949. MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
  7950. : CurDir(&Dir), CGF(CGF) {}
  7951. /// Generate code for the combined entry if we have a partially mapped struct
  7952. /// and take care of the mapping flags of the arguments corresponding to
  7953. /// individual struct members.
  7954. void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
  7955. MapFlagsArrayTy &CurTypes,
  7956. const StructRangeInfoTy &PartialStruct,
  7957. const ValueDecl *VD = nullptr,
  7958. bool NotTargetParams = true) const {
  7959. if (CurTypes.size() == 1 &&
  7960. ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
  7961. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
  7962. !PartialStruct.IsArraySection)
  7963. return;
  7964. Address LBAddr = PartialStruct.LowestElem.second;
  7965. Address HBAddr = PartialStruct.HighestElem.second;
  7966. if (PartialStruct.HasCompleteRecord) {
  7967. LBAddr = PartialStruct.LB;
  7968. HBAddr = PartialStruct.LB;
  7969. }
  7970. CombinedInfo.Exprs.push_back(VD);
  7971. // Base is the base of the struct
  7972. CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
  7973. // Pointer is the address of the lowest element
  7974. llvm::Value *LB = LBAddr.getPointer();
  7975. const CXXMethodDecl *MD =
  7976. CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
  7977. const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
  7978. bool HasBaseClass = RD ? RD->getNumBases() > 0 : false;
  7979. // There should not be a mapper for a combined entry.
  7980. if (HasBaseClass) {
  7981. // OpenMP 5.2 148:21:
  7982. // If the target construct is within a class non-static member function,
  7983. // and a variable is an accessible data member of the object for which the
  7984. // non-static data member function is invoked, the variable is treated as
  7985. // if the this[:1] expression had appeared in a map clause with a map-type
  7986. // of tofrom.
  7987. // Emit this[:1]
  7988. CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
  7989. QualType Ty = MD->getThisType()->getPointeeType();
  7990. llvm::Value *Size =
  7991. CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
  7992. /*isSigned=*/true);
  7993. CombinedInfo.Sizes.push_back(Size);
  7994. } else {
  7995. CombinedInfo.Pointers.push_back(LB);
  7996. // Size is (addr of {highest+1} element) - (addr of lowest element)
  7997. llvm::Value *HB = HBAddr.getPointer();
  7998. llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
  7999. HBAddr.getElementType(), HB, /*Idx0=*/1);
  8000. llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
  8001. llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
  8002. llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
  8003. llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
  8004. /*isSigned=*/false);
  8005. CombinedInfo.Sizes.push_back(Size);
  8006. }
  8007. CombinedInfo.Mappers.push_back(nullptr);
  8008. // Map type is always TARGET_PARAM, if generate info for captures.
  8009. CombinedInfo.Types.push_back(
  8010. NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
  8011. : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
  8012. // If any element has the present modifier, then make sure the runtime
  8013. // doesn't attempt to allocate the struct.
  8014. if (CurTypes.end() !=
  8015. llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
  8016. return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  8017. Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
  8018. }))
  8019. CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
  8020. // Remove TARGET_PARAM flag from the first element
  8021. (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
  8022. // If any element has the ompx_hold modifier, then make sure the runtime
  8023. // uses the hold reference count for the struct as a whole so that it won't
  8024. // be unmapped by an extra dynamic reference count decrement. Add it to all
  8025. // elements as well so the runtime knows which reference count to check
  8026. // when determining whether it's time for device-to-host transfers of
  8027. // individual elements.
  8028. if (CurTypes.end() !=
  8029. llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
  8030. return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  8031. Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
  8032. })) {
  8033. CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
  8034. for (auto &M : CurTypes)
  8035. M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
  8036. }
  8037. // All other current entries will be MEMBER_OF the combined entry
  8038. // (except for PTR_AND_OBJ entries which do not have a placeholder value
  8039. // 0xFFFF in the MEMBER_OF field).
  8040. OpenMPOffloadMappingFlags MemberOfFlag =
  8041. getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
  8042. for (auto &M : CurTypes)
  8043. setCorrectMemberOfFlag(M, MemberOfFlag);
  8044. }
  8045. /// Generate all the base pointers, section pointers, sizes, map types, and
  8046. /// mappers for the extracted mappable expressions (all included in \a
  8047. /// CombinedInfo). Also, for each item that relates with a device pointer, a
  8048. /// pair of the relevant declaration and index where it occurs is appended to
  8049. /// the device pointers info array.
  8050. void generateAllInfo(
  8051. MapCombinedInfoTy &CombinedInfo,
  8052. const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
  8053. llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
  8054. assert(CurDir.is<const OMPExecutableDirective *>() &&
  8055. "Expect a executable directive");
  8056. const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
  8057. generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
  8058. }
  8059. /// Generate all the base pointers, section pointers, sizes, map types, and
  8060. /// mappers for the extracted map clauses of user-defined mapper (all included
  8061. /// in \a CombinedInfo).
  8062. void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
  8063. assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
  8064. "Expect a declare mapper directive");
  8065. const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
  8066. generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
  8067. }
  8068. /// Emit capture info for lambdas for variables captured by reference.
  8069. void generateInfoForLambdaCaptures(
  8070. const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
  8071. llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
  8072. QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
  8073. const auto *RD = VDType->getAsCXXRecordDecl();
  8074. if (!RD || !RD->isLambda())
  8075. return;
  8076. Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
  8077. CGF.getContext().getDeclAlign(VD));
  8078. LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
  8079. llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
  8080. FieldDecl *ThisCapture = nullptr;
  8081. RD->getCaptureFields(Captures, ThisCapture);
  8082. if (ThisCapture) {
  8083. LValue ThisLVal =
  8084. CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
  8085. LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
  8086. LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
  8087. VDLVal.getPointer(CGF));
  8088. CombinedInfo.Exprs.push_back(VD);
  8089. CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
  8090. CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
  8091. CombinedInfo.Sizes.push_back(
  8092. CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
  8093. CGF.Int64Ty, /*isSigned=*/true));
  8094. CombinedInfo.Types.push_back(
  8095. OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
  8096. OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
  8097. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
  8098. OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
  8099. CombinedInfo.Mappers.push_back(nullptr);
  8100. }
  8101. for (const LambdaCapture &LC : RD->captures()) {
  8102. if (!LC.capturesVariable())
  8103. continue;
  8104. const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
  8105. if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
  8106. continue;
  8107. auto It = Captures.find(VD);
  8108. assert(It != Captures.end() && "Found lambda capture without field.");
  8109. LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
  8110. if (LC.getCaptureKind() == LCK_ByRef) {
  8111. LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
  8112. LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
  8113. VDLVal.getPointer(CGF));
  8114. CombinedInfo.Exprs.push_back(VD);
  8115. CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
  8116. CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
  8117. CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
  8118. CGF.getTypeSize(
  8119. VD->getType().getCanonicalType().getNonReferenceType()),
  8120. CGF.Int64Ty, /*isSigned=*/true));
  8121. } else {
  8122. RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
  8123. LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
  8124. VDLVal.getPointer(CGF));
  8125. CombinedInfo.Exprs.push_back(VD);
  8126. CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
  8127. CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
  8128. CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
  8129. }
  8130. CombinedInfo.Types.push_back(
  8131. OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
  8132. OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
  8133. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
  8134. OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
  8135. CombinedInfo.Mappers.push_back(nullptr);
  8136. }
  8137. }
  8138. /// Set correct indices for lambdas captures.
  8139. void adjustMemberOfForLambdaCaptures(
  8140. const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
  8141. MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
  8142. MapFlagsArrayTy &Types) const {
  8143. for (unsigned I = 0, E = Types.size(); I < E; ++I) {
  8144. // Set correct member_of idx for all implicit lambda captures.
  8145. if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
  8146. OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
  8147. OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
  8148. OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
  8149. continue;
  8150. llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
  8151. assert(BasePtr && "Unable to find base lambda address.");
  8152. int TgtIdx = -1;
  8153. for (unsigned J = I; J > 0; --J) {
  8154. unsigned Idx = J - 1;
  8155. if (Pointers[Idx] != BasePtr)
  8156. continue;
  8157. TgtIdx = Idx;
  8158. break;
  8159. }
  8160. assert(TgtIdx != -1 && "Unable to find parent lambda.");
  8161. // All other current entries will be MEMBER_OF the combined entry
  8162. // (except for PTR_AND_OBJ entries which do not have a placeholder value
  8163. // 0xFFFF in the MEMBER_OF field).
  8164. OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
  8165. setCorrectMemberOfFlag(Types[I], MemberOfFlag);
  8166. }
  8167. }
  8168. /// Generate the base pointers, section pointers, sizes, map types, and
  8169. /// mappers associated to a given capture (all included in \a CombinedInfo).
  8170. void generateInfoForCapture(const CapturedStmt::Capture *Cap,
  8171. llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
  8172. StructRangeInfoTy &PartialStruct) const {
  8173. assert(!Cap->capturesVariableArrayType() &&
  8174. "Not expecting to generate map info for a variable array type!");
  8175. // We need to know when we generating information for the first component
  8176. const ValueDecl *VD = Cap->capturesThis()
  8177. ? nullptr
  8178. : Cap->getCapturedVar()->getCanonicalDecl();
  8179. // for map(to: lambda): skip here, processing it in
  8180. // generateDefaultMapInfo
  8181. if (LambdasMap.count(VD))
  8182. return;
  8183. // If this declaration appears in a is_device_ptr clause we just have to
  8184. // pass the pointer by value. If it is a reference to a declaration, we just
  8185. // pass its value.
  8186. if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
  8187. CombinedInfo.Exprs.push_back(VD);
  8188. CombinedInfo.BasePointers.emplace_back(Arg, VD);
  8189. CombinedInfo.Pointers.push_back(Arg);
  8190. CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
  8191. CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
  8192. /*isSigned=*/true));
  8193. CombinedInfo.Types.push_back(
  8194. (Cap->capturesVariable()
  8195. ? OpenMPOffloadMappingFlags::OMP_MAP_TO
  8196. : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) |
  8197. OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
  8198. CombinedInfo.Mappers.push_back(nullptr);
  8199. return;
  8200. }
  8201. using MapData =
  8202. std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
  8203. OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
  8204. const ValueDecl *, const Expr *>;
  8205. SmallVector<MapData, 4> DeclComponentLists;
  8206. // For member fields list in is_device_ptr, store it in
  8207. // DeclComponentLists for generating components info.
  8208. static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
  8209. auto It = DevPointersMap.find(VD);
  8210. if (It != DevPointersMap.end())
  8211. for (const auto &MCL : It->second)
  8212. DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
  8213. /*IsImpicit = */ true, nullptr,
  8214. nullptr);
  8215. auto I = HasDevAddrsMap.find(VD);
  8216. if (I != HasDevAddrsMap.end())
  8217. for (const auto &MCL : I->second)
  8218. DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
  8219. /*IsImpicit = */ true, nullptr,
  8220. nullptr);
  8221. assert(CurDir.is<const OMPExecutableDirective *>() &&
  8222. "Expect a executable directive");
  8223. const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
  8224. for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
  8225. const auto *EI = C->getVarRefs().begin();
  8226. for (const auto L : C->decl_component_lists(VD)) {
  8227. const ValueDecl *VDecl, *Mapper;
  8228. // The Expression is not correct if the mapping is implicit
  8229. const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
  8230. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  8231. std::tie(VDecl, Components, Mapper) = L;
  8232. assert(VDecl == VD && "We got information for the wrong declaration??");
  8233. assert(!Components.empty() &&
  8234. "Not expecting declaration with no component lists.");
  8235. DeclComponentLists.emplace_back(Components, C->getMapType(),
  8236. C->getMapTypeModifiers(),
  8237. C->isImplicit(), Mapper, E);
  8238. ++EI;
  8239. }
  8240. }
  8241. llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
  8242. const MapData &RHS) {
  8243. ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
  8244. OpenMPMapClauseKind MapType = std::get<1>(RHS);
  8245. bool HasPresent =
  8246. llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
  8247. bool HasAllocs = MapType == OMPC_MAP_alloc;
  8248. MapModifiers = std::get<2>(RHS);
  8249. MapType = std::get<1>(LHS);
  8250. bool HasPresentR =
  8251. llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
  8252. bool HasAllocsR = MapType == OMPC_MAP_alloc;
  8253. return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
  8254. });
  8255. // Find overlapping elements (including the offset from the base element).
  8256. llvm::SmallDenseMap<
  8257. const MapData *,
  8258. llvm::SmallVector<
  8259. OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
  8260. 4>
  8261. OverlappedData;
  8262. size_t Count = 0;
  8263. for (const MapData &L : DeclComponentLists) {
  8264. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  8265. OpenMPMapClauseKind MapType;
  8266. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  8267. bool IsImplicit;
  8268. const ValueDecl *Mapper;
  8269. const Expr *VarRef;
  8270. std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
  8271. L;
  8272. ++Count;
  8273. for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
  8274. OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
  8275. std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
  8276. VarRef) = L1;
  8277. auto CI = Components.rbegin();
  8278. auto CE = Components.rend();
  8279. auto SI = Components1.rbegin();
  8280. auto SE = Components1.rend();
  8281. for (; CI != CE && SI != SE; ++CI, ++SI) {
  8282. if (CI->getAssociatedExpression()->getStmtClass() !=
  8283. SI->getAssociatedExpression()->getStmtClass())
  8284. break;
  8285. // Are we dealing with different variables/fields?
  8286. if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
  8287. break;
  8288. }
  8289. // Found overlapping if, at least for one component, reached the head
  8290. // of the components list.
  8291. if (CI == CE || SI == SE) {
  8292. // Ignore it if it is the same component.
  8293. if (CI == CE && SI == SE)
  8294. continue;
  8295. const auto It = (SI == SE) ? CI : SI;
  8296. // If one component is a pointer and another one is a kind of
  8297. // dereference of this pointer (array subscript, section, dereference,
  8298. // etc.), it is not an overlapping.
  8299. // Same, if one component is a base and another component is a
  8300. // dereferenced pointer memberexpr with the same base.
  8301. if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
  8302. (std::prev(It)->getAssociatedDeclaration() &&
  8303. std::prev(It)
  8304. ->getAssociatedDeclaration()
  8305. ->getType()
  8306. ->isPointerType()) ||
  8307. (It->getAssociatedDeclaration() &&
  8308. It->getAssociatedDeclaration()->getType()->isPointerType() &&
  8309. std::next(It) != CE && std::next(It) != SE))
  8310. continue;
  8311. const MapData &BaseData = CI == CE ? L : L1;
  8312. OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
  8313. SI == SE ? Components : Components1;
  8314. auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
  8315. OverlappedElements.getSecond().push_back(SubData);
  8316. }
  8317. }
  8318. }
  8319. // Sort the overlapped elements for each item.
  8320. llvm::SmallVector<const FieldDecl *, 4> Layout;
  8321. if (!OverlappedData.empty()) {
  8322. const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
  8323. const Type *OrigType = BaseType->getPointeeOrArrayElementType();
  8324. while (BaseType != OrigType) {
  8325. BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
  8326. OrigType = BaseType->getPointeeOrArrayElementType();
  8327. }
  8328. if (const auto *CRD = BaseType->getAsCXXRecordDecl())
  8329. getPlainLayout(CRD, Layout, /*AsBase=*/false);
  8330. else {
  8331. const auto *RD = BaseType->getAsRecordDecl();
  8332. Layout.append(RD->field_begin(), RD->field_end());
  8333. }
  8334. }
  8335. for (auto &Pair : OverlappedData) {
  8336. llvm::stable_sort(
  8337. Pair.getSecond(),
  8338. [&Layout](
  8339. OMPClauseMappableExprCommon::MappableExprComponentListRef First,
  8340. OMPClauseMappableExprCommon::MappableExprComponentListRef
  8341. Second) {
  8342. auto CI = First.rbegin();
  8343. auto CE = First.rend();
  8344. auto SI = Second.rbegin();
  8345. auto SE = Second.rend();
  8346. for (; CI != CE && SI != SE; ++CI, ++SI) {
  8347. if (CI->getAssociatedExpression()->getStmtClass() !=
  8348. SI->getAssociatedExpression()->getStmtClass())
  8349. break;
  8350. // Are we dealing with different variables/fields?
  8351. if (CI->getAssociatedDeclaration() !=
  8352. SI->getAssociatedDeclaration())
  8353. break;
  8354. }
  8355. // Lists contain the same elements.
  8356. if (CI == CE && SI == SE)
  8357. return false;
  8358. // List with less elements is less than list with more elements.
  8359. if (CI == CE || SI == SE)
  8360. return CI == CE;
  8361. const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
  8362. const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
  8363. if (FD1->getParent() == FD2->getParent())
  8364. return FD1->getFieldIndex() < FD2->getFieldIndex();
  8365. const auto *It =
  8366. llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
  8367. return FD == FD1 || FD == FD2;
  8368. });
  8369. return *It == FD1;
  8370. });
  8371. }
  8372. // Associated with a capture, because the mapping flags depend on it.
  8373. // Go through all of the elements with the overlapped elements.
  8374. bool IsFirstComponentList = true;
  8375. for (const auto &Pair : OverlappedData) {
  8376. const MapData &L = *Pair.getFirst();
  8377. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  8378. OpenMPMapClauseKind MapType;
  8379. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  8380. bool IsImplicit;
  8381. const ValueDecl *Mapper;
  8382. const Expr *VarRef;
  8383. std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
  8384. L;
  8385. ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
  8386. OverlappedComponents = Pair.getSecond();
  8387. generateInfoForComponentList(
  8388. MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
  8389. PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
  8390. /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
  8391. IsFirstComponentList = false;
  8392. }
  8393. // Go through other elements without overlapped elements.
  8394. for (const MapData &L : DeclComponentLists) {
  8395. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  8396. OpenMPMapClauseKind MapType;
  8397. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  8398. bool IsImplicit;
  8399. const ValueDecl *Mapper;
  8400. const Expr *VarRef;
  8401. std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
  8402. L;
  8403. auto It = OverlappedData.find(&L);
  8404. if (It == OverlappedData.end())
  8405. generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
  8406. Components, CombinedInfo, PartialStruct,
  8407. IsFirstComponentList, IsImplicit, Mapper,
  8408. /*ForDeviceAddr=*/false, VD, VarRef);
  8409. IsFirstComponentList = false;
  8410. }
  8411. }
  8412. /// Generate the default map information for a given capture \a CI,
  8413. /// record field declaration \a RI and captured value \a CV.
  8414. void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
  8415. const FieldDecl &RI, llvm::Value *CV,
  8416. MapCombinedInfoTy &CombinedInfo) const {
  8417. bool IsImplicit = true;
  8418. // Do the default mapping.
  8419. if (CI.capturesThis()) {
  8420. CombinedInfo.Exprs.push_back(nullptr);
  8421. CombinedInfo.BasePointers.push_back(CV);
  8422. CombinedInfo.Pointers.push_back(CV);
  8423. const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
  8424. CombinedInfo.Sizes.push_back(
  8425. CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
  8426. CGF.Int64Ty, /*isSigned=*/true));
  8427. // Default map type.
  8428. CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
  8429. OpenMPOffloadMappingFlags::OMP_MAP_FROM);
  8430. } else if (CI.capturesVariableByCopy()) {
  8431. const VarDecl *VD = CI.getCapturedVar();
  8432. CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
  8433. CombinedInfo.BasePointers.push_back(CV);
  8434. CombinedInfo.Pointers.push_back(CV);
  8435. if (!RI.getType()->isAnyPointerType()) {
  8436. // We have to signal to the runtime captures passed by value that are
  8437. // not pointers.
  8438. CombinedInfo.Types.push_back(
  8439. OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
  8440. CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
  8441. CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
  8442. } else {
  8443. // Pointers are implicitly mapped with a zero size and no flags
  8444. // (other than first map that is added for all implicit maps).
  8445. CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
  8446. CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
  8447. }
  8448. auto I = FirstPrivateDecls.find(VD);
  8449. if (I != FirstPrivateDecls.end())
  8450. IsImplicit = I->getSecond();
  8451. } else {
  8452. assert(CI.capturesVariable() && "Expected captured reference.");
  8453. const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
  8454. QualType ElementType = PtrTy->getPointeeType();
  8455. CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
  8456. CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
  8457. // The default map type for a scalar/complex type is 'to' because by
  8458. // default the value doesn't have to be retrieved. For an aggregate
  8459. // type, the default is 'tofrom'.
  8460. CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
  8461. const VarDecl *VD = CI.getCapturedVar();
  8462. auto I = FirstPrivateDecls.find(VD);
  8463. CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
  8464. CombinedInfo.BasePointers.push_back(CV);
  8465. if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
  8466. Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
  8467. CV, ElementType, CGF.getContext().getDeclAlign(VD),
  8468. AlignmentSource::Decl));
  8469. CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
  8470. } else {
  8471. CombinedInfo.Pointers.push_back(CV);
  8472. }
  8473. if (I != FirstPrivateDecls.end())
  8474. IsImplicit = I->getSecond();
  8475. }
  8476. // Every default map produces a single argument which is a target parameter.
  8477. CombinedInfo.Types.back() |=
  8478. OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
  8479. // Add flag stating this is an implicit map.
  8480. if (IsImplicit)
  8481. CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
  8482. // No user-defined mapper for default mapping.
  8483. CombinedInfo.Mappers.push_back(nullptr);
  8484. }
  8485. };
  8486. } // anonymous namespace
  8487. static void emitNonContiguousDescriptor(
  8488. CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
  8489. CGOpenMPRuntime::TargetDataInfo &Info) {
  8490. CodeGenModule &CGM = CGF.CGM;
  8491. MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
  8492. &NonContigInfo = CombinedInfo.NonContigInfo;
  8493. // Build an array of struct descriptor_dim and then assign it to
  8494. // offload_args.
  8495. //
  8496. // struct descriptor_dim {
  8497. // uint64_t offset;
  8498. // uint64_t count;
  8499. // uint64_t stride
  8500. // };
  8501. ASTContext &C = CGF.getContext();
  8502. QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
  8503. RecordDecl *RD;
  8504. RD = C.buildImplicitRecord("descriptor_dim");
  8505. RD->startDefinition();
  8506. addFieldToRecordDecl(C, RD, Int64Ty);
  8507. addFieldToRecordDecl(C, RD, Int64Ty);
  8508. addFieldToRecordDecl(C, RD, Int64Ty);
  8509. RD->completeDefinition();
  8510. QualType DimTy = C.getRecordType(RD);
  8511. enum { OffsetFD = 0, CountFD, StrideFD };
  8512. // We need two index variable here since the size of "Dims" is the same as the
  8513. // size of Components, however, the size of offset, count, and stride is equal
  8514. // to the size of base declaration that is non-contiguous.
  8515. for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
  8516. // Skip emitting ir if dimension size is 1 since it cannot be
  8517. // non-contiguous.
  8518. if (NonContigInfo.Dims[I] == 1)
  8519. continue;
  8520. llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
  8521. QualType ArrayTy =
  8522. C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
  8523. Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
  8524. for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
  8525. unsigned RevIdx = EE - II - 1;
  8526. LValue DimsLVal = CGF.MakeAddrLValue(
  8527. CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
  8528. // Offset
  8529. LValue OffsetLVal = CGF.EmitLValueForField(
  8530. DimsLVal, *std::next(RD->field_begin(), OffsetFD));
  8531. CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
  8532. // Count
  8533. LValue CountLVal = CGF.EmitLValueForField(
  8534. DimsLVal, *std::next(RD->field_begin(), CountFD));
  8535. CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
  8536. // Stride
  8537. LValue StrideLVal = CGF.EmitLValueForField(
  8538. DimsLVal, *std::next(RD->field_begin(), StrideFD));
  8539. CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
  8540. }
  8541. // args[I] = &dims
  8542. Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  8543. DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
  8544. llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
  8545. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8546. Info.RTArgs.PointersArray, 0, I);
  8547. Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
  8548. CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
  8549. ++L;
  8550. }
  8551. }
  8552. // Try to extract the base declaration from a `this->x` expression if possible.
  8553. static ValueDecl *getDeclFromThisExpr(const Expr *E) {
  8554. if (!E)
  8555. return nullptr;
  8556. if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
  8557. if (const MemberExpr *ME =
  8558. dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
  8559. return ME->getMemberDecl();
  8560. return nullptr;
  8561. }
  8562. /// Emit a string constant containing the names of the values mapped to the
  8563. /// offloading runtime library.
  8564. llvm::Constant *
  8565. emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
  8566. MappableExprsHandler::MappingExprInfo &MapExprs) {
  8567. uint32_t SrcLocStrSize;
  8568. if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
  8569. return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
  8570. SourceLocation Loc;
  8571. if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
  8572. if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
  8573. Loc = VD->getLocation();
  8574. else
  8575. Loc = MapExprs.getMapExpr()->getExprLoc();
  8576. } else {
  8577. Loc = MapExprs.getMapDecl()->getLocation();
  8578. }
  8579. std::string ExprName;
  8580. if (MapExprs.getMapExpr()) {
  8581. PrintingPolicy P(CGF.getContext().getLangOpts());
  8582. llvm::raw_string_ostream OS(ExprName);
  8583. MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
  8584. OS.flush();
  8585. } else {
  8586. ExprName = MapExprs.getMapDecl()->getNameAsString();
  8587. }
  8588. PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
  8589. return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
  8590. PLoc.getLine(), PLoc.getColumn(),
  8591. SrcLocStrSize);
  8592. }
  8593. /// Emit the arrays used to pass the captures and map information to the
  8594. /// offloading runtime library. If there is no map or capture information,
  8595. /// return nullptr by reference.
  8596. static void emitOffloadingArrays(
  8597. CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
  8598. CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
  8599. bool IsNonContiguous = false) {
  8600. CodeGenModule &CGM = CGF.CGM;
  8601. ASTContext &Ctx = CGF.getContext();
  8602. // Reset the array information.
  8603. Info.clearArrayInfo();
  8604. Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
  8605. if (Info.NumberOfPtrs) {
  8606. // Detect if we have any capture size requiring runtime evaluation of the
  8607. // size so that a constant array could be eventually used.
  8608. llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
  8609. QualType PointerArrayType = Ctx.getConstantArrayType(
  8610. Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
  8611. /*IndexTypeQuals=*/0);
  8612. Info.RTArgs.BasePointersArray =
  8613. CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
  8614. Info.RTArgs.PointersArray =
  8615. CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
  8616. Address MappersArray =
  8617. CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
  8618. Info.RTArgs.MappersArray = MappersArray.getPointer();
  8619. // If we don't have any VLA types or other types that require runtime
  8620. // evaluation, we can use a constant array for the map sizes, otherwise we
  8621. // need to fill up the arrays as we do for the pointers.
  8622. QualType Int64Ty =
  8623. Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
  8624. SmallVector<llvm::Constant *> ConstSizes(
  8625. CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
  8626. llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
  8627. for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
  8628. if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
  8629. if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
  8630. if (IsNonContiguous &&
  8631. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  8632. CombinedInfo.Types[I] &
  8633. OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
  8634. ConstSizes[I] = llvm::ConstantInt::get(
  8635. CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
  8636. else
  8637. ConstSizes[I] = CI;
  8638. continue;
  8639. }
  8640. }
  8641. RuntimeSizes.set(I);
  8642. }
  8643. if (RuntimeSizes.all()) {
  8644. QualType SizeArrayType = Ctx.getConstantArrayType(
  8645. Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
  8646. /*IndexTypeQuals=*/0);
  8647. Info.RTArgs.SizesArray =
  8648. CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
  8649. } else {
  8650. auto *SizesArrayInit = llvm::ConstantArray::get(
  8651. llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
  8652. std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
  8653. auto *SizesArrayGbl = new llvm::GlobalVariable(
  8654. CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
  8655. llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
  8656. SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  8657. if (RuntimeSizes.any()) {
  8658. QualType SizeArrayType = Ctx.getConstantArrayType(
  8659. Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
  8660. /*IndexTypeQuals=*/0);
  8661. Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
  8662. llvm::Value *GblConstPtr =
  8663. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  8664. SizesArrayGbl, CGM.Int64Ty->getPointerTo());
  8665. CGF.Builder.CreateMemCpy(
  8666. Buffer,
  8667. Address(GblConstPtr, CGM.Int64Ty,
  8668. CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
  8669. /*DestWidth=*/64, /*Signed=*/false))),
  8670. CGF.getTypeSize(SizeArrayType));
  8671. Info.RTArgs.SizesArray = Buffer.getPointer();
  8672. } else {
  8673. Info.RTArgs.SizesArray = SizesArrayGbl;
  8674. }
  8675. }
  8676. // The map types are always constant so we don't need to generate code to
  8677. // fill arrays. Instead, we create an array constant.
  8678. SmallVector<uint64_t, 4> Mapping;
  8679. for (auto mapFlag : CombinedInfo.Types)
  8680. Mapping.push_back(
  8681. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  8682. mapFlag));
  8683. std::string MaptypesName =
  8684. CGM.getOpenMPRuntime().getName({"offload_maptypes"});
  8685. auto *MapTypesArrayGbl =
  8686. OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
  8687. Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
  8688. // The information types are only built if there is debug information
  8689. // requested.
  8690. if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
  8691. Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue(
  8692. llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
  8693. } else {
  8694. auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
  8695. return emitMappingInformation(CGF, OMPBuilder, MapExpr);
  8696. };
  8697. SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
  8698. llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
  8699. std::string MapnamesName =
  8700. CGM.getOpenMPRuntime().getName({"offload_mapnames"});
  8701. auto *MapNamesArrayGbl =
  8702. OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
  8703. Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
  8704. }
  8705. // If there's a present map type modifier, it must not be applied to the end
  8706. // of a region, so generate a separate map type array in that case.
  8707. if (Info.separateBeginEndCalls()) {
  8708. bool EndMapTypesDiffer = false;
  8709. for (uint64_t &Type : Mapping) {
  8710. if (Type &
  8711. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  8712. OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
  8713. Type &=
  8714. ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  8715. OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
  8716. EndMapTypesDiffer = true;
  8717. }
  8718. }
  8719. if (EndMapTypesDiffer) {
  8720. MapTypesArrayGbl =
  8721. OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
  8722. Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
  8723. }
  8724. }
  8725. for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
  8726. llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
  8727. llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
  8728. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8729. Info.RTArgs.BasePointersArray, 0, I);
  8730. BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  8731. BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
  8732. Address BPAddr(BP, BPVal->getType(),
  8733. Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
  8734. CGF.Builder.CreateStore(BPVal, BPAddr);
  8735. if (Info.requiresDevicePointerInfo())
  8736. if (const ValueDecl *DevVD =
  8737. CombinedInfo.BasePointers[I].getDevicePtrDecl())
  8738. Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
  8739. llvm::Value *PVal = CombinedInfo.Pointers[I];
  8740. llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
  8741. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8742. Info.RTArgs.PointersArray, 0, I);
  8743. P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  8744. P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
  8745. Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
  8746. CGF.Builder.CreateStore(PVal, PAddr);
  8747. if (RuntimeSizes.test(I)) {
  8748. llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
  8749. llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
  8750. Info.RTArgs.SizesArray,
  8751. /*Idx0=*/0,
  8752. /*Idx1=*/I);
  8753. Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
  8754. CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
  8755. CGM.Int64Ty,
  8756. /*isSigned=*/true),
  8757. SAddr);
  8758. }
  8759. // Fill up the mapper array.
  8760. llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
  8761. if (CombinedInfo.Mappers[I]) {
  8762. MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
  8763. cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
  8764. MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
  8765. Info.HasMapper = true;
  8766. }
  8767. Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
  8768. CGF.Builder.CreateStore(MFunc, MAddr);
  8769. }
  8770. }
  8771. if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
  8772. Info.NumberOfPtrs == 0)
  8773. return;
  8774. emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
  8775. }
  8776. /// Check for inner distribute directive.
  8777. static const OMPExecutableDirective *
  8778. getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
  8779. const auto *CS = D.getInnermostCapturedStmt();
  8780. const auto *Body =
  8781. CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
  8782. const Stmt *ChildStmt =
  8783. CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
  8784. if (const auto *NestedDir =
  8785. dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
  8786. OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
  8787. switch (D.getDirectiveKind()) {
  8788. case OMPD_target:
  8789. if (isOpenMPDistributeDirective(DKind))
  8790. return NestedDir;
  8791. if (DKind == OMPD_teams) {
  8792. Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
  8793. /*IgnoreCaptured=*/true);
  8794. if (!Body)
  8795. return nullptr;
  8796. ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
  8797. if (const auto *NND =
  8798. dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
  8799. DKind = NND->getDirectiveKind();
  8800. if (isOpenMPDistributeDirective(DKind))
  8801. return NND;
  8802. }
  8803. }
  8804. return nullptr;
  8805. case OMPD_target_teams:
  8806. if (isOpenMPDistributeDirective(DKind))
  8807. return NestedDir;
  8808. return nullptr;
  8809. case OMPD_target_parallel:
  8810. case OMPD_target_simd:
  8811. case OMPD_target_parallel_for:
  8812. case OMPD_target_parallel_for_simd:
  8813. return nullptr;
  8814. case OMPD_target_teams_distribute:
  8815. case OMPD_target_teams_distribute_simd:
  8816. case OMPD_target_teams_distribute_parallel_for:
  8817. case OMPD_target_teams_distribute_parallel_for_simd:
  8818. case OMPD_parallel:
  8819. case OMPD_for:
  8820. case OMPD_parallel_for:
  8821. case OMPD_parallel_master:
  8822. case OMPD_parallel_sections:
  8823. case OMPD_for_simd:
  8824. case OMPD_parallel_for_simd:
  8825. case OMPD_cancel:
  8826. case OMPD_cancellation_point:
  8827. case OMPD_ordered:
  8828. case OMPD_threadprivate:
  8829. case OMPD_allocate:
  8830. case OMPD_task:
  8831. case OMPD_simd:
  8832. case OMPD_tile:
  8833. case OMPD_unroll:
  8834. case OMPD_sections:
  8835. case OMPD_section:
  8836. case OMPD_single:
  8837. case OMPD_master:
  8838. case OMPD_critical:
  8839. case OMPD_taskyield:
  8840. case OMPD_barrier:
  8841. case OMPD_taskwait:
  8842. case OMPD_taskgroup:
  8843. case OMPD_atomic:
  8844. case OMPD_flush:
  8845. case OMPD_depobj:
  8846. case OMPD_scan:
  8847. case OMPD_teams:
  8848. case OMPD_target_data:
  8849. case OMPD_target_exit_data:
  8850. case OMPD_target_enter_data:
  8851. case OMPD_distribute:
  8852. case OMPD_distribute_simd:
  8853. case OMPD_distribute_parallel_for:
  8854. case OMPD_distribute_parallel_for_simd:
  8855. case OMPD_teams_distribute:
  8856. case OMPD_teams_distribute_simd:
  8857. case OMPD_teams_distribute_parallel_for:
  8858. case OMPD_teams_distribute_parallel_for_simd:
  8859. case OMPD_target_update:
  8860. case OMPD_declare_simd:
  8861. case OMPD_declare_variant:
  8862. case OMPD_begin_declare_variant:
  8863. case OMPD_end_declare_variant:
  8864. case OMPD_declare_target:
  8865. case OMPD_end_declare_target:
  8866. case OMPD_declare_reduction:
  8867. case OMPD_declare_mapper:
  8868. case OMPD_taskloop:
  8869. case OMPD_taskloop_simd:
  8870. case OMPD_master_taskloop:
  8871. case OMPD_master_taskloop_simd:
  8872. case OMPD_parallel_master_taskloop:
  8873. case OMPD_parallel_master_taskloop_simd:
  8874. case OMPD_requires:
  8875. case OMPD_metadirective:
  8876. case OMPD_unknown:
  8877. default:
  8878. llvm_unreachable("Unexpected directive.");
  8879. }
  8880. }
  8881. return nullptr;
  8882. }
  8883. /// Emit the user-defined mapper function. The code generation follows the
  8884. /// pattern in the example below.
  8885. /// \code
  8886. /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
  8887. /// void *base, void *begin,
  8888. /// int64_t size, int64_t type,
  8889. /// void *name = nullptr) {
  8890. /// // Allocate space for an array section first or add a base/begin for
  8891. /// // pointer dereference.
  8892. /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
  8893. /// !maptype.IsDelete)
  8894. /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
  8895. /// size*sizeof(Ty), clearToFromMember(type));
  8896. /// // Map members.
  8897. /// for (unsigned i = 0; i < size; i++) {
  8898. /// // For each component specified by this mapper:
  8899. /// for (auto c : begin[i]->all_components) {
  8900. /// if (c.hasMapper())
  8901. /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
  8902. /// c.arg_type, c.arg_name);
  8903. /// else
  8904. /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
  8905. /// c.arg_begin, c.arg_size, c.arg_type,
  8906. /// c.arg_name);
  8907. /// }
  8908. /// }
  8909. /// // Delete the array section.
  8910. /// if (size > 1 && maptype.IsDelete)
  8911. /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
  8912. /// size*sizeof(Ty), clearToFromMember(type));
  8913. /// }
  8914. /// \endcode
  8915. void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
  8916. CodeGenFunction *CGF) {
  8917. if (UDMMap.count(D) > 0)
  8918. return;
  8919. ASTContext &C = CGM.getContext();
  8920. QualType Ty = D->getType();
  8921. QualType PtrTy = C.getPointerType(Ty).withRestrict();
  8922. QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
  8923. auto *MapperVarDecl =
  8924. cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
  8925. SourceLocation Loc = D->getLocation();
  8926. CharUnits ElementSize = C.getTypeSizeInChars(Ty);
  8927. llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
  8928. // Prepare mapper function arguments and attributes.
  8929. ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  8930. C.VoidPtrTy, ImplicitParamDecl::Other);
  8931. ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  8932. ImplicitParamDecl::Other);
  8933. ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  8934. C.VoidPtrTy, ImplicitParamDecl::Other);
  8935. ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
  8936. ImplicitParamDecl::Other);
  8937. ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
  8938. ImplicitParamDecl::Other);
  8939. ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  8940. ImplicitParamDecl::Other);
  8941. FunctionArgList Args;
  8942. Args.push_back(&HandleArg);
  8943. Args.push_back(&BaseArg);
  8944. Args.push_back(&BeginArg);
  8945. Args.push_back(&SizeArg);
  8946. Args.push_back(&TypeArg);
  8947. Args.push_back(&NameArg);
  8948. const CGFunctionInfo &FnInfo =
  8949. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  8950. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  8951. SmallString<64> TyStr;
  8952. llvm::raw_svector_ostream Out(TyStr);
  8953. CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
  8954. std::string Name = getName({"omp_mapper", TyStr, D->getName()});
  8955. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  8956. Name, &CGM.getModule());
  8957. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  8958. Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
  8959. // Start the mapper function code generation.
  8960. CodeGenFunction MapperCGF(CGM);
  8961. MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  8962. // Compute the starting and end addresses of array elements.
  8963. llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
  8964. MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
  8965. C.getPointerType(Int64Ty), Loc);
  8966. // Prepare common arguments for array initiation and deletion.
  8967. llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
  8968. MapperCGF.GetAddrOfLocalVar(&HandleArg),
  8969. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8970. llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
  8971. MapperCGF.GetAddrOfLocalVar(&BaseArg),
  8972. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8973. llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
  8974. MapperCGF.GetAddrOfLocalVar(&BeginArg),
  8975. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8976. // Convert the size in bytes into the number of array elements.
  8977. Size = MapperCGF.Builder.CreateExactUDiv(
  8978. Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
  8979. llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
  8980. BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
  8981. llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
  8982. llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
  8983. MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
  8984. C.getPointerType(Int64Ty), Loc);
  8985. llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
  8986. MapperCGF.GetAddrOfLocalVar(&NameArg),
  8987. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8988. // Emit array initiation if this is an array section and \p MapType indicates
  8989. // that memory allocation is required.
  8990. llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
  8991. emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
  8992. MapName, ElementSize, HeadBB, /*IsInit=*/true);
  8993. // Emit a for loop to iterate through SizeArg of elements and map all of them.
  8994. // Emit the loop header block.
  8995. MapperCGF.EmitBlock(HeadBB);
  8996. llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
  8997. llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
  8998. // Evaluate whether the initial condition is satisfied.
  8999. llvm::Value *IsEmpty =
  9000. MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
  9001. MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  9002. llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
  9003. // Emit the loop body block.
  9004. MapperCGF.EmitBlock(BodyBB);
  9005. llvm::BasicBlock *LastBB = BodyBB;
  9006. llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
  9007. PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
  9008. PtrPHI->addIncoming(PtrBegin, EntryBB);
  9009. Address PtrCurrent(PtrPHI, ElemTy,
  9010. MapperCGF.GetAddrOfLocalVar(&BeginArg)
  9011. .getAlignment()
  9012. .alignmentOfArrayElement(ElementSize));
  9013. // Privatize the declared variable of mapper to be the current array element.
  9014. CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
  9015. Scope.addPrivate(MapperVarDecl, PtrCurrent);
  9016. (void)Scope.Privatize();
  9017. // Get map clause information. Fill up the arrays with all mapped variables.
  9018. MappableExprsHandler::MapCombinedInfoTy Info;
  9019. MappableExprsHandler MEHandler(*D, MapperCGF);
  9020. MEHandler.generateAllInfoForMapper(Info);
  9021. // Call the runtime API __tgt_mapper_num_components to get the number of
  9022. // pre-existing components.
  9023. llvm::Value *OffloadingArgs[] = {Handle};
  9024. llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
  9025. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  9026. OMPRTL___tgt_mapper_num_components),
  9027. OffloadingArgs);
  9028. llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
  9029. PreviousSize,
  9030. MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
  9031. // Fill up the runtime mapper handle for all components.
  9032. for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
  9033. llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
  9034. *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
  9035. llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
  9036. Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
  9037. llvm::Value *CurSizeArg = Info.Sizes[I];
  9038. llvm::Value *CurNameArg =
  9039. (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
  9040. ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
  9041. : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
  9042. // Extract the MEMBER_OF field from the map type.
  9043. llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
  9044. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9045. Info.Types[I]));
  9046. llvm::Value *MemberMapType =
  9047. MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
  9048. // Combine the map type inherited from user-defined mapper with that
  9049. // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
  9050. // bits of the \a MapType, which is the input argument of the mapper
  9051. // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
  9052. // bits of MemberMapType.
  9053. // [OpenMP 5.0], 1.2.6. map-type decay.
  9054. // | alloc | to | from | tofrom | release | delete
  9055. // ----------------------------------------------------------
  9056. // alloc | alloc | alloc | alloc | alloc | release | delete
  9057. // to | alloc | to | alloc | to | release | delete
  9058. // from | alloc | alloc | from | from | release | delete
  9059. // tofrom | alloc | to | from | tofrom | release | delete
  9060. llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
  9061. MapType,
  9062. MapperCGF.Builder.getInt64(
  9063. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9064. OpenMPOffloadMappingFlags::OMP_MAP_TO |
  9065. OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
  9066. llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
  9067. llvm::BasicBlock *AllocElseBB =
  9068. MapperCGF.createBasicBlock("omp.type.alloc.else");
  9069. llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
  9070. llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
  9071. llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
  9072. llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
  9073. llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
  9074. MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
  9075. // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
  9076. MapperCGF.EmitBlock(AllocBB);
  9077. llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
  9078. MemberMapType,
  9079. MapperCGF.Builder.getInt64(
  9080. ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9081. OpenMPOffloadMappingFlags::OMP_MAP_TO |
  9082. OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
  9083. MapperCGF.Builder.CreateBr(EndBB);
  9084. MapperCGF.EmitBlock(AllocElseBB);
  9085. llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
  9086. LeftToFrom,
  9087. MapperCGF.Builder.getInt64(
  9088. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9089. OpenMPOffloadMappingFlags::OMP_MAP_TO)));
  9090. MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
  9091. // In case of to, clear OMP_MAP_FROM.
  9092. MapperCGF.EmitBlock(ToBB);
  9093. llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
  9094. MemberMapType,
  9095. MapperCGF.Builder.getInt64(
  9096. ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9097. OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
  9098. MapperCGF.Builder.CreateBr(EndBB);
  9099. MapperCGF.EmitBlock(ToElseBB);
  9100. llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
  9101. LeftToFrom,
  9102. MapperCGF.Builder.getInt64(
  9103. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9104. OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
  9105. MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
  9106. // In case of from, clear OMP_MAP_TO.
  9107. MapperCGF.EmitBlock(FromBB);
  9108. llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
  9109. MemberMapType,
  9110. MapperCGF.Builder.getInt64(
  9111. ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9112. OpenMPOffloadMappingFlags::OMP_MAP_TO)));
  9113. // In case of tofrom, do nothing.
  9114. MapperCGF.EmitBlock(EndBB);
  9115. LastBB = EndBB;
  9116. llvm::PHINode *CurMapType =
  9117. MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
  9118. CurMapType->addIncoming(AllocMapType, AllocBB);
  9119. CurMapType->addIncoming(ToMapType, ToBB);
  9120. CurMapType->addIncoming(FromMapType, FromBB);
  9121. CurMapType->addIncoming(MemberMapType, ToElseBB);
  9122. llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
  9123. CurSizeArg, CurMapType, CurNameArg};
  9124. if (Info.Mappers[I]) {
  9125. // Call the corresponding mapper function.
  9126. llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
  9127. cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
  9128. assert(MapperFunc && "Expect a valid mapper function is available.");
  9129. MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
  9130. } else {
  9131. // Call the runtime API __tgt_push_mapper_component to fill up the runtime
  9132. // data structure.
  9133. MapperCGF.EmitRuntimeCall(
  9134. OMPBuilder.getOrCreateRuntimeFunction(
  9135. CGM.getModule(), OMPRTL___tgt_push_mapper_component),
  9136. OffloadingArgs);
  9137. }
  9138. }
  9139. // Update the pointer to point to the next element that needs to be mapped,
  9140. // and check whether we have mapped all elements.
  9141. llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
  9142. ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
  9143. PtrPHI->addIncoming(PtrNext, LastBB);
  9144. llvm::Value *IsDone =
  9145. MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
  9146. llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
  9147. MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
  9148. MapperCGF.EmitBlock(ExitBB);
  9149. // Emit array deletion if this is an array section and \p MapType indicates
  9150. // that deletion is required.
  9151. emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
  9152. MapName, ElementSize, DoneBB, /*IsInit=*/false);
  9153. // Emit the function exit block.
  9154. MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  9155. MapperCGF.FinishFunction();
  9156. UDMMap.try_emplace(D, Fn);
  9157. if (CGF) {
  9158. auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
  9159. Decls.second.push_back(D);
  9160. }
  9161. }
  9162. /// Emit the array initialization or deletion portion for user-defined mapper
  9163. /// code generation. First, it evaluates whether an array section is mapped and
  9164. /// whether the \a MapType instructs to delete this section. If \a IsInit is
  9165. /// true, and \a MapType indicates to not delete this array, array
  9166. /// initialization code is generated. If \a IsInit is false, and \a MapType
  9167. /// indicates to not this array, array deletion code is generated.
  9168. void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
  9169. CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
  9170. llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
  9171. llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
  9172. bool IsInit) {
  9173. StringRef Prefix = IsInit ? ".init" : ".del";
  9174. // Evaluate if this is an array section.
  9175. llvm::BasicBlock *BodyBB =
  9176. MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
  9177. llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
  9178. Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
  9179. llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
  9180. MapType,
  9181. MapperCGF.Builder.getInt64(
  9182. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9183. OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
  9184. llvm::Value *DeleteCond;
  9185. llvm::Value *Cond;
  9186. if (IsInit) {
  9187. // base != begin?
  9188. llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
  9189. // IsPtrAndObj?
  9190. llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
  9191. MapType,
  9192. MapperCGF.Builder.getInt64(
  9193. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9194. OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
  9195. PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
  9196. BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
  9197. Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
  9198. DeleteCond = MapperCGF.Builder.CreateIsNull(
  9199. DeleteBit, getName({"omp.array", Prefix, ".delete"}));
  9200. } else {
  9201. Cond = IsArray;
  9202. DeleteCond = MapperCGF.Builder.CreateIsNotNull(
  9203. DeleteBit, getName({"omp.array", Prefix, ".delete"}));
  9204. }
  9205. Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
  9206. MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
  9207. MapperCGF.EmitBlock(BodyBB);
  9208. // Get the array size by multiplying element size and element number (i.e., \p
  9209. // Size).
  9210. llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
  9211. Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
  9212. // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
  9213. // memory allocation/deletion purpose only.
  9214. llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
  9215. MapType,
  9216. MapperCGF.Builder.getInt64(
  9217. ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9218. OpenMPOffloadMappingFlags::OMP_MAP_TO |
  9219. OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
  9220. MapTypeArg = MapperCGF.Builder.CreateOr(
  9221. MapTypeArg,
  9222. MapperCGF.Builder.getInt64(
  9223. static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
  9224. OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
  9225. // Call the runtime API __tgt_push_mapper_component to fill up the runtime
  9226. // data structure.
  9227. llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
  9228. ArraySize, MapTypeArg, MapName};
  9229. MapperCGF.EmitRuntimeCall(
  9230. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  9231. OMPRTL___tgt_push_mapper_component),
  9232. OffloadingArgs);
  9233. }
  9234. llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
  9235. const OMPDeclareMapperDecl *D) {
  9236. auto I = UDMMap.find(D);
  9237. if (I != UDMMap.end())
  9238. return I->second;
  9239. emitUserDefinedMapper(D);
  9240. return UDMMap.lookup(D);
  9241. }
  9242. llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
  9243. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  9244. llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
  9245. const OMPLoopDirective &D)>
  9246. SizeEmitter) {
  9247. OpenMPDirectiveKind Kind = D.getDirectiveKind();
  9248. const OMPExecutableDirective *TD = &D;
  9249. // Get nested teams distribute kind directive, if any.
  9250. if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
  9251. TD = getNestedDistributeDirective(CGM.getContext(), D);
  9252. if (!TD)
  9253. return llvm::ConstantInt::get(CGF.Int64Ty, 0);
  9254. const auto *LD = cast<OMPLoopDirective>(TD);
  9255. if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
  9256. return NumIterations;
  9257. return llvm::ConstantInt::get(CGF.Int64Ty, 0);
  9258. }
  9259. void CGOpenMPRuntime::emitTargetCall(
  9260. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  9261. llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
  9262. llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
  9263. llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
  9264. const OMPLoopDirective &D)>
  9265. SizeEmitter) {
  9266. if (!CGF.HaveInsertPoint())
  9267. return;
  9268. const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
  9269. CGM.getLangOpts().OpenMPOffloadMandatory;
  9270. assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
  9271. const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
  9272. D.hasClausesOfKind<OMPNowaitClause>() ||
  9273. D.hasClausesOfKind<OMPInReductionClause>();
  9274. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  9275. const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
  9276. auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
  9277. PrePostActionTy &) {
  9278. CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
  9279. };
  9280. emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
  9281. CodeGenFunction::OMPTargetDataInfo InputInfo;
  9282. llvm::Value *MapTypesArray = nullptr;
  9283. llvm::Value *MapNamesArray = nullptr;
  9284. // Generate code for the host fallback function.
  9285. auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
  9286. &CS, OffloadingMandatory](CodeGenFunction &CGF) {
  9287. if (OffloadingMandatory) {
  9288. CGF.Builder.CreateUnreachable();
  9289. } else {
  9290. if (RequiresOuterTask) {
  9291. CapturedVars.clear();
  9292. CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
  9293. }
  9294. emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
  9295. }
  9296. };
  9297. // Fill up the pointer arrays and transfer execution to the device.
  9298. auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
  9299. &MapNamesArray, SizeEmitter,
  9300. FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
  9301. if (Device.getInt() == OMPC_DEVICE_ancestor) {
  9302. // Reverse offloading is not supported, so just execute on the host.
  9303. FallbackGen(CGF);
  9304. return;
  9305. }
  9306. // On top of the arrays that were filled up, the target offloading call
  9307. // takes as arguments the device id as well as the host pointer. The host
  9308. // pointer is used by the runtime library to identify the current target
  9309. // region, so it only has to be unique and not necessarily point to
  9310. // anything. It could be the pointer to the outlined function that
  9311. // implements the target region, but we aren't using that so that the
  9312. // compiler doesn't need to keep that, and could therefore inline the host
  9313. // function if proven worthwhile during optimization.
  9314. // From this point on, we need to have an ID of the target region defined.
  9315. assert(OutlinedFnID && "Invalid outlined function ID!");
  9316. (void)OutlinedFnID;
  9317. // Emit device ID if any.
  9318. llvm::Value *DeviceID;
  9319. if (Device.getPointer()) {
  9320. assert((Device.getInt() == OMPC_DEVICE_unknown ||
  9321. Device.getInt() == OMPC_DEVICE_device_num) &&
  9322. "Expected device_num modifier.");
  9323. llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
  9324. DeviceID =
  9325. CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
  9326. } else {
  9327. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  9328. }
  9329. // Emit the number of elements in the offloading arrays.
  9330. llvm::Value *PointerNum =
  9331. CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
  9332. // Return value of the runtime offloading call.
  9333. llvm::Value *Return;
  9334. llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
  9335. llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
  9336. // Source location for the ident struct
  9337. llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
  9338. // Get tripcount for the target loop-based directive.
  9339. llvm::Value *NumIterations =
  9340. emitTargetNumIterationsCall(CGF, D, SizeEmitter);
  9341. llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
  9342. if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
  9343. CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
  9344. llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
  9345. DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
  9346. DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
  9347. /*isSigned=*/false);
  9348. }
  9349. llvm::Value *ZeroArray =
  9350. llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3));
  9351. bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
  9352. llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait);
  9353. llvm::Value *NumTeams3D =
  9354. CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0});
  9355. llvm::Value *NumThreads3D =
  9356. CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0});
  9357. // Arguments for the target kernel.
  9358. SmallVector<llvm::Value *> KernelArgs{
  9359. CGF.Builder.getInt32(/* Version */ 2),
  9360. PointerNum,
  9361. InputInfo.BasePointersArray.getPointer(),
  9362. InputInfo.PointersArray.getPointer(),
  9363. InputInfo.SizesArray.getPointer(),
  9364. MapTypesArray,
  9365. MapNamesArray,
  9366. InputInfo.MappersArray.getPointer(),
  9367. NumIterations,
  9368. Flags,
  9369. NumTeams3D,
  9370. NumThreads3D,
  9371. DynCGroupMem,
  9372. };
  9373. // The target region is an outlined function launched by the runtime
  9374. // via calls to __tgt_target_kernel().
  9375. //
  9376. // Note that on the host and CPU targets, the runtime implementation of
  9377. // these calls simply call the outlined function without forking threads.
  9378. // The outlined functions themselves have runtime calls to
  9379. // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
  9380. // the compiler in emitTeamsCall() and emitParallelCall().
  9381. //
  9382. // In contrast, on the NVPTX target, the implementation of
  9383. // __tgt_target_teams() launches a GPU kernel with the requested number
  9384. // of teams and threads so no additional calls to the runtime are required.
  9385. // Check the error code and execute the host version if required.
  9386. CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel(
  9387. CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads,
  9388. OutlinedFnID, KernelArgs));
  9389. llvm::BasicBlock *OffloadFailedBlock =
  9390. CGF.createBasicBlock("omp_offload.failed");
  9391. llvm::BasicBlock *OffloadContBlock =
  9392. CGF.createBasicBlock("omp_offload.cont");
  9393. llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
  9394. CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
  9395. CGF.EmitBlock(OffloadFailedBlock);
  9396. FallbackGen(CGF);
  9397. CGF.EmitBranch(OffloadContBlock);
  9398. CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
  9399. };
  9400. // Notify that the host version must be executed.
  9401. auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
  9402. FallbackGen(CGF);
  9403. };
  9404. auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
  9405. &MapNamesArray, &CapturedVars, RequiresOuterTask,
  9406. &CS](CodeGenFunction &CGF, PrePostActionTy &) {
  9407. // Fill up the arrays with all the captured variables.
  9408. MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
  9409. // Get mappable expression information.
  9410. MappableExprsHandler MEHandler(D, CGF);
  9411. llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
  9412. llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
  9413. auto RI = CS.getCapturedRecordDecl()->field_begin();
  9414. auto *CV = CapturedVars.begin();
  9415. for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
  9416. CE = CS.capture_end();
  9417. CI != CE; ++CI, ++RI, ++CV) {
  9418. MappableExprsHandler::MapCombinedInfoTy CurInfo;
  9419. MappableExprsHandler::StructRangeInfoTy PartialStruct;
  9420. // VLA sizes are passed to the outlined region by copy and do not have map
  9421. // information associated.
  9422. if (CI->capturesVariableArrayType()) {
  9423. CurInfo.Exprs.push_back(nullptr);
  9424. CurInfo.BasePointers.push_back(*CV);
  9425. CurInfo.Pointers.push_back(*CV);
  9426. CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
  9427. CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
  9428. // Copy to the device as an argument. No need to retrieve it.
  9429. CurInfo.Types.push_back(
  9430. OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
  9431. OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
  9432. OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
  9433. CurInfo.Mappers.push_back(nullptr);
  9434. } else {
  9435. // If we have any information in the map clause, we use it, otherwise we
  9436. // just do a default mapping.
  9437. MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
  9438. if (!CI->capturesThis())
  9439. MappedVarSet.insert(CI->getCapturedVar());
  9440. else
  9441. MappedVarSet.insert(nullptr);
  9442. if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
  9443. MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
  9444. // Generate correct mapping for variables captured by reference in
  9445. // lambdas.
  9446. if (CI->capturesVariable())
  9447. MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
  9448. CurInfo, LambdaPointers);
  9449. }
  9450. // We expect to have at least an element of information for this capture.
  9451. assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
  9452. "Non-existing map pointer for capture!");
  9453. assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
  9454. CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
  9455. CurInfo.BasePointers.size() == CurInfo.Types.size() &&
  9456. CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
  9457. "Inconsistent map information sizes!");
  9458. // If there is an entry in PartialStruct it means we have a struct with
  9459. // individual members mapped. Emit an extra combined entry.
  9460. if (PartialStruct.Base.isValid()) {
  9461. CombinedInfo.append(PartialStruct.PreliminaryMapData);
  9462. MEHandler.emitCombinedEntry(
  9463. CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
  9464. !PartialStruct.PreliminaryMapData.BasePointers.empty());
  9465. }
  9466. // We need to append the results of this capture to what we already have.
  9467. CombinedInfo.append(CurInfo);
  9468. }
  9469. // Adjust MEMBER_OF flags for the lambdas captures.
  9470. MEHandler.adjustMemberOfForLambdaCaptures(
  9471. LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
  9472. CombinedInfo.Types);
  9473. // Map any list items in a map clause that were not captures because they
  9474. // weren't referenced within the construct.
  9475. MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
  9476. CGOpenMPRuntime::TargetDataInfo Info;
  9477. // Fill up the arrays and create the arguments.
  9478. emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
  9479. bool EmitDebug =
  9480. CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
  9481. OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
  9482. EmitDebug,
  9483. /*ForEndCall=*/false);
  9484. InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
  9485. InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
  9486. CGF.VoidPtrTy, CGM.getPointerAlign());
  9487. InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
  9488. CGM.getPointerAlign());
  9489. InputInfo.SizesArray =
  9490. Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
  9491. InputInfo.MappersArray =
  9492. Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
  9493. MapTypesArray = Info.RTArgs.MapTypesArray;
  9494. MapNamesArray = Info.RTArgs.MapNamesArray;
  9495. if (RequiresOuterTask)
  9496. CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
  9497. else
  9498. emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
  9499. };
  9500. auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
  9501. CodeGenFunction &CGF, PrePostActionTy &) {
  9502. if (RequiresOuterTask) {
  9503. CodeGenFunction::OMPTargetDataInfo InputInfo;
  9504. CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
  9505. } else {
  9506. emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
  9507. }
  9508. };
  9509. // If we have a target function ID it means that we need to support
  9510. // offloading, otherwise, just execute on the host. We need to execute on host
  9511. // regardless of the conditional in the if clause if, e.g., the user do not
  9512. // specify target triples.
  9513. if (OutlinedFnID) {
  9514. if (IfCond) {
  9515. emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
  9516. } else {
  9517. RegionCodeGenTy ThenRCG(TargetThenGen);
  9518. ThenRCG(CGF);
  9519. }
  9520. } else {
  9521. RegionCodeGenTy ElseRCG(TargetElseGen);
  9522. ElseRCG(CGF);
  9523. }
  9524. }
  9525. void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
  9526. StringRef ParentName) {
  9527. if (!S)
  9528. return;
  9529. // Codegen OMP target directives that offload compute to the device.
  9530. bool RequiresDeviceCodegen =
  9531. isa<OMPExecutableDirective>(S) &&
  9532. isOpenMPTargetExecutionDirective(
  9533. cast<OMPExecutableDirective>(S)->getDirectiveKind());
  9534. if (RequiresDeviceCodegen) {
  9535. const auto &E = *cast<OMPExecutableDirective>(S);
  9536. auto EntryInfo =
  9537. getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName);
  9538. // Is this a target region that should not be emitted as an entry point? If
  9539. // so just signal we are done with this target region.
  9540. if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo))
  9541. return;
  9542. switch (E.getDirectiveKind()) {
  9543. case OMPD_target:
  9544. CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
  9545. cast<OMPTargetDirective>(E));
  9546. break;
  9547. case OMPD_target_parallel:
  9548. CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
  9549. CGM, ParentName, cast<OMPTargetParallelDirective>(E));
  9550. break;
  9551. case OMPD_target_teams:
  9552. CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
  9553. CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
  9554. break;
  9555. case OMPD_target_teams_distribute:
  9556. CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
  9557. CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
  9558. break;
  9559. case OMPD_target_teams_distribute_simd:
  9560. CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
  9561. CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
  9562. break;
  9563. case OMPD_target_parallel_for:
  9564. CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
  9565. CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
  9566. break;
  9567. case OMPD_target_parallel_for_simd:
  9568. CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
  9569. CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
  9570. break;
  9571. case OMPD_target_simd:
  9572. CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
  9573. CGM, ParentName, cast<OMPTargetSimdDirective>(E));
  9574. break;
  9575. case OMPD_target_teams_distribute_parallel_for:
  9576. CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
  9577. CGM, ParentName,
  9578. cast<OMPTargetTeamsDistributeParallelForDirective>(E));
  9579. break;
  9580. case OMPD_target_teams_distribute_parallel_for_simd:
  9581. CodeGenFunction::
  9582. EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
  9583. CGM, ParentName,
  9584. cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
  9585. break;
  9586. case OMPD_parallel:
  9587. case OMPD_for:
  9588. case OMPD_parallel_for:
  9589. case OMPD_parallel_master:
  9590. case OMPD_parallel_sections:
  9591. case OMPD_for_simd:
  9592. case OMPD_parallel_for_simd:
  9593. case OMPD_cancel:
  9594. case OMPD_cancellation_point:
  9595. case OMPD_ordered:
  9596. case OMPD_threadprivate:
  9597. case OMPD_allocate:
  9598. case OMPD_task:
  9599. case OMPD_simd:
  9600. case OMPD_tile:
  9601. case OMPD_unroll:
  9602. case OMPD_sections:
  9603. case OMPD_section:
  9604. case OMPD_single:
  9605. case OMPD_master:
  9606. case OMPD_critical:
  9607. case OMPD_taskyield:
  9608. case OMPD_barrier:
  9609. case OMPD_taskwait:
  9610. case OMPD_taskgroup:
  9611. case OMPD_atomic:
  9612. case OMPD_flush:
  9613. case OMPD_depobj:
  9614. case OMPD_scan:
  9615. case OMPD_teams:
  9616. case OMPD_target_data:
  9617. case OMPD_target_exit_data:
  9618. case OMPD_target_enter_data:
  9619. case OMPD_distribute:
  9620. case OMPD_distribute_simd:
  9621. case OMPD_distribute_parallel_for:
  9622. case OMPD_distribute_parallel_for_simd:
  9623. case OMPD_teams_distribute:
  9624. case OMPD_teams_distribute_simd:
  9625. case OMPD_teams_distribute_parallel_for:
  9626. case OMPD_teams_distribute_parallel_for_simd:
  9627. case OMPD_target_update:
  9628. case OMPD_declare_simd:
  9629. case OMPD_declare_variant:
  9630. case OMPD_begin_declare_variant:
  9631. case OMPD_end_declare_variant:
  9632. case OMPD_declare_target:
  9633. case OMPD_end_declare_target:
  9634. case OMPD_declare_reduction:
  9635. case OMPD_declare_mapper:
  9636. case OMPD_taskloop:
  9637. case OMPD_taskloop_simd:
  9638. case OMPD_master_taskloop:
  9639. case OMPD_master_taskloop_simd:
  9640. case OMPD_parallel_master_taskloop:
  9641. case OMPD_parallel_master_taskloop_simd:
  9642. case OMPD_requires:
  9643. case OMPD_metadirective:
  9644. case OMPD_unknown:
  9645. default:
  9646. llvm_unreachable("Unknown target directive for OpenMP device codegen.");
  9647. }
  9648. return;
  9649. }
  9650. if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
  9651. if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
  9652. return;
  9653. scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
  9654. return;
  9655. }
  9656. // If this is a lambda function, look into its body.
  9657. if (const auto *L = dyn_cast<LambdaExpr>(S))
  9658. S = L->getBody();
  9659. // Keep looking for target regions recursively.
  9660. for (const Stmt *II : S->children())
  9661. scanForTargetRegionsFunctions(II, ParentName);
  9662. }
  9663. static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
  9664. std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
  9665. OMPDeclareTargetDeclAttr::getDeviceType(VD);
  9666. if (!DevTy)
  9667. return false;
  9668. // Do not emit device_type(nohost) functions for the host.
  9669. if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
  9670. return true;
  9671. // Do not emit device_type(host) functions for the device.
  9672. if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
  9673. return true;
  9674. return false;
  9675. }
  9676. bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
  9677. // If emitting code for the host, we do not process FD here. Instead we do
  9678. // the normal code generation.
  9679. if (!CGM.getLangOpts().OpenMPIsDevice) {
  9680. if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
  9681. if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
  9682. CGM.getLangOpts().OpenMPIsDevice))
  9683. return true;
  9684. return false;
  9685. }
  9686. const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
  9687. // Try to detect target regions in the function.
  9688. if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
  9689. StringRef Name = CGM.getMangledName(GD);
  9690. scanForTargetRegionsFunctions(FD->getBody(), Name);
  9691. if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
  9692. CGM.getLangOpts().OpenMPIsDevice))
  9693. return true;
  9694. }
  9695. // Do not to emit function if it is not marked as declare target.
  9696. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
  9697. AlreadyEmittedTargetDecls.count(VD) == 0;
  9698. }
  9699. bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
  9700. if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
  9701. CGM.getLangOpts().OpenMPIsDevice))
  9702. return true;
  9703. if (!CGM.getLangOpts().OpenMPIsDevice)
  9704. return false;
  9705. // Check if there are Ctors/Dtors in this declaration and look for target
  9706. // regions in it. We use the complete variant to produce the kernel name
  9707. // mangling.
  9708. QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
  9709. if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
  9710. for (const CXXConstructorDecl *Ctor : RD->ctors()) {
  9711. StringRef ParentName =
  9712. CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
  9713. scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
  9714. }
  9715. if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
  9716. StringRef ParentName =
  9717. CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
  9718. scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
  9719. }
  9720. }
  9721. // Do not to emit variable if it is not marked as declare target.
  9722. std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  9723. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
  9724. cast<VarDecl>(GD.getDecl()));
  9725. if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
  9726. ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  9727. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  9728. HasRequiresUnifiedSharedMemory)) {
  9729. DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
  9730. return true;
  9731. }
  9732. return false;
  9733. }
  9734. void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
  9735. llvm::Constant *Addr) {
  9736. if (CGM.getLangOpts().OMPTargetTriples.empty() &&
  9737. !CGM.getLangOpts().OpenMPIsDevice)
  9738. return;
  9739. // If we have host/nohost variables, they do not need to be registered.
  9740. std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
  9741. OMPDeclareTargetDeclAttr::getDeviceType(VD);
  9742. if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
  9743. return;
  9744. std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  9745. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  9746. if (!Res) {
  9747. if (CGM.getLangOpts().OpenMPIsDevice) {
  9748. // Register non-target variables being emitted in device code (debug info
  9749. // may cause this).
  9750. StringRef VarName = CGM.getMangledName(VD);
  9751. EmittedNonTargetVariables.try_emplace(VarName, Addr);
  9752. }
  9753. return;
  9754. }
  9755. // Register declare target variables.
  9756. llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
  9757. StringRef VarName;
  9758. int64_t VarSize;
  9759. llvm::GlobalValue::LinkageTypes Linkage;
  9760. if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  9761. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  9762. !HasRequiresUnifiedSharedMemory) {
  9763. Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
  9764. VarName = CGM.getMangledName(VD);
  9765. if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
  9766. VarSize =
  9767. CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity();
  9768. assert(VarSize != 0 && "Expected non-zero size of the variable");
  9769. } else {
  9770. VarSize = 0;
  9771. }
  9772. Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
  9773. // Temp solution to prevent optimizations of the internal variables.
  9774. if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
  9775. // Do not create a "ref-variable" if the original is not also available
  9776. // on the host.
  9777. if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
  9778. return;
  9779. std::string RefName = getName({VarName, "ref"});
  9780. if (!CGM.GetGlobalValue(RefName)) {
  9781. llvm::Constant *AddrRef =
  9782. OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName);
  9783. auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
  9784. GVAddrRef->setConstant(/*Val=*/true);
  9785. GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
  9786. GVAddrRef->setInitializer(Addr);
  9787. CGM.addCompilerUsedGlobal(GVAddrRef);
  9788. }
  9789. }
  9790. } else {
  9791. assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
  9792. ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  9793. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  9794. HasRequiresUnifiedSharedMemory)) &&
  9795. "Declare target attribute must link or to with unified memory.");
  9796. if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
  9797. Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
  9798. else
  9799. Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
  9800. if (CGM.getLangOpts().OpenMPIsDevice) {
  9801. VarName = Addr->getName();
  9802. Addr = nullptr;
  9803. } else {
  9804. VarName = getAddrOfDeclareTargetVar(VD).getName();
  9805. Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
  9806. }
  9807. VarSize = CGM.getPointerSize().getQuantity();
  9808. Linkage = llvm::GlobalValue::WeakAnyLinkage;
  9809. }
  9810. OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
  9811. VarName, Addr, VarSize, Flags, Linkage);
  9812. }
  9813. bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
  9814. if (isa<FunctionDecl>(GD.getDecl()) ||
  9815. isa<OMPDeclareReductionDecl>(GD.getDecl()))
  9816. return emitTargetFunctions(GD);
  9817. return emitTargetGlobalVariable(GD);
  9818. }
  9819. void CGOpenMPRuntime::emitDeferredTargetDecls() const {
  9820. for (const VarDecl *VD : DeferredGlobalVariables) {
  9821. std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  9822. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  9823. if (!Res)
  9824. continue;
  9825. if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  9826. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  9827. !HasRequiresUnifiedSharedMemory) {
  9828. CGM.EmitGlobal(VD);
  9829. } else {
  9830. assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
  9831. ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
  9832. *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
  9833. HasRequiresUnifiedSharedMemory)) &&
  9834. "Expected link clause or to clause with unified memory.");
  9835. (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
  9836. }
  9837. }
  9838. }
  9839. void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
  9840. CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
  9841. assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
  9842. " Expected target-based directive.");
  9843. }
  9844. void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
  9845. for (const OMPClause *Clause : D->clauselists()) {
  9846. if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
  9847. HasRequiresUnifiedSharedMemory = true;
  9848. OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
  9849. } else if (const auto *AC =
  9850. dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
  9851. switch (AC->getAtomicDefaultMemOrderKind()) {
  9852. case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
  9853. RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
  9854. break;
  9855. case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
  9856. RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
  9857. break;
  9858. case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
  9859. RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
  9860. break;
  9861. case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
  9862. break;
  9863. }
  9864. }
  9865. }
  9866. }
  9867. llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
  9868. return RequiresAtomicOrdering;
  9869. }
  9870. bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
  9871. LangAS &AS) {
  9872. if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
  9873. return false;
  9874. const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
  9875. switch(A->getAllocatorType()) {
  9876. case OMPAllocateDeclAttr::OMPNullMemAlloc:
  9877. case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
  9878. // Not supported, fallback to the default mem space.
  9879. case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
  9880. case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
  9881. case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
  9882. case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
  9883. case OMPAllocateDeclAttr::OMPThreadMemAlloc:
  9884. case OMPAllocateDeclAttr::OMPConstMemAlloc:
  9885. case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
  9886. AS = LangAS::Default;
  9887. return true;
  9888. case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
  9889. llvm_unreachable("Expected predefined allocator for the variables with the "
  9890. "static storage.");
  9891. }
  9892. return false;
  9893. }
  9894. bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
  9895. return HasRequiresUnifiedSharedMemory;
  9896. }
  9897. CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
  9898. CodeGenModule &CGM)
  9899. : CGM(CGM) {
  9900. if (CGM.getLangOpts().OpenMPIsDevice) {
  9901. SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
  9902. CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
  9903. }
  9904. }
  9905. CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
  9906. if (CGM.getLangOpts().OpenMPIsDevice)
  9907. CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
  9908. }
  9909. bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
  9910. if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
  9911. return true;
  9912. const auto *D = cast<FunctionDecl>(GD.getDecl());
  9913. // Do not to emit function if it is marked as declare target as it was already
  9914. // emitted.
  9915. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
  9916. if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
  9917. if (auto *F = dyn_cast_or_null<llvm::Function>(
  9918. CGM.GetGlobalValue(CGM.getMangledName(GD))))
  9919. return !F->isDeclaration();
  9920. return false;
  9921. }
  9922. return true;
  9923. }
  9924. return !AlreadyEmittedTargetDecls.insert(D).second;
  9925. }
  9926. llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
  9927. // If we don't have entries or if we are emitting code for the device, we
  9928. // don't need to do anything.
  9929. if (CGM.getLangOpts().OMPTargetTriples.empty() ||
  9930. CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
  9931. (OffloadEntriesInfoManager.empty() &&
  9932. !HasEmittedDeclareTargetRegion &&
  9933. !HasEmittedTargetRegion))
  9934. return nullptr;
  9935. // Create and register the function that handles the requires directives.
  9936. ASTContext &C = CGM.getContext();
  9937. llvm::Function *RequiresRegFn;
  9938. {
  9939. CodeGenFunction CGF(CGM);
  9940. const auto &FI = CGM.getTypes().arrangeNullaryFunction();
  9941. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  9942. std::string ReqName = getName({"omp_offloading", "requires_reg"});
  9943. RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
  9944. CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
  9945. OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
  9946. // TODO: check for other requires clauses.
  9947. // The requires directive takes effect only when a target region is
  9948. // present in the compilation unit. Otherwise it is ignored and not
  9949. // passed to the runtime. This avoids the runtime from throwing an error
  9950. // for mismatching requires clauses across compilation units that don't
  9951. // contain at least 1 target region.
  9952. assert((HasEmittedTargetRegion ||
  9953. HasEmittedDeclareTargetRegion ||
  9954. !OffloadEntriesInfoManager.empty()) &&
  9955. "Target or declare target region expected.");
  9956. if (HasRequiresUnifiedSharedMemory)
  9957. Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
  9958. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  9959. CGM.getModule(), OMPRTL___tgt_register_requires),
  9960. llvm::ConstantInt::get(CGM.Int64Ty, Flags));
  9961. CGF.FinishFunction();
  9962. }
  9963. return RequiresRegFn;
  9964. }
  9965. void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
  9966. const OMPExecutableDirective &D,
  9967. SourceLocation Loc,
  9968. llvm::Function *OutlinedFn,
  9969. ArrayRef<llvm::Value *> CapturedVars) {
  9970. if (!CGF.HaveInsertPoint())
  9971. return;
  9972. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
  9973. CodeGenFunction::RunCleanupsScope Scope(CGF);
  9974. // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
  9975. llvm::Value *Args[] = {
  9976. RTLoc,
  9977. CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
  9978. CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
  9979. llvm::SmallVector<llvm::Value *, 16> RealArgs;
  9980. RealArgs.append(std::begin(Args), std::end(Args));
  9981. RealArgs.append(CapturedVars.begin(), CapturedVars.end());
  9982. llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
  9983. CGM.getModule(), OMPRTL___kmpc_fork_teams);
  9984. CGF.EmitRuntimeCall(RTLFn, RealArgs);
  9985. }
  9986. void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
  9987. const Expr *NumTeams,
  9988. const Expr *ThreadLimit,
  9989. SourceLocation Loc) {
  9990. if (!CGF.HaveInsertPoint())
  9991. return;
  9992. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
  9993. llvm::Value *NumTeamsVal =
  9994. NumTeams
  9995. ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
  9996. CGF.CGM.Int32Ty, /* isSigned = */ true)
  9997. : CGF.Builder.getInt32(0);
  9998. llvm::Value *ThreadLimitVal =
  9999. ThreadLimit
  10000. ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
  10001. CGF.CGM.Int32Ty, /* isSigned = */ true)
  10002. : CGF.Builder.getInt32(0);
  10003. // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
  10004. llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
  10005. ThreadLimitVal};
  10006. CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
  10007. CGM.getModule(), OMPRTL___kmpc_push_num_teams),
  10008. PushNumTeamsArgs);
  10009. }
  10010. void CGOpenMPRuntime::emitTargetDataCalls(
  10011. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  10012. const Expr *Device, const RegionCodeGenTy &CodeGen,
  10013. CGOpenMPRuntime::TargetDataInfo &Info) {
  10014. if (!CGF.HaveInsertPoint())
  10015. return;
  10016. // Action used to replace the default codegen action and turn privatization
  10017. // off.
  10018. PrePostActionTy NoPrivAction;
  10019. // Generate the code for the opening of the data environment. Capture all the
  10020. // arguments of the runtime call by reference because they are used in the
  10021. // closing of the region.
  10022. auto &&BeginThenGen = [this, &D, Device, &Info,
  10023. &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
  10024. // Fill up the arrays with all the mapped variables.
  10025. MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
  10026. // Get map clause information.
  10027. MappableExprsHandler MEHandler(D, CGF);
  10028. MEHandler.generateAllInfo(CombinedInfo);
  10029. // Fill up the arrays and create the arguments.
  10030. emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
  10031. /*IsNonContiguous=*/true);
  10032. llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
  10033. bool EmitDebug =
  10034. CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
  10035. OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
  10036. EmitDebug);
  10037. // Emit device ID if any.
  10038. llvm::Value *DeviceID = nullptr;
  10039. if (Device) {
  10040. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  10041. CGF.Int64Ty, /*isSigned=*/true);
  10042. } else {
  10043. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  10044. }
  10045. // Emit the number of elements in the offloading arrays.
  10046. llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
  10047. //
  10048. // Source location for the ident struct
  10049. llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
  10050. llvm::Value *OffloadingArgs[] = {RTLoc,
  10051. DeviceID,
  10052. PointerNum,
  10053. RTArgs.BasePointersArray,
  10054. RTArgs.PointersArray,
  10055. RTArgs.SizesArray,
  10056. RTArgs.MapTypesArray,
  10057. RTArgs.MapNamesArray,
  10058. RTArgs.MappersArray};
  10059. CGF.EmitRuntimeCall(
  10060. OMPBuilder.getOrCreateRuntimeFunction(
  10061. CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
  10062. OffloadingArgs);
  10063. // If device pointer privatization is required, emit the body of the region
  10064. // here. It will have to be duplicated: with and without privatization.
  10065. if (!Info.CaptureDeviceAddrMap.empty())
  10066. CodeGen(CGF);
  10067. };
  10068. // Generate code for the closing of the data region.
  10069. auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
  10070. PrePostActionTy &) {
  10071. assert(Info.isValid() && "Invalid data environment closing arguments.");
  10072. llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
  10073. bool EmitDebug =
  10074. CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
  10075. OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
  10076. EmitDebug,
  10077. /*ForEndCall=*/true);
  10078. // Emit device ID if any.
  10079. llvm::Value *DeviceID = nullptr;
  10080. if (Device) {
  10081. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  10082. CGF.Int64Ty, /*isSigned=*/true);
  10083. } else {
  10084. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  10085. }
  10086. // Emit the number of elements in the offloading arrays.
  10087. llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
  10088. // Source location for the ident struct
  10089. llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
  10090. llvm::Value *OffloadingArgs[] = {RTLoc,
  10091. DeviceID,
  10092. PointerNum,
  10093. RTArgs.BasePointersArray,
  10094. RTArgs.PointersArray,
  10095. RTArgs.SizesArray,
  10096. RTArgs.MapTypesArray,
  10097. RTArgs.MapNamesArray,
  10098. RTArgs.MappersArray};
  10099. CGF.EmitRuntimeCall(
  10100. OMPBuilder.getOrCreateRuntimeFunction(
  10101. CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
  10102. OffloadingArgs);
  10103. };
  10104. // If we need device pointer privatization, we need to emit the body of the
  10105. // region with no privatization in the 'else' branch of the conditional.
  10106. // Otherwise, we don't have to do anything.
  10107. auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
  10108. PrePostActionTy &) {
  10109. if (!Info.CaptureDeviceAddrMap.empty()) {
  10110. CodeGen.setAction(NoPrivAction);
  10111. CodeGen(CGF);
  10112. }
  10113. };
  10114. // We don't have to do anything to close the region if the if clause evaluates
  10115. // to false.
  10116. auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
  10117. if (IfCond) {
  10118. emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
  10119. } else {
  10120. RegionCodeGenTy RCG(BeginThenGen);
  10121. RCG(CGF);
  10122. }
  10123. // If we don't require privatization of device pointers, we emit the body in
  10124. // between the runtime calls. This avoids duplicating the body code.
  10125. if (Info.CaptureDeviceAddrMap.empty()) {
  10126. CodeGen.setAction(NoPrivAction);
  10127. CodeGen(CGF);
  10128. }
  10129. if (IfCond) {
  10130. emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
  10131. } else {
  10132. RegionCodeGenTy RCG(EndThenGen);
  10133. RCG(CGF);
  10134. }
  10135. }
  10136. void CGOpenMPRuntime::emitTargetDataStandAloneCall(
  10137. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  10138. const Expr *Device) {
  10139. if (!CGF.HaveInsertPoint())
  10140. return;
  10141. assert((isa<OMPTargetEnterDataDirective>(D) ||
  10142. isa<OMPTargetExitDataDirective>(D) ||
  10143. isa<OMPTargetUpdateDirective>(D)) &&
  10144. "Expecting either target enter, exit data, or update directives.");
  10145. CodeGenFunction::OMPTargetDataInfo InputInfo;
  10146. llvm::Value *MapTypesArray = nullptr;
  10147. llvm::Value *MapNamesArray = nullptr;
  10148. // Generate the code for the opening of the data environment.
  10149. auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
  10150. &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
  10151. // Emit device ID if any.
  10152. llvm::Value *DeviceID = nullptr;
  10153. if (Device) {
  10154. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  10155. CGF.Int64Ty, /*isSigned=*/true);
  10156. } else {
  10157. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  10158. }
  10159. // Emit the number of elements in the offloading arrays.
  10160. llvm::Constant *PointerNum =
  10161. CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
  10162. // Source location for the ident struct
  10163. llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
  10164. llvm::Value *OffloadingArgs[] = {RTLoc,
  10165. DeviceID,
  10166. PointerNum,
  10167. InputInfo.BasePointersArray.getPointer(),
  10168. InputInfo.PointersArray.getPointer(),
  10169. InputInfo.SizesArray.getPointer(),
  10170. MapTypesArray,
  10171. MapNamesArray,
  10172. InputInfo.MappersArray.getPointer()};
  10173. // Select the right runtime function call for each standalone
  10174. // directive.
  10175. const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
  10176. RuntimeFunction RTLFn;
  10177. switch (D.getDirectiveKind()) {
  10178. case OMPD_target_enter_data:
  10179. RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
  10180. : OMPRTL___tgt_target_data_begin_mapper;
  10181. break;
  10182. case OMPD_target_exit_data:
  10183. RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
  10184. : OMPRTL___tgt_target_data_end_mapper;
  10185. break;
  10186. case OMPD_target_update:
  10187. RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
  10188. : OMPRTL___tgt_target_data_update_mapper;
  10189. break;
  10190. case OMPD_parallel:
  10191. case OMPD_for:
  10192. case OMPD_parallel_for:
  10193. case OMPD_parallel_master:
  10194. case OMPD_parallel_sections:
  10195. case OMPD_for_simd:
  10196. case OMPD_parallel_for_simd:
  10197. case OMPD_cancel:
  10198. case OMPD_cancellation_point:
  10199. case OMPD_ordered:
  10200. case OMPD_threadprivate:
  10201. case OMPD_allocate:
  10202. case OMPD_task:
  10203. case OMPD_simd:
  10204. case OMPD_tile:
  10205. case OMPD_unroll:
  10206. case OMPD_sections:
  10207. case OMPD_section:
  10208. case OMPD_single:
  10209. case OMPD_master:
  10210. case OMPD_critical:
  10211. case OMPD_taskyield:
  10212. case OMPD_barrier:
  10213. case OMPD_taskwait:
  10214. case OMPD_taskgroup:
  10215. case OMPD_atomic:
  10216. case OMPD_flush:
  10217. case OMPD_depobj:
  10218. case OMPD_scan:
  10219. case OMPD_teams:
  10220. case OMPD_target_data:
  10221. case OMPD_distribute:
  10222. case OMPD_distribute_simd:
  10223. case OMPD_distribute_parallel_for:
  10224. case OMPD_distribute_parallel_for_simd:
  10225. case OMPD_teams_distribute:
  10226. case OMPD_teams_distribute_simd:
  10227. case OMPD_teams_distribute_parallel_for:
  10228. case OMPD_teams_distribute_parallel_for_simd:
  10229. case OMPD_declare_simd:
  10230. case OMPD_declare_variant:
  10231. case OMPD_begin_declare_variant:
  10232. case OMPD_end_declare_variant:
  10233. case OMPD_declare_target:
  10234. case OMPD_end_declare_target:
  10235. case OMPD_declare_reduction:
  10236. case OMPD_declare_mapper:
  10237. case OMPD_taskloop:
  10238. case OMPD_taskloop_simd:
  10239. case OMPD_master_taskloop:
  10240. case OMPD_master_taskloop_simd:
  10241. case OMPD_parallel_master_taskloop:
  10242. case OMPD_parallel_master_taskloop_simd:
  10243. case OMPD_target:
  10244. case OMPD_target_simd:
  10245. case OMPD_target_teams_distribute:
  10246. case OMPD_target_teams_distribute_simd:
  10247. case OMPD_target_teams_distribute_parallel_for:
  10248. case OMPD_target_teams_distribute_parallel_for_simd:
  10249. case OMPD_target_teams:
  10250. case OMPD_target_parallel:
  10251. case OMPD_target_parallel_for:
  10252. case OMPD_target_parallel_for_simd:
  10253. case OMPD_requires:
  10254. case OMPD_metadirective:
  10255. case OMPD_unknown:
  10256. default:
  10257. llvm_unreachable("Unexpected standalone target data directive.");
  10258. break;
  10259. }
  10260. CGF.EmitRuntimeCall(
  10261. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
  10262. OffloadingArgs);
  10263. };
  10264. auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
  10265. &MapNamesArray](CodeGenFunction &CGF,
  10266. PrePostActionTy &) {
  10267. // Fill up the arrays with all the mapped variables.
  10268. MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
  10269. // Get map clause information.
  10270. MappableExprsHandler MEHandler(D, CGF);
  10271. MEHandler.generateAllInfo(CombinedInfo);
  10272. CGOpenMPRuntime::TargetDataInfo Info;
  10273. // Fill up the arrays and create the arguments.
  10274. emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
  10275. /*IsNonContiguous=*/true);
  10276. bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
  10277. D.hasClausesOfKind<OMPNowaitClause>();
  10278. bool EmitDebug =
  10279. CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
  10280. OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
  10281. EmitDebug,
  10282. /*ForEndCall=*/false);
  10283. InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
  10284. InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
  10285. CGF.VoidPtrTy, CGM.getPointerAlign());
  10286. InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
  10287. CGM.getPointerAlign());
  10288. InputInfo.SizesArray =
  10289. Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
  10290. InputInfo.MappersArray =
  10291. Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
  10292. MapTypesArray = Info.RTArgs.MapTypesArray;
  10293. MapNamesArray = Info.RTArgs.MapNamesArray;
  10294. if (RequiresOuterTask)
  10295. CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
  10296. else
  10297. emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
  10298. };
  10299. if (IfCond) {
  10300. emitIfClause(CGF, IfCond, TargetThenGen,
  10301. [](CodeGenFunction &CGF, PrePostActionTy &) {});
  10302. } else {
  10303. RegionCodeGenTy ThenRCG(TargetThenGen);
  10304. ThenRCG(CGF);
  10305. }
  10306. }
  10307. namespace {
  10308. /// Kind of parameter in a function with 'declare simd' directive.
  10309. enum ParamKindTy {
  10310. Linear,
  10311. LinearRef,
  10312. LinearUVal,
  10313. LinearVal,
  10314. Uniform,
  10315. Vector,
  10316. };
  10317. /// Attribute set of the parameter.
  10318. struct ParamAttrTy {
  10319. ParamKindTy Kind = Vector;
  10320. llvm::APSInt StrideOrArg;
  10321. llvm::APSInt Alignment;
  10322. bool HasVarStride = false;
  10323. };
  10324. } // namespace
  10325. static unsigned evaluateCDTSize(const FunctionDecl *FD,
  10326. ArrayRef<ParamAttrTy> ParamAttrs) {
  10327. // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
  10328. // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
  10329. // of that clause. The VLEN value must be power of 2.
  10330. // In other case the notion of the function`s "characteristic data type" (CDT)
  10331. // is used to compute the vector length.
  10332. // CDT is defined in the following order:
  10333. // a) For non-void function, the CDT is the return type.
  10334. // b) If the function has any non-uniform, non-linear parameters, then the
  10335. // CDT is the type of the first such parameter.
  10336. // c) If the CDT determined by a) or b) above is struct, union, or class
  10337. // type which is pass-by-value (except for the type that maps to the
  10338. // built-in complex data type), the characteristic data type is int.
  10339. // d) If none of the above three cases is applicable, the CDT is int.
  10340. // The VLEN is then determined based on the CDT and the size of vector
  10341. // register of that ISA for which current vector version is generated. The
  10342. // VLEN is computed using the formula below:
  10343. // VLEN = sizeof(vector_register) / sizeof(CDT),
  10344. // where vector register size specified in section 3.2.1 Registers and the
  10345. // Stack Frame of original AMD64 ABI document.
  10346. QualType RetType = FD->getReturnType();
  10347. if (RetType.isNull())
  10348. return 0;
  10349. ASTContext &C = FD->getASTContext();
  10350. QualType CDT;
  10351. if (!RetType.isNull() && !RetType->isVoidType()) {
  10352. CDT = RetType;
  10353. } else {
  10354. unsigned Offset = 0;
  10355. if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
  10356. if (ParamAttrs[Offset].Kind == Vector)
  10357. CDT = C.getPointerType(C.getRecordType(MD->getParent()));
  10358. ++Offset;
  10359. }
  10360. if (CDT.isNull()) {
  10361. for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
  10362. if (ParamAttrs[I + Offset].Kind == Vector) {
  10363. CDT = FD->getParamDecl(I)->getType();
  10364. break;
  10365. }
  10366. }
  10367. }
  10368. }
  10369. if (CDT.isNull())
  10370. CDT = C.IntTy;
  10371. CDT = CDT->getCanonicalTypeUnqualified();
  10372. if (CDT->isRecordType() || CDT->isUnionType())
  10373. CDT = C.IntTy;
  10374. return C.getTypeSize(CDT);
  10375. }
  10376. /// Mangle the parameter part of the vector function name according to
  10377. /// their OpenMP classification. The mangling function is defined in
  10378. /// section 4.5 of the AAVFABI(2021Q1).
  10379. static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
  10380. SmallString<256> Buffer;
  10381. llvm::raw_svector_ostream Out(Buffer);
  10382. for (const auto &ParamAttr : ParamAttrs) {
  10383. switch (ParamAttr.Kind) {
  10384. case Linear:
  10385. Out << 'l';
  10386. break;
  10387. case LinearRef:
  10388. Out << 'R';
  10389. break;
  10390. case LinearUVal:
  10391. Out << 'U';
  10392. break;
  10393. case LinearVal:
  10394. Out << 'L';
  10395. break;
  10396. case Uniform:
  10397. Out << 'u';
  10398. break;
  10399. case Vector:
  10400. Out << 'v';
  10401. break;
  10402. }
  10403. if (ParamAttr.HasVarStride)
  10404. Out << "s" << ParamAttr.StrideOrArg;
  10405. else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
  10406. ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
  10407. // Don't print the step value if it is not present or if it is
  10408. // equal to 1.
  10409. if (ParamAttr.StrideOrArg < 0)
  10410. Out << 'n' << -ParamAttr.StrideOrArg;
  10411. else if (ParamAttr.StrideOrArg != 1)
  10412. Out << ParamAttr.StrideOrArg;
  10413. }
  10414. if (!!ParamAttr.Alignment)
  10415. Out << 'a' << ParamAttr.Alignment;
  10416. }
  10417. return std::string(Out.str());
  10418. }
  10419. static void
  10420. emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
  10421. const llvm::APSInt &VLENVal,
  10422. ArrayRef<ParamAttrTy> ParamAttrs,
  10423. OMPDeclareSimdDeclAttr::BranchStateTy State) {
  10424. struct ISADataTy {
  10425. char ISA;
  10426. unsigned VecRegSize;
  10427. };
  10428. ISADataTy ISAData[] = {
  10429. {
  10430. 'b', 128
  10431. }, // SSE
  10432. {
  10433. 'c', 256
  10434. }, // AVX
  10435. {
  10436. 'd', 256
  10437. }, // AVX2
  10438. {
  10439. 'e', 512
  10440. }, // AVX512
  10441. };
  10442. llvm::SmallVector<char, 2> Masked;
  10443. switch (State) {
  10444. case OMPDeclareSimdDeclAttr::BS_Undefined:
  10445. Masked.push_back('N');
  10446. Masked.push_back('M');
  10447. break;
  10448. case OMPDeclareSimdDeclAttr::BS_Notinbranch:
  10449. Masked.push_back('N');
  10450. break;
  10451. case OMPDeclareSimdDeclAttr::BS_Inbranch:
  10452. Masked.push_back('M');
  10453. break;
  10454. }
  10455. for (char Mask : Masked) {
  10456. for (const ISADataTy &Data : ISAData) {
  10457. SmallString<256> Buffer;
  10458. llvm::raw_svector_ostream Out(Buffer);
  10459. Out << "_ZGV" << Data.ISA << Mask;
  10460. if (!VLENVal) {
  10461. unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
  10462. assert(NumElts && "Non-zero simdlen/cdtsize expected");
  10463. Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
  10464. } else {
  10465. Out << VLENVal;
  10466. }
  10467. Out << mangleVectorParameters(ParamAttrs);
  10468. Out << '_' << Fn->getName();
  10469. Fn->addFnAttr(Out.str());
  10470. }
  10471. }
  10472. }
  10473. // This are the Functions that are needed to mangle the name of the
  10474. // vector functions generated by the compiler, according to the rules
  10475. // defined in the "Vector Function ABI specifications for AArch64",
  10476. // available at
  10477. // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
  10478. /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
  10479. static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
  10480. QT = QT.getCanonicalType();
  10481. if (QT->isVoidType())
  10482. return false;
  10483. if (Kind == ParamKindTy::Uniform)
  10484. return false;
  10485. if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
  10486. return false;
  10487. if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
  10488. !QT->isReferenceType())
  10489. return false;
  10490. return true;
  10491. }
  10492. /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
  10493. static bool getAArch64PBV(QualType QT, ASTContext &C) {
  10494. QT = QT.getCanonicalType();
  10495. unsigned Size = C.getTypeSize(QT);
  10496. // Only scalars and complex within 16 bytes wide set PVB to true.
  10497. if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
  10498. return false;
  10499. if (QT->isFloatingType())
  10500. return true;
  10501. if (QT->isIntegerType())
  10502. return true;
  10503. if (QT->isPointerType())
  10504. return true;
  10505. // TODO: Add support for complex types (section 3.1.2, item 2).
  10506. return false;
  10507. }
  10508. /// Computes the lane size (LS) of a return type or of an input parameter,
  10509. /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
  10510. /// TODO: Add support for references, section 3.2.1, item 1.
  10511. static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
  10512. if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
  10513. QualType PTy = QT.getCanonicalType()->getPointeeType();
  10514. if (getAArch64PBV(PTy, C))
  10515. return C.getTypeSize(PTy);
  10516. }
  10517. if (getAArch64PBV(QT, C))
  10518. return C.getTypeSize(QT);
  10519. return C.getTypeSize(C.getUIntPtrType());
  10520. }
  10521. // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
  10522. // signature of the scalar function, as defined in 3.2.2 of the
  10523. // AAVFABI.
  10524. static std::tuple<unsigned, unsigned, bool>
  10525. getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
  10526. QualType RetType = FD->getReturnType().getCanonicalType();
  10527. ASTContext &C = FD->getASTContext();
  10528. bool OutputBecomesInput = false;
  10529. llvm::SmallVector<unsigned, 8> Sizes;
  10530. if (!RetType->isVoidType()) {
  10531. Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
  10532. if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
  10533. OutputBecomesInput = true;
  10534. }
  10535. for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
  10536. QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
  10537. Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
  10538. }
  10539. assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
  10540. // The LS of a function parameter / return value can only be a power
  10541. // of 2, starting from 8 bits, up to 128.
  10542. assert(llvm::all_of(Sizes,
  10543. [](unsigned Size) {
  10544. return Size == 8 || Size == 16 || Size == 32 ||
  10545. Size == 64 || Size == 128;
  10546. }) &&
  10547. "Invalid size");
  10548. return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
  10549. *std::max_element(std::begin(Sizes), std::end(Sizes)),
  10550. OutputBecomesInput);
  10551. }
  10552. // Function used to add the attribute. The parameter `VLEN` is
  10553. // templated to allow the use of "x" when targeting scalable functions
  10554. // for SVE.
  10555. template <typename T>
  10556. static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
  10557. char ISA, StringRef ParSeq,
  10558. StringRef MangledName, bool OutputBecomesInput,
  10559. llvm::Function *Fn) {
  10560. SmallString<256> Buffer;
  10561. llvm::raw_svector_ostream Out(Buffer);
  10562. Out << Prefix << ISA << LMask << VLEN;
  10563. if (OutputBecomesInput)
  10564. Out << "v";
  10565. Out << ParSeq << "_" << MangledName;
  10566. Fn->addFnAttr(Out.str());
  10567. }
  10568. // Helper function to generate the Advanced SIMD names depending on
  10569. // the value of the NDS when simdlen is not present.
  10570. static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
  10571. StringRef Prefix, char ISA,
  10572. StringRef ParSeq, StringRef MangledName,
  10573. bool OutputBecomesInput,
  10574. llvm::Function *Fn) {
  10575. switch (NDS) {
  10576. case 8:
  10577. addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
  10578. OutputBecomesInput, Fn);
  10579. addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
  10580. OutputBecomesInput, Fn);
  10581. break;
  10582. case 16:
  10583. addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
  10584. OutputBecomesInput, Fn);
  10585. addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
  10586. OutputBecomesInput, Fn);
  10587. break;
  10588. case 32:
  10589. addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
  10590. OutputBecomesInput, Fn);
  10591. addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
  10592. OutputBecomesInput, Fn);
  10593. break;
  10594. case 64:
  10595. case 128:
  10596. addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
  10597. OutputBecomesInput, Fn);
  10598. break;
  10599. default:
  10600. llvm_unreachable("Scalar type is too wide.");
  10601. }
  10602. }
  10603. /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
  10604. static void emitAArch64DeclareSimdFunction(
  10605. CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
  10606. ArrayRef<ParamAttrTy> ParamAttrs,
  10607. OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
  10608. char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
  10609. // Get basic data for building the vector signature.
  10610. const auto Data = getNDSWDS(FD, ParamAttrs);
  10611. const unsigned NDS = std::get<0>(Data);
  10612. const unsigned WDS = std::get<1>(Data);
  10613. const bool OutputBecomesInput = std::get<2>(Data);
  10614. // Check the values provided via `simdlen` by the user.
  10615. // 1. A `simdlen(1)` doesn't produce vector signatures,
  10616. if (UserVLEN == 1) {
  10617. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  10618. DiagnosticsEngine::Warning,
  10619. "The clause simdlen(1) has no effect when targeting aarch64.");
  10620. CGM.getDiags().Report(SLoc, DiagID);
  10621. return;
  10622. }
  10623. // 2. Section 3.3.1, item 1: user input must be a power of 2 for
  10624. // Advanced SIMD output.
  10625. if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
  10626. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  10627. DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
  10628. "power of 2 when targeting Advanced SIMD.");
  10629. CGM.getDiags().Report(SLoc, DiagID);
  10630. return;
  10631. }
  10632. // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
  10633. // limits.
  10634. if (ISA == 's' && UserVLEN != 0) {
  10635. if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
  10636. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  10637. DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
  10638. "lanes in the architectural constraints "
  10639. "for SVE (min is 128-bit, max is "
  10640. "2048-bit, by steps of 128-bit)");
  10641. CGM.getDiags().Report(SLoc, DiagID) << WDS;
  10642. return;
  10643. }
  10644. }
  10645. // Sort out parameter sequence.
  10646. const std::string ParSeq = mangleVectorParameters(ParamAttrs);
  10647. StringRef Prefix = "_ZGV";
  10648. // Generate simdlen from user input (if any).
  10649. if (UserVLEN) {
  10650. if (ISA == 's') {
  10651. // SVE generates only a masked function.
  10652. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
  10653. OutputBecomesInput, Fn);
  10654. } else {
  10655. assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
  10656. // Advanced SIMD generates one or two functions, depending on
  10657. // the `[not]inbranch` clause.
  10658. switch (State) {
  10659. case OMPDeclareSimdDeclAttr::BS_Undefined:
  10660. addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
  10661. OutputBecomesInput, Fn);
  10662. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
  10663. OutputBecomesInput, Fn);
  10664. break;
  10665. case OMPDeclareSimdDeclAttr::BS_Notinbranch:
  10666. addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
  10667. OutputBecomesInput, Fn);
  10668. break;
  10669. case OMPDeclareSimdDeclAttr::BS_Inbranch:
  10670. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
  10671. OutputBecomesInput, Fn);
  10672. break;
  10673. }
  10674. }
  10675. } else {
  10676. // If no user simdlen is provided, follow the AAVFABI rules for
  10677. // generating the vector length.
  10678. if (ISA == 's') {
  10679. // SVE, section 3.4.1, item 1.
  10680. addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
  10681. OutputBecomesInput, Fn);
  10682. } else {
  10683. assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
  10684. // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
  10685. // two vector names depending on the use of the clause
  10686. // `[not]inbranch`.
  10687. switch (State) {
  10688. case OMPDeclareSimdDeclAttr::BS_Undefined:
  10689. addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
  10690. OutputBecomesInput, Fn);
  10691. addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
  10692. OutputBecomesInput, Fn);
  10693. break;
  10694. case OMPDeclareSimdDeclAttr::BS_Notinbranch:
  10695. addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
  10696. OutputBecomesInput, Fn);
  10697. break;
  10698. case OMPDeclareSimdDeclAttr::BS_Inbranch:
  10699. addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
  10700. OutputBecomesInput, Fn);
  10701. break;
  10702. }
  10703. }
  10704. }
  10705. }
  10706. void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
  10707. llvm::Function *Fn) {
  10708. ASTContext &C = CGM.getContext();
  10709. FD = FD->getMostRecentDecl();
  10710. while (FD) {
  10711. // Map params to their positions in function decl.
  10712. llvm::DenseMap<const Decl *, unsigned> ParamPositions;
  10713. if (isa<CXXMethodDecl>(FD))
  10714. ParamPositions.try_emplace(FD, 0);
  10715. unsigned ParamPos = ParamPositions.size();
  10716. for (const ParmVarDecl *P : FD->parameters()) {
  10717. ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
  10718. ++ParamPos;
  10719. }
  10720. for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
  10721. llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
  10722. // Mark uniform parameters.
  10723. for (const Expr *E : Attr->uniforms()) {
  10724. E = E->IgnoreParenImpCasts();
  10725. unsigned Pos;
  10726. if (isa<CXXThisExpr>(E)) {
  10727. Pos = ParamPositions[FD];
  10728. } else {
  10729. const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
  10730. ->getCanonicalDecl();
  10731. auto It = ParamPositions.find(PVD);
  10732. assert(It != ParamPositions.end() && "Function parameter not found");
  10733. Pos = It->second;
  10734. }
  10735. ParamAttrs[Pos].Kind = Uniform;
  10736. }
  10737. // Get alignment info.
  10738. auto *NI = Attr->alignments_begin();
  10739. for (const Expr *E : Attr->aligneds()) {
  10740. E = E->IgnoreParenImpCasts();
  10741. unsigned Pos;
  10742. QualType ParmTy;
  10743. if (isa<CXXThisExpr>(E)) {
  10744. Pos = ParamPositions[FD];
  10745. ParmTy = E->getType();
  10746. } else {
  10747. const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
  10748. ->getCanonicalDecl();
  10749. auto It = ParamPositions.find(PVD);
  10750. assert(It != ParamPositions.end() && "Function parameter not found");
  10751. Pos = It->second;
  10752. ParmTy = PVD->getType();
  10753. }
  10754. ParamAttrs[Pos].Alignment =
  10755. (*NI)
  10756. ? (*NI)->EvaluateKnownConstInt(C)
  10757. : llvm::APSInt::getUnsigned(
  10758. C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
  10759. .getQuantity());
  10760. ++NI;
  10761. }
  10762. // Mark linear parameters.
  10763. auto *SI = Attr->steps_begin();
  10764. auto *MI = Attr->modifiers_begin();
  10765. for (const Expr *E : Attr->linears()) {
  10766. E = E->IgnoreParenImpCasts();
  10767. unsigned Pos;
  10768. bool IsReferenceType = false;
  10769. // Rescaling factor needed to compute the linear parameter
  10770. // value in the mangled name.
  10771. unsigned PtrRescalingFactor = 1;
  10772. if (isa<CXXThisExpr>(E)) {
  10773. Pos = ParamPositions[FD];
  10774. auto *P = cast<PointerType>(E->getType());
  10775. PtrRescalingFactor = CGM.getContext()
  10776. .getTypeSizeInChars(P->getPointeeType())
  10777. .getQuantity();
  10778. } else {
  10779. const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
  10780. ->getCanonicalDecl();
  10781. auto It = ParamPositions.find(PVD);
  10782. assert(It != ParamPositions.end() && "Function parameter not found");
  10783. Pos = It->second;
  10784. if (auto *P = dyn_cast<PointerType>(PVD->getType()))
  10785. PtrRescalingFactor = CGM.getContext()
  10786. .getTypeSizeInChars(P->getPointeeType())
  10787. .getQuantity();
  10788. else if (PVD->getType()->isReferenceType()) {
  10789. IsReferenceType = true;
  10790. PtrRescalingFactor =
  10791. CGM.getContext()
  10792. .getTypeSizeInChars(PVD->getType().getNonReferenceType())
  10793. .getQuantity();
  10794. }
  10795. }
  10796. ParamAttrTy &ParamAttr = ParamAttrs[Pos];
  10797. if (*MI == OMPC_LINEAR_ref)
  10798. ParamAttr.Kind = LinearRef;
  10799. else if (*MI == OMPC_LINEAR_uval)
  10800. ParamAttr.Kind = LinearUVal;
  10801. else if (IsReferenceType)
  10802. ParamAttr.Kind = LinearVal;
  10803. else
  10804. ParamAttr.Kind = Linear;
  10805. // Assuming a stride of 1, for `linear` without modifiers.
  10806. ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
  10807. if (*SI) {
  10808. Expr::EvalResult Result;
  10809. if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
  10810. if (const auto *DRE =
  10811. cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
  10812. if (const auto *StridePVD =
  10813. dyn_cast<ParmVarDecl>(DRE->getDecl())) {
  10814. ParamAttr.HasVarStride = true;
  10815. auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
  10816. assert(It != ParamPositions.end() &&
  10817. "Function parameter not found");
  10818. ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
  10819. }
  10820. }
  10821. } else {
  10822. ParamAttr.StrideOrArg = Result.Val.getInt();
  10823. }
  10824. }
  10825. // If we are using a linear clause on a pointer, we need to
  10826. // rescale the value of linear_step with the byte size of the
  10827. // pointee type.
  10828. if (!ParamAttr.HasVarStride &&
  10829. (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
  10830. ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
  10831. ++SI;
  10832. ++MI;
  10833. }
  10834. llvm::APSInt VLENVal;
  10835. SourceLocation ExprLoc;
  10836. const Expr *VLENExpr = Attr->getSimdlen();
  10837. if (VLENExpr) {
  10838. VLENVal = VLENExpr->EvaluateKnownConstInt(C);
  10839. ExprLoc = VLENExpr->getExprLoc();
  10840. }
  10841. OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
  10842. if (CGM.getTriple().isX86()) {
  10843. emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
  10844. } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
  10845. unsigned VLEN = VLENVal.getExtValue();
  10846. StringRef MangledName = Fn->getName();
  10847. if (CGM.getTarget().hasFeature("sve"))
  10848. emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
  10849. MangledName, 's', 128, Fn, ExprLoc);
  10850. else if (CGM.getTarget().hasFeature("neon"))
  10851. emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
  10852. MangledName, 'n', 128, Fn, ExprLoc);
  10853. }
  10854. }
  10855. FD = FD->getPreviousDecl();
  10856. }
  10857. }
  10858. namespace {
  10859. /// Cleanup action for doacross support.
  10860. class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
  10861. public:
  10862. static const int DoacrossFinArgs = 2;
  10863. private:
  10864. llvm::FunctionCallee RTLFn;
  10865. llvm::Value *Args[DoacrossFinArgs];
  10866. public:
  10867. DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
  10868. ArrayRef<llvm::Value *> CallArgs)
  10869. : RTLFn(RTLFn) {
  10870. assert(CallArgs.size() == DoacrossFinArgs);
  10871. std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
  10872. }
  10873. void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
  10874. if (!CGF.HaveInsertPoint())
  10875. return;
  10876. CGF.EmitRuntimeCall(RTLFn, Args);
  10877. }
  10878. };
  10879. } // namespace
  10880. void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
  10881. const OMPLoopDirective &D,
  10882. ArrayRef<Expr *> NumIterations) {
  10883. if (!CGF.HaveInsertPoint())
  10884. return;
  10885. ASTContext &C = CGM.getContext();
  10886. QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
  10887. RecordDecl *RD;
  10888. if (KmpDimTy.isNull()) {
  10889. // Build struct kmp_dim { // loop bounds info casted to kmp_int64
  10890. // kmp_int64 lo; // lower
  10891. // kmp_int64 up; // upper
  10892. // kmp_int64 st; // stride
  10893. // };
  10894. RD = C.buildImplicitRecord("kmp_dim");
  10895. RD->startDefinition();
  10896. addFieldToRecordDecl(C, RD, Int64Ty);
  10897. addFieldToRecordDecl(C, RD, Int64Ty);
  10898. addFieldToRecordDecl(C, RD, Int64Ty);
  10899. RD->completeDefinition();
  10900. KmpDimTy = C.getRecordType(RD);
  10901. } else {
  10902. RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
  10903. }
  10904. llvm::APInt Size(/*numBits=*/32, NumIterations.size());
  10905. QualType ArrayTy =
  10906. C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
  10907. Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
  10908. CGF.EmitNullInitialization(DimsAddr, ArrayTy);
  10909. enum { LowerFD = 0, UpperFD, StrideFD };
  10910. // Fill dims with data.
  10911. for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
  10912. LValue DimsLVal = CGF.MakeAddrLValue(
  10913. CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
  10914. // dims.upper = num_iterations;
  10915. LValue UpperLVal = CGF.EmitLValueForField(
  10916. DimsLVal, *std::next(RD->field_begin(), UpperFD));
  10917. llvm::Value *NumIterVal = CGF.EmitScalarConversion(
  10918. CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
  10919. Int64Ty, NumIterations[I]->getExprLoc());
  10920. CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
  10921. // dims.stride = 1;
  10922. LValue StrideLVal = CGF.EmitLValueForField(
  10923. DimsLVal, *std::next(RD->field_begin(), StrideFD));
  10924. CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
  10925. StrideLVal);
  10926. }
  10927. // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
  10928. // kmp_int32 num_dims, struct kmp_dim * dims);
  10929. llvm::Value *Args[] = {
  10930. emitUpdateLocation(CGF, D.getBeginLoc()),
  10931. getThreadID(CGF, D.getBeginLoc()),
  10932. llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
  10933. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  10934. CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
  10935. CGM.VoidPtrTy)};
  10936. llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
  10937. CGM.getModule(), OMPRTL___kmpc_doacross_init);
  10938. CGF.EmitRuntimeCall(RTLFn, Args);
  10939. llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
  10940. emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
  10941. llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
  10942. CGM.getModule(), OMPRTL___kmpc_doacross_fini);
  10943. CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
  10944. llvm::ArrayRef(FiniArgs));
  10945. }
  10946. void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
  10947. const OMPDependClause *C) {
  10948. QualType Int64Ty =
  10949. CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
  10950. llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
  10951. QualType ArrayTy = CGM.getContext().getConstantArrayType(
  10952. Int64Ty, Size, nullptr, ArrayType::Normal, 0);
  10953. Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
  10954. for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
  10955. const Expr *CounterVal = C->getLoopData(I);
  10956. assert(CounterVal);
  10957. llvm::Value *CntVal = CGF.EmitScalarConversion(
  10958. CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
  10959. CounterVal->getExprLoc());
  10960. CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
  10961. /*Volatile=*/false, Int64Ty);
  10962. }
  10963. llvm::Value *Args[] = {
  10964. emitUpdateLocation(CGF, C->getBeginLoc()),
  10965. getThreadID(CGF, C->getBeginLoc()),
  10966. CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
  10967. llvm::FunctionCallee RTLFn;
  10968. if (C->getDependencyKind() == OMPC_DEPEND_source) {
  10969. RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  10970. OMPRTL___kmpc_doacross_post);
  10971. } else {
  10972. assert(C->getDependencyKind() == OMPC_DEPEND_sink);
  10973. RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
  10974. OMPRTL___kmpc_doacross_wait);
  10975. }
  10976. CGF.EmitRuntimeCall(RTLFn, Args);
  10977. }
  10978. void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
  10979. llvm::FunctionCallee Callee,
  10980. ArrayRef<llvm::Value *> Args) const {
  10981. assert(Loc.isValid() && "Outlined function call location must be valid.");
  10982. auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
  10983. if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
  10984. if (Fn->doesNotThrow()) {
  10985. CGF.EmitNounwindRuntimeCall(Fn, Args);
  10986. return;
  10987. }
  10988. }
  10989. CGF.EmitRuntimeCall(Callee, Args);
  10990. }
  10991. void CGOpenMPRuntime::emitOutlinedFunctionCall(
  10992. CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
  10993. ArrayRef<llvm::Value *> Args) const {
  10994. emitCall(CGF, Loc, OutlinedFn, Args);
  10995. }
  10996. void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
  10997. if (const auto *FD = dyn_cast<FunctionDecl>(D))
  10998. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
  10999. HasEmittedDeclareTargetRegion = true;
  11000. }
  11001. Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
  11002. const VarDecl *NativeParam,
  11003. const VarDecl *TargetParam) const {
  11004. return CGF.GetAddrOfLocalVar(NativeParam);
  11005. }
  11006. /// Return allocator value from expression, or return a null allocator (default
  11007. /// when no allocator specified).
  11008. static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
  11009. const Expr *Allocator) {
  11010. llvm::Value *AllocVal;
  11011. if (Allocator) {
  11012. AllocVal = CGF.EmitScalarExpr(Allocator);
  11013. // According to the standard, the original allocator type is a enum
  11014. // (integer). Convert to pointer type, if required.
  11015. AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
  11016. CGF.getContext().VoidPtrTy,
  11017. Allocator->getExprLoc());
  11018. } else {
  11019. // If no allocator specified, it defaults to the null allocator.
  11020. AllocVal = llvm::Constant::getNullValue(
  11021. CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
  11022. }
  11023. return AllocVal;
  11024. }
  11025. /// Return the alignment from an allocate directive if present.
  11026. static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
  11027. std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
  11028. if (!AllocateAlignment)
  11029. return nullptr;
  11030. return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
  11031. }
  11032. Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
  11033. const VarDecl *VD) {
  11034. if (!VD)
  11035. return Address::invalid();
  11036. Address UntiedAddr = Address::invalid();
  11037. Address UntiedRealAddr = Address::invalid();
  11038. auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
  11039. if (It != FunctionToUntiedTaskStackMap.end()) {
  11040. const UntiedLocalVarsAddressesMap &UntiedData =
  11041. UntiedLocalVarsStack[It->second];
  11042. auto I = UntiedData.find(VD);
  11043. if (I != UntiedData.end()) {
  11044. UntiedAddr = I->second.first;
  11045. UntiedRealAddr = I->second.second;
  11046. }
  11047. }
  11048. const VarDecl *CVD = VD->getCanonicalDecl();
  11049. if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
  11050. // Use the default allocation.
  11051. if (!isAllocatableDecl(VD))
  11052. return UntiedAddr;
  11053. llvm::Value *Size;
  11054. CharUnits Align = CGM.getContext().getDeclAlign(CVD);
  11055. if (CVD->getType()->isVariablyModifiedType()) {
  11056. Size = CGF.getTypeSize(CVD->getType());
  11057. // Align the size: ((size + align - 1) / align) * align
  11058. Size = CGF.Builder.CreateNUWAdd(
  11059. Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
  11060. Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
  11061. Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
  11062. } else {
  11063. CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
  11064. Size = CGM.getSize(Sz.alignTo(Align));
  11065. }
  11066. llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
  11067. const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
  11068. const Expr *Allocator = AA->getAllocator();
  11069. llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
  11070. llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
  11071. SmallVector<llvm::Value *, 4> Args;
  11072. Args.push_back(ThreadID);
  11073. if (Alignment)
  11074. Args.push_back(Alignment);
  11075. Args.push_back(Size);
  11076. Args.push_back(AllocVal);
  11077. llvm::omp::RuntimeFunction FnID =
  11078. Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
  11079. llvm::Value *Addr = CGF.EmitRuntimeCall(
  11080. OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
  11081. getName({CVD->getName(), ".void.addr"}));
  11082. llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
  11083. CGM.getModule(), OMPRTL___kmpc_free);
  11084. QualType Ty = CGM.getContext().getPointerType(CVD->getType());
  11085. Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  11086. Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
  11087. if (UntiedAddr.isValid())
  11088. CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
  11089. // Cleanup action for allocate support.
  11090. class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
  11091. llvm::FunctionCallee RTLFn;
  11092. SourceLocation::UIntTy LocEncoding;
  11093. Address Addr;
  11094. const Expr *AllocExpr;
  11095. public:
  11096. OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
  11097. SourceLocation::UIntTy LocEncoding, Address Addr,
  11098. const Expr *AllocExpr)
  11099. : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
  11100. AllocExpr(AllocExpr) {}
  11101. void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
  11102. if (!CGF.HaveInsertPoint())
  11103. return;
  11104. llvm::Value *Args[3];
  11105. Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
  11106. CGF, SourceLocation::getFromRawEncoding(LocEncoding));
  11107. Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  11108. Addr.getPointer(), CGF.VoidPtrTy);
  11109. llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
  11110. Args[2] = AllocVal;
  11111. CGF.EmitRuntimeCall(RTLFn, Args);
  11112. }
  11113. };
  11114. Address VDAddr =
  11115. UntiedRealAddr.isValid()
  11116. ? UntiedRealAddr
  11117. : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
  11118. CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
  11119. NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
  11120. VDAddr, Allocator);
  11121. if (UntiedRealAddr.isValid())
  11122. if (auto *Region =
  11123. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  11124. Region->emitUntiedSwitch(CGF);
  11125. return VDAddr;
  11126. }
  11127. return UntiedAddr;
  11128. }
  11129. bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
  11130. const VarDecl *VD) const {
  11131. auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
  11132. if (It == FunctionToUntiedTaskStackMap.end())
  11133. return false;
  11134. return UntiedLocalVarsStack[It->second].count(VD) > 0;
  11135. }
  11136. CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
  11137. CodeGenModule &CGM, const OMPLoopDirective &S)
  11138. : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
  11139. assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
  11140. if (!NeedToPush)
  11141. return;
  11142. NontemporalDeclsSet &DS =
  11143. CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
  11144. for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
  11145. for (const Stmt *Ref : C->private_refs()) {
  11146. const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
  11147. const ValueDecl *VD;
  11148. if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
  11149. VD = DRE->getDecl();
  11150. } else {
  11151. const auto *ME = cast<MemberExpr>(SimpleRefExpr);
  11152. assert((ME->isImplicitCXXThis() ||
  11153. isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
  11154. "Expected member of current class.");
  11155. VD = ME->getMemberDecl();
  11156. }
  11157. DS.insert(VD);
  11158. }
  11159. }
  11160. }
  11161. CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
  11162. if (!NeedToPush)
  11163. return;
  11164. CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
  11165. }
  11166. CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
  11167. CodeGenFunction &CGF,
  11168. const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
  11169. std::pair<Address, Address>> &LocalVars)
  11170. : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
  11171. if (!NeedToPush)
  11172. return;
  11173. CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
  11174. CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
  11175. CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
  11176. }
  11177. CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
  11178. if (!NeedToPush)
  11179. return;
  11180. CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
  11181. }
  11182. bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
  11183. assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
  11184. return llvm::any_of(
  11185. CGM.getOpenMPRuntime().NontemporalDeclsStack,
  11186. [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
  11187. }
  11188. void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
  11189. const OMPExecutableDirective &S,
  11190. llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
  11191. const {
  11192. llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
  11193. // Vars in target/task regions must be excluded completely.
  11194. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
  11195. isOpenMPTaskingDirective(S.getDirectiveKind())) {
  11196. SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
  11197. getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
  11198. const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
  11199. for (const CapturedStmt::Capture &Cap : CS->captures()) {
  11200. if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
  11201. NeedToCheckForLPCs.insert(Cap.getCapturedVar());
  11202. }
  11203. }
  11204. // Exclude vars in private clauses.
  11205. for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
  11206. for (const Expr *Ref : C->varlists()) {
  11207. if (!Ref->getType()->isScalarType())
  11208. continue;
  11209. const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
  11210. if (!DRE)
  11211. continue;
  11212. NeedToCheckForLPCs.insert(DRE->getDecl());
  11213. }
  11214. }
  11215. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
  11216. for (const Expr *Ref : C->varlists()) {
  11217. if (!Ref->getType()->isScalarType())
  11218. continue;
  11219. const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
  11220. if (!DRE)
  11221. continue;
  11222. NeedToCheckForLPCs.insert(DRE->getDecl());
  11223. }
  11224. }
  11225. for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
  11226. for (const Expr *Ref : C->varlists()) {
  11227. if (!Ref->getType()->isScalarType())
  11228. continue;
  11229. const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
  11230. if (!DRE)
  11231. continue;
  11232. NeedToCheckForLPCs.insert(DRE->getDecl());
  11233. }
  11234. }
  11235. for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
  11236. for (const Expr *Ref : C->varlists()) {
  11237. if (!Ref->getType()->isScalarType())
  11238. continue;
  11239. const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
  11240. if (!DRE)
  11241. continue;
  11242. NeedToCheckForLPCs.insert(DRE->getDecl());
  11243. }
  11244. }
  11245. for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
  11246. for (const Expr *Ref : C->varlists()) {
  11247. if (!Ref->getType()->isScalarType())
  11248. continue;
  11249. const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
  11250. if (!DRE)
  11251. continue;
  11252. NeedToCheckForLPCs.insert(DRE->getDecl());
  11253. }
  11254. }
  11255. for (const Decl *VD : NeedToCheckForLPCs) {
  11256. for (const LastprivateConditionalData &Data :
  11257. llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
  11258. if (Data.DeclToUniqueName.count(VD) > 0) {
  11259. if (!Data.Disabled)
  11260. NeedToAddForLPCsAsDisabled.insert(VD);
  11261. break;
  11262. }
  11263. }
  11264. }
  11265. }
  11266. CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
  11267. CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
  11268. : CGM(CGF.CGM),
  11269. Action((CGM.getLangOpts().OpenMP >= 50 &&
  11270. llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
  11271. [](const OMPLastprivateClause *C) {
  11272. return C->getKind() ==
  11273. OMPC_LASTPRIVATE_conditional;
  11274. }))
  11275. ? ActionToDo::PushAsLastprivateConditional
  11276. : ActionToDo::DoNotPush) {
  11277. assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
  11278. if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
  11279. return;
  11280. assert(Action == ActionToDo::PushAsLastprivateConditional &&
  11281. "Expected a push action.");
  11282. LastprivateConditionalData &Data =
  11283. CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
  11284. for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
  11285. if (C->getKind() != OMPC_LASTPRIVATE_conditional)
  11286. continue;
  11287. for (const Expr *Ref : C->varlists()) {
  11288. Data.DeclToUniqueName.insert(std::make_pair(
  11289. cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
  11290. SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
  11291. }
  11292. }
  11293. Data.IVLVal = IVLVal;
  11294. Data.Fn = CGF.CurFn;
  11295. }
  11296. CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
  11297. CodeGenFunction &CGF, const OMPExecutableDirective &S)
  11298. : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
  11299. assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
  11300. if (CGM.getLangOpts().OpenMP < 50)
  11301. return;
  11302. llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
  11303. tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
  11304. if (!NeedToAddForLPCsAsDisabled.empty()) {
  11305. Action = ActionToDo::DisableLastprivateConditional;
  11306. LastprivateConditionalData &Data =
  11307. CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
  11308. for (const Decl *VD : NeedToAddForLPCsAsDisabled)
  11309. Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
  11310. Data.Fn = CGF.CurFn;
  11311. Data.Disabled = true;
  11312. }
  11313. }
  11314. CGOpenMPRuntime::LastprivateConditionalRAII
  11315. CGOpenMPRuntime::LastprivateConditionalRAII::disable(
  11316. CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  11317. return LastprivateConditionalRAII(CGF, S);
  11318. }
  11319. CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
  11320. if (CGM.getLangOpts().OpenMP < 50)
  11321. return;
  11322. if (Action == ActionToDo::DisableLastprivateConditional) {
  11323. assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
  11324. "Expected list of disabled private vars.");
  11325. CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
  11326. }
  11327. if (Action == ActionToDo::PushAsLastprivateConditional) {
  11328. assert(
  11329. !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
  11330. "Expected list of lastprivate conditional vars.");
  11331. CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
  11332. }
  11333. }
  11334. Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
  11335. const VarDecl *VD) {
  11336. ASTContext &C = CGM.getContext();
  11337. auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
  11338. if (I == LastprivateConditionalToTypes.end())
  11339. I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
  11340. QualType NewType;
  11341. const FieldDecl *VDField;
  11342. const FieldDecl *FiredField;
  11343. LValue BaseLVal;
  11344. auto VI = I->getSecond().find(VD);
  11345. if (VI == I->getSecond().end()) {
  11346. RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
  11347. RD->startDefinition();
  11348. VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
  11349. FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
  11350. RD->completeDefinition();
  11351. NewType = C.getRecordType(RD);
  11352. Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
  11353. BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
  11354. I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
  11355. } else {
  11356. NewType = std::get<0>(VI->getSecond());
  11357. VDField = std::get<1>(VI->getSecond());
  11358. FiredField = std::get<2>(VI->getSecond());
  11359. BaseLVal = std::get<3>(VI->getSecond());
  11360. }
  11361. LValue FiredLVal =
  11362. CGF.EmitLValueForField(BaseLVal, FiredField);
  11363. CGF.EmitStoreOfScalar(
  11364. llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
  11365. FiredLVal);
  11366. return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
  11367. }
  11368. namespace {
  11369. /// Checks if the lastprivate conditional variable is referenced in LHS.
  11370. class LastprivateConditionalRefChecker final
  11371. : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
  11372. ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
  11373. const Expr *FoundE = nullptr;
  11374. const Decl *FoundD = nullptr;
  11375. StringRef UniqueDeclName;
  11376. LValue IVLVal;
  11377. llvm::Function *FoundFn = nullptr;
  11378. SourceLocation Loc;
  11379. public:
  11380. bool VisitDeclRefExpr(const DeclRefExpr *E) {
  11381. for (const CGOpenMPRuntime::LastprivateConditionalData &D :
  11382. llvm::reverse(LPM)) {
  11383. auto It = D.DeclToUniqueName.find(E->getDecl());
  11384. if (It == D.DeclToUniqueName.end())
  11385. continue;
  11386. if (D.Disabled)
  11387. return false;
  11388. FoundE = E;
  11389. FoundD = E->getDecl()->getCanonicalDecl();
  11390. UniqueDeclName = It->second;
  11391. IVLVal = D.IVLVal;
  11392. FoundFn = D.Fn;
  11393. break;
  11394. }
  11395. return FoundE == E;
  11396. }
  11397. bool VisitMemberExpr(const MemberExpr *E) {
  11398. if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
  11399. return false;
  11400. for (const CGOpenMPRuntime::LastprivateConditionalData &D :
  11401. llvm::reverse(LPM)) {
  11402. auto It = D.DeclToUniqueName.find(E->getMemberDecl());
  11403. if (It == D.DeclToUniqueName.end())
  11404. continue;
  11405. if (D.Disabled)
  11406. return false;
  11407. FoundE = E;
  11408. FoundD = E->getMemberDecl()->getCanonicalDecl();
  11409. UniqueDeclName = It->second;
  11410. IVLVal = D.IVLVal;
  11411. FoundFn = D.Fn;
  11412. break;
  11413. }
  11414. return FoundE == E;
  11415. }
  11416. bool VisitStmt(const Stmt *S) {
  11417. for (const Stmt *Child : S->children()) {
  11418. if (!Child)
  11419. continue;
  11420. if (const auto *E = dyn_cast<Expr>(Child))
  11421. if (!E->isGLValue())
  11422. continue;
  11423. if (Visit(Child))
  11424. return true;
  11425. }
  11426. return false;
  11427. }
  11428. explicit LastprivateConditionalRefChecker(
  11429. ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
  11430. : LPM(LPM) {}
  11431. std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
  11432. getFoundData() const {
  11433. return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
  11434. }
  11435. };
  11436. } // namespace
  11437. void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
  11438. LValue IVLVal,
  11439. StringRef UniqueDeclName,
  11440. LValue LVal,
  11441. SourceLocation Loc) {
  11442. // Last updated loop counter for the lastprivate conditional var.
  11443. // int<xx> last_iv = 0;
  11444. llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
  11445. llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
  11446. LLIVTy, getName({UniqueDeclName, "iv"}));
  11447. cast<llvm::GlobalVariable>(LastIV)->setAlignment(
  11448. IVLVal.getAlignment().getAsAlign());
  11449. LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
  11450. // Last value of the lastprivate conditional.
  11451. // decltype(priv_a) last_a;
  11452. llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
  11453. CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
  11454. Last->setAlignment(LVal.getAlignment().getAsAlign());
  11455. LValue LastLVal = CGF.MakeAddrLValue(
  11456. Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
  11457. // Global loop counter. Required to handle inner parallel-for regions.
  11458. // iv
  11459. llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
  11460. // #pragma omp critical(a)
  11461. // if (last_iv <= iv) {
  11462. // last_iv = iv;
  11463. // last_a = priv_a;
  11464. // }
  11465. auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
  11466. Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
  11467. Action.Enter(CGF);
  11468. llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
  11469. // (last_iv <= iv) ? Check if the variable is updated and store new
  11470. // value in global var.
  11471. llvm::Value *CmpRes;
  11472. if (IVLVal.getType()->isSignedIntegerType()) {
  11473. CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
  11474. } else {
  11475. assert(IVLVal.getType()->isUnsignedIntegerType() &&
  11476. "Loop iteration variable must be integer.");
  11477. CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
  11478. }
  11479. llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
  11480. llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
  11481. CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
  11482. // {
  11483. CGF.EmitBlock(ThenBB);
  11484. // last_iv = iv;
  11485. CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
  11486. // last_a = priv_a;
  11487. switch (CGF.getEvaluationKind(LVal.getType())) {
  11488. case TEK_Scalar: {
  11489. llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
  11490. CGF.EmitStoreOfScalar(PrivVal, LastLVal);
  11491. break;
  11492. }
  11493. case TEK_Complex: {
  11494. CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
  11495. CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
  11496. break;
  11497. }
  11498. case TEK_Aggregate:
  11499. llvm_unreachable(
  11500. "Aggregates are not supported in lastprivate conditional.");
  11501. }
  11502. // }
  11503. CGF.EmitBranch(ExitBB);
  11504. // There is no need to emit line number for unconditional branch.
  11505. (void)ApplyDebugLocation::CreateEmpty(CGF);
  11506. CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
  11507. };
  11508. if (CGM.getLangOpts().OpenMPSimd) {
  11509. // Do not emit as a critical region as no parallel region could be emitted.
  11510. RegionCodeGenTy ThenRCG(CodeGen);
  11511. ThenRCG(CGF);
  11512. } else {
  11513. emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
  11514. }
  11515. }
  11516. void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
  11517. const Expr *LHS) {
  11518. if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
  11519. return;
  11520. LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
  11521. if (!Checker.Visit(LHS))
  11522. return;
  11523. const Expr *FoundE;
  11524. const Decl *FoundD;
  11525. StringRef UniqueDeclName;
  11526. LValue IVLVal;
  11527. llvm::Function *FoundFn;
  11528. std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
  11529. Checker.getFoundData();
  11530. if (FoundFn != CGF.CurFn) {
  11531. // Special codegen for inner parallel regions.
  11532. // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
  11533. auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
  11534. assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
  11535. "Lastprivate conditional is not found in outer region.");
  11536. QualType StructTy = std::get<0>(It->getSecond());
  11537. const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
  11538. LValue PrivLVal = CGF.EmitLValue(FoundE);
  11539. Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  11540. PrivLVal.getAddress(CGF),
  11541. CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
  11542. CGF.ConvertTypeForMem(StructTy));
  11543. LValue BaseLVal =
  11544. CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
  11545. LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
  11546. CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
  11547. CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
  11548. FiredLVal, llvm::AtomicOrdering::Unordered,
  11549. /*IsVolatile=*/true, /*isInit=*/false);
  11550. return;
  11551. }
  11552. // Private address of the lastprivate conditional in the current context.
  11553. // priv_a
  11554. LValue LVal = CGF.EmitLValue(FoundE);
  11555. emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
  11556. FoundE->getExprLoc());
  11557. }
  11558. void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
  11559. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  11560. const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
  11561. if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
  11562. return;
  11563. auto Range = llvm::reverse(LastprivateConditionalStack);
  11564. auto It = llvm::find_if(
  11565. Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
  11566. if (It == Range.end() || It->Fn != CGF.CurFn)
  11567. return;
  11568. auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
  11569. assert(LPCI != LastprivateConditionalToTypes.end() &&
  11570. "Lastprivates must be registered already.");
  11571. SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
  11572. getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
  11573. const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
  11574. for (const auto &Pair : It->DeclToUniqueName) {
  11575. const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
  11576. if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
  11577. continue;
  11578. auto I = LPCI->getSecond().find(Pair.first);
  11579. assert(I != LPCI->getSecond().end() &&
  11580. "Lastprivate must be rehistered already.");
  11581. // bool Cmp = priv_a.Fired != 0;
  11582. LValue BaseLVal = std::get<3>(I->getSecond());
  11583. LValue FiredLVal =
  11584. CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
  11585. llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
  11586. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
  11587. llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
  11588. llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
  11589. // if (Cmp) {
  11590. CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
  11591. CGF.EmitBlock(ThenBB);
  11592. Address Addr = CGF.GetAddrOfLocalVar(VD);
  11593. LValue LVal;
  11594. if (VD->getType()->isReferenceType())
  11595. LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
  11596. AlignmentSource::Decl);
  11597. else
  11598. LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
  11599. AlignmentSource::Decl);
  11600. emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
  11601. D.getBeginLoc());
  11602. auto AL = ApplyDebugLocation::CreateArtificial(CGF);
  11603. CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
  11604. // }
  11605. }
  11606. }
  11607. void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
  11608. CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
  11609. SourceLocation Loc) {
  11610. if (CGF.getLangOpts().OpenMP < 50)
  11611. return;
  11612. auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
  11613. assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
  11614. "Unknown lastprivate conditional variable.");
  11615. StringRef UniqueName = It->second;
  11616. llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
  11617. // The variable was not updated in the region - exit.
  11618. if (!GV)
  11619. return;
  11620. LValue LPLVal = CGF.MakeAddrLValue(
  11621. Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
  11622. PrivLVal.getType().getNonReferenceType());
  11623. llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
  11624. CGF.EmitStoreOfScalar(Res, PrivLVal);
  11625. }
  11626. llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
  11627. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  11628. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  11629. llvm_unreachable("Not supported in SIMD-only mode");
  11630. }
  11631. llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
  11632. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  11633. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  11634. llvm_unreachable("Not supported in SIMD-only mode");
  11635. }
  11636. llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
  11637. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  11638. const VarDecl *PartIDVar, const VarDecl *TaskTVar,
  11639. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  11640. bool Tied, unsigned &NumberOfParts) {
  11641. llvm_unreachable("Not supported in SIMD-only mode");
  11642. }
  11643. void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
  11644. SourceLocation Loc,
  11645. llvm::Function *OutlinedFn,
  11646. ArrayRef<llvm::Value *> CapturedVars,
  11647. const Expr *IfCond,
  11648. llvm::Value *NumThreads) {
  11649. llvm_unreachable("Not supported in SIMD-only mode");
  11650. }
  11651. void CGOpenMPSIMDRuntime::emitCriticalRegion(
  11652. CodeGenFunction &CGF, StringRef CriticalName,
  11653. const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
  11654. const Expr *Hint) {
  11655. llvm_unreachable("Not supported in SIMD-only mode");
  11656. }
  11657. void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
  11658. const RegionCodeGenTy &MasterOpGen,
  11659. SourceLocation Loc) {
  11660. llvm_unreachable("Not supported in SIMD-only mode");
  11661. }
  11662. void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
  11663. const RegionCodeGenTy &MasterOpGen,
  11664. SourceLocation Loc,
  11665. const Expr *Filter) {
  11666. llvm_unreachable("Not supported in SIMD-only mode");
  11667. }
  11668. void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
  11669. SourceLocation Loc) {
  11670. llvm_unreachable("Not supported in SIMD-only mode");
  11671. }
  11672. void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
  11673. CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
  11674. SourceLocation Loc) {
  11675. llvm_unreachable("Not supported in SIMD-only mode");
  11676. }
  11677. void CGOpenMPSIMDRuntime::emitSingleRegion(
  11678. CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
  11679. SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
  11680. ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
  11681. ArrayRef<const Expr *> AssignmentOps) {
  11682. llvm_unreachable("Not supported in SIMD-only mode");
  11683. }
  11684. void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
  11685. const RegionCodeGenTy &OrderedOpGen,
  11686. SourceLocation Loc,
  11687. bool IsThreads) {
  11688. llvm_unreachable("Not supported in SIMD-only mode");
  11689. }
  11690. void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
  11691. SourceLocation Loc,
  11692. OpenMPDirectiveKind Kind,
  11693. bool EmitChecks,
  11694. bool ForceSimpleCall) {
  11695. llvm_unreachable("Not supported in SIMD-only mode");
  11696. }
  11697. void CGOpenMPSIMDRuntime::emitForDispatchInit(
  11698. CodeGenFunction &CGF, SourceLocation Loc,
  11699. const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
  11700. bool Ordered, const DispatchRTInput &DispatchValues) {
  11701. llvm_unreachable("Not supported in SIMD-only mode");
  11702. }
  11703. void CGOpenMPSIMDRuntime::emitForStaticInit(
  11704. CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
  11705. const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
  11706. llvm_unreachable("Not supported in SIMD-only mode");
  11707. }
  11708. void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
  11709. CodeGenFunction &CGF, SourceLocation Loc,
  11710. OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
  11711. llvm_unreachable("Not supported in SIMD-only mode");
  11712. }
  11713. void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
  11714. SourceLocation Loc,
  11715. unsigned IVSize,
  11716. bool IVSigned) {
  11717. llvm_unreachable("Not supported in SIMD-only mode");
  11718. }
  11719. void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
  11720. SourceLocation Loc,
  11721. OpenMPDirectiveKind DKind) {
  11722. llvm_unreachable("Not supported in SIMD-only mode");
  11723. }
  11724. llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
  11725. SourceLocation Loc,
  11726. unsigned IVSize, bool IVSigned,
  11727. Address IL, Address LB,
  11728. Address UB, Address ST) {
  11729. llvm_unreachable("Not supported in SIMD-only mode");
  11730. }
  11731. void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
  11732. llvm::Value *NumThreads,
  11733. SourceLocation Loc) {
  11734. llvm_unreachable("Not supported in SIMD-only mode");
  11735. }
  11736. void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
  11737. ProcBindKind ProcBind,
  11738. SourceLocation Loc) {
  11739. llvm_unreachable("Not supported in SIMD-only mode");
  11740. }
  11741. Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
  11742. const VarDecl *VD,
  11743. Address VDAddr,
  11744. SourceLocation Loc) {
  11745. llvm_unreachable("Not supported in SIMD-only mode");
  11746. }
  11747. llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
  11748. const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
  11749. CodeGenFunction *CGF) {
  11750. llvm_unreachable("Not supported in SIMD-only mode");
  11751. }
  11752. Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
  11753. CodeGenFunction &CGF, QualType VarType, StringRef Name) {
  11754. llvm_unreachable("Not supported in SIMD-only mode");
  11755. }
  11756. void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
  11757. ArrayRef<const Expr *> Vars,
  11758. SourceLocation Loc,
  11759. llvm::AtomicOrdering AO) {
  11760. llvm_unreachable("Not supported in SIMD-only mode");
  11761. }
  11762. void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
  11763. const OMPExecutableDirective &D,
  11764. llvm::Function *TaskFunction,
  11765. QualType SharedsTy, Address Shareds,
  11766. const Expr *IfCond,
  11767. const OMPTaskDataTy &Data) {
  11768. llvm_unreachable("Not supported in SIMD-only mode");
  11769. }
  11770. void CGOpenMPSIMDRuntime::emitTaskLoopCall(
  11771. CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
  11772. llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
  11773. const Expr *IfCond, const OMPTaskDataTy &Data) {
  11774. llvm_unreachable("Not supported in SIMD-only mode");
  11775. }
  11776. void CGOpenMPSIMDRuntime::emitReduction(
  11777. CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
  11778. ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
  11779. ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
  11780. assert(Options.SimpleReduction && "Only simple reduction is expected.");
  11781. CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
  11782. ReductionOps, Options);
  11783. }
  11784. llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
  11785. CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
  11786. ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
  11787. llvm_unreachable("Not supported in SIMD-only mode");
  11788. }
  11789. void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
  11790. SourceLocation Loc,
  11791. bool IsWorksharingReduction) {
  11792. llvm_unreachable("Not supported in SIMD-only mode");
  11793. }
  11794. void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
  11795. SourceLocation Loc,
  11796. ReductionCodeGen &RCG,
  11797. unsigned N) {
  11798. llvm_unreachable("Not supported in SIMD-only mode");
  11799. }
  11800. Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
  11801. SourceLocation Loc,
  11802. llvm::Value *ReductionsPtr,
  11803. LValue SharedLVal) {
  11804. llvm_unreachable("Not supported in SIMD-only mode");
  11805. }
  11806. void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
  11807. SourceLocation Loc,
  11808. const OMPTaskDataTy &Data) {
  11809. llvm_unreachable("Not supported in SIMD-only mode");
  11810. }
  11811. void CGOpenMPSIMDRuntime::emitCancellationPointCall(
  11812. CodeGenFunction &CGF, SourceLocation Loc,
  11813. OpenMPDirectiveKind CancelRegion) {
  11814. llvm_unreachable("Not supported in SIMD-only mode");
  11815. }
  11816. void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
  11817. SourceLocation Loc, const Expr *IfCond,
  11818. OpenMPDirectiveKind CancelRegion) {
  11819. llvm_unreachable("Not supported in SIMD-only mode");
  11820. }
  11821. void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
  11822. const OMPExecutableDirective &D, StringRef ParentName,
  11823. llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
  11824. bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
  11825. llvm_unreachable("Not supported in SIMD-only mode");
  11826. }
  11827. void CGOpenMPSIMDRuntime::emitTargetCall(
  11828. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  11829. llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
  11830. llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
  11831. llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
  11832. const OMPLoopDirective &D)>
  11833. SizeEmitter) {
  11834. llvm_unreachable("Not supported in SIMD-only mode");
  11835. }
  11836. bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
  11837. llvm_unreachable("Not supported in SIMD-only mode");
  11838. }
  11839. bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
  11840. llvm_unreachable("Not supported in SIMD-only mode");
  11841. }
  11842. bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
  11843. return false;
  11844. }
  11845. void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
  11846. const OMPExecutableDirective &D,
  11847. SourceLocation Loc,
  11848. llvm::Function *OutlinedFn,
  11849. ArrayRef<llvm::Value *> CapturedVars) {
  11850. llvm_unreachable("Not supported in SIMD-only mode");
  11851. }
  11852. void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
  11853. const Expr *NumTeams,
  11854. const Expr *ThreadLimit,
  11855. SourceLocation Loc) {
  11856. llvm_unreachable("Not supported in SIMD-only mode");
  11857. }
  11858. void CGOpenMPSIMDRuntime::emitTargetDataCalls(
  11859. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  11860. const Expr *Device, const RegionCodeGenTy &CodeGen,
  11861. CGOpenMPRuntime::TargetDataInfo &Info) {
  11862. llvm_unreachable("Not supported in SIMD-only mode");
  11863. }
  11864. void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
  11865. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  11866. const Expr *Device) {
  11867. llvm_unreachable("Not supported in SIMD-only mode");
  11868. }
  11869. void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
  11870. const OMPLoopDirective &D,
  11871. ArrayRef<Expr *> NumIterations) {
  11872. llvm_unreachable("Not supported in SIMD-only mode");
  11873. }
  11874. void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
  11875. const OMPDependClause *C) {
  11876. llvm_unreachable("Not supported in SIMD-only mode");
  11877. }
  11878. const VarDecl *
  11879. CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
  11880. const VarDecl *NativeParam) const {
  11881. llvm_unreachable("Not supported in SIMD-only mode");
  11882. }
  11883. Address
  11884. CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
  11885. const VarDecl *NativeParam,
  11886. const VarDecl *TargetParam) const {
  11887. llvm_unreachable("Not supported in SIMD-only mode");
  11888. }