CGBuiltin.cpp 822 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804
  1. //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This contains code to emit Builtin calls as LLVM code.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "ABIInfo.h"
  13. #include "CGCUDARuntime.h"
  14. #include "CGCXXABI.h"
  15. #include "CGObjCRuntime.h"
  16. #include "CGOpenCLRuntime.h"
  17. #include "CGRecordLayout.h"
  18. #include "CodeGenFunction.h"
  19. #include "CodeGenModule.h"
  20. #include "ConstantEmitter.h"
  21. #include "PatternInit.h"
  22. #include "TargetInfo.h"
  23. #include "clang/AST/ASTContext.h"
  24. #include "clang/AST/Attr.h"
  25. #include "clang/AST/Decl.h"
  26. #include "clang/AST/OSLog.h"
  27. #include "clang/Basic/TargetBuiltins.h"
  28. #include "clang/Basic/TargetInfo.h"
  29. #include "clang/CodeGen/CGFunctionInfo.h"
  30. #include "llvm/ADT/APFloat.h"
  31. #include "llvm/ADT/APInt.h"
  32. #include "llvm/ADT/SmallPtrSet.h"
  33. #include "llvm/ADT/StringExtras.h"
  34. #include "llvm/Analysis/ValueTracking.h"
  35. #include "llvm/IR/DataLayout.h"
  36. #include "llvm/IR/InlineAsm.h"
  37. #include "llvm/IR/Intrinsics.h"
  38. #include "llvm/IR/IntrinsicsAArch64.h"
  39. #include "llvm/IR/IntrinsicsAMDGPU.h"
  40. #include "llvm/IR/IntrinsicsARM.h"
  41. #include "llvm/IR/IntrinsicsBPF.h"
  42. #include "llvm/IR/IntrinsicsHexagon.h"
  43. #include "llvm/IR/IntrinsicsLoongArch.h"
  44. #include "llvm/IR/IntrinsicsNVPTX.h"
  45. #include "llvm/IR/IntrinsicsPowerPC.h"
  46. #include "llvm/IR/IntrinsicsR600.h"
  47. #include "llvm/IR/IntrinsicsRISCV.h"
  48. #include "llvm/IR/IntrinsicsS390.h"
  49. #include "llvm/IR/IntrinsicsVE.h"
  50. #include "llvm/IR/IntrinsicsWebAssembly.h"
  51. #include "llvm/IR/IntrinsicsX86.h"
  52. #include "llvm/IR/MDBuilder.h"
  53. #include "llvm/IR/MatrixBuilder.h"
  54. #include "llvm/Support/AArch64TargetParser.h"
  55. #include "llvm/Support/ConvertUTF.h"
  56. #include "llvm/Support/ScopedPrinter.h"
  57. #include "llvm/Support/X86TargetParser.h"
  58. #include <optional>
  59. #include <sstream>
  60. using namespace clang;
  61. using namespace CodeGen;
  62. using namespace llvm;
  63. static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
  64. Align AlignmentInBytes) {
  65. ConstantInt *Byte;
  66. switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
  67. case LangOptions::TrivialAutoVarInitKind::Uninitialized:
  68. // Nothing to initialize.
  69. return;
  70. case LangOptions::TrivialAutoVarInitKind::Zero:
  71. Byte = CGF.Builder.getInt8(0x00);
  72. break;
  73. case LangOptions::TrivialAutoVarInitKind::Pattern: {
  74. llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
  75. Byte = llvm::dyn_cast<llvm::ConstantInt>(
  76. initializationPatternFor(CGF.CGM, Int8));
  77. break;
  78. }
  79. }
  80. if (CGF.CGM.stopAutoInit())
  81. return;
  82. auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
  83. I->addAnnotationMetadata("auto-init");
  84. }
  85. /// getBuiltinLibFunction - Given a builtin id for a function like
  86. /// "__builtin_fabsf", return a Function* for "fabsf".
  87. llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
  88. unsigned BuiltinID) {
  89. assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
  90. // Get the name, skip over the __builtin_ prefix (if necessary).
  91. StringRef Name;
  92. GlobalDecl D(FD);
  93. // TODO: This list should be expanded or refactored after all GCC-compatible
  94. // std libcall builtins are implemented.
  95. static SmallDenseMap<unsigned, StringRef, 8> F128Builtins{
  96. {Builtin::BI__builtin_printf, "__printfieee128"},
  97. {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
  98. {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
  99. {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
  100. {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
  101. {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
  102. {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
  103. };
  104. // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
  105. // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
  106. // if it is 64-bit 'long double' mode.
  107. static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
  108. {Builtin::BI__builtin_frexpl, "frexp"},
  109. {Builtin::BI__builtin_ldexpl, "ldexp"},
  110. {Builtin::BI__builtin_modfl, "modf"},
  111. };
  112. // If the builtin has been declared explicitly with an assembler label,
  113. // use the mangled name. This differs from the plain label on platforms
  114. // that prefix labels.
  115. if (FD->hasAttr<AsmLabelAttr>())
  116. Name = getMangledName(D);
  117. else {
  118. // TODO: This mutation should also be applied to other targets other than
  119. // PPC, after backend supports IEEE 128-bit style libcalls.
  120. if (getTriple().isPPC64() &&
  121. &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
  122. F128Builtins.find(BuiltinID) != F128Builtins.end())
  123. Name = F128Builtins[BuiltinID];
  124. else if (getTriple().isOSAIX() &&
  125. &getTarget().getLongDoubleFormat() ==
  126. &llvm::APFloat::IEEEdouble() &&
  127. AIXLongDouble64Builtins.find(BuiltinID) !=
  128. AIXLongDouble64Builtins.end())
  129. Name = AIXLongDouble64Builtins[BuiltinID];
  130. else
  131. Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
  132. }
  133. llvm::FunctionType *Ty =
  134. cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
  135. return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
  136. }
  137. /// Emit the conversions required to turn the given value into an
  138. /// integer of the given size.
  139. static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
  140. QualType T, llvm::IntegerType *IntType) {
  141. V = CGF.EmitToMemory(V, T);
  142. if (V->getType()->isPointerTy())
  143. return CGF.Builder.CreatePtrToInt(V, IntType);
  144. assert(V->getType() == IntType);
  145. return V;
  146. }
  147. static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
  148. QualType T, llvm::Type *ResultType) {
  149. V = CGF.EmitFromMemory(V, T);
  150. if (ResultType->isPointerTy())
  151. return CGF.Builder.CreateIntToPtr(V, ResultType);
  152. assert(V->getType() == ResultType);
  153. return V;
  154. }
  155. /// Utility to insert an atomic instruction based on Intrinsic::ID
  156. /// and the expression node.
  157. static Value *MakeBinaryAtomicValue(
  158. CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
  159. AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
  160. QualType T = E->getType();
  161. assert(E->getArg(0)->getType()->isPointerType());
  162. assert(CGF.getContext().hasSameUnqualifiedType(T,
  163. E->getArg(0)->getType()->getPointeeType()));
  164. assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
  165. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  166. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  167. llvm::IntegerType *IntType =
  168. llvm::IntegerType::get(CGF.getLLVMContext(),
  169. CGF.getContext().getTypeSize(T));
  170. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  171. llvm::Value *Args[2];
  172. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  173. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  174. llvm::Type *ValueType = Args[1]->getType();
  175. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  176. llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
  177. Kind, Args[0], Args[1], Ordering);
  178. return EmitFromInt(CGF, Result, T, ValueType);
  179. }
  180. static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
  181. Value *Val = CGF.EmitScalarExpr(E->getArg(0));
  182. Value *Address = CGF.EmitScalarExpr(E->getArg(1));
  183. // Convert the type of the pointer to a pointer to the stored type.
  184. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
  185. unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
  186. Value *BC = CGF.Builder.CreateBitCast(
  187. Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
  188. LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
  189. LV.setNontemporal(true);
  190. CGF.EmitStoreOfScalar(Val, LV, false);
  191. return nullptr;
  192. }
  193. static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
  194. Value *Address = CGF.EmitScalarExpr(E->getArg(0));
  195. LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
  196. LV.setNontemporal(true);
  197. return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
  198. }
  199. static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
  200. llvm::AtomicRMWInst::BinOp Kind,
  201. const CallExpr *E) {
  202. return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
  203. }
  204. /// Utility to insert an atomic instruction based Intrinsic::ID and
  205. /// the expression node, where the return value is the result of the
  206. /// operation.
  207. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
  208. llvm::AtomicRMWInst::BinOp Kind,
  209. const CallExpr *E,
  210. Instruction::BinaryOps Op,
  211. bool Invert = false) {
  212. QualType T = E->getType();
  213. assert(E->getArg(0)->getType()->isPointerType());
  214. assert(CGF.getContext().hasSameUnqualifiedType(T,
  215. E->getArg(0)->getType()->getPointeeType()));
  216. assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
  217. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  218. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  219. llvm::IntegerType *IntType =
  220. llvm::IntegerType::get(CGF.getLLVMContext(),
  221. CGF.getContext().getTypeSize(T));
  222. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  223. llvm::Value *Args[2];
  224. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  225. llvm::Type *ValueType = Args[1]->getType();
  226. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  227. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  228. llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
  229. Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
  230. Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
  231. if (Invert)
  232. Result =
  233. CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
  234. llvm::ConstantInt::getAllOnesValue(IntType));
  235. Result = EmitFromInt(CGF, Result, T, ValueType);
  236. return RValue::get(Result);
  237. }
  238. /// Utility to insert an atomic cmpxchg instruction.
  239. ///
  240. /// @param CGF The current codegen function.
  241. /// @param E Builtin call expression to convert to cmpxchg.
  242. /// arg0 - address to operate on
  243. /// arg1 - value to compare with
  244. /// arg2 - new value
  245. /// @param ReturnBool Specifies whether to return success flag of
  246. /// cmpxchg result or the old value.
  247. ///
  248. /// @returns result of cmpxchg, according to ReturnBool
  249. ///
  250. /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
  251. /// invoke the function EmitAtomicCmpXchgForMSIntrin.
  252. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
  253. bool ReturnBool) {
  254. QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
  255. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  256. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  257. llvm::IntegerType *IntType = llvm::IntegerType::get(
  258. CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
  259. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  260. Value *Args[3];
  261. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  262. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  263. llvm::Type *ValueType = Args[1]->getType();
  264. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  265. Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
  266. Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
  267. Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
  268. llvm::AtomicOrdering::SequentiallyConsistent);
  269. if (ReturnBool)
  270. // Extract boolean success flag and zext it to int.
  271. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
  272. CGF.ConvertType(E->getType()));
  273. else
  274. // Extract old value and emit it using the same type as compare value.
  275. return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
  276. ValueType);
  277. }
  278. /// This function should be invoked to emit atomic cmpxchg for Microsoft's
  279. /// _InterlockedCompareExchange* intrinsics which have the following signature:
  280. /// T _InterlockedCompareExchange(T volatile *Destination,
  281. /// T Exchange,
  282. /// T Comparand);
  283. ///
  284. /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
  285. /// cmpxchg *Destination, Comparand, Exchange.
  286. /// So we need to swap Comparand and Exchange when invoking
  287. /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
  288. /// function MakeAtomicCmpXchgValue since it expects the arguments to be
  289. /// already swapped.
  290. static
  291. Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
  292. AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
  293. assert(E->getArg(0)->getType()->isPointerType());
  294. assert(CGF.getContext().hasSameUnqualifiedType(
  295. E->getType(), E->getArg(0)->getType()->getPointeeType()));
  296. assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
  297. E->getArg(1)->getType()));
  298. assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
  299. E->getArg(2)->getType()));
  300. auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
  301. auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
  302. auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
  303. // For Release ordering, the failure ordering should be Monotonic.
  304. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
  305. AtomicOrdering::Monotonic :
  306. SuccessOrdering;
  307. // The atomic instruction is marked volatile for consistency with MSVC. This
  308. // blocks the few atomics optimizations that LLVM has. If we want to optimize
  309. // _Interlocked* operations in the future, we will have to remove the volatile
  310. // marker.
  311. auto *Result = CGF.Builder.CreateAtomicCmpXchg(
  312. Destination, Comparand, Exchange,
  313. SuccessOrdering, FailureOrdering);
  314. Result->setVolatile(true);
  315. return CGF.Builder.CreateExtractValue(Result, 0);
  316. }
  317. // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
  318. // prototyped like this:
  319. //
  320. // unsigned char _InterlockedCompareExchange128...(
  321. // __int64 volatile * _Destination,
  322. // __int64 _ExchangeHigh,
  323. // __int64 _ExchangeLow,
  324. // __int64 * _ComparandResult);
  325. static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
  326. const CallExpr *E,
  327. AtomicOrdering SuccessOrdering) {
  328. assert(E->getNumArgs() == 4);
  329. llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
  330. llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
  331. llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
  332. llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
  333. assert(Destination->getType()->isPointerTy());
  334. assert(!ExchangeHigh->getType()->isPointerTy());
  335. assert(!ExchangeLow->getType()->isPointerTy());
  336. assert(ComparandPtr->getType()->isPointerTy());
  337. // For Release ordering, the failure ordering should be Monotonic.
  338. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
  339. ? AtomicOrdering::Monotonic
  340. : SuccessOrdering;
  341. // Convert to i128 pointers and values.
  342. llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
  343. llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
  344. Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy);
  345. Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy),
  346. Int128Ty, CGF.getContext().toCharUnitsFromBits(128));
  347. // (((i128)hi) << 64) | ((i128)lo)
  348. ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
  349. ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
  350. ExchangeHigh =
  351. CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
  352. llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
  353. // Load the comparand for the instruction.
  354. llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
  355. auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
  356. SuccessOrdering, FailureOrdering);
  357. // The atomic instruction is marked volatile for consistency with MSVC. This
  358. // blocks the few atomics optimizations that LLVM has. If we want to optimize
  359. // _Interlocked* operations in the future, we will have to remove the volatile
  360. // marker.
  361. CXI->setVolatile(true);
  362. // Store the result as an outparameter.
  363. CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
  364. ComparandResult);
  365. // Get the success boolean and zero extend it to i8.
  366. Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
  367. return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
  368. }
  369. static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
  370. AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
  371. assert(E->getArg(0)->getType()->isPointerType());
  372. auto *IntTy = CGF.ConvertType(E->getType());
  373. auto *Result = CGF.Builder.CreateAtomicRMW(
  374. AtomicRMWInst::Add,
  375. CGF.EmitScalarExpr(E->getArg(0)),
  376. ConstantInt::get(IntTy, 1),
  377. Ordering);
  378. return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
  379. }
  380. static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
  381. AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
  382. assert(E->getArg(0)->getType()->isPointerType());
  383. auto *IntTy = CGF.ConvertType(E->getType());
  384. auto *Result = CGF.Builder.CreateAtomicRMW(
  385. AtomicRMWInst::Sub,
  386. CGF.EmitScalarExpr(E->getArg(0)),
  387. ConstantInt::get(IntTy, 1),
  388. Ordering);
  389. return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
  390. }
  391. // Build a plain volatile load.
  392. static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
  393. Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
  394. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  395. CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
  396. llvm::Type *ITy =
  397. llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
  398. Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  399. llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
  400. Load->setVolatile(true);
  401. return Load;
  402. }
  403. // Build a plain volatile store.
  404. static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
  405. Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
  406. Value *Value = CGF.EmitScalarExpr(E->getArg(1));
  407. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  408. CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
  409. llvm::Type *ITy =
  410. llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8);
  411. Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  412. llvm::StoreInst *Store =
  413. CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
  414. Store->setVolatile(true);
  415. return Store;
  416. }
  417. // Emit a simple mangled intrinsic that has 1 argument and a return type
  418. // matching the argument type. Depending on mode, this may be a constrained
  419. // floating-point intrinsic.
  420. static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  421. const CallExpr *E, unsigned IntrinsicID,
  422. unsigned ConstrainedIntrinsicID) {
  423. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  424. if (CGF.Builder.getIsFPConstrained()) {
  425. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  426. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
  427. return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
  428. } else {
  429. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  430. return CGF.Builder.CreateCall(F, Src0);
  431. }
  432. }
  433. // Emit an intrinsic that has 2 operands of the same type as its result.
  434. // Depending on mode, this may be a constrained floating-point intrinsic.
  435. static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  436. const CallExpr *E, unsigned IntrinsicID,
  437. unsigned ConstrainedIntrinsicID) {
  438. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  439. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  440. if (CGF.Builder.getIsFPConstrained()) {
  441. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  442. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
  443. return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
  444. } else {
  445. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  446. return CGF.Builder.CreateCall(F, { Src0, Src1 });
  447. }
  448. }
  449. // Emit an intrinsic that has 3 operands of the same type as its result.
  450. // Depending on mode, this may be a constrained floating-point intrinsic.
  451. static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  452. const CallExpr *E, unsigned IntrinsicID,
  453. unsigned ConstrainedIntrinsicID) {
  454. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  455. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  456. llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
  457. if (CGF.Builder.getIsFPConstrained()) {
  458. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  459. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
  460. return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
  461. } else {
  462. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  463. return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
  464. }
  465. }
  466. // Emit an intrinsic where all operands are of the same type as the result.
  467. // Depending on mode, this may be a constrained floating-point intrinsic.
  468. static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  469. unsigned IntrinsicID,
  470. unsigned ConstrainedIntrinsicID,
  471. llvm::Type *Ty,
  472. ArrayRef<Value *> Args) {
  473. Function *F;
  474. if (CGF.Builder.getIsFPConstrained())
  475. F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
  476. else
  477. F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
  478. if (CGF.Builder.getIsFPConstrained())
  479. return CGF.Builder.CreateConstrainedFPCall(F, Args);
  480. else
  481. return CGF.Builder.CreateCall(F, Args);
  482. }
  483. // Emit a simple mangled intrinsic that has 1 argument and a return type
  484. // matching the argument type.
  485. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
  486. unsigned IntrinsicID,
  487. llvm::StringRef Name = "") {
  488. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  489. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  490. return CGF.Builder.CreateCall(F, Src0, Name);
  491. }
  492. // Emit an intrinsic that has 2 operands of the same type as its result.
  493. static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
  494. const CallExpr *E,
  495. unsigned IntrinsicID) {
  496. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  497. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  498. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  499. return CGF.Builder.CreateCall(F, { Src0, Src1 });
  500. }
  501. // Emit an intrinsic that has 3 operands of the same type as its result.
  502. static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
  503. const CallExpr *E,
  504. unsigned IntrinsicID) {
  505. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  506. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  507. llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
  508. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  509. return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
  510. }
  511. // Emit an intrinsic that has 1 float or double operand, and 1 integer.
  512. static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
  513. const CallExpr *E,
  514. unsigned IntrinsicID) {
  515. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  516. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  517. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  518. return CGF.Builder.CreateCall(F, {Src0, Src1});
  519. }
  520. // Emit an intrinsic that has overloaded integer result and fp operand.
  521. static Value *
  522. emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
  523. unsigned IntrinsicID,
  524. unsigned ConstrainedIntrinsicID) {
  525. llvm::Type *ResultType = CGF.ConvertType(E->getType());
  526. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  527. if (CGF.Builder.getIsFPConstrained()) {
  528. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  529. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
  530. {ResultType, Src0->getType()});
  531. return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
  532. } else {
  533. Function *F =
  534. CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
  535. return CGF.Builder.CreateCall(F, Src0);
  536. }
  537. }
  538. /// EmitFAbs - Emit a call to @llvm.fabs().
  539. static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
  540. Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
  541. llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
  542. Call->setDoesNotAccessMemory();
  543. return Call;
  544. }
  545. /// Emit the computation of the sign bit for a floating point value. Returns
  546. /// the i1 sign bit value.
  547. static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
  548. LLVMContext &C = CGF.CGM.getLLVMContext();
  549. llvm::Type *Ty = V->getType();
  550. int Width = Ty->getPrimitiveSizeInBits();
  551. llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
  552. V = CGF.Builder.CreateBitCast(V, IntTy);
  553. if (Ty->isPPC_FP128Ty()) {
  554. // We want the sign bit of the higher-order double. The bitcast we just
  555. // did works as if the double-double was stored to memory and then
  556. // read as an i128. The "store" will put the higher-order double in the
  557. // lower address in both little- and big-Endian modes, but the "load"
  558. // will treat those bits as a different part of the i128: the low bits in
  559. // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
  560. // we need to shift the high bits down to the low before truncating.
  561. Width >>= 1;
  562. if (CGF.getTarget().isBigEndian()) {
  563. Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
  564. V = CGF.Builder.CreateLShr(V, ShiftCst);
  565. }
  566. // We are truncating value in order to extract the higher-order
  567. // double, which we will be using to extract the sign from.
  568. IntTy = llvm::IntegerType::get(C, Width);
  569. V = CGF.Builder.CreateTrunc(V, IntTy);
  570. }
  571. Value *Zero = llvm::Constant::getNullValue(IntTy);
  572. return CGF.Builder.CreateICmpSLT(V, Zero);
  573. }
  574. static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
  575. const CallExpr *E, llvm::Constant *calleeValue) {
  576. CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
  577. return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
  578. }
  579. /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
  580. /// depending on IntrinsicID.
  581. ///
  582. /// \arg CGF The current codegen function.
  583. /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
  584. /// \arg X The first argument to the llvm.*.with.overflow.*.
  585. /// \arg Y The second argument to the llvm.*.with.overflow.*.
  586. /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
  587. /// \returns The result (i.e. sum/product) returned by the intrinsic.
  588. static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
  589. const llvm::Intrinsic::ID IntrinsicID,
  590. llvm::Value *X, llvm::Value *Y,
  591. llvm::Value *&Carry) {
  592. // Make sure we have integers of the same width.
  593. assert(X->getType() == Y->getType() &&
  594. "Arguments must be the same type. (Did you forget to make sure both "
  595. "arguments have the same integer width?)");
  596. Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
  597. llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
  598. Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
  599. return CGF.Builder.CreateExtractValue(Tmp, 0);
  600. }
  601. static Value *emitRangedBuiltin(CodeGenFunction &CGF,
  602. unsigned IntrinsicID,
  603. int low, int high) {
  604. llvm::MDBuilder MDHelper(CGF.getLLVMContext());
  605. llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
  606. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
  607. llvm::Instruction *Call = CGF.Builder.CreateCall(F);
  608. Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
  609. Call->setMetadata(llvm::LLVMContext::MD_noundef,
  610. llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
  611. return Call;
  612. }
  613. namespace {
  614. struct WidthAndSignedness {
  615. unsigned Width;
  616. bool Signed;
  617. };
  618. }
  619. static WidthAndSignedness
  620. getIntegerWidthAndSignedness(const clang::ASTContext &context,
  621. const clang::QualType Type) {
  622. assert(Type->isIntegerType() && "Given type is not an integer.");
  623. unsigned Width = Type->isBooleanType() ? 1
  624. : Type->isBitIntType() ? context.getIntWidth(Type)
  625. : context.getTypeInfo(Type).Width;
  626. bool Signed = Type->isSignedIntegerType();
  627. return {Width, Signed};
  628. }
  629. // Given one or more integer types, this function produces an integer type that
  630. // encompasses them: any value in one of the given types could be expressed in
  631. // the encompassing type.
  632. static struct WidthAndSignedness
  633. EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
  634. assert(Types.size() > 0 && "Empty list of types.");
  635. // If any of the given types is signed, we must return a signed type.
  636. bool Signed = false;
  637. for (const auto &Type : Types) {
  638. Signed |= Type.Signed;
  639. }
  640. // The encompassing type must have a width greater than or equal to the width
  641. // of the specified types. Additionally, if the encompassing type is signed,
  642. // its width must be strictly greater than the width of any unsigned types
  643. // given.
  644. unsigned Width = 0;
  645. for (const auto &Type : Types) {
  646. unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
  647. if (Width < MinWidth) {
  648. Width = MinWidth;
  649. }
  650. }
  651. return {Width, Signed};
  652. }
  653. Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
  654. llvm::Type *DestType = Int8PtrTy;
  655. if (ArgValue->getType() != DestType)
  656. ArgValue =
  657. Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
  658. Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
  659. return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
  660. }
  661. /// Checks if using the result of __builtin_object_size(p, @p From) in place of
  662. /// __builtin_object_size(p, @p To) is correct
  663. static bool areBOSTypesCompatible(int From, int To) {
  664. // Note: Our __builtin_object_size implementation currently treats Type=0 and
  665. // Type=2 identically. Encoding this implementation detail here may make
  666. // improving __builtin_object_size difficult in the future, so it's omitted.
  667. return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
  668. }
  669. static llvm::Value *
  670. getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
  671. return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
  672. }
  673. llvm::Value *
  674. CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
  675. llvm::IntegerType *ResType,
  676. llvm::Value *EmittedE,
  677. bool IsDynamic) {
  678. uint64_t ObjectSize;
  679. if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
  680. return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
  681. return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
  682. }
  683. /// Returns a Value corresponding to the size of the given expression.
  684. /// This Value may be either of the following:
  685. /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
  686. /// it)
  687. /// - A call to the @llvm.objectsize intrinsic
  688. ///
  689. /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
  690. /// and we wouldn't otherwise try to reference a pass_object_size parameter,
  691. /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
  692. llvm::Value *
  693. CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
  694. llvm::IntegerType *ResType,
  695. llvm::Value *EmittedE, bool IsDynamic) {
  696. // We need to reference an argument if the pointer is a parameter with the
  697. // pass_object_size attribute.
  698. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
  699. auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
  700. auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
  701. if (Param != nullptr && PS != nullptr &&
  702. areBOSTypesCompatible(PS->getType(), Type)) {
  703. auto Iter = SizeArguments.find(Param);
  704. assert(Iter != SizeArguments.end());
  705. const ImplicitParamDecl *D = Iter->second;
  706. auto DIter = LocalDeclMap.find(D);
  707. assert(DIter != LocalDeclMap.end());
  708. return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
  709. getContext().getSizeType(), E->getBeginLoc());
  710. }
  711. }
  712. // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
  713. // evaluate E for side-effects. In either case, we shouldn't lower to
  714. // @llvm.objectsize.
  715. if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
  716. return getDefaultBuiltinObjectSizeResult(Type, ResType);
  717. Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
  718. assert(Ptr->getType()->isPointerTy() &&
  719. "Non-pointer passed to __builtin_object_size?");
  720. Function *F =
  721. CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
  722. // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
  723. Value *Min = Builder.getInt1((Type & 2) != 0);
  724. // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
  725. Value *NullIsUnknown = Builder.getTrue();
  726. Value *Dynamic = Builder.getInt1(IsDynamic);
  727. return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
  728. }
  729. namespace {
  730. /// A struct to generically describe a bit test intrinsic.
  731. struct BitTest {
  732. enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
  733. enum InterlockingKind : uint8_t {
  734. Unlocked,
  735. Sequential,
  736. Acquire,
  737. Release,
  738. NoFence
  739. };
  740. ActionKind Action;
  741. InterlockingKind Interlocking;
  742. bool Is64Bit;
  743. static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
  744. };
  745. } // namespace
  746. BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
  747. switch (BuiltinID) {
  748. // Main portable variants.
  749. case Builtin::BI_bittest:
  750. return {TestOnly, Unlocked, false};
  751. case Builtin::BI_bittestandcomplement:
  752. return {Complement, Unlocked, false};
  753. case Builtin::BI_bittestandreset:
  754. return {Reset, Unlocked, false};
  755. case Builtin::BI_bittestandset:
  756. return {Set, Unlocked, false};
  757. case Builtin::BI_interlockedbittestandreset:
  758. return {Reset, Sequential, false};
  759. case Builtin::BI_interlockedbittestandset:
  760. return {Set, Sequential, false};
  761. // X86-specific 64-bit variants.
  762. case Builtin::BI_bittest64:
  763. return {TestOnly, Unlocked, true};
  764. case Builtin::BI_bittestandcomplement64:
  765. return {Complement, Unlocked, true};
  766. case Builtin::BI_bittestandreset64:
  767. return {Reset, Unlocked, true};
  768. case Builtin::BI_bittestandset64:
  769. return {Set, Unlocked, true};
  770. case Builtin::BI_interlockedbittestandreset64:
  771. return {Reset, Sequential, true};
  772. case Builtin::BI_interlockedbittestandset64:
  773. return {Set, Sequential, true};
  774. // ARM/AArch64-specific ordering variants.
  775. case Builtin::BI_interlockedbittestandset_acq:
  776. return {Set, Acquire, false};
  777. case Builtin::BI_interlockedbittestandset_rel:
  778. return {Set, Release, false};
  779. case Builtin::BI_interlockedbittestandset_nf:
  780. return {Set, NoFence, false};
  781. case Builtin::BI_interlockedbittestandreset_acq:
  782. return {Reset, Acquire, false};
  783. case Builtin::BI_interlockedbittestandreset_rel:
  784. return {Reset, Release, false};
  785. case Builtin::BI_interlockedbittestandreset_nf:
  786. return {Reset, NoFence, false};
  787. }
  788. llvm_unreachable("expected only bittest intrinsics");
  789. }
  790. static char bitActionToX86BTCode(BitTest::ActionKind A) {
  791. switch (A) {
  792. case BitTest::TestOnly: return '\0';
  793. case BitTest::Complement: return 'c';
  794. case BitTest::Reset: return 'r';
  795. case BitTest::Set: return 's';
  796. }
  797. llvm_unreachable("invalid action");
  798. }
  799. static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
  800. BitTest BT,
  801. const CallExpr *E, Value *BitBase,
  802. Value *BitPos) {
  803. char Action = bitActionToX86BTCode(BT.Action);
  804. char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
  805. // Build the assembly.
  806. SmallString<64> Asm;
  807. raw_svector_ostream AsmOS(Asm);
  808. if (BT.Interlocking != BitTest::Unlocked)
  809. AsmOS << "lock ";
  810. AsmOS << "bt";
  811. if (Action)
  812. AsmOS << Action;
  813. AsmOS << SizeSuffix << " $2, ($1)";
  814. // Build the constraints. FIXME: We should support immediates when possible.
  815. std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
  816. std::string MachineClobbers = CGF.getTarget().getClobbers();
  817. if (!MachineClobbers.empty()) {
  818. Constraints += ',';
  819. Constraints += MachineClobbers;
  820. }
  821. llvm::IntegerType *IntType = llvm::IntegerType::get(
  822. CGF.getLLVMContext(),
  823. CGF.getContext().getTypeSize(E->getArg(1)->getType()));
  824. llvm::Type *IntPtrType = IntType->getPointerTo();
  825. llvm::FunctionType *FTy =
  826. llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
  827. llvm::InlineAsm *IA =
  828. llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
  829. return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
  830. }
  831. static llvm::AtomicOrdering
  832. getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
  833. switch (I) {
  834. case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
  835. case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
  836. case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
  837. case BitTest::Release: return llvm::AtomicOrdering::Release;
  838. case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
  839. }
  840. llvm_unreachable("invalid interlocking");
  841. }
  842. /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
  843. /// bits and a bit position and read and optionally modify the bit at that
  844. /// position. The position index can be arbitrarily large, i.e. it can be larger
  845. /// than 31 or 63, so we need an indexed load in the general case.
  846. static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
  847. unsigned BuiltinID,
  848. const CallExpr *E) {
  849. Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
  850. Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
  851. BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
  852. // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
  853. // indexing operation internally. Use them if possible.
  854. if (CGF.getTarget().getTriple().isX86())
  855. return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
  856. // Otherwise, use generic code to load one byte and test the bit. Use all but
  857. // the bottom three bits as the array index, and the bottom three bits to form
  858. // a mask.
  859. // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
  860. Value *ByteIndex = CGF.Builder.CreateAShr(
  861. BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
  862. Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
  863. Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
  864. ByteIndex, "bittest.byteaddr"),
  865. CGF.Int8Ty, CharUnits::One());
  866. Value *PosLow =
  867. CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
  868. llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
  869. // The updating instructions will need a mask.
  870. Value *Mask = nullptr;
  871. if (BT.Action != BitTest::TestOnly) {
  872. Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
  873. "bittest.mask");
  874. }
  875. // Check the action and ordering of the interlocked intrinsics.
  876. llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
  877. Value *OldByte = nullptr;
  878. if (Ordering != llvm::AtomicOrdering::NotAtomic) {
  879. // Emit a combined atomicrmw load/store operation for the interlocked
  880. // intrinsics.
  881. llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
  882. if (BT.Action == BitTest::Reset) {
  883. Mask = CGF.Builder.CreateNot(Mask);
  884. RMWOp = llvm::AtomicRMWInst::And;
  885. }
  886. OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
  887. Ordering);
  888. } else {
  889. // Emit a plain load for the non-interlocked intrinsics.
  890. OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
  891. Value *NewByte = nullptr;
  892. switch (BT.Action) {
  893. case BitTest::TestOnly:
  894. // Don't store anything.
  895. break;
  896. case BitTest::Complement:
  897. NewByte = CGF.Builder.CreateXor(OldByte, Mask);
  898. break;
  899. case BitTest::Reset:
  900. NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
  901. break;
  902. case BitTest::Set:
  903. NewByte = CGF.Builder.CreateOr(OldByte, Mask);
  904. break;
  905. }
  906. if (NewByte)
  907. CGF.Builder.CreateStore(NewByte, ByteAddr);
  908. }
  909. // However we loaded the old byte, either by plain load or atomicrmw, shift
  910. // the bit into the low position and mask it to 0 or 1.
  911. Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
  912. return CGF.Builder.CreateAnd(
  913. ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
  914. }
  915. static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
  916. unsigned BuiltinID,
  917. const CallExpr *E) {
  918. Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
  919. SmallString<64> Asm;
  920. raw_svector_ostream AsmOS(Asm);
  921. llvm::IntegerType *RetType = CGF.Int32Ty;
  922. switch (BuiltinID) {
  923. case clang::PPC::BI__builtin_ppc_ldarx:
  924. AsmOS << "ldarx ";
  925. RetType = CGF.Int64Ty;
  926. break;
  927. case clang::PPC::BI__builtin_ppc_lwarx:
  928. AsmOS << "lwarx ";
  929. RetType = CGF.Int32Ty;
  930. break;
  931. case clang::PPC::BI__builtin_ppc_lharx:
  932. AsmOS << "lharx ";
  933. RetType = CGF.Int16Ty;
  934. break;
  935. case clang::PPC::BI__builtin_ppc_lbarx:
  936. AsmOS << "lbarx ";
  937. RetType = CGF.Int8Ty;
  938. break;
  939. default:
  940. llvm_unreachable("Expected only PowerPC load reserve intrinsics");
  941. }
  942. AsmOS << "$0, ${1:y}";
  943. std::string Constraints = "=r,*Z,~{memory}";
  944. std::string MachineClobbers = CGF.getTarget().getClobbers();
  945. if (!MachineClobbers.empty()) {
  946. Constraints += ',';
  947. Constraints += MachineClobbers;
  948. }
  949. llvm::Type *IntPtrType = RetType->getPointerTo();
  950. llvm::FunctionType *FTy =
  951. llvm::FunctionType::get(RetType, {IntPtrType}, false);
  952. llvm::InlineAsm *IA =
  953. llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
  954. llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
  955. CI->addParamAttr(
  956. 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
  957. return CI;
  958. }
  959. namespace {
  960. enum class MSVCSetJmpKind {
  961. _setjmpex,
  962. _setjmp3,
  963. _setjmp
  964. };
  965. }
  966. /// MSVC handles setjmp a bit differently on different platforms. On every
  967. /// architecture except 32-bit x86, the frame address is passed. On x86, extra
  968. /// parameters can be passed as variadic arguments, but we always pass none.
  969. static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
  970. const CallExpr *E) {
  971. llvm::Value *Arg1 = nullptr;
  972. llvm::Type *Arg1Ty = nullptr;
  973. StringRef Name;
  974. bool IsVarArg = false;
  975. if (SJKind == MSVCSetJmpKind::_setjmp3) {
  976. Name = "_setjmp3";
  977. Arg1Ty = CGF.Int32Ty;
  978. Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
  979. IsVarArg = true;
  980. } else {
  981. Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
  982. Arg1Ty = CGF.Int8PtrTy;
  983. if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
  984. Arg1 = CGF.Builder.CreateCall(
  985. CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
  986. } else
  987. Arg1 = CGF.Builder.CreateCall(
  988. CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
  989. llvm::ConstantInt::get(CGF.Int32Ty, 0));
  990. }
  991. // Mark the call site and declaration with ReturnsTwice.
  992. llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
  993. llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
  994. CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
  995. llvm::Attribute::ReturnsTwice);
  996. llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
  997. llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
  998. ReturnsTwiceAttr, /*Local=*/true);
  999. llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
  1000. CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
  1001. llvm::Value *Args[] = {Buf, Arg1};
  1002. llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
  1003. CB->setAttributes(ReturnsTwiceAttr);
  1004. return RValue::get(CB);
  1005. }
  1006. // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
  1007. // we handle them here.
  1008. enum class CodeGenFunction::MSVCIntrin {
  1009. _BitScanForward,
  1010. _BitScanReverse,
  1011. _InterlockedAnd,
  1012. _InterlockedDecrement,
  1013. _InterlockedExchange,
  1014. _InterlockedExchangeAdd,
  1015. _InterlockedExchangeSub,
  1016. _InterlockedIncrement,
  1017. _InterlockedOr,
  1018. _InterlockedXor,
  1019. _InterlockedExchangeAdd_acq,
  1020. _InterlockedExchangeAdd_rel,
  1021. _InterlockedExchangeAdd_nf,
  1022. _InterlockedExchange_acq,
  1023. _InterlockedExchange_rel,
  1024. _InterlockedExchange_nf,
  1025. _InterlockedCompareExchange_acq,
  1026. _InterlockedCompareExchange_rel,
  1027. _InterlockedCompareExchange_nf,
  1028. _InterlockedCompareExchange128,
  1029. _InterlockedCompareExchange128_acq,
  1030. _InterlockedCompareExchange128_rel,
  1031. _InterlockedCompareExchange128_nf,
  1032. _InterlockedOr_acq,
  1033. _InterlockedOr_rel,
  1034. _InterlockedOr_nf,
  1035. _InterlockedXor_acq,
  1036. _InterlockedXor_rel,
  1037. _InterlockedXor_nf,
  1038. _InterlockedAnd_acq,
  1039. _InterlockedAnd_rel,
  1040. _InterlockedAnd_nf,
  1041. _InterlockedIncrement_acq,
  1042. _InterlockedIncrement_rel,
  1043. _InterlockedIncrement_nf,
  1044. _InterlockedDecrement_acq,
  1045. _InterlockedDecrement_rel,
  1046. _InterlockedDecrement_nf,
  1047. __fastfail,
  1048. };
  1049. static std::optional<CodeGenFunction::MSVCIntrin>
  1050. translateArmToMsvcIntrin(unsigned BuiltinID) {
  1051. using MSVCIntrin = CodeGenFunction::MSVCIntrin;
  1052. switch (BuiltinID) {
  1053. default:
  1054. return std::nullopt;
  1055. case clang::ARM::BI_BitScanForward:
  1056. case clang::ARM::BI_BitScanForward64:
  1057. return MSVCIntrin::_BitScanForward;
  1058. case clang::ARM::BI_BitScanReverse:
  1059. case clang::ARM::BI_BitScanReverse64:
  1060. return MSVCIntrin::_BitScanReverse;
  1061. case clang::ARM::BI_InterlockedAnd64:
  1062. return MSVCIntrin::_InterlockedAnd;
  1063. case clang::ARM::BI_InterlockedExchange64:
  1064. return MSVCIntrin::_InterlockedExchange;
  1065. case clang::ARM::BI_InterlockedExchangeAdd64:
  1066. return MSVCIntrin::_InterlockedExchangeAdd;
  1067. case clang::ARM::BI_InterlockedExchangeSub64:
  1068. return MSVCIntrin::_InterlockedExchangeSub;
  1069. case clang::ARM::BI_InterlockedOr64:
  1070. return MSVCIntrin::_InterlockedOr;
  1071. case clang::ARM::BI_InterlockedXor64:
  1072. return MSVCIntrin::_InterlockedXor;
  1073. case clang::ARM::BI_InterlockedDecrement64:
  1074. return MSVCIntrin::_InterlockedDecrement;
  1075. case clang::ARM::BI_InterlockedIncrement64:
  1076. return MSVCIntrin::_InterlockedIncrement;
  1077. case clang::ARM::BI_InterlockedExchangeAdd8_acq:
  1078. case clang::ARM::BI_InterlockedExchangeAdd16_acq:
  1079. case clang::ARM::BI_InterlockedExchangeAdd_acq:
  1080. case clang::ARM::BI_InterlockedExchangeAdd64_acq:
  1081. return MSVCIntrin::_InterlockedExchangeAdd_acq;
  1082. case clang::ARM::BI_InterlockedExchangeAdd8_rel:
  1083. case clang::ARM::BI_InterlockedExchangeAdd16_rel:
  1084. case clang::ARM::BI_InterlockedExchangeAdd_rel:
  1085. case clang::ARM::BI_InterlockedExchangeAdd64_rel:
  1086. return MSVCIntrin::_InterlockedExchangeAdd_rel;
  1087. case clang::ARM::BI_InterlockedExchangeAdd8_nf:
  1088. case clang::ARM::BI_InterlockedExchangeAdd16_nf:
  1089. case clang::ARM::BI_InterlockedExchangeAdd_nf:
  1090. case clang::ARM::BI_InterlockedExchangeAdd64_nf:
  1091. return MSVCIntrin::_InterlockedExchangeAdd_nf;
  1092. case clang::ARM::BI_InterlockedExchange8_acq:
  1093. case clang::ARM::BI_InterlockedExchange16_acq:
  1094. case clang::ARM::BI_InterlockedExchange_acq:
  1095. case clang::ARM::BI_InterlockedExchange64_acq:
  1096. return MSVCIntrin::_InterlockedExchange_acq;
  1097. case clang::ARM::BI_InterlockedExchange8_rel:
  1098. case clang::ARM::BI_InterlockedExchange16_rel:
  1099. case clang::ARM::BI_InterlockedExchange_rel:
  1100. case clang::ARM::BI_InterlockedExchange64_rel:
  1101. return MSVCIntrin::_InterlockedExchange_rel;
  1102. case clang::ARM::BI_InterlockedExchange8_nf:
  1103. case clang::ARM::BI_InterlockedExchange16_nf:
  1104. case clang::ARM::BI_InterlockedExchange_nf:
  1105. case clang::ARM::BI_InterlockedExchange64_nf:
  1106. return MSVCIntrin::_InterlockedExchange_nf;
  1107. case clang::ARM::BI_InterlockedCompareExchange8_acq:
  1108. case clang::ARM::BI_InterlockedCompareExchange16_acq:
  1109. case clang::ARM::BI_InterlockedCompareExchange_acq:
  1110. case clang::ARM::BI_InterlockedCompareExchange64_acq:
  1111. return MSVCIntrin::_InterlockedCompareExchange_acq;
  1112. case clang::ARM::BI_InterlockedCompareExchange8_rel:
  1113. case clang::ARM::BI_InterlockedCompareExchange16_rel:
  1114. case clang::ARM::BI_InterlockedCompareExchange_rel:
  1115. case clang::ARM::BI_InterlockedCompareExchange64_rel:
  1116. return MSVCIntrin::_InterlockedCompareExchange_rel;
  1117. case clang::ARM::BI_InterlockedCompareExchange8_nf:
  1118. case clang::ARM::BI_InterlockedCompareExchange16_nf:
  1119. case clang::ARM::BI_InterlockedCompareExchange_nf:
  1120. case clang::ARM::BI_InterlockedCompareExchange64_nf:
  1121. return MSVCIntrin::_InterlockedCompareExchange_nf;
  1122. case clang::ARM::BI_InterlockedOr8_acq:
  1123. case clang::ARM::BI_InterlockedOr16_acq:
  1124. case clang::ARM::BI_InterlockedOr_acq:
  1125. case clang::ARM::BI_InterlockedOr64_acq:
  1126. return MSVCIntrin::_InterlockedOr_acq;
  1127. case clang::ARM::BI_InterlockedOr8_rel:
  1128. case clang::ARM::BI_InterlockedOr16_rel:
  1129. case clang::ARM::BI_InterlockedOr_rel:
  1130. case clang::ARM::BI_InterlockedOr64_rel:
  1131. return MSVCIntrin::_InterlockedOr_rel;
  1132. case clang::ARM::BI_InterlockedOr8_nf:
  1133. case clang::ARM::BI_InterlockedOr16_nf:
  1134. case clang::ARM::BI_InterlockedOr_nf:
  1135. case clang::ARM::BI_InterlockedOr64_nf:
  1136. return MSVCIntrin::_InterlockedOr_nf;
  1137. case clang::ARM::BI_InterlockedXor8_acq:
  1138. case clang::ARM::BI_InterlockedXor16_acq:
  1139. case clang::ARM::BI_InterlockedXor_acq:
  1140. case clang::ARM::BI_InterlockedXor64_acq:
  1141. return MSVCIntrin::_InterlockedXor_acq;
  1142. case clang::ARM::BI_InterlockedXor8_rel:
  1143. case clang::ARM::BI_InterlockedXor16_rel:
  1144. case clang::ARM::BI_InterlockedXor_rel:
  1145. case clang::ARM::BI_InterlockedXor64_rel:
  1146. return MSVCIntrin::_InterlockedXor_rel;
  1147. case clang::ARM::BI_InterlockedXor8_nf:
  1148. case clang::ARM::BI_InterlockedXor16_nf:
  1149. case clang::ARM::BI_InterlockedXor_nf:
  1150. case clang::ARM::BI_InterlockedXor64_nf:
  1151. return MSVCIntrin::_InterlockedXor_nf;
  1152. case clang::ARM::BI_InterlockedAnd8_acq:
  1153. case clang::ARM::BI_InterlockedAnd16_acq:
  1154. case clang::ARM::BI_InterlockedAnd_acq:
  1155. case clang::ARM::BI_InterlockedAnd64_acq:
  1156. return MSVCIntrin::_InterlockedAnd_acq;
  1157. case clang::ARM::BI_InterlockedAnd8_rel:
  1158. case clang::ARM::BI_InterlockedAnd16_rel:
  1159. case clang::ARM::BI_InterlockedAnd_rel:
  1160. case clang::ARM::BI_InterlockedAnd64_rel:
  1161. return MSVCIntrin::_InterlockedAnd_rel;
  1162. case clang::ARM::BI_InterlockedAnd8_nf:
  1163. case clang::ARM::BI_InterlockedAnd16_nf:
  1164. case clang::ARM::BI_InterlockedAnd_nf:
  1165. case clang::ARM::BI_InterlockedAnd64_nf:
  1166. return MSVCIntrin::_InterlockedAnd_nf;
  1167. case clang::ARM::BI_InterlockedIncrement16_acq:
  1168. case clang::ARM::BI_InterlockedIncrement_acq:
  1169. case clang::ARM::BI_InterlockedIncrement64_acq:
  1170. return MSVCIntrin::_InterlockedIncrement_acq;
  1171. case clang::ARM::BI_InterlockedIncrement16_rel:
  1172. case clang::ARM::BI_InterlockedIncrement_rel:
  1173. case clang::ARM::BI_InterlockedIncrement64_rel:
  1174. return MSVCIntrin::_InterlockedIncrement_rel;
  1175. case clang::ARM::BI_InterlockedIncrement16_nf:
  1176. case clang::ARM::BI_InterlockedIncrement_nf:
  1177. case clang::ARM::BI_InterlockedIncrement64_nf:
  1178. return MSVCIntrin::_InterlockedIncrement_nf;
  1179. case clang::ARM::BI_InterlockedDecrement16_acq:
  1180. case clang::ARM::BI_InterlockedDecrement_acq:
  1181. case clang::ARM::BI_InterlockedDecrement64_acq:
  1182. return MSVCIntrin::_InterlockedDecrement_acq;
  1183. case clang::ARM::BI_InterlockedDecrement16_rel:
  1184. case clang::ARM::BI_InterlockedDecrement_rel:
  1185. case clang::ARM::BI_InterlockedDecrement64_rel:
  1186. return MSVCIntrin::_InterlockedDecrement_rel;
  1187. case clang::ARM::BI_InterlockedDecrement16_nf:
  1188. case clang::ARM::BI_InterlockedDecrement_nf:
  1189. case clang::ARM::BI_InterlockedDecrement64_nf:
  1190. return MSVCIntrin::_InterlockedDecrement_nf;
  1191. }
  1192. llvm_unreachable("must return from switch");
  1193. }
  1194. static std::optional<CodeGenFunction::MSVCIntrin>
  1195. translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
  1196. using MSVCIntrin = CodeGenFunction::MSVCIntrin;
  1197. switch (BuiltinID) {
  1198. default:
  1199. return std::nullopt;
  1200. case clang::AArch64::BI_BitScanForward:
  1201. case clang::AArch64::BI_BitScanForward64:
  1202. return MSVCIntrin::_BitScanForward;
  1203. case clang::AArch64::BI_BitScanReverse:
  1204. case clang::AArch64::BI_BitScanReverse64:
  1205. return MSVCIntrin::_BitScanReverse;
  1206. case clang::AArch64::BI_InterlockedAnd64:
  1207. return MSVCIntrin::_InterlockedAnd;
  1208. case clang::AArch64::BI_InterlockedExchange64:
  1209. return MSVCIntrin::_InterlockedExchange;
  1210. case clang::AArch64::BI_InterlockedExchangeAdd64:
  1211. return MSVCIntrin::_InterlockedExchangeAdd;
  1212. case clang::AArch64::BI_InterlockedExchangeSub64:
  1213. return MSVCIntrin::_InterlockedExchangeSub;
  1214. case clang::AArch64::BI_InterlockedOr64:
  1215. return MSVCIntrin::_InterlockedOr;
  1216. case clang::AArch64::BI_InterlockedXor64:
  1217. return MSVCIntrin::_InterlockedXor;
  1218. case clang::AArch64::BI_InterlockedDecrement64:
  1219. return MSVCIntrin::_InterlockedDecrement;
  1220. case clang::AArch64::BI_InterlockedIncrement64:
  1221. return MSVCIntrin::_InterlockedIncrement;
  1222. case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
  1223. case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
  1224. case clang::AArch64::BI_InterlockedExchangeAdd_acq:
  1225. case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
  1226. return MSVCIntrin::_InterlockedExchangeAdd_acq;
  1227. case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
  1228. case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
  1229. case clang::AArch64::BI_InterlockedExchangeAdd_rel:
  1230. case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
  1231. return MSVCIntrin::_InterlockedExchangeAdd_rel;
  1232. case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
  1233. case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
  1234. case clang::AArch64::BI_InterlockedExchangeAdd_nf:
  1235. case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
  1236. return MSVCIntrin::_InterlockedExchangeAdd_nf;
  1237. case clang::AArch64::BI_InterlockedExchange8_acq:
  1238. case clang::AArch64::BI_InterlockedExchange16_acq:
  1239. case clang::AArch64::BI_InterlockedExchange_acq:
  1240. case clang::AArch64::BI_InterlockedExchange64_acq:
  1241. return MSVCIntrin::_InterlockedExchange_acq;
  1242. case clang::AArch64::BI_InterlockedExchange8_rel:
  1243. case clang::AArch64::BI_InterlockedExchange16_rel:
  1244. case clang::AArch64::BI_InterlockedExchange_rel:
  1245. case clang::AArch64::BI_InterlockedExchange64_rel:
  1246. return MSVCIntrin::_InterlockedExchange_rel;
  1247. case clang::AArch64::BI_InterlockedExchange8_nf:
  1248. case clang::AArch64::BI_InterlockedExchange16_nf:
  1249. case clang::AArch64::BI_InterlockedExchange_nf:
  1250. case clang::AArch64::BI_InterlockedExchange64_nf:
  1251. return MSVCIntrin::_InterlockedExchange_nf;
  1252. case clang::AArch64::BI_InterlockedCompareExchange8_acq:
  1253. case clang::AArch64::BI_InterlockedCompareExchange16_acq:
  1254. case clang::AArch64::BI_InterlockedCompareExchange_acq:
  1255. case clang::AArch64::BI_InterlockedCompareExchange64_acq:
  1256. return MSVCIntrin::_InterlockedCompareExchange_acq;
  1257. case clang::AArch64::BI_InterlockedCompareExchange8_rel:
  1258. case clang::AArch64::BI_InterlockedCompareExchange16_rel:
  1259. case clang::AArch64::BI_InterlockedCompareExchange_rel:
  1260. case clang::AArch64::BI_InterlockedCompareExchange64_rel:
  1261. return MSVCIntrin::_InterlockedCompareExchange_rel;
  1262. case clang::AArch64::BI_InterlockedCompareExchange8_nf:
  1263. case clang::AArch64::BI_InterlockedCompareExchange16_nf:
  1264. case clang::AArch64::BI_InterlockedCompareExchange_nf:
  1265. case clang::AArch64::BI_InterlockedCompareExchange64_nf:
  1266. return MSVCIntrin::_InterlockedCompareExchange_nf;
  1267. case clang::AArch64::BI_InterlockedCompareExchange128:
  1268. return MSVCIntrin::_InterlockedCompareExchange128;
  1269. case clang::AArch64::BI_InterlockedCompareExchange128_acq:
  1270. return MSVCIntrin::_InterlockedCompareExchange128_acq;
  1271. case clang::AArch64::BI_InterlockedCompareExchange128_nf:
  1272. return MSVCIntrin::_InterlockedCompareExchange128_nf;
  1273. case clang::AArch64::BI_InterlockedCompareExchange128_rel:
  1274. return MSVCIntrin::_InterlockedCompareExchange128_rel;
  1275. case clang::AArch64::BI_InterlockedOr8_acq:
  1276. case clang::AArch64::BI_InterlockedOr16_acq:
  1277. case clang::AArch64::BI_InterlockedOr_acq:
  1278. case clang::AArch64::BI_InterlockedOr64_acq:
  1279. return MSVCIntrin::_InterlockedOr_acq;
  1280. case clang::AArch64::BI_InterlockedOr8_rel:
  1281. case clang::AArch64::BI_InterlockedOr16_rel:
  1282. case clang::AArch64::BI_InterlockedOr_rel:
  1283. case clang::AArch64::BI_InterlockedOr64_rel:
  1284. return MSVCIntrin::_InterlockedOr_rel;
  1285. case clang::AArch64::BI_InterlockedOr8_nf:
  1286. case clang::AArch64::BI_InterlockedOr16_nf:
  1287. case clang::AArch64::BI_InterlockedOr_nf:
  1288. case clang::AArch64::BI_InterlockedOr64_nf:
  1289. return MSVCIntrin::_InterlockedOr_nf;
  1290. case clang::AArch64::BI_InterlockedXor8_acq:
  1291. case clang::AArch64::BI_InterlockedXor16_acq:
  1292. case clang::AArch64::BI_InterlockedXor_acq:
  1293. case clang::AArch64::BI_InterlockedXor64_acq:
  1294. return MSVCIntrin::_InterlockedXor_acq;
  1295. case clang::AArch64::BI_InterlockedXor8_rel:
  1296. case clang::AArch64::BI_InterlockedXor16_rel:
  1297. case clang::AArch64::BI_InterlockedXor_rel:
  1298. case clang::AArch64::BI_InterlockedXor64_rel:
  1299. return MSVCIntrin::_InterlockedXor_rel;
  1300. case clang::AArch64::BI_InterlockedXor8_nf:
  1301. case clang::AArch64::BI_InterlockedXor16_nf:
  1302. case clang::AArch64::BI_InterlockedXor_nf:
  1303. case clang::AArch64::BI_InterlockedXor64_nf:
  1304. return MSVCIntrin::_InterlockedXor_nf;
  1305. case clang::AArch64::BI_InterlockedAnd8_acq:
  1306. case clang::AArch64::BI_InterlockedAnd16_acq:
  1307. case clang::AArch64::BI_InterlockedAnd_acq:
  1308. case clang::AArch64::BI_InterlockedAnd64_acq:
  1309. return MSVCIntrin::_InterlockedAnd_acq;
  1310. case clang::AArch64::BI_InterlockedAnd8_rel:
  1311. case clang::AArch64::BI_InterlockedAnd16_rel:
  1312. case clang::AArch64::BI_InterlockedAnd_rel:
  1313. case clang::AArch64::BI_InterlockedAnd64_rel:
  1314. return MSVCIntrin::_InterlockedAnd_rel;
  1315. case clang::AArch64::BI_InterlockedAnd8_nf:
  1316. case clang::AArch64::BI_InterlockedAnd16_nf:
  1317. case clang::AArch64::BI_InterlockedAnd_nf:
  1318. case clang::AArch64::BI_InterlockedAnd64_nf:
  1319. return MSVCIntrin::_InterlockedAnd_nf;
  1320. case clang::AArch64::BI_InterlockedIncrement16_acq:
  1321. case clang::AArch64::BI_InterlockedIncrement_acq:
  1322. case clang::AArch64::BI_InterlockedIncrement64_acq:
  1323. return MSVCIntrin::_InterlockedIncrement_acq;
  1324. case clang::AArch64::BI_InterlockedIncrement16_rel:
  1325. case clang::AArch64::BI_InterlockedIncrement_rel:
  1326. case clang::AArch64::BI_InterlockedIncrement64_rel:
  1327. return MSVCIntrin::_InterlockedIncrement_rel;
  1328. case clang::AArch64::BI_InterlockedIncrement16_nf:
  1329. case clang::AArch64::BI_InterlockedIncrement_nf:
  1330. case clang::AArch64::BI_InterlockedIncrement64_nf:
  1331. return MSVCIntrin::_InterlockedIncrement_nf;
  1332. case clang::AArch64::BI_InterlockedDecrement16_acq:
  1333. case clang::AArch64::BI_InterlockedDecrement_acq:
  1334. case clang::AArch64::BI_InterlockedDecrement64_acq:
  1335. return MSVCIntrin::_InterlockedDecrement_acq;
  1336. case clang::AArch64::BI_InterlockedDecrement16_rel:
  1337. case clang::AArch64::BI_InterlockedDecrement_rel:
  1338. case clang::AArch64::BI_InterlockedDecrement64_rel:
  1339. return MSVCIntrin::_InterlockedDecrement_rel;
  1340. case clang::AArch64::BI_InterlockedDecrement16_nf:
  1341. case clang::AArch64::BI_InterlockedDecrement_nf:
  1342. case clang::AArch64::BI_InterlockedDecrement64_nf:
  1343. return MSVCIntrin::_InterlockedDecrement_nf;
  1344. }
  1345. llvm_unreachable("must return from switch");
  1346. }
  1347. static std::optional<CodeGenFunction::MSVCIntrin>
  1348. translateX86ToMsvcIntrin(unsigned BuiltinID) {
  1349. using MSVCIntrin = CodeGenFunction::MSVCIntrin;
  1350. switch (BuiltinID) {
  1351. default:
  1352. return std::nullopt;
  1353. case clang::X86::BI_BitScanForward:
  1354. case clang::X86::BI_BitScanForward64:
  1355. return MSVCIntrin::_BitScanForward;
  1356. case clang::X86::BI_BitScanReverse:
  1357. case clang::X86::BI_BitScanReverse64:
  1358. return MSVCIntrin::_BitScanReverse;
  1359. case clang::X86::BI_InterlockedAnd64:
  1360. return MSVCIntrin::_InterlockedAnd;
  1361. case clang::X86::BI_InterlockedCompareExchange128:
  1362. return MSVCIntrin::_InterlockedCompareExchange128;
  1363. case clang::X86::BI_InterlockedExchange64:
  1364. return MSVCIntrin::_InterlockedExchange;
  1365. case clang::X86::BI_InterlockedExchangeAdd64:
  1366. return MSVCIntrin::_InterlockedExchangeAdd;
  1367. case clang::X86::BI_InterlockedExchangeSub64:
  1368. return MSVCIntrin::_InterlockedExchangeSub;
  1369. case clang::X86::BI_InterlockedOr64:
  1370. return MSVCIntrin::_InterlockedOr;
  1371. case clang::X86::BI_InterlockedXor64:
  1372. return MSVCIntrin::_InterlockedXor;
  1373. case clang::X86::BI_InterlockedDecrement64:
  1374. return MSVCIntrin::_InterlockedDecrement;
  1375. case clang::X86::BI_InterlockedIncrement64:
  1376. return MSVCIntrin::_InterlockedIncrement;
  1377. }
  1378. llvm_unreachable("must return from switch");
  1379. }
  1380. // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
  1381. Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
  1382. const CallExpr *E) {
  1383. switch (BuiltinID) {
  1384. case MSVCIntrin::_BitScanForward:
  1385. case MSVCIntrin::_BitScanReverse: {
  1386. Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
  1387. Value *ArgValue = EmitScalarExpr(E->getArg(1));
  1388. llvm::Type *ArgType = ArgValue->getType();
  1389. llvm::Type *IndexType = IndexAddress.getElementType();
  1390. llvm::Type *ResultType = ConvertType(E->getType());
  1391. Value *ArgZero = llvm::Constant::getNullValue(ArgType);
  1392. Value *ResZero = llvm::Constant::getNullValue(ResultType);
  1393. Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
  1394. BasicBlock *Begin = Builder.GetInsertBlock();
  1395. BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
  1396. Builder.SetInsertPoint(End);
  1397. PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
  1398. Builder.SetInsertPoint(Begin);
  1399. Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
  1400. BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
  1401. Builder.CreateCondBr(IsZero, End, NotZero);
  1402. Result->addIncoming(ResZero, Begin);
  1403. Builder.SetInsertPoint(NotZero);
  1404. if (BuiltinID == MSVCIntrin::_BitScanForward) {
  1405. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  1406. Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
  1407. ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
  1408. Builder.CreateStore(ZeroCount, IndexAddress, false);
  1409. } else {
  1410. unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
  1411. Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
  1412. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  1413. Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
  1414. ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
  1415. Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
  1416. Builder.CreateStore(Index, IndexAddress, false);
  1417. }
  1418. Builder.CreateBr(End);
  1419. Result->addIncoming(ResOne, NotZero);
  1420. Builder.SetInsertPoint(End);
  1421. return Result;
  1422. }
  1423. case MSVCIntrin::_InterlockedAnd:
  1424. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
  1425. case MSVCIntrin::_InterlockedExchange:
  1426. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
  1427. case MSVCIntrin::_InterlockedExchangeAdd:
  1428. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
  1429. case MSVCIntrin::_InterlockedExchangeSub:
  1430. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
  1431. case MSVCIntrin::_InterlockedOr:
  1432. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
  1433. case MSVCIntrin::_InterlockedXor:
  1434. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
  1435. case MSVCIntrin::_InterlockedExchangeAdd_acq:
  1436. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  1437. AtomicOrdering::Acquire);
  1438. case MSVCIntrin::_InterlockedExchangeAdd_rel:
  1439. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  1440. AtomicOrdering::Release);
  1441. case MSVCIntrin::_InterlockedExchangeAdd_nf:
  1442. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  1443. AtomicOrdering::Monotonic);
  1444. case MSVCIntrin::_InterlockedExchange_acq:
  1445. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  1446. AtomicOrdering::Acquire);
  1447. case MSVCIntrin::_InterlockedExchange_rel:
  1448. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  1449. AtomicOrdering::Release);
  1450. case MSVCIntrin::_InterlockedExchange_nf:
  1451. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  1452. AtomicOrdering::Monotonic);
  1453. case MSVCIntrin::_InterlockedCompareExchange_acq:
  1454. return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
  1455. case MSVCIntrin::_InterlockedCompareExchange_rel:
  1456. return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
  1457. case MSVCIntrin::_InterlockedCompareExchange_nf:
  1458. return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
  1459. case MSVCIntrin::_InterlockedCompareExchange128:
  1460. return EmitAtomicCmpXchg128ForMSIntrin(
  1461. *this, E, AtomicOrdering::SequentiallyConsistent);
  1462. case MSVCIntrin::_InterlockedCompareExchange128_acq:
  1463. return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
  1464. case MSVCIntrin::_InterlockedCompareExchange128_rel:
  1465. return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
  1466. case MSVCIntrin::_InterlockedCompareExchange128_nf:
  1467. return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
  1468. case MSVCIntrin::_InterlockedOr_acq:
  1469. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  1470. AtomicOrdering::Acquire);
  1471. case MSVCIntrin::_InterlockedOr_rel:
  1472. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  1473. AtomicOrdering::Release);
  1474. case MSVCIntrin::_InterlockedOr_nf:
  1475. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  1476. AtomicOrdering::Monotonic);
  1477. case MSVCIntrin::_InterlockedXor_acq:
  1478. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
  1479. AtomicOrdering::Acquire);
  1480. case MSVCIntrin::_InterlockedXor_rel:
  1481. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
  1482. AtomicOrdering::Release);
  1483. case MSVCIntrin::_InterlockedXor_nf:
  1484. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
  1485. AtomicOrdering::Monotonic);
  1486. case MSVCIntrin::_InterlockedAnd_acq:
  1487. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  1488. AtomicOrdering::Acquire);
  1489. case MSVCIntrin::_InterlockedAnd_rel:
  1490. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  1491. AtomicOrdering::Release);
  1492. case MSVCIntrin::_InterlockedAnd_nf:
  1493. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  1494. AtomicOrdering::Monotonic);
  1495. case MSVCIntrin::_InterlockedIncrement_acq:
  1496. return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
  1497. case MSVCIntrin::_InterlockedIncrement_rel:
  1498. return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
  1499. case MSVCIntrin::_InterlockedIncrement_nf:
  1500. return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
  1501. case MSVCIntrin::_InterlockedDecrement_acq:
  1502. return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
  1503. case MSVCIntrin::_InterlockedDecrement_rel:
  1504. return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
  1505. case MSVCIntrin::_InterlockedDecrement_nf:
  1506. return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
  1507. case MSVCIntrin::_InterlockedDecrement:
  1508. return EmitAtomicDecrementValue(*this, E);
  1509. case MSVCIntrin::_InterlockedIncrement:
  1510. return EmitAtomicIncrementValue(*this, E);
  1511. case MSVCIntrin::__fastfail: {
  1512. // Request immediate process termination from the kernel. The instruction
  1513. // sequences to do this are documented on MSDN:
  1514. // https://msdn.microsoft.com/en-us/library/dn774154.aspx
  1515. llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
  1516. StringRef Asm, Constraints;
  1517. switch (ISA) {
  1518. default:
  1519. ErrorUnsupported(E, "__fastfail call for this architecture");
  1520. break;
  1521. case llvm::Triple::x86:
  1522. case llvm::Triple::x86_64:
  1523. Asm = "int $$0x29";
  1524. Constraints = "{cx}";
  1525. break;
  1526. case llvm::Triple::thumb:
  1527. Asm = "udf #251";
  1528. Constraints = "{r0}";
  1529. break;
  1530. case llvm::Triple::aarch64:
  1531. Asm = "brk #0xF003";
  1532. Constraints = "{w0}";
  1533. }
  1534. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
  1535. llvm::InlineAsm *IA =
  1536. llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
  1537. llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
  1538. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  1539. llvm::Attribute::NoReturn);
  1540. llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
  1541. CI->setAttributes(NoReturnAttr);
  1542. return CI;
  1543. }
  1544. }
  1545. llvm_unreachable("Incorrect MSVC intrinsic!");
  1546. }
  1547. namespace {
  1548. // ARC cleanup for __builtin_os_log_format
  1549. struct CallObjCArcUse final : EHScopeStack::Cleanup {
  1550. CallObjCArcUse(llvm::Value *object) : object(object) {}
  1551. llvm::Value *object;
  1552. void Emit(CodeGenFunction &CGF, Flags flags) override {
  1553. CGF.EmitARCIntrinsicUse(object);
  1554. }
  1555. };
  1556. }
  1557. Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
  1558. BuiltinCheckKind Kind) {
  1559. assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
  1560. && "Unsupported builtin check kind");
  1561. Value *ArgValue = EmitScalarExpr(E);
  1562. if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
  1563. return ArgValue;
  1564. SanitizerScope SanScope(this);
  1565. Value *Cond = Builder.CreateICmpNE(
  1566. ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
  1567. EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
  1568. SanitizerHandler::InvalidBuiltin,
  1569. {EmitCheckSourceLocation(E->getExprLoc()),
  1570. llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
  1571. std::nullopt);
  1572. return ArgValue;
  1573. }
  1574. /// Get the argument type for arguments to os_log_helper.
  1575. static CanQualType getOSLogArgType(ASTContext &C, int Size) {
  1576. QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
  1577. return C.getCanonicalType(UnsignedTy);
  1578. }
  1579. llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
  1580. const analyze_os_log::OSLogBufferLayout &Layout,
  1581. CharUnits BufferAlignment) {
  1582. ASTContext &Ctx = getContext();
  1583. llvm::SmallString<64> Name;
  1584. {
  1585. raw_svector_ostream OS(Name);
  1586. OS << "__os_log_helper";
  1587. OS << "_" << BufferAlignment.getQuantity();
  1588. OS << "_" << int(Layout.getSummaryByte());
  1589. OS << "_" << int(Layout.getNumArgsByte());
  1590. for (const auto &Item : Layout.Items)
  1591. OS << "_" << int(Item.getSizeByte()) << "_"
  1592. << int(Item.getDescriptorByte());
  1593. }
  1594. if (llvm::Function *F = CGM.getModule().getFunction(Name))
  1595. return F;
  1596. llvm::SmallVector<QualType, 4> ArgTys;
  1597. FunctionArgList Args;
  1598. Args.push_back(ImplicitParamDecl::Create(
  1599. Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
  1600. ImplicitParamDecl::Other));
  1601. ArgTys.emplace_back(Ctx.VoidPtrTy);
  1602. for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
  1603. char Size = Layout.Items[I].getSizeByte();
  1604. if (!Size)
  1605. continue;
  1606. QualType ArgTy = getOSLogArgType(Ctx, Size);
  1607. Args.push_back(ImplicitParamDecl::Create(
  1608. Ctx, nullptr, SourceLocation(),
  1609. &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
  1610. ImplicitParamDecl::Other));
  1611. ArgTys.emplace_back(ArgTy);
  1612. }
  1613. QualType ReturnTy = Ctx.VoidTy;
  1614. // The helper function has linkonce_odr linkage to enable the linker to merge
  1615. // identical functions. To ensure the merging always happens, 'noinline' is
  1616. // attached to the function when compiling with -Oz.
  1617. const CGFunctionInfo &FI =
  1618. CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
  1619. llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
  1620. llvm::Function *Fn = llvm::Function::Create(
  1621. FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
  1622. Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
  1623. CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
  1624. CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
  1625. Fn->setDoesNotThrow();
  1626. // Attach 'noinline' at -Oz.
  1627. if (CGM.getCodeGenOpts().OptimizeSize == 2)
  1628. Fn->addFnAttr(llvm::Attribute::NoInline);
  1629. auto NL = ApplyDebugLocation::CreateEmpty(*this);
  1630. StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
  1631. // Create a scope with an artificial location for the body of this function.
  1632. auto AL = ApplyDebugLocation::CreateArtificial(*this);
  1633. CharUnits Offset;
  1634. Address BufAddr =
  1635. Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty,
  1636. BufferAlignment);
  1637. Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
  1638. Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
  1639. Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
  1640. Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
  1641. unsigned I = 1;
  1642. for (const auto &Item : Layout.Items) {
  1643. Builder.CreateStore(
  1644. Builder.getInt8(Item.getDescriptorByte()),
  1645. Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
  1646. Builder.CreateStore(
  1647. Builder.getInt8(Item.getSizeByte()),
  1648. Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
  1649. CharUnits Size = Item.size();
  1650. if (!Size.getQuantity())
  1651. continue;
  1652. Address Arg = GetAddrOfLocalVar(Args[I]);
  1653. Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
  1654. Addr =
  1655. Builder.CreateElementBitCast(Addr, Arg.getElementType(), "argDataCast");
  1656. Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
  1657. Offset += Size;
  1658. ++I;
  1659. }
  1660. FinishFunction();
  1661. return Fn;
  1662. }
  1663. RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
  1664. assert(E.getNumArgs() >= 2 &&
  1665. "__builtin_os_log_format takes at least 2 arguments");
  1666. ASTContext &Ctx = getContext();
  1667. analyze_os_log::OSLogBufferLayout Layout;
  1668. analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
  1669. Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
  1670. llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
  1671. // Ignore argument 1, the format string. It is not currently used.
  1672. CallArgList Args;
  1673. Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
  1674. for (const auto &Item : Layout.Items) {
  1675. int Size = Item.getSizeByte();
  1676. if (!Size)
  1677. continue;
  1678. llvm::Value *ArgVal;
  1679. if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
  1680. uint64_t Val = 0;
  1681. for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
  1682. Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
  1683. ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
  1684. } else if (const Expr *TheExpr = Item.getExpr()) {
  1685. ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
  1686. // If a temporary object that requires destruction after the full
  1687. // expression is passed, push a lifetime-extended cleanup to extend its
  1688. // lifetime to the end of the enclosing block scope.
  1689. auto LifetimeExtendObject = [&](const Expr *E) {
  1690. E = E->IgnoreParenCasts();
  1691. // Extend lifetimes of objects returned by function calls and message
  1692. // sends.
  1693. // FIXME: We should do this in other cases in which temporaries are
  1694. // created including arguments of non-ARC types (e.g., C++
  1695. // temporaries).
  1696. if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
  1697. return true;
  1698. return false;
  1699. };
  1700. if (TheExpr->getType()->isObjCRetainableType() &&
  1701. getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
  1702. assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
  1703. "Only scalar can be a ObjC retainable type");
  1704. if (!isa<Constant>(ArgVal)) {
  1705. CleanupKind Cleanup = getARCCleanupKind();
  1706. QualType Ty = TheExpr->getType();
  1707. Address Alloca = Address::invalid();
  1708. Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
  1709. ArgVal = EmitARCRetain(Ty, ArgVal);
  1710. Builder.CreateStore(ArgVal, Addr);
  1711. pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
  1712. CodeGenFunction::destroyARCStrongPrecise,
  1713. Cleanup & EHCleanup);
  1714. // Push a clang.arc.use call to ensure ARC optimizer knows that the
  1715. // argument has to be alive.
  1716. if (CGM.getCodeGenOpts().OptimizationLevel != 0)
  1717. pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
  1718. }
  1719. }
  1720. } else {
  1721. ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
  1722. }
  1723. unsigned ArgValSize =
  1724. CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
  1725. llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
  1726. ArgValSize);
  1727. ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
  1728. CanQualType ArgTy = getOSLogArgType(Ctx, Size);
  1729. // If ArgVal has type x86_fp80, zero-extend ArgVal.
  1730. ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
  1731. Args.add(RValue::get(ArgVal), ArgTy);
  1732. }
  1733. const CGFunctionInfo &FI =
  1734. CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
  1735. llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
  1736. Layout, BufAddr.getAlignment());
  1737. EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
  1738. return RValue::get(BufAddr.getPointer());
  1739. }
  1740. static bool isSpecialUnsignedMultiplySignedResult(
  1741. unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
  1742. WidthAndSignedness ResultInfo) {
  1743. return BuiltinID == Builtin::BI__builtin_mul_overflow &&
  1744. Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
  1745. !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
  1746. }
  1747. static RValue EmitCheckedUnsignedMultiplySignedResult(
  1748. CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
  1749. const clang::Expr *Op2, WidthAndSignedness Op2Info,
  1750. const clang::Expr *ResultArg, QualType ResultQTy,
  1751. WidthAndSignedness ResultInfo) {
  1752. assert(isSpecialUnsignedMultiplySignedResult(
  1753. Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
  1754. "Cannot specialize this multiply");
  1755. llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
  1756. llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
  1757. llvm::Value *HasOverflow;
  1758. llvm::Value *Result = EmitOverflowIntrinsic(
  1759. CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
  1760. // The intrinsic call will detect overflow when the value is > UINT_MAX,
  1761. // however, since the original builtin had a signed result, we need to report
  1762. // an overflow when the result is greater than INT_MAX.
  1763. auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
  1764. llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
  1765. llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
  1766. HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
  1767. bool isVolatile =
  1768. ResultArg->getType()->getPointeeType().isVolatileQualified();
  1769. Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
  1770. CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
  1771. isVolatile);
  1772. return RValue::get(HasOverflow);
  1773. }
  1774. /// Determine if a binop is a checked mixed-sign multiply we can specialize.
  1775. static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
  1776. WidthAndSignedness Op1Info,
  1777. WidthAndSignedness Op2Info,
  1778. WidthAndSignedness ResultInfo) {
  1779. return BuiltinID == Builtin::BI__builtin_mul_overflow &&
  1780. std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
  1781. Op1Info.Signed != Op2Info.Signed;
  1782. }
  1783. /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
  1784. /// the generic checked-binop irgen.
  1785. static RValue
  1786. EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
  1787. WidthAndSignedness Op1Info, const clang::Expr *Op2,
  1788. WidthAndSignedness Op2Info,
  1789. const clang::Expr *ResultArg, QualType ResultQTy,
  1790. WidthAndSignedness ResultInfo) {
  1791. assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
  1792. Op2Info, ResultInfo) &&
  1793. "Not a mixed-sign multipliction we can specialize");
  1794. // Emit the signed and unsigned operands.
  1795. const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
  1796. const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
  1797. llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
  1798. llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
  1799. unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
  1800. unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
  1801. // One of the operands may be smaller than the other. If so, [s|z]ext it.
  1802. if (SignedOpWidth < UnsignedOpWidth)
  1803. Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
  1804. if (UnsignedOpWidth < SignedOpWidth)
  1805. Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
  1806. llvm::Type *OpTy = Signed->getType();
  1807. llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
  1808. Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
  1809. llvm::Type *ResTy = ResultPtr.getElementType();
  1810. unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
  1811. // Take the absolute value of the signed operand.
  1812. llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
  1813. llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
  1814. llvm::Value *AbsSigned =
  1815. CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
  1816. // Perform a checked unsigned multiplication.
  1817. llvm::Value *UnsignedOverflow;
  1818. llvm::Value *UnsignedResult =
  1819. EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
  1820. Unsigned, UnsignedOverflow);
  1821. llvm::Value *Overflow, *Result;
  1822. if (ResultInfo.Signed) {
  1823. // Signed overflow occurs if the result is greater than INT_MAX or lesser
  1824. // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
  1825. auto IntMax =
  1826. llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
  1827. llvm::Value *MaxResult =
  1828. CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
  1829. CGF.Builder.CreateZExt(IsNegative, OpTy));
  1830. llvm::Value *SignedOverflow =
  1831. CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
  1832. Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
  1833. // Prepare the signed result (possibly by negating it).
  1834. llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
  1835. llvm::Value *SignedResult =
  1836. CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
  1837. Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
  1838. } else {
  1839. // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
  1840. llvm::Value *Underflow = CGF.Builder.CreateAnd(
  1841. IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
  1842. Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
  1843. if (ResultInfo.Width < OpWidth) {
  1844. auto IntMax =
  1845. llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
  1846. llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
  1847. UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
  1848. Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
  1849. }
  1850. // Negate the product if it would be negative in infinite precision.
  1851. Result = CGF.Builder.CreateSelect(
  1852. IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
  1853. Result = CGF.Builder.CreateTrunc(Result, ResTy);
  1854. }
  1855. assert(Overflow && Result && "Missing overflow or result");
  1856. bool isVolatile =
  1857. ResultArg->getType()->getPointeeType().isVolatileQualified();
  1858. CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
  1859. isVolatile);
  1860. return RValue::get(Overflow);
  1861. }
  1862. static bool
  1863. TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
  1864. llvm::SmallPtrSetImpl<const Decl *> &Seen) {
  1865. if (const auto *Arr = Ctx.getAsArrayType(Ty))
  1866. Ty = Ctx.getBaseElementType(Arr);
  1867. const auto *Record = Ty->getAsCXXRecordDecl();
  1868. if (!Record)
  1869. return false;
  1870. // We've already checked this type, or are in the process of checking it.
  1871. if (!Seen.insert(Record).second)
  1872. return false;
  1873. assert(Record->hasDefinition() &&
  1874. "Incomplete types should already be diagnosed");
  1875. if (Record->isDynamicClass())
  1876. return true;
  1877. for (FieldDecl *F : Record->fields()) {
  1878. if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
  1879. return true;
  1880. }
  1881. return false;
  1882. }
  1883. /// Determine if the specified type requires laundering by checking if it is a
  1884. /// dynamic class type or contains a subobject which is a dynamic class type.
  1885. static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
  1886. if (!CGM.getCodeGenOpts().StrictVTablePointers)
  1887. return false;
  1888. llvm::SmallPtrSet<const Decl *, 16> Seen;
  1889. return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
  1890. }
  1891. RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
  1892. llvm::Value *Src = EmitScalarExpr(E->getArg(0));
  1893. llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
  1894. // The builtin's shift arg may have a different type than the source arg and
  1895. // result, but the LLVM intrinsic uses the same type for all values.
  1896. llvm::Type *Ty = Src->getType();
  1897. ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
  1898. // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
  1899. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
  1900. Function *F = CGM.getIntrinsic(IID, Ty);
  1901. return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
  1902. }
  1903. // Map math builtins for long-double to f128 version.
  1904. static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
  1905. switch (BuiltinID) {
  1906. #define MUTATE_LDBL(func) \
  1907. case Builtin::BI__builtin_##func##l: \
  1908. return Builtin::BI__builtin_##func##f128;
  1909. MUTATE_LDBL(sqrt)
  1910. MUTATE_LDBL(cbrt)
  1911. MUTATE_LDBL(fabs)
  1912. MUTATE_LDBL(log)
  1913. MUTATE_LDBL(log2)
  1914. MUTATE_LDBL(log10)
  1915. MUTATE_LDBL(log1p)
  1916. MUTATE_LDBL(logb)
  1917. MUTATE_LDBL(exp)
  1918. MUTATE_LDBL(exp2)
  1919. MUTATE_LDBL(expm1)
  1920. MUTATE_LDBL(fdim)
  1921. MUTATE_LDBL(hypot)
  1922. MUTATE_LDBL(ilogb)
  1923. MUTATE_LDBL(pow)
  1924. MUTATE_LDBL(fmin)
  1925. MUTATE_LDBL(fmax)
  1926. MUTATE_LDBL(ceil)
  1927. MUTATE_LDBL(trunc)
  1928. MUTATE_LDBL(rint)
  1929. MUTATE_LDBL(nearbyint)
  1930. MUTATE_LDBL(round)
  1931. MUTATE_LDBL(floor)
  1932. MUTATE_LDBL(lround)
  1933. MUTATE_LDBL(llround)
  1934. MUTATE_LDBL(lrint)
  1935. MUTATE_LDBL(llrint)
  1936. MUTATE_LDBL(fmod)
  1937. MUTATE_LDBL(modf)
  1938. MUTATE_LDBL(nan)
  1939. MUTATE_LDBL(nans)
  1940. MUTATE_LDBL(inf)
  1941. MUTATE_LDBL(fma)
  1942. MUTATE_LDBL(sin)
  1943. MUTATE_LDBL(cos)
  1944. MUTATE_LDBL(tan)
  1945. MUTATE_LDBL(sinh)
  1946. MUTATE_LDBL(cosh)
  1947. MUTATE_LDBL(tanh)
  1948. MUTATE_LDBL(asin)
  1949. MUTATE_LDBL(acos)
  1950. MUTATE_LDBL(atan)
  1951. MUTATE_LDBL(asinh)
  1952. MUTATE_LDBL(acosh)
  1953. MUTATE_LDBL(atanh)
  1954. MUTATE_LDBL(atan2)
  1955. MUTATE_LDBL(erf)
  1956. MUTATE_LDBL(erfc)
  1957. MUTATE_LDBL(ldexp)
  1958. MUTATE_LDBL(frexp)
  1959. MUTATE_LDBL(huge_val)
  1960. MUTATE_LDBL(copysign)
  1961. MUTATE_LDBL(nextafter)
  1962. MUTATE_LDBL(nexttoward)
  1963. MUTATE_LDBL(remainder)
  1964. MUTATE_LDBL(remquo)
  1965. MUTATE_LDBL(scalbln)
  1966. MUTATE_LDBL(scalbn)
  1967. MUTATE_LDBL(tgamma)
  1968. MUTATE_LDBL(lgamma)
  1969. #undef MUTATE_LDBL
  1970. default:
  1971. return BuiltinID;
  1972. }
  1973. }
  1974. RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
  1975. const CallExpr *E,
  1976. ReturnValueSlot ReturnValue) {
  1977. const FunctionDecl *FD = GD.getDecl()->getAsFunction();
  1978. // See if we can constant fold this builtin. If so, don't emit it at all.
  1979. // TODO: Extend this handling to all builtin calls that we can constant-fold.
  1980. Expr::EvalResult Result;
  1981. if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
  1982. !Result.hasSideEffects()) {
  1983. if (Result.Val.isInt())
  1984. return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
  1985. Result.Val.getInt()));
  1986. if (Result.Val.isFloat())
  1987. return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
  1988. Result.Val.getFloat()));
  1989. }
  1990. // If current long-double semantics is IEEE 128-bit, replace math builtins
  1991. // of long-double with f128 equivalent.
  1992. // TODO: This mutation should also be applied to other targets other than PPC,
  1993. // after backend supports IEEE 128-bit style libcalls.
  1994. if (getTarget().getTriple().isPPC64() &&
  1995. &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
  1996. BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
  1997. // If the builtin has been declared explicitly with an assembler label,
  1998. // disable the specialized emitting below. Ideally we should communicate the
  1999. // rename in IR, or at least avoid generating the intrinsic calls that are
  2000. // likely to get lowered to the renamed library functions.
  2001. const unsigned BuiltinIDIfNoAsmLabel =
  2002. FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
  2003. // There are LLVM math intrinsics/instructions corresponding to math library
  2004. // functions except the LLVM op will never set errno while the math library
  2005. // might. Also, math builtins have the same semantics as their math library
  2006. // twins. Thus, we can transform math library and builtin calls to their
  2007. // LLVM counterparts if the call is marked 'const' (known to never set errno).
  2008. // In case FP exceptions are enabled, the experimental versions of the
  2009. // intrinsics model those.
  2010. bool ConstWithoutErrnoAndExceptions =
  2011. getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
  2012. bool ConstWithoutExceptions =
  2013. getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
  2014. if (FD->hasAttr<ConstAttr>() ||
  2015. ((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) &&
  2016. (!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) {
  2017. switch (BuiltinIDIfNoAsmLabel) {
  2018. case Builtin::BIceil:
  2019. case Builtin::BIceilf:
  2020. case Builtin::BIceill:
  2021. case Builtin::BI__builtin_ceil:
  2022. case Builtin::BI__builtin_ceilf:
  2023. case Builtin::BI__builtin_ceilf16:
  2024. case Builtin::BI__builtin_ceill:
  2025. case Builtin::BI__builtin_ceilf128:
  2026. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2027. Intrinsic::ceil,
  2028. Intrinsic::experimental_constrained_ceil));
  2029. case Builtin::BIcopysign:
  2030. case Builtin::BIcopysignf:
  2031. case Builtin::BIcopysignl:
  2032. case Builtin::BI__builtin_copysign:
  2033. case Builtin::BI__builtin_copysignf:
  2034. case Builtin::BI__builtin_copysignf16:
  2035. case Builtin::BI__builtin_copysignl:
  2036. case Builtin::BI__builtin_copysignf128:
  2037. return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
  2038. case Builtin::BIcos:
  2039. case Builtin::BIcosf:
  2040. case Builtin::BIcosl:
  2041. case Builtin::BI__builtin_cos:
  2042. case Builtin::BI__builtin_cosf:
  2043. case Builtin::BI__builtin_cosf16:
  2044. case Builtin::BI__builtin_cosl:
  2045. case Builtin::BI__builtin_cosf128:
  2046. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2047. Intrinsic::cos,
  2048. Intrinsic::experimental_constrained_cos));
  2049. case Builtin::BIexp:
  2050. case Builtin::BIexpf:
  2051. case Builtin::BIexpl:
  2052. case Builtin::BI__builtin_exp:
  2053. case Builtin::BI__builtin_expf:
  2054. case Builtin::BI__builtin_expf16:
  2055. case Builtin::BI__builtin_expl:
  2056. case Builtin::BI__builtin_expf128:
  2057. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2058. Intrinsic::exp,
  2059. Intrinsic::experimental_constrained_exp));
  2060. case Builtin::BIexp2:
  2061. case Builtin::BIexp2f:
  2062. case Builtin::BIexp2l:
  2063. case Builtin::BI__builtin_exp2:
  2064. case Builtin::BI__builtin_exp2f:
  2065. case Builtin::BI__builtin_exp2f16:
  2066. case Builtin::BI__builtin_exp2l:
  2067. case Builtin::BI__builtin_exp2f128:
  2068. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2069. Intrinsic::exp2,
  2070. Intrinsic::experimental_constrained_exp2));
  2071. case Builtin::BIfabs:
  2072. case Builtin::BIfabsf:
  2073. case Builtin::BIfabsl:
  2074. case Builtin::BI__builtin_fabs:
  2075. case Builtin::BI__builtin_fabsf:
  2076. case Builtin::BI__builtin_fabsf16:
  2077. case Builtin::BI__builtin_fabsl:
  2078. case Builtin::BI__builtin_fabsf128:
  2079. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
  2080. case Builtin::BIfloor:
  2081. case Builtin::BIfloorf:
  2082. case Builtin::BIfloorl:
  2083. case Builtin::BI__builtin_floor:
  2084. case Builtin::BI__builtin_floorf:
  2085. case Builtin::BI__builtin_floorf16:
  2086. case Builtin::BI__builtin_floorl:
  2087. case Builtin::BI__builtin_floorf128:
  2088. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2089. Intrinsic::floor,
  2090. Intrinsic::experimental_constrained_floor));
  2091. case Builtin::BIfma:
  2092. case Builtin::BIfmaf:
  2093. case Builtin::BIfmal:
  2094. case Builtin::BI__builtin_fma:
  2095. case Builtin::BI__builtin_fmaf:
  2096. case Builtin::BI__builtin_fmaf16:
  2097. case Builtin::BI__builtin_fmal:
  2098. case Builtin::BI__builtin_fmaf128:
  2099. return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
  2100. Intrinsic::fma,
  2101. Intrinsic::experimental_constrained_fma));
  2102. case Builtin::BIfmax:
  2103. case Builtin::BIfmaxf:
  2104. case Builtin::BIfmaxl:
  2105. case Builtin::BI__builtin_fmax:
  2106. case Builtin::BI__builtin_fmaxf:
  2107. case Builtin::BI__builtin_fmaxf16:
  2108. case Builtin::BI__builtin_fmaxl:
  2109. case Builtin::BI__builtin_fmaxf128:
  2110. return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
  2111. Intrinsic::maxnum,
  2112. Intrinsic::experimental_constrained_maxnum));
  2113. case Builtin::BIfmin:
  2114. case Builtin::BIfminf:
  2115. case Builtin::BIfminl:
  2116. case Builtin::BI__builtin_fmin:
  2117. case Builtin::BI__builtin_fminf:
  2118. case Builtin::BI__builtin_fminf16:
  2119. case Builtin::BI__builtin_fminl:
  2120. case Builtin::BI__builtin_fminf128:
  2121. return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
  2122. Intrinsic::minnum,
  2123. Intrinsic::experimental_constrained_minnum));
  2124. // fmod() is a special-case. It maps to the frem instruction rather than an
  2125. // LLVM intrinsic.
  2126. case Builtin::BIfmod:
  2127. case Builtin::BIfmodf:
  2128. case Builtin::BIfmodl:
  2129. case Builtin::BI__builtin_fmod:
  2130. case Builtin::BI__builtin_fmodf:
  2131. case Builtin::BI__builtin_fmodf16:
  2132. case Builtin::BI__builtin_fmodl:
  2133. case Builtin::BI__builtin_fmodf128: {
  2134. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2135. Value *Arg1 = EmitScalarExpr(E->getArg(0));
  2136. Value *Arg2 = EmitScalarExpr(E->getArg(1));
  2137. return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
  2138. }
  2139. case Builtin::BIlog:
  2140. case Builtin::BIlogf:
  2141. case Builtin::BIlogl:
  2142. case Builtin::BI__builtin_log:
  2143. case Builtin::BI__builtin_logf:
  2144. case Builtin::BI__builtin_logf16:
  2145. case Builtin::BI__builtin_logl:
  2146. case Builtin::BI__builtin_logf128:
  2147. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2148. Intrinsic::log,
  2149. Intrinsic::experimental_constrained_log));
  2150. case Builtin::BIlog10:
  2151. case Builtin::BIlog10f:
  2152. case Builtin::BIlog10l:
  2153. case Builtin::BI__builtin_log10:
  2154. case Builtin::BI__builtin_log10f:
  2155. case Builtin::BI__builtin_log10f16:
  2156. case Builtin::BI__builtin_log10l:
  2157. case Builtin::BI__builtin_log10f128:
  2158. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2159. Intrinsic::log10,
  2160. Intrinsic::experimental_constrained_log10));
  2161. case Builtin::BIlog2:
  2162. case Builtin::BIlog2f:
  2163. case Builtin::BIlog2l:
  2164. case Builtin::BI__builtin_log2:
  2165. case Builtin::BI__builtin_log2f:
  2166. case Builtin::BI__builtin_log2f16:
  2167. case Builtin::BI__builtin_log2l:
  2168. case Builtin::BI__builtin_log2f128:
  2169. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2170. Intrinsic::log2,
  2171. Intrinsic::experimental_constrained_log2));
  2172. case Builtin::BInearbyint:
  2173. case Builtin::BInearbyintf:
  2174. case Builtin::BInearbyintl:
  2175. case Builtin::BI__builtin_nearbyint:
  2176. case Builtin::BI__builtin_nearbyintf:
  2177. case Builtin::BI__builtin_nearbyintl:
  2178. case Builtin::BI__builtin_nearbyintf128:
  2179. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2180. Intrinsic::nearbyint,
  2181. Intrinsic::experimental_constrained_nearbyint));
  2182. case Builtin::BIpow:
  2183. case Builtin::BIpowf:
  2184. case Builtin::BIpowl:
  2185. case Builtin::BI__builtin_pow:
  2186. case Builtin::BI__builtin_powf:
  2187. case Builtin::BI__builtin_powf16:
  2188. case Builtin::BI__builtin_powl:
  2189. case Builtin::BI__builtin_powf128:
  2190. return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
  2191. Intrinsic::pow,
  2192. Intrinsic::experimental_constrained_pow));
  2193. case Builtin::BIrint:
  2194. case Builtin::BIrintf:
  2195. case Builtin::BIrintl:
  2196. case Builtin::BI__builtin_rint:
  2197. case Builtin::BI__builtin_rintf:
  2198. case Builtin::BI__builtin_rintf16:
  2199. case Builtin::BI__builtin_rintl:
  2200. case Builtin::BI__builtin_rintf128:
  2201. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2202. Intrinsic::rint,
  2203. Intrinsic::experimental_constrained_rint));
  2204. case Builtin::BIround:
  2205. case Builtin::BIroundf:
  2206. case Builtin::BIroundl:
  2207. case Builtin::BI__builtin_round:
  2208. case Builtin::BI__builtin_roundf:
  2209. case Builtin::BI__builtin_roundf16:
  2210. case Builtin::BI__builtin_roundl:
  2211. case Builtin::BI__builtin_roundf128:
  2212. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2213. Intrinsic::round,
  2214. Intrinsic::experimental_constrained_round));
  2215. case Builtin::BIsin:
  2216. case Builtin::BIsinf:
  2217. case Builtin::BIsinl:
  2218. case Builtin::BI__builtin_sin:
  2219. case Builtin::BI__builtin_sinf:
  2220. case Builtin::BI__builtin_sinf16:
  2221. case Builtin::BI__builtin_sinl:
  2222. case Builtin::BI__builtin_sinf128:
  2223. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2224. Intrinsic::sin,
  2225. Intrinsic::experimental_constrained_sin));
  2226. case Builtin::BIsqrt:
  2227. case Builtin::BIsqrtf:
  2228. case Builtin::BIsqrtl:
  2229. case Builtin::BI__builtin_sqrt:
  2230. case Builtin::BI__builtin_sqrtf:
  2231. case Builtin::BI__builtin_sqrtf16:
  2232. case Builtin::BI__builtin_sqrtl:
  2233. case Builtin::BI__builtin_sqrtf128:
  2234. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2235. Intrinsic::sqrt,
  2236. Intrinsic::experimental_constrained_sqrt));
  2237. case Builtin::BItrunc:
  2238. case Builtin::BItruncf:
  2239. case Builtin::BItruncl:
  2240. case Builtin::BI__builtin_trunc:
  2241. case Builtin::BI__builtin_truncf:
  2242. case Builtin::BI__builtin_truncf16:
  2243. case Builtin::BI__builtin_truncl:
  2244. case Builtin::BI__builtin_truncf128:
  2245. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2246. Intrinsic::trunc,
  2247. Intrinsic::experimental_constrained_trunc));
  2248. case Builtin::BIlround:
  2249. case Builtin::BIlroundf:
  2250. case Builtin::BIlroundl:
  2251. case Builtin::BI__builtin_lround:
  2252. case Builtin::BI__builtin_lroundf:
  2253. case Builtin::BI__builtin_lroundl:
  2254. case Builtin::BI__builtin_lroundf128:
  2255. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2256. *this, E, Intrinsic::lround,
  2257. Intrinsic::experimental_constrained_lround));
  2258. case Builtin::BIllround:
  2259. case Builtin::BIllroundf:
  2260. case Builtin::BIllroundl:
  2261. case Builtin::BI__builtin_llround:
  2262. case Builtin::BI__builtin_llroundf:
  2263. case Builtin::BI__builtin_llroundl:
  2264. case Builtin::BI__builtin_llroundf128:
  2265. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2266. *this, E, Intrinsic::llround,
  2267. Intrinsic::experimental_constrained_llround));
  2268. case Builtin::BIlrint:
  2269. case Builtin::BIlrintf:
  2270. case Builtin::BIlrintl:
  2271. case Builtin::BI__builtin_lrint:
  2272. case Builtin::BI__builtin_lrintf:
  2273. case Builtin::BI__builtin_lrintl:
  2274. case Builtin::BI__builtin_lrintf128:
  2275. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2276. *this, E, Intrinsic::lrint,
  2277. Intrinsic::experimental_constrained_lrint));
  2278. case Builtin::BIllrint:
  2279. case Builtin::BIllrintf:
  2280. case Builtin::BIllrintl:
  2281. case Builtin::BI__builtin_llrint:
  2282. case Builtin::BI__builtin_llrintf:
  2283. case Builtin::BI__builtin_llrintl:
  2284. case Builtin::BI__builtin_llrintf128:
  2285. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2286. *this, E, Intrinsic::llrint,
  2287. Intrinsic::experimental_constrained_llrint));
  2288. default:
  2289. break;
  2290. }
  2291. }
  2292. switch (BuiltinIDIfNoAsmLabel) {
  2293. default: break;
  2294. case Builtin::BI__builtin___CFStringMakeConstantString:
  2295. case Builtin::BI__builtin___NSStringMakeConstantString:
  2296. return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
  2297. case Builtin::BI__builtin_stdarg_start:
  2298. case Builtin::BI__builtin_va_start:
  2299. case Builtin::BI__va_start:
  2300. case Builtin::BI__builtin_va_end:
  2301. EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
  2302. ? EmitScalarExpr(E->getArg(0))
  2303. : EmitVAListRef(E->getArg(0)).getPointer(),
  2304. BuiltinID != Builtin::BI__builtin_va_end);
  2305. return RValue::get(nullptr);
  2306. case Builtin::BI__builtin_va_copy: {
  2307. Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
  2308. Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
  2309. llvm::Type *Type = Int8PtrTy;
  2310. DstPtr = Builder.CreateBitCast(DstPtr, Type);
  2311. SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
  2312. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
  2313. return RValue::get(nullptr);
  2314. }
  2315. case Builtin::BI__builtin_abs:
  2316. case Builtin::BI__builtin_labs:
  2317. case Builtin::BI__builtin_llabs: {
  2318. // X < 0 ? -X : X
  2319. // The negation has 'nsw' because abs of INT_MIN is undefined.
  2320. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2321. Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
  2322. Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
  2323. Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
  2324. Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
  2325. return RValue::get(Result);
  2326. }
  2327. case Builtin::BI__builtin_complex: {
  2328. Value *Real = EmitScalarExpr(E->getArg(0));
  2329. Value *Imag = EmitScalarExpr(E->getArg(1));
  2330. return RValue::getComplex({Real, Imag});
  2331. }
  2332. case Builtin::BI__builtin_conj:
  2333. case Builtin::BI__builtin_conjf:
  2334. case Builtin::BI__builtin_conjl:
  2335. case Builtin::BIconj:
  2336. case Builtin::BIconjf:
  2337. case Builtin::BIconjl: {
  2338. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2339. Value *Real = ComplexVal.first;
  2340. Value *Imag = ComplexVal.second;
  2341. Imag = Builder.CreateFNeg(Imag, "neg");
  2342. return RValue::getComplex(std::make_pair(Real, Imag));
  2343. }
  2344. case Builtin::BI__builtin_creal:
  2345. case Builtin::BI__builtin_crealf:
  2346. case Builtin::BI__builtin_creall:
  2347. case Builtin::BIcreal:
  2348. case Builtin::BIcrealf:
  2349. case Builtin::BIcreall: {
  2350. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2351. return RValue::get(ComplexVal.first);
  2352. }
  2353. case Builtin::BI__builtin_preserve_access_index: {
  2354. // Only enabled preserved access index region when debuginfo
  2355. // is available as debuginfo is needed to preserve user-level
  2356. // access pattern.
  2357. if (!getDebugInfo()) {
  2358. CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
  2359. return RValue::get(EmitScalarExpr(E->getArg(0)));
  2360. }
  2361. // Nested builtin_preserve_access_index() not supported
  2362. if (IsInPreservedAIRegion) {
  2363. CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
  2364. return RValue::get(EmitScalarExpr(E->getArg(0)));
  2365. }
  2366. IsInPreservedAIRegion = true;
  2367. Value *Res = EmitScalarExpr(E->getArg(0));
  2368. IsInPreservedAIRegion = false;
  2369. return RValue::get(Res);
  2370. }
  2371. case Builtin::BI__builtin_cimag:
  2372. case Builtin::BI__builtin_cimagf:
  2373. case Builtin::BI__builtin_cimagl:
  2374. case Builtin::BIcimag:
  2375. case Builtin::BIcimagf:
  2376. case Builtin::BIcimagl: {
  2377. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2378. return RValue::get(ComplexVal.second);
  2379. }
  2380. case Builtin::BI__builtin_clrsb:
  2381. case Builtin::BI__builtin_clrsbl:
  2382. case Builtin::BI__builtin_clrsbll: {
  2383. // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
  2384. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2385. llvm::Type *ArgType = ArgValue->getType();
  2386. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  2387. llvm::Type *ResultType = ConvertType(E->getType());
  2388. Value *Zero = llvm::Constant::getNullValue(ArgType);
  2389. Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
  2390. Value *Inverse = Builder.CreateNot(ArgValue, "not");
  2391. Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
  2392. Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
  2393. Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
  2394. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2395. "cast");
  2396. return RValue::get(Result);
  2397. }
  2398. case Builtin::BI__builtin_ctzs:
  2399. case Builtin::BI__builtin_ctz:
  2400. case Builtin::BI__builtin_ctzl:
  2401. case Builtin::BI__builtin_ctzll: {
  2402. Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
  2403. llvm::Type *ArgType = ArgValue->getType();
  2404. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  2405. llvm::Type *ResultType = ConvertType(E->getType());
  2406. Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
  2407. Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
  2408. if (Result->getType() != ResultType)
  2409. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2410. "cast");
  2411. return RValue::get(Result);
  2412. }
  2413. case Builtin::BI__builtin_clzs:
  2414. case Builtin::BI__builtin_clz:
  2415. case Builtin::BI__builtin_clzl:
  2416. case Builtin::BI__builtin_clzll: {
  2417. Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
  2418. llvm::Type *ArgType = ArgValue->getType();
  2419. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  2420. llvm::Type *ResultType = ConvertType(E->getType());
  2421. Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
  2422. Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
  2423. if (Result->getType() != ResultType)
  2424. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2425. "cast");
  2426. return RValue::get(Result);
  2427. }
  2428. case Builtin::BI__builtin_ffs:
  2429. case Builtin::BI__builtin_ffsl:
  2430. case Builtin::BI__builtin_ffsll: {
  2431. // ffs(x) -> x ? cttz(x) + 1 : 0
  2432. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2433. llvm::Type *ArgType = ArgValue->getType();
  2434. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  2435. llvm::Type *ResultType = ConvertType(E->getType());
  2436. Value *Tmp =
  2437. Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
  2438. llvm::ConstantInt::get(ArgType, 1));
  2439. Value *Zero = llvm::Constant::getNullValue(ArgType);
  2440. Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
  2441. Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
  2442. if (Result->getType() != ResultType)
  2443. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2444. "cast");
  2445. return RValue::get(Result);
  2446. }
  2447. case Builtin::BI__builtin_parity:
  2448. case Builtin::BI__builtin_parityl:
  2449. case Builtin::BI__builtin_parityll: {
  2450. // parity(x) -> ctpop(x) & 1
  2451. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2452. llvm::Type *ArgType = ArgValue->getType();
  2453. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  2454. llvm::Type *ResultType = ConvertType(E->getType());
  2455. Value *Tmp = Builder.CreateCall(F, ArgValue);
  2456. Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
  2457. if (Result->getType() != ResultType)
  2458. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2459. "cast");
  2460. return RValue::get(Result);
  2461. }
  2462. case Builtin::BI__lzcnt16:
  2463. case Builtin::BI__lzcnt:
  2464. case Builtin::BI__lzcnt64: {
  2465. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2466. llvm::Type *ArgType = ArgValue->getType();
  2467. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  2468. llvm::Type *ResultType = ConvertType(E->getType());
  2469. Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
  2470. if (Result->getType() != ResultType)
  2471. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2472. "cast");
  2473. return RValue::get(Result);
  2474. }
  2475. case Builtin::BI__popcnt16:
  2476. case Builtin::BI__popcnt:
  2477. case Builtin::BI__popcnt64:
  2478. case Builtin::BI__builtin_popcount:
  2479. case Builtin::BI__builtin_popcountl:
  2480. case Builtin::BI__builtin_popcountll: {
  2481. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2482. llvm::Type *ArgType = ArgValue->getType();
  2483. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  2484. llvm::Type *ResultType = ConvertType(E->getType());
  2485. Value *Result = Builder.CreateCall(F, ArgValue);
  2486. if (Result->getType() != ResultType)
  2487. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2488. "cast");
  2489. return RValue::get(Result);
  2490. }
  2491. case Builtin::BI__builtin_unpredictable: {
  2492. // Always return the argument of __builtin_unpredictable. LLVM does not
  2493. // handle this builtin. Metadata for this builtin should be added directly
  2494. // to instructions such as branches or switches that use it.
  2495. return RValue::get(EmitScalarExpr(E->getArg(0)));
  2496. }
  2497. case Builtin::BI__builtin_expect: {
  2498. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2499. llvm::Type *ArgType = ArgValue->getType();
  2500. Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
  2501. // Don't generate llvm.expect on -O0 as the backend won't use it for
  2502. // anything.
  2503. // Note, we still IRGen ExpectedValue because it could have side-effects.
  2504. if (CGM.getCodeGenOpts().OptimizationLevel == 0)
  2505. return RValue::get(ArgValue);
  2506. Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
  2507. Value *Result =
  2508. Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
  2509. return RValue::get(Result);
  2510. }
  2511. case Builtin::BI__builtin_expect_with_probability: {
  2512. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2513. llvm::Type *ArgType = ArgValue->getType();
  2514. Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
  2515. llvm::APFloat Probability(0.0);
  2516. const Expr *ProbArg = E->getArg(2);
  2517. bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
  2518. assert(EvalSucceed && "probability should be able to evaluate as float");
  2519. (void)EvalSucceed;
  2520. bool LoseInfo = false;
  2521. Probability.convert(llvm::APFloat::IEEEdouble(),
  2522. llvm::RoundingMode::Dynamic, &LoseInfo);
  2523. llvm::Type *Ty = ConvertType(ProbArg->getType());
  2524. Constant *Confidence = ConstantFP::get(Ty, Probability);
  2525. // Don't generate llvm.expect.with.probability on -O0 as the backend
  2526. // won't use it for anything.
  2527. // Note, we still IRGen ExpectedValue because it could have side-effects.
  2528. if (CGM.getCodeGenOpts().OptimizationLevel == 0)
  2529. return RValue::get(ArgValue);
  2530. Function *FnExpect =
  2531. CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
  2532. Value *Result = Builder.CreateCall(
  2533. FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
  2534. return RValue::get(Result);
  2535. }
  2536. case Builtin::BI__builtin_assume_aligned: {
  2537. const Expr *Ptr = E->getArg(0);
  2538. Value *PtrValue = EmitScalarExpr(Ptr);
  2539. if (PtrValue->getType() != VoidPtrTy)
  2540. PtrValue = EmitCastToVoidPtr(PtrValue);
  2541. Value *OffsetValue =
  2542. (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
  2543. Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
  2544. ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
  2545. if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
  2546. AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
  2547. llvm::Value::MaximumAlignment);
  2548. emitAlignmentAssumption(PtrValue, Ptr,
  2549. /*The expr loc is sufficient.*/ SourceLocation(),
  2550. AlignmentCI, OffsetValue);
  2551. return RValue::get(PtrValue);
  2552. }
  2553. case Builtin::BI__assume:
  2554. case Builtin::BI__builtin_assume: {
  2555. if (E->getArg(0)->HasSideEffects(getContext()))
  2556. return RValue::get(nullptr);
  2557. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2558. Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
  2559. Builder.CreateCall(FnAssume, ArgValue);
  2560. return RValue::get(nullptr);
  2561. }
  2562. case Builtin::BI__arithmetic_fence: {
  2563. // Create the builtin call if FastMath is selected, and the target
  2564. // supports the builtin, otherwise just return the argument.
  2565. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2566. llvm::FastMathFlags FMF = Builder.getFastMathFlags();
  2567. bool isArithmeticFenceEnabled =
  2568. FMF.allowReassoc() &&
  2569. getContext().getTargetInfo().checkArithmeticFenceSupported();
  2570. QualType ArgType = E->getArg(0)->getType();
  2571. if (ArgType->isComplexType()) {
  2572. if (isArithmeticFenceEnabled) {
  2573. QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
  2574. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2575. Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
  2576. ConvertType(ElementType));
  2577. Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
  2578. ConvertType(ElementType));
  2579. return RValue::getComplex(std::make_pair(Real, Imag));
  2580. }
  2581. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2582. Value *Real = ComplexVal.first;
  2583. Value *Imag = ComplexVal.second;
  2584. return RValue::getComplex(std::make_pair(Real, Imag));
  2585. }
  2586. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2587. if (isArithmeticFenceEnabled)
  2588. return RValue::get(
  2589. Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
  2590. return RValue::get(ArgValue);
  2591. }
  2592. case Builtin::BI__builtin_bswap16:
  2593. case Builtin::BI__builtin_bswap32:
  2594. case Builtin::BI__builtin_bswap64:
  2595. case Builtin::BI_byteswap_ushort:
  2596. case Builtin::BI_byteswap_ulong:
  2597. case Builtin::BI_byteswap_uint64: {
  2598. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
  2599. }
  2600. case Builtin::BI__builtin_bitreverse8:
  2601. case Builtin::BI__builtin_bitreverse16:
  2602. case Builtin::BI__builtin_bitreverse32:
  2603. case Builtin::BI__builtin_bitreverse64: {
  2604. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
  2605. }
  2606. case Builtin::BI__builtin_rotateleft8:
  2607. case Builtin::BI__builtin_rotateleft16:
  2608. case Builtin::BI__builtin_rotateleft32:
  2609. case Builtin::BI__builtin_rotateleft64:
  2610. case Builtin::BI_rotl8: // Microsoft variants of rotate left
  2611. case Builtin::BI_rotl16:
  2612. case Builtin::BI_rotl:
  2613. case Builtin::BI_lrotl:
  2614. case Builtin::BI_rotl64:
  2615. return emitRotate(E, false);
  2616. case Builtin::BI__builtin_rotateright8:
  2617. case Builtin::BI__builtin_rotateright16:
  2618. case Builtin::BI__builtin_rotateright32:
  2619. case Builtin::BI__builtin_rotateright64:
  2620. case Builtin::BI_rotr8: // Microsoft variants of rotate right
  2621. case Builtin::BI_rotr16:
  2622. case Builtin::BI_rotr:
  2623. case Builtin::BI_lrotr:
  2624. case Builtin::BI_rotr64:
  2625. return emitRotate(E, true);
  2626. case Builtin::BI__builtin_constant_p: {
  2627. llvm::Type *ResultType = ConvertType(E->getType());
  2628. const Expr *Arg = E->getArg(0);
  2629. QualType ArgType = Arg->getType();
  2630. // FIXME: The allowance for Obj-C pointers and block pointers is historical
  2631. // and likely a mistake.
  2632. if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
  2633. !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
  2634. // Per the GCC documentation, only numeric constants are recognized after
  2635. // inlining.
  2636. return RValue::get(ConstantInt::get(ResultType, 0));
  2637. if (Arg->HasSideEffects(getContext()))
  2638. // The argument is unevaluated, so be conservative if it might have
  2639. // side-effects.
  2640. return RValue::get(ConstantInt::get(ResultType, 0));
  2641. Value *ArgValue = EmitScalarExpr(Arg);
  2642. if (ArgType->isObjCObjectPointerType()) {
  2643. // Convert Objective-C objects to id because we cannot distinguish between
  2644. // LLVM types for Obj-C classes as they are opaque.
  2645. ArgType = CGM.getContext().getObjCIdType();
  2646. ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
  2647. }
  2648. Function *F =
  2649. CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
  2650. Value *Result = Builder.CreateCall(F, ArgValue);
  2651. if (Result->getType() != ResultType)
  2652. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
  2653. return RValue::get(Result);
  2654. }
  2655. case Builtin::BI__builtin_dynamic_object_size:
  2656. case Builtin::BI__builtin_object_size: {
  2657. unsigned Type =
  2658. E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
  2659. auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
  2660. // We pass this builtin onto the optimizer so that it can figure out the
  2661. // object size in more complex cases.
  2662. bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
  2663. return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
  2664. /*EmittedE=*/nullptr, IsDynamic));
  2665. }
  2666. case Builtin::BI__builtin_prefetch: {
  2667. Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
  2668. // FIXME: Technically these constants should of type 'int', yes?
  2669. RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
  2670. llvm::ConstantInt::get(Int32Ty, 0);
  2671. Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
  2672. llvm::ConstantInt::get(Int32Ty, 3);
  2673. Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
  2674. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  2675. Builder.CreateCall(F, {Address, RW, Locality, Data});
  2676. return RValue::get(nullptr);
  2677. }
  2678. case Builtin::BI__builtin_readcyclecounter: {
  2679. Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
  2680. return RValue::get(Builder.CreateCall(F));
  2681. }
  2682. case Builtin::BI__builtin___clear_cache: {
  2683. Value *Begin = EmitScalarExpr(E->getArg(0));
  2684. Value *End = EmitScalarExpr(E->getArg(1));
  2685. Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
  2686. return RValue::get(Builder.CreateCall(F, {Begin, End}));
  2687. }
  2688. case Builtin::BI__builtin_trap:
  2689. EmitTrapCall(Intrinsic::trap);
  2690. return RValue::get(nullptr);
  2691. case Builtin::BI__debugbreak:
  2692. EmitTrapCall(Intrinsic::debugtrap);
  2693. return RValue::get(nullptr);
  2694. case Builtin::BI__builtin_unreachable: {
  2695. EmitUnreachable(E->getExprLoc());
  2696. // We do need to preserve an insertion point.
  2697. EmitBlock(createBasicBlock("unreachable.cont"));
  2698. return RValue::get(nullptr);
  2699. }
  2700. case Builtin::BI__builtin_powi:
  2701. case Builtin::BI__builtin_powif:
  2702. case Builtin::BI__builtin_powil: {
  2703. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  2704. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  2705. if (Builder.getIsFPConstrained()) {
  2706. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2707. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
  2708. Src0->getType());
  2709. return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
  2710. }
  2711. Function *F = CGM.getIntrinsic(Intrinsic::powi,
  2712. { Src0->getType(), Src1->getType() });
  2713. return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
  2714. }
  2715. case Builtin::BI__builtin_isgreater:
  2716. case Builtin::BI__builtin_isgreaterequal:
  2717. case Builtin::BI__builtin_isless:
  2718. case Builtin::BI__builtin_islessequal:
  2719. case Builtin::BI__builtin_islessgreater:
  2720. case Builtin::BI__builtin_isunordered: {
  2721. // Ordered comparisons: we know the arguments to these are matching scalar
  2722. // floating point values.
  2723. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2724. Value *LHS = EmitScalarExpr(E->getArg(0));
  2725. Value *RHS = EmitScalarExpr(E->getArg(1));
  2726. switch (BuiltinID) {
  2727. default: llvm_unreachable("Unknown ordered comparison");
  2728. case Builtin::BI__builtin_isgreater:
  2729. LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
  2730. break;
  2731. case Builtin::BI__builtin_isgreaterequal:
  2732. LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
  2733. break;
  2734. case Builtin::BI__builtin_isless:
  2735. LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
  2736. break;
  2737. case Builtin::BI__builtin_islessequal:
  2738. LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
  2739. break;
  2740. case Builtin::BI__builtin_islessgreater:
  2741. LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
  2742. break;
  2743. case Builtin::BI__builtin_isunordered:
  2744. LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
  2745. break;
  2746. }
  2747. // ZExt bool to int type.
  2748. return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
  2749. }
  2750. case Builtin::BI__builtin_isnan: {
  2751. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2752. Value *V = EmitScalarExpr(E->getArg(0));
  2753. llvm::Type *Ty = V->getType();
  2754. const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
  2755. if (!Builder.getIsFPConstrained() ||
  2756. Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
  2757. !Ty->isIEEE()) {
  2758. V = Builder.CreateFCmpUNO(V, V, "cmp");
  2759. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  2760. }
  2761. if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
  2762. return RValue::get(Result);
  2763. // NaN has all exp bits set and a non zero significand. Therefore:
  2764. // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
  2765. unsigned bitsize = Ty->getScalarSizeInBits();
  2766. llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
  2767. Value *IntV = Builder.CreateBitCast(V, IntTy);
  2768. APInt AndMask = APInt::getSignedMaxValue(bitsize);
  2769. Value *AbsV =
  2770. Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
  2771. APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
  2772. Value *Sub =
  2773. Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
  2774. // V = sign bit (Sub) <=> V = (Sub < 0)
  2775. V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
  2776. if (bitsize > 32)
  2777. V = Builder.CreateTrunc(V, ConvertType(E->getType()));
  2778. return RValue::get(V);
  2779. }
  2780. case Builtin::BI__builtin_elementwise_abs: {
  2781. Value *Result;
  2782. QualType QT = E->getArg(0)->getType();
  2783. if (auto *VecTy = QT->getAs<VectorType>())
  2784. QT = VecTy->getElementType();
  2785. if (QT->isIntegerType())
  2786. Result = Builder.CreateBinaryIntrinsic(
  2787. llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
  2788. Builder.getFalse(), nullptr, "elt.abs");
  2789. else
  2790. Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
  2791. return RValue::get(Result);
  2792. }
  2793. case Builtin::BI__builtin_elementwise_ceil:
  2794. return RValue::get(
  2795. emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
  2796. case Builtin::BI__builtin_elementwise_cos:
  2797. return RValue::get(
  2798. emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
  2799. case Builtin::BI__builtin_elementwise_floor:
  2800. return RValue::get(
  2801. emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
  2802. case Builtin::BI__builtin_elementwise_roundeven:
  2803. return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
  2804. "elt.roundeven"));
  2805. case Builtin::BI__builtin_elementwise_sin:
  2806. return RValue::get(
  2807. emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
  2808. case Builtin::BI__builtin_elementwise_trunc:
  2809. return RValue::get(
  2810. emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
  2811. case Builtin::BI__builtin_elementwise_canonicalize:
  2812. return RValue::get(
  2813. emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.trunc"));
  2814. case Builtin::BI__builtin_elementwise_copysign:
  2815. return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
  2816. case Builtin::BI__builtin_elementwise_add_sat:
  2817. case Builtin::BI__builtin_elementwise_sub_sat: {
  2818. Value *Op0 = EmitScalarExpr(E->getArg(0));
  2819. Value *Op1 = EmitScalarExpr(E->getArg(1));
  2820. Value *Result;
  2821. assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
  2822. QualType Ty = E->getArg(0)->getType();
  2823. if (auto *VecTy = Ty->getAs<VectorType>())
  2824. Ty = VecTy->getElementType();
  2825. bool IsSigned = Ty->isSignedIntegerType();
  2826. unsigned Opc;
  2827. if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
  2828. Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
  2829. else
  2830. Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
  2831. Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
  2832. return RValue::get(Result);
  2833. }
  2834. case Builtin::BI__builtin_elementwise_max: {
  2835. Value *Op0 = EmitScalarExpr(E->getArg(0));
  2836. Value *Op1 = EmitScalarExpr(E->getArg(1));
  2837. Value *Result;
  2838. if (Op0->getType()->isIntOrIntVectorTy()) {
  2839. QualType Ty = E->getArg(0)->getType();
  2840. if (auto *VecTy = Ty->getAs<VectorType>())
  2841. Ty = VecTy->getElementType();
  2842. Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
  2843. ? llvm::Intrinsic::smax
  2844. : llvm::Intrinsic::umax,
  2845. Op0, Op1, nullptr, "elt.max");
  2846. } else
  2847. Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
  2848. return RValue::get(Result);
  2849. }
  2850. case Builtin::BI__builtin_elementwise_min: {
  2851. Value *Op0 = EmitScalarExpr(E->getArg(0));
  2852. Value *Op1 = EmitScalarExpr(E->getArg(1));
  2853. Value *Result;
  2854. if (Op0->getType()->isIntOrIntVectorTy()) {
  2855. QualType Ty = E->getArg(0)->getType();
  2856. if (auto *VecTy = Ty->getAs<VectorType>())
  2857. Ty = VecTy->getElementType();
  2858. Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
  2859. ? llvm::Intrinsic::smin
  2860. : llvm::Intrinsic::umin,
  2861. Op0, Op1, nullptr, "elt.min");
  2862. } else
  2863. Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
  2864. return RValue::get(Result);
  2865. }
  2866. case Builtin::BI__builtin_reduce_max: {
  2867. auto GetIntrinsicID = [](QualType QT) {
  2868. if (auto *VecTy = QT->getAs<VectorType>())
  2869. QT = VecTy->getElementType();
  2870. if (QT->isSignedIntegerType())
  2871. return llvm::Intrinsic::vector_reduce_smax;
  2872. if (QT->isUnsignedIntegerType())
  2873. return llvm::Intrinsic::vector_reduce_umax;
  2874. assert(QT->isFloatingType() && "must have a float here");
  2875. return llvm::Intrinsic::vector_reduce_fmax;
  2876. };
  2877. return RValue::get(emitUnaryBuiltin(
  2878. *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
  2879. }
  2880. case Builtin::BI__builtin_reduce_min: {
  2881. auto GetIntrinsicID = [](QualType QT) {
  2882. if (auto *VecTy = QT->getAs<VectorType>())
  2883. QT = VecTy->getElementType();
  2884. if (QT->isSignedIntegerType())
  2885. return llvm::Intrinsic::vector_reduce_smin;
  2886. if (QT->isUnsignedIntegerType())
  2887. return llvm::Intrinsic::vector_reduce_umin;
  2888. assert(QT->isFloatingType() && "must have a float here");
  2889. return llvm::Intrinsic::vector_reduce_fmin;
  2890. };
  2891. return RValue::get(emitUnaryBuiltin(
  2892. *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
  2893. }
  2894. case Builtin::BI__builtin_reduce_add:
  2895. return RValue::get(emitUnaryBuiltin(
  2896. *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
  2897. case Builtin::BI__builtin_reduce_mul:
  2898. return RValue::get(emitUnaryBuiltin(
  2899. *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
  2900. case Builtin::BI__builtin_reduce_xor:
  2901. return RValue::get(emitUnaryBuiltin(
  2902. *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
  2903. case Builtin::BI__builtin_reduce_or:
  2904. return RValue::get(emitUnaryBuiltin(
  2905. *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
  2906. case Builtin::BI__builtin_reduce_and:
  2907. return RValue::get(emitUnaryBuiltin(
  2908. *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
  2909. case Builtin::BI__builtin_matrix_transpose: {
  2910. auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
  2911. Value *MatValue = EmitScalarExpr(E->getArg(0));
  2912. MatrixBuilder MB(Builder);
  2913. Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
  2914. MatrixTy->getNumColumns());
  2915. return RValue::get(Result);
  2916. }
  2917. case Builtin::BI__builtin_matrix_column_major_load: {
  2918. MatrixBuilder MB(Builder);
  2919. // Emit everything that isn't dependent on the first parameter type
  2920. Value *Stride = EmitScalarExpr(E->getArg(3));
  2921. const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
  2922. auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
  2923. assert(PtrTy && "arg0 must be of pointer type");
  2924. bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
  2925. Address Src = EmitPointerWithAlignment(E->getArg(0));
  2926. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
  2927. E->getArg(0)->getExprLoc(), FD, 0);
  2928. Value *Result = MB.CreateColumnMajorLoad(
  2929. Src.getElementType(), Src.getPointer(),
  2930. Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
  2931. ResultTy->getNumRows(), ResultTy->getNumColumns(),
  2932. "matrix");
  2933. return RValue::get(Result);
  2934. }
  2935. case Builtin::BI__builtin_matrix_column_major_store: {
  2936. MatrixBuilder MB(Builder);
  2937. Value *Matrix = EmitScalarExpr(E->getArg(0));
  2938. Address Dst = EmitPointerWithAlignment(E->getArg(1));
  2939. Value *Stride = EmitScalarExpr(E->getArg(2));
  2940. const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
  2941. auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
  2942. assert(PtrTy && "arg1 must be of pointer type");
  2943. bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
  2944. EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(),
  2945. E->getArg(1)->getExprLoc(), FD, 0);
  2946. Value *Result = MB.CreateColumnMajorStore(
  2947. Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
  2948. Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
  2949. return RValue::get(Result);
  2950. }
  2951. case Builtin::BIfinite:
  2952. case Builtin::BI__finite:
  2953. case Builtin::BIfinitef:
  2954. case Builtin::BI__finitef:
  2955. case Builtin::BIfinitel:
  2956. case Builtin::BI__finitel:
  2957. case Builtin::BI__builtin_isinf:
  2958. case Builtin::BI__builtin_isfinite: {
  2959. // isinf(x) --> fabs(x) == infinity
  2960. // isfinite(x) --> fabs(x) != infinity
  2961. // x != NaN via the ordered compare in either case.
  2962. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2963. Value *V = EmitScalarExpr(E->getArg(0));
  2964. llvm::Type *Ty = V->getType();
  2965. if (!Builder.getIsFPConstrained() ||
  2966. Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
  2967. !Ty->isIEEE()) {
  2968. Value *Fabs = EmitFAbs(*this, V);
  2969. Constant *Infinity = ConstantFP::getInfinity(V->getType());
  2970. CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
  2971. ? CmpInst::FCMP_OEQ
  2972. : CmpInst::FCMP_ONE;
  2973. Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
  2974. return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
  2975. }
  2976. if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
  2977. return RValue::get(Result);
  2978. // Inf values have all exp bits set and a zero significand. Therefore:
  2979. // isinf(V) == ((V << 1) == ((exp mask) << 1))
  2980. // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
  2981. unsigned bitsize = Ty->getScalarSizeInBits();
  2982. llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
  2983. Value *IntV = Builder.CreateBitCast(V, IntTy);
  2984. Value *Shl1 = Builder.CreateShl(IntV, 1);
  2985. const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
  2986. APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
  2987. Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1));
  2988. if (BuiltinID == Builtin::BI__builtin_isinf)
  2989. V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1);
  2990. else
  2991. V = Builder.CreateICmpULT(Shl1, ExpMaskShl1);
  2992. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  2993. }
  2994. case Builtin::BI__builtin_isinf_sign: {
  2995. // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
  2996. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2997. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  2998. Value *Arg = EmitScalarExpr(E->getArg(0));
  2999. Value *AbsArg = EmitFAbs(*this, Arg);
  3000. Value *IsInf = Builder.CreateFCmpOEQ(
  3001. AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
  3002. Value *IsNeg = EmitSignBit(*this, Arg);
  3003. llvm::Type *IntTy = ConvertType(E->getType());
  3004. Value *Zero = Constant::getNullValue(IntTy);
  3005. Value *One = ConstantInt::get(IntTy, 1);
  3006. Value *NegativeOne = ConstantInt::get(IntTy, -1);
  3007. Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
  3008. Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
  3009. return RValue::get(Result);
  3010. }
  3011. case Builtin::BI__builtin_isnormal: {
  3012. // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
  3013. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  3014. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  3015. Value *V = EmitScalarExpr(E->getArg(0));
  3016. Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
  3017. Value *Abs = EmitFAbs(*this, V);
  3018. Value *IsLessThanInf =
  3019. Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
  3020. APFloat Smallest = APFloat::getSmallestNormalized(
  3021. getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
  3022. Value *IsNormal =
  3023. Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
  3024. "isnormal");
  3025. V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
  3026. V = Builder.CreateAnd(V, IsNormal, "and");
  3027. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  3028. }
  3029. case Builtin::BI__builtin_flt_rounds: {
  3030. Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
  3031. llvm::Type *ResultType = ConvertType(E->getType());
  3032. Value *Result = Builder.CreateCall(F);
  3033. if (Result->getType() != ResultType)
  3034. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  3035. "cast");
  3036. return RValue::get(Result);
  3037. }
  3038. case Builtin::BI__builtin_fpclassify: {
  3039. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  3040. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  3041. Value *V = EmitScalarExpr(E->getArg(5));
  3042. llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
  3043. // Create Result
  3044. BasicBlock *Begin = Builder.GetInsertBlock();
  3045. BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
  3046. Builder.SetInsertPoint(End);
  3047. PHINode *Result =
  3048. Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
  3049. "fpclassify_result");
  3050. // if (V==0) return FP_ZERO
  3051. Builder.SetInsertPoint(Begin);
  3052. Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
  3053. "iszero");
  3054. Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
  3055. BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
  3056. Builder.CreateCondBr(IsZero, End, NotZero);
  3057. Result->addIncoming(ZeroLiteral, Begin);
  3058. // if (V != V) return FP_NAN
  3059. Builder.SetInsertPoint(NotZero);
  3060. Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
  3061. Value *NanLiteral = EmitScalarExpr(E->getArg(0));
  3062. BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
  3063. Builder.CreateCondBr(IsNan, End, NotNan);
  3064. Result->addIncoming(NanLiteral, NotZero);
  3065. // if (fabs(V) == infinity) return FP_INFINITY
  3066. Builder.SetInsertPoint(NotNan);
  3067. Value *VAbs = EmitFAbs(*this, V);
  3068. Value *IsInf =
  3069. Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
  3070. "isinf");
  3071. Value *InfLiteral = EmitScalarExpr(E->getArg(1));
  3072. BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
  3073. Builder.CreateCondBr(IsInf, End, NotInf);
  3074. Result->addIncoming(InfLiteral, NotNan);
  3075. // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
  3076. Builder.SetInsertPoint(NotInf);
  3077. APFloat Smallest = APFloat::getSmallestNormalized(
  3078. getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
  3079. Value *IsNormal =
  3080. Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
  3081. "isnormal");
  3082. Value *NormalResult =
  3083. Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
  3084. EmitScalarExpr(E->getArg(3)));
  3085. Builder.CreateBr(End);
  3086. Result->addIncoming(NormalResult, NotInf);
  3087. // return Result
  3088. Builder.SetInsertPoint(End);
  3089. return RValue::get(Result);
  3090. }
  3091. case Builtin::BIalloca:
  3092. case Builtin::BI_alloca:
  3093. case Builtin::BI__builtin_alloca_uninitialized:
  3094. case Builtin::BI__builtin_alloca: {
  3095. Value *Size = EmitScalarExpr(E->getArg(0));
  3096. const TargetInfo &TI = getContext().getTargetInfo();
  3097. // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
  3098. const Align SuitableAlignmentInBytes =
  3099. CGM.getContext()
  3100. .toCharUnitsFromBits(TI.getSuitableAlign())
  3101. .getAsAlign();
  3102. AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
  3103. AI->setAlignment(SuitableAlignmentInBytes);
  3104. if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
  3105. initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
  3106. return RValue::get(AI);
  3107. }
  3108. case Builtin::BI__builtin_alloca_with_align_uninitialized:
  3109. case Builtin::BI__builtin_alloca_with_align: {
  3110. Value *Size = EmitScalarExpr(E->getArg(0));
  3111. Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
  3112. auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
  3113. unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
  3114. const Align AlignmentInBytes =
  3115. CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
  3116. AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
  3117. AI->setAlignment(AlignmentInBytes);
  3118. if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
  3119. initializeAlloca(*this, AI, Size, AlignmentInBytes);
  3120. return RValue::get(AI);
  3121. }
  3122. case Builtin::BIbzero:
  3123. case Builtin::BI__builtin_bzero: {
  3124. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3125. Value *SizeVal = EmitScalarExpr(E->getArg(1));
  3126. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3127. E->getArg(0)->getExprLoc(), FD, 0);
  3128. Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
  3129. return RValue::get(nullptr);
  3130. }
  3131. case Builtin::BImemcpy:
  3132. case Builtin::BI__builtin_memcpy:
  3133. case Builtin::BImempcpy:
  3134. case Builtin::BI__builtin_mempcpy: {
  3135. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3136. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3137. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3138. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3139. E->getArg(0)->getExprLoc(), FD, 0);
  3140. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  3141. E->getArg(1)->getExprLoc(), FD, 1);
  3142. Builder.CreateMemCpy(Dest, Src, SizeVal, false);
  3143. if (BuiltinID == Builtin::BImempcpy ||
  3144. BuiltinID == Builtin::BI__builtin_mempcpy)
  3145. return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
  3146. Dest.getPointer(), SizeVal));
  3147. else
  3148. return RValue::get(Dest.getPointer());
  3149. }
  3150. case Builtin::BI__builtin_memcpy_inline: {
  3151. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3152. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3153. uint64_t Size =
  3154. E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
  3155. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3156. E->getArg(0)->getExprLoc(), FD, 0);
  3157. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  3158. E->getArg(1)->getExprLoc(), FD, 1);
  3159. Builder.CreateMemCpyInline(Dest, Src, Size);
  3160. return RValue::get(nullptr);
  3161. }
  3162. case Builtin::BI__builtin_char_memchr:
  3163. BuiltinID = Builtin::BI__builtin_memchr;
  3164. break;
  3165. case Builtin::BI__builtin___memcpy_chk: {
  3166. // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
  3167. Expr::EvalResult SizeResult, DstSizeResult;
  3168. if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
  3169. !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
  3170. break;
  3171. llvm::APSInt Size = SizeResult.Val.getInt();
  3172. llvm::APSInt DstSize = DstSizeResult.Val.getInt();
  3173. if (Size.ugt(DstSize))
  3174. break;
  3175. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3176. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3177. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  3178. Builder.CreateMemCpy(Dest, Src, SizeVal, false);
  3179. return RValue::get(Dest.getPointer());
  3180. }
  3181. case Builtin::BI__builtin_objc_memmove_collectable: {
  3182. Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
  3183. Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
  3184. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3185. CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
  3186. DestAddr, SrcAddr, SizeVal);
  3187. return RValue::get(DestAddr.getPointer());
  3188. }
  3189. case Builtin::BI__builtin___memmove_chk: {
  3190. // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
  3191. Expr::EvalResult SizeResult, DstSizeResult;
  3192. if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
  3193. !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
  3194. break;
  3195. llvm::APSInt Size = SizeResult.Val.getInt();
  3196. llvm::APSInt DstSize = DstSizeResult.Val.getInt();
  3197. if (Size.ugt(DstSize))
  3198. break;
  3199. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3200. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3201. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  3202. Builder.CreateMemMove(Dest, Src, SizeVal, false);
  3203. return RValue::get(Dest.getPointer());
  3204. }
  3205. case Builtin::BImemmove:
  3206. case Builtin::BI__builtin_memmove: {
  3207. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3208. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3209. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3210. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3211. E->getArg(0)->getExprLoc(), FD, 0);
  3212. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  3213. E->getArg(1)->getExprLoc(), FD, 1);
  3214. Builder.CreateMemMove(Dest, Src, SizeVal, false);
  3215. return RValue::get(Dest.getPointer());
  3216. }
  3217. case Builtin::BImemset:
  3218. case Builtin::BI__builtin_memset: {
  3219. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3220. Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
  3221. Builder.getInt8Ty());
  3222. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3223. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3224. E->getArg(0)->getExprLoc(), FD, 0);
  3225. Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
  3226. return RValue::get(Dest.getPointer());
  3227. }
  3228. case Builtin::BI__builtin_memset_inline: {
  3229. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3230. Value *ByteVal =
  3231. Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
  3232. uint64_t Size =
  3233. E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
  3234. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3235. E->getArg(0)->getExprLoc(), FD, 0);
  3236. Builder.CreateMemSetInline(Dest, ByteVal, Size);
  3237. return RValue::get(nullptr);
  3238. }
  3239. case Builtin::BI__builtin___memset_chk: {
  3240. // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
  3241. Expr::EvalResult SizeResult, DstSizeResult;
  3242. if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
  3243. !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
  3244. break;
  3245. llvm::APSInt Size = SizeResult.Val.getInt();
  3246. llvm::APSInt DstSize = DstSizeResult.Val.getInt();
  3247. if (Size.ugt(DstSize))
  3248. break;
  3249. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3250. Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
  3251. Builder.getInt8Ty());
  3252. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  3253. Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
  3254. return RValue::get(Dest.getPointer());
  3255. }
  3256. case Builtin::BI__builtin_wmemchr: {
  3257. // The MSVC runtime library does not provide a definition of wmemchr, so we
  3258. // need an inline implementation.
  3259. if (!getTarget().getTriple().isOSMSVCRT())
  3260. break;
  3261. llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
  3262. Value *Str = EmitScalarExpr(E->getArg(0));
  3263. Value *Chr = EmitScalarExpr(E->getArg(1));
  3264. Value *Size = EmitScalarExpr(E->getArg(2));
  3265. BasicBlock *Entry = Builder.GetInsertBlock();
  3266. BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
  3267. BasicBlock *Next = createBasicBlock("wmemchr.next");
  3268. BasicBlock *Exit = createBasicBlock("wmemchr.exit");
  3269. Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
  3270. Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
  3271. EmitBlock(CmpEq);
  3272. PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
  3273. StrPhi->addIncoming(Str, Entry);
  3274. PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
  3275. SizePhi->addIncoming(Size, Entry);
  3276. CharUnits WCharAlign =
  3277. getContext().getTypeAlignInChars(getContext().WCharTy);
  3278. Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
  3279. Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
  3280. Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
  3281. Builder.CreateCondBr(StrEqChr, Exit, Next);
  3282. EmitBlock(Next);
  3283. Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
  3284. Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
  3285. Value *NextSizeEq0 =
  3286. Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
  3287. Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
  3288. StrPhi->addIncoming(NextStr, Next);
  3289. SizePhi->addIncoming(NextSize, Next);
  3290. EmitBlock(Exit);
  3291. PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
  3292. Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
  3293. Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
  3294. Ret->addIncoming(FoundChr, CmpEq);
  3295. return RValue::get(Ret);
  3296. }
  3297. case Builtin::BI__builtin_wmemcmp: {
  3298. // The MSVC runtime library does not provide a definition of wmemcmp, so we
  3299. // need an inline implementation.
  3300. if (!getTarget().getTriple().isOSMSVCRT())
  3301. break;
  3302. llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
  3303. Value *Dst = EmitScalarExpr(E->getArg(0));
  3304. Value *Src = EmitScalarExpr(E->getArg(1));
  3305. Value *Size = EmitScalarExpr(E->getArg(2));
  3306. BasicBlock *Entry = Builder.GetInsertBlock();
  3307. BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
  3308. BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
  3309. BasicBlock *Next = createBasicBlock("wmemcmp.next");
  3310. BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
  3311. Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
  3312. Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
  3313. EmitBlock(CmpGT);
  3314. PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
  3315. DstPhi->addIncoming(Dst, Entry);
  3316. PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
  3317. SrcPhi->addIncoming(Src, Entry);
  3318. PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
  3319. SizePhi->addIncoming(Size, Entry);
  3320. CharUnits WCharAlign =
  3321. getContext().getTypeAlignInChars(getContext().WCharTy);
  3322. Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
  3323. Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
  3324. Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
  3325. Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
  3326. EmitBlock(CmpLT);
  3327. Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
  3328. Builder.CreateCondBr(DstLtSrc, Exit, Next);
  3329. EmitBlock(Next);
  3330. Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
  3331. Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
  3332. Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
  3333. Value *NextSizeEq0 =
  3334. Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
  3335. Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
  3336. DstPhi->addIncoming(NextDst, Next);
  3337. SrcPhi->addIncoming(NextSrc, Next);
  3338. SizePhi->addIncoming(NextSize, Next);
  3339. EmitBlock(Exit);
  3340. PHINode *Ret = Builder.CreatePHI(IntTy, 4);
  3341. Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
  3342. Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
  3343. Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
  3344. Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
  3345. return RValue::get(Ret);
  3346. }
  3347. case Builtin::BI__builtin_dwarf_cfa: {
  3348. // The offset in bytes from the first argument to the CFA.
  3349. //
  3350. // Why on earth is this in the frontend? Is there any reason at
  3351. // all that the backend can't reasonably determine this while
  3352. // lowering llvm.eh.dwarf.cfa()?
  3353. //
  3354. // TODO: If there's a satisfactory reason, add a target hook for
  3355. // this instead of hard-coding 0, which is correct for most targets.
  3356. int32_t Offset = 0;
  3357. Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
  3358. return RValue::get(Builder.CreateCall(F,
  3359. llvm::ConstantInt::get(Int32Ty, Offset)));
  3360. }
  3361. case Builtin::BI__builtin_return_address: {
  3362. Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
  3363. getContext().UnsignedIntTy);
  3364. Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
  3365. return RValue::get(Builder.CreateCall(F, Depth));
  3366. }
  3367. case Builtin::BI_ReturnAddress: {
  3368. Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
  3369. return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
  3370. }
  3371. case Builtin::BI__builtin_frame_address: {
  3372. Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
  3373. getContext().UnsignedIntTy);
  3374. Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
  3375. return RValue::get(Builder.CreateCall(F, Depth));
  3376. }
  3377. case Builtin::BI__builtin_extract_return_addr: {
  3378. Value *Address = EmitScalarExpr(E->getArg(0));
  3379. Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
  3380. return RValue::get(Result);
  3381. }
  3382. case Builtin::BI__builtin_frob_return_addr: {
  3383. Value *Address = EmitScalarExpr(E->getArg(0));
  3384. Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
  3385. return RValue::get(Result);
  3386. }
  3387. case Builtin::BI__builtin_dwarf_sp_column: {
  3388. llvm::IntegerType *Ty
  3389. = cast<llvm::IntegerType>(ConvertType(E->getType()));
  3390. int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
  3391. if (Column == -1) {
  3392. CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
  3393. return RValue::get(llvm::UndefValue::get(Ty));
  3394. }
  3395. return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
  3396. }
  3397. case Builtin::BI__builtin_init_dwarf_reg_size_table: {
  3398. Value *Address = EmitScalarExpr(E->getArg(0));
  3399. if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
  3400. CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
  3401. return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
  3402. }
  3403. case Builtin::BI__builtin_eh_return: {
  3404. Value *Int = EmitScalarExpr(E->getArg(0));
  3405. Value *Ptr = EmitScalarExpr(E->getArg(1));
  3406. llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
  3407. assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
  3408. "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
  3409. Function *F =
  3410. CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
  3411. : Intrinsic::eh_return_i64);
  3412. Builder.CreateCall(F, {Int, Ptr});
  3413. Builder.CreateUnreachable();
  3414. // We do need to preserve an insertion point.
  3415. EmitBlock(createBasicBlock("builtin_eh_return.cont"));
  3416. return RValue::get(nullptr);
  3417. }
  3418. case Builtin::BI__builtin_unwind_init: {
  3419. Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
  3420. Builder.CreateCall(F);
  3421. return RValue::get(nullptr);
  3422. }
  3423. case Builtin::BI__builtin_extend_pointer: {
  3424. // Extends a pointer to the size of an _Unwind_Word, which is
  3425. // uint64_t on all platforms. Generally this gets poked into a
  3426. // register and eventually used as an address, so if the
  3427. // addressing registers are wider than pointers and the platform
  3428. // doesn't implicitly ignore high-order bits when doing
  3429. // addressing, we need to make sure we zext / sext based on
  3430. // the platform's expectations.
  3431. //
  3432. // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
  3433. // Cast the pointer to intptr_t.
  3434. Value *Ptr = EmitScalarExpr(E->getArg(0));
  3435. Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
  3436. // If that's 64 bits, we're done.
  3437. if (IntPtrTy->getBitWidth() == 64)
  3438. return RValue::get(Result);
  3439. // Otherwise, ask the codegen data what to do.
  3440. if (getTargetHooks().extendPointerWithSExt())
  3441. return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
  3442. else
  3443. return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
  3444. }
  3445. case Builtin::BI__builtin_setjmp: {
  3446. // Buffer is a void**.
  3447. Address Buf = EmitPointerWithAlignment(E->getArg(0));
  3448. // Store the frame pointer to the setjmp buffer.
  3449. Value *FrameAddr = Builder.CreateCall(
  3450. CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
  3451. ConstantInt::get(Int32Ty, 0));
  3452. Builder.CreateStore(FrameAddr, Buf);
  3453. // Store the stack pointer to the setjmp buffer.
  3454. Value *StackAddr =
  3455. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
  3456. Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
  3457. Builder.CreateStore(StackAddr, StackSaveSlot);
  3458. // Call LLVM's EH setjmp, which is lightweight.
  3459. Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
  3460. Buf = Builder.CreateElementBitCast(Buf, Int8Ty);
  3461. return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
  3462. }
  3463. case Builtin::BI__builtin_longjmp: {
  3464. Value *Buf = EmitScalarExpr(E->getArg(0));
  3465. Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
  3466. // Call LLVM's EH longjmp, which is lightweight.
  3467. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
  3468. // longjmp doesn't return; mark this as unreachable.
  3469. Builder.CreateUnreachable();
  3470. // We do need to preserve an insertion point.
  3471. EmitBlock(createBasicBlock("longjmp.cont"));
  3472. return RValue::get(nullptr);
  3473. }
  3474. case Builtin::BI__builtin_launder: {
  3475. const Expr *Arg = E->getArg(0);
  3476. QualType ArgTy = Arg->getType()->getPointeeType();
  3477. Value *Ptr = EmitScalarExpr(Arg);
  3478. if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
  3479. Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
  3480. return RValue::get(Ptr);
  3481. }
  3482. case Builtin::BI__sync_fetch_and_add:
  3483. case Builtin::BI__sync_fetch_and_sub:
  3484. case Builtin::BI__sync_fetch_and_or:
  3485. case Builtin::BI__sync_fetch_and_and:
  3486. case Builtin::BI__sync_fetch_and_xor:
  3487. case Builtin::BI__sync_fetch_and_nand:
  3488. case Builtin::BI__sync_add_and_fetch:
  3489. case Builtin::BI__sync_sub_and_fetch:
  3490. case Builtin::BI__sync_and_and_fetch:
  3491. case Builtin::BI__sync_or_and_fetch:
  3492. case Builtin::BI__sync_xor_and_fetch:
  3493. case Builtin::BI__sync_nand_and_fetch:
  3494. case Builtin::BI__sync_val_compare_and_swap:
  3495. case Builtin::BI__sync_bool_compare_and_swap:
  3496. case Builtin::BI__sync_lock_test_and_set:
  3497. case Builtin::BI__sync_lock_release:
  3498. case Builtin::BI__sync_swap:
  3499. llvm_unreachable("Shouldn't make it through sema");
  3500. case Builtin::BI__sync_fetch_and_add_1:
  3501. case Builtin::BI__sync_fetch_and_add_2:
  3502. case Builtin::BI__sync_fetch_and_add_4:
  3503. case Builtin::BI__sync_fetch_and_add_8:
  3504. case Builtin::BI__sync_fetch_and_add_16:
  3505. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
  3506. case Builtin::BI__sync_fetch_and_sub_1:
  3507. case Builtin::BI__sync_fetch_and_sub_2:
  3508. case Builtin::BI__sync_fetch_and_sub_4:
  3509. case Builtin::BI__sync_fetch_and_sub_8:
  3510. case Builtin::BI__sync_fetch_and_sub_16:
  3511. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
  3512. case Builtin::BI__sync_fetch_and_or_1:
  3513. case Builtin::BI__sync_fetch_and_or_2:
  3514. case Builtin::BI__sync_fetch_and_or_4:
  3515. case Builtin::BI__sync_fetch_and_or_8:
  3516. case Builtin::BI__sync_fetch_and_or_16:
  3517. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
  3518. case Builtin::BI__sync_fetch_and_and_1:
  3519. case Builtin::BI__sync_fetch_and_and_2:
  3520. case Builtin::BI__sync_fetch_and_and_4:
  3521. case Builtin::BI__sync_fetch_and_and_8:
  3522. case Builtin::BI__sync_fetch_and_and_16:
  3523. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
  3524. case Builtin::BI__sync_fetch_and_xor_1:
  3525. case Builtin::BI__sync_fetch_and_xor_2:
  3526. case Builtin::BI__sync_fetch_and_xor_4:
  3527. case Builtin::BI__sync_fetch_and_xor_8:
  3528. case Builtin::BI__sync_fetch_and_xor_16:
  3529. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
  3530. case Builtin::BI__sync_fetch_and_nand_1:
  3531. case Builtin::BI__sync_fetch_and_nand_2:
  3532. case Builtin::BI__sync_fetch_and_nand_4:
  3533. case Builtin::BI__sync_fetch_and_nand_8:
  3534. case Builtin::BI__sync_fetch_and_nand_16:
  3535. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
  3536. // Clang extensions: not overloaded yet.
  3537. case Builtin::BI__sync_fetch_and_min:
  3538. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
  3539. case Builtin::BI__sync_fetch_and_max:
  3540. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
  3541. case Builtin::BI__sync_fetch_and_umin:
  3542. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
  3543. case Builtin::BI__sync_fetch_and_umax:
  3544. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
  3545. case Builtin::BI__sync_add_and_fetch_1:
  3546. case Builtin::BI__sync_add_and_fetch_2:
  3547. case Builtin::BI__sync_add_and_fetch_4:
  3548. case Builtin::BI__sync_add_and_fetch_8:
  3549. case Builtin::BI__sync_add_and_fetch_16:
  3550. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
  3551. llvm::Instruction::Add);
  3552. case Builtin::BI__sync_sub_and_fetch_1:
  3553. case Builtin::BI__sync_sub_and_fetch_2:
  3554. case Builtin::BI__sync_sub_and_fetch_4:
  3555. case Builtin::BI__sync_sub_and_fetch_8:
  3556. case Builtin::BI__sync_sub_and_fetch_16:
  3557. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
  3558. llvm::Instruction::Sub);
  3559. case Builtin::BI__sync_and_and_fetch_1:
  3560. case Builtin::BI__sync_and_and_fetch_2:
  3561. case Builtin::BI__sync_and_and_fetch_4:
  3562. case Builtin::BI__sync_and_and_fetch_8:
  3563. case Builtin::BI__sync_and_and_fetch_16:
  3564. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
  3565. llvm::Instruction::And);
  3566. case Builtin::BI__sync_or_and_fetch_1:
  3567. case Builtin::BI__sync_or_and_fetch_2:
  3568. case Builtin::BI__sync_or_and_fetch_4:
  3569. case Builtin::BI__sync_or_and_fetch_8:
  3570. case Builtin::BI__sync_or_and_fetch_16:
  3571. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
  3572. llvm::Instruction::Or);
  3573. case Builtin::BI__sync_xor_and_fetch_1:
  3574. case Builtin::BI__sync_xor_and_fetch_2:
  3575. case Builtin::BI__sync_xor_and_fetch_4:
  3576. case Builtin::BI__sync_xor_and_fetch_8:
  3577. case Builtin::BI__sync_xor_and_fetch_16:
  3578. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
  3579. llvm::Instruction::Xor);
  3580. case Builtin::BI__sync_nand_and_fetch_1:
  3581. case Builtin::BI__sync_nand_and_fetch_2:
  3582. case Builtin::BI__sync_nand_and_fetch_4:
  3583. case Builtin::BI__sync_nand_and_fetch_8:
  3584. case Builtin::BI__sync_nand_and_fetch_16:
  3585. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
  3586. llvm::Instruction::And, true);
  3587. case Builtin::BI__sync_val_compare_and_swap_1:
  3588. case Builtin::BI__sync_val_compare_and_swap_2:
  3589. case Builtin::BI__sync_val_compare_and_swap_4:
  3590. case Builtin::BI__sync_val_compare_and_swap_8:
  3591. case Builtin::BI__sync_val_compare_and_swap_16:
  3592. return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
  3593. case Builtin::BI__sync_bool_compare_and_swap_1:
  3594. case Builtin::BI__sync_bool_compare_and_swap_2:
  3595. case Builtin::BI__sync_bool_compare_and_swap_4:
  3596. case Builtin::BI__sync_bool_compare_and_swap_8:
  3597. case Builtin::BI__sync_bool_compare_and_swap_16:
  3598. return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
  3599. case Builtin::BI__sync_swap_1:
  3600. case Builtin::BI__sync_swap_2:
  3601. case Builtin::BI__sync_swap_4:
  3602. case Builtin::BI__sync_swap_8:
  3603. case Builtin::BI__sync_swap_16:
  3604. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
  3605. case Builtin::BI__sync_lock_test_and_set_1:
  3606. case Builtin::BI__sync_lock_test_and_set_2:
  3607. case Builtin::BI__sync_lock_test_and_set_4:
  3608. case Builtin::BI__sync_lock_test_and_set_8:
  3609. case Builtin::BI__sync_lock_test_and_set_16:
  3610. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
  3611. case Builtin::BI__sync_lock_release_1:
  3612. case Builtin::BI__sync_lock_release_2:
  3613. case Builtin::BI__sync_lock_release_4:
  3614. case Builtin::BI__sync_lock_release_8:
  3615. case Builtin::BI__sync_lock_release_16: {
  3616. Value *Ptr = EmitScalarExpr(E->getArg(0));
  3617. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  3618. CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
  3619. llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
  3620. StoreSize.getQuantity() * 8);
  3621. Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  3622. llvm::StoreInst *Store =
  3623. Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
  3624. StoreSize);
  3625. Store->setAtomic(llvm::AtomicOrdering::Release);
  3626. return RValue::get(nullptr);
  3627. }
  3628. case Builtin::BI__sync_synchronize: {
  3629. // We assume this is supposed to correspond to a C++0x-style
  3630. // sequentially-consistent fence (i.e. this is only usable for
  3631. // synchronization, not device I/O or anything like that). This intrinsic
  3632. // is really badly designed in the sense that in theory, there isn't
  3633. // any way to safely use it... but in practice, it mostly works
  3634. // to use it with non-atomic loads and stores to get acquire/release
  3635. // semantics.
  3636. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
  3637. return RValue::get(nullptr);
  3638. }
  3639. case Builtin::BI__builtin_nontemporal_load:
  3640. return RValue::get(EmitNontemporalLoad(*this, E));
  3641. case Builtin::BI__builtin_nontemporal_store:
  3642. return RValue::get(EmitNontemporalStore(*this, E));
  3643. case Builtin::BI__c11_atomic_is_lock_free:
  3644. case Builtin::BI__atomic_is_lock_free: {
  3645. // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
  3646. // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
  3647. // _Atomic(T) is always properly-aligned.
  3648. const char *LibCallName = "__atomic_is_lock_free";
  3649. CallArgList Args;
  3650. Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
  3651. getContext().getSizeType());
  3652. if (BuiltinID == Builtin::BI__atomic_is_lock_free)
  3653. Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
  3654. getContext().VoidPtrTy);
  3655. else
  3656. Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
  3657. getContext().VoidPtrTy);
  3658. const CGFunctionInfo &FuncInfo =
  3659. CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
  3660. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
  3661. llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
  3662. return EmitCall(FuncInfo, CGCallee::forDirect(Func),
  3663. ReturnValueSlot(), Args);
  3664. }
  3665. case Builtin::BI__atomic_test_and_set: {
  3666. // Look at the argument type to determine whether this is a volatile
  3667. // operation. The parameter type is always volatile.
  3668. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  3669. bool Volatile =
  3670. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  3671. Value *Ptr = EmitScalarExpr(E->getArg(0));
  3672. unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
  3673. Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
  3674. Value *NewVal = Builder.getInt8(1);
  3675. Value *Order = EmitScalarExpr(E->getArg(1));
  3676. if (isa<llvm::ConstantInt>(Order)) {
  3677. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  3678. AtomicRMWInst *Result = nullptr;
  3679. switch (ord) {
  3680. case 0: // memory_order_relaxed
  3681. default: // invalid order
  3682. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3683. llvm::AtomicOrdering::Monotonic);
  3684. break;
  3685. case 1: // memory_order_consume
  3686. case 2: // memory_order_acquire
  3687. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3688. llvm::AtomicOrdering::Acquire);
  3689. break;
  3690. case 3: // memory_order_release
  3691. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3692. llvm::AtomicOrdering::Release);
  3693. break;
  3694. case 4: // memory_order_acq_rel
  3695. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3696. llvm::AtomicOrdering::AcquireRelease);
  3697. break;
  3698. case 5: // memory_order_seq_cst
  3699. Result = Builder.CreateAtomicRMW(
  3700. llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3701. llvm::AtomicOrdering::SequentiallyConsistent);
  3702. break;
  3703. }
  3704. Result->setVolatile(Volatile);
  3705. return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
  3706. }
  3707. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  3708. llvm::BasicBlock *BBs[5] = {
  3709. createBasicBlock("monotonic", CurFn),
  3710. createBasicBlock("acquire", CurFn),
  3711. createBasicBlock("release", CurFn),
  3712. createBasicBlock("acqrel", CurFn),
  3713. createBasicBlock("seqcst", CurFn)
  3714. };
  3715. llvm::AtomicOrdering Orders[5] = {
  3716. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
  3717. llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
  3718. llvm::AtomicOrdering::SequentiallyConsistent};
  3719. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  3720. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
  3721. Builder.SetInsertPoint(ContBB);
  3722. PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
  3723. for (unsigned i = 0; i < 5; ++i) {
  3724. Builder.SetInsertPoint(BBs[i]);
  3725. AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
  3726. Ptr, NewVal, Orders[i]);
  3727. RMW->setVolatile(Volatile);
  3728. Result->addIncoming(RMW, BBs[i]);
  3729. Builder.CreateBr(ContBB);
  3730. }
  3731. SI->addCase(Builder.getInt32(0), BBs[0]);
  3732. SI->addCase(Builder.getInt32(1), BBs[1]);
  3733. SI->addCase(Builder.getInt32(2), BBs[1]);
  3734. SI->addCase(Builder.getInt32(3), BBs[2]);
  3735. SI->addCase(Builder.getInt32(4), BBs[3]);
  3736. SI->addCase(Builder.getInt32(5), BBs[4]);
  3737. Builder.SetInsertPoint(ContBB);
  3738. return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
  3739. }
  3740. case Builtin::BI__atomic_clear: {
  3741. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  3742. bool Volatile =
  3743. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  3744. Address Ptr = EmitPointerWithAlignment(E->getArg(0));
  3745. Ptr = Builder.CreateElementBitCast(Ptr, Int8Ty);
  3746. Value *NewVal = Builder.getInt8(0);
  3747. Value *Order = EmitScalarExpr(E->getArg(1));
  3748. if (isa<llvm::ConstantInt>(Order)) {
  3749. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  3750. StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
  3751. switch (ord) {
  3752. case 0: // memory_order_relaxed
  3753. default: // invalid order
  3754. Store->setOrdering(llvm::AtomicOrdering::Monotonic);
  3755. break;
  3756. case 3: // memory_order_release
  3757. Store->setOrdering(llvm::AtomicOrdering::Release);
  3758. break;
  3759. case 5: // memory_order_seq_cst
  3760. Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
  3761. break;
  3762. }
  3763. return RValue::get(nullptr);
  3764. }
  3765. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  3766. llvm::BasicBlock *BBs[3] = {
  3767. createBasicBlock("monotonic", CurFn),
  3768. createBasicBlock("release", CurFn),
  3769. createBasicBlock("seqcst", CurFn)
  3770. };
  3771. llvm::AtomicOrdering Orders[3] = {
  3772. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
  3773. llvm::AtomicOrdering::SequentiallyConsistent};
  3774. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  3775. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
  3776. for (unsigned i = 0; i < 3; ++i) {
  3777. Builder.SetInsertPoint(BBs[i]);
  3778. StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
  3779. Store->setOrdering(Orders[i]);
  3780. Builder.CreateBr(ContBB);
  3781. }
  3782. SI->addCase(Builder.getInt32(0), BBs[0]);
  3783. SI->addCase(Builder.getInt32(3), BBs[1]);
  3784. SI->addCase(Builder.getInt32(5), BBs[2]);
  3785. Builder.SetInsertPoint(ContBB);
  3786. return RValue::get(nullptr);
  3787. }
  3788. case Builtin::BI__atomic_thread_fence:
  3789. case Builtin::BI__atomic_signal_fence:
  3790. case Builtin::BI__c11_atomic_thread_fence:
  3791. case Builtin::BI__c11_atomic_signal_fence: {
  3792. llvm::SyncScope::ID SSID;
  3793. if (BuiltinID == Builtin::BI__atomic_signal_fence ||
  3794. BuiltinID == Builtin::BI__c11_atomic_signal_fence)
  3795. SSID = llvm::SyncScope::SingleThread;
  3796. else
  3797. SSID = llvm::SyncScope::System;
  3798. Value *Order = EmitScalarExpr(E->getArg(0));
  3799. if (isa<llvm::ConstantInt>(Order)) {
  3800. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  3801. switch (ord) {
  3802. case 0: // memory_order_relaxed
  3803. default: // invalid order
  3804. break;
  3805. case 1: // memory_order_consume
  3806. case 2: // memory_order_acquire
  3807. Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
  3808. break;
  3809. case 3: // memory_order_release
  3810. Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
  3811. break;
  3812. case 4: // memory_order_acq_rel
  3813. Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
  3814. break;
  3815. case 5: // memory_order_seq_cst
  3816. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
  3817. break;
  3818. }
  3819. return RValue::get(nullptr);
  3820. }
  3821. llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
  3822. AcquireBB = createBasicBlock("acquire", CurFn);
  3823. ReleaseBB = createBasicBlock("release", CurFn);
  3824. AcqRelBB = createBasicBlock("acqrel", CurFn);
  3825. SeqCstBB = createBasicBlock("seqcst", CurFn);
  3826. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  3827. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  3828. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
  3829. Builder.SetInsertPoint(AcquireBB);
  3830. Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
  3831. Builder.CreateBr(ContBB);
  3832. SI->addCase(Builder.getInt32(1), AcquireBB);
  3833. SI->addCase(Builder.getInt32(2), AcquireBB);
  3834. Builder.SetInsertPoint(ReleaseBB);
  3835. Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
  3836. Builder.CreateBr(ContBB);
  3837. SI->addCase(Builder.getInt32(3), ReleaseBB);
  3838. Builder.SetInsertPoint(AcqRelBB);
  3839. Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
  3840. Builder.CreateBr(ContBB);
  3841. SI->addCase(Builder.getInt32(4), AcqRelBB);
  3842. Builder.SetInsertPoint(SeqCstBB);
  3843. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
  3844. Builder.CreateBr(ContBB);
  3845. SI->addCase(Builder.getInt32(5), SeqCstBB);
  3846. Builder.SetInsertPoint(ContBB);
  3847. return RValue::get(nullptr);
  3848. }
  3849. case Builtin::BI__builtin_signbit:
  3850. case Builtin::BI__builtin_signbitf:
  3851. case Builtin::BI__builtin_signbitl: {
  3852. return RValue::get(
  3853. Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
  3854. ConvertType(E->getType())));
  3855. }
  3856. case Builtin::BI__warn_memset_zero_len:
  3857. return RValue::getIgnored();
  3858. case Builtin::BI__annotation: {
  3859. // Re-encode each wide string to UTF8 and make an MDString.
  3860. SmallVector<Metadata *, 1> Strings;
  3861. for (const Expr *Arg : E->arguments()) {
  3862. const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
  3863. assert(Str->getCharByteWidth() == 2);
  3864. StringRef WideBytes = Str->getBytes();
  3865. std::string StrUtf8;
  3866. if (!convertUTF16ToUTF8String(
  3867. ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
  3868. CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
  3869. continue;
  3870. }
  3871. Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
  3872. }
  3873. // Build and MDTuple of MDStrings and emit the intrinsic call.
  3874. llvm::Function *F =
  3875. CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
  3876. MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
  3877. Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
  3878. return RValue::getIgnored();
  3879. }
  3880. case Builtin::BI__builtin_annotation: {
  3881. llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
  3882. llvm::Function *F =
  3883. CGM.getIntrinsic(llvm::Intrinsic::annotation,
  3884. {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
  3885. // Get the annotation string, go through casts. Sema requires this to be a
  3886. // non-wide string literal, potentially casted, so the cast<> is safe.
  3887. const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
  3888. StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
  3889. return RValue::get(
  3890. EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
  3891. }
  3892. case Builtin::BI__builtin_addcb:
  3893. case Builtin::BI__builtin_addcs:
  3894. case Builtin::BI__builtin_addc:
  3895. case Builtin::BI__builtin_addcl:
  3896. case Builtin::BI__builtin_addcll:
  3897. case Builtin::BI__builtin_subcb:
  3898. case Builtin::BI__builtin_subcs:
  3899. case Builtin::BI__builtin_subc:
  3900. case Builtin::BI__builtin_subcl:
  3901. case Builtin::BI__builtin_subcll: {
  3902. // We translate all of these builtins from expressions of the form:
  3903. // int x = ..., y = ..., carryin = ..., carryout, result;
  3904. // result = __builtin_addc(x, y, carryin, &carryout);
  3905. //
  3906. // to LLVM IR of the form:
  3907. //
  3908. // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
  3909. // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
  3910. // %carry1 = extractvalue {i32, i1} %tmp1, 1
  3911. // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
  3912. // i32 %carryin)
  3913. // %result = extractvalue {i32, i1} %tmp2, 0
  3914. // %carry2 = extractvalue {i32, i1} %tmp2, 1
  3915. // %tmp3 = or i1 %carry1, %carry2
  3916. // %tmp4 = zext i1 %tmp3 to i32
  3917. // store i32 %tmp4, i32* %carryout
  3918. // Scalarize our inputs.
  3919. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  3920. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  3921. llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
  3922. Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
  3923. // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
  3924. llvm::Intrinsic::ID IntrinsicId;
  3925. switch (BuiltinID) {
  3926. default: llvm_unreachable("Unknown multiprecision builtin id.");
  3927. case Builtin::BI__builtin_addcb:
  3928. case Builtin::BI__builtin_addcs:
  3929. case Builtin::BI__builtin_addc:
  3930. case Builtin::BI__builtin_addcl:
  3931. case Builtin::BI__builtin_addcll:
  3932. IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
  3933. break;
  3934. case Builtin::BI__builtin_subcb:
  3935. case Builtin::BI__builtin_subcs:
  3936. case Builtin::BI__builtin_subc:
  3937. case Builtin::BI__builtin_subcl:
  3938. case Builtin::BI__builtin_subcll:
  3939. IntrinsicId = llvm::Intrinsic::usub_with_overflow;
  3940. break;
  3941. }
  3942. // Construct our resulting LLVM IR expression.
  3943. llvm::Value *Carry1;
  3944. llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
  3945. X, Y, Carry1);
  3946. llvm::Value *Carry2;
  3947. llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
  3948. Sum1, Carryin, Carry2);
  3949. llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
  3950. X->getType());
  3951. Builder.CreateStore(CarryOut, CarryOutPtr);
  3952. return RValue::get(Sum2);
  3953. }
  3954. case Builtin::BI__builtin_add_overflow:
  3955. case Builtin::BI__builtin_sub_overflow:
  3956. case Builtin::BI__builtin_mul_overflow: {
  3957. const clang::Expr *LeftArg = E->getArg(0);
  3958. const clang::Expr *RightArg = E->getArg(1);
  3959. const clang::Expr *ResultArg = E->getArg(2);
  3960. clang::QualType ResultQTy =
  3961. ResultArg->getType()->castAs<PointerType>()->getPointeeType();
  3962. WidthAndSignedness LeftInfo =
  3963. getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
  3964. WidthAndSignedness RightInfo =
  3965. getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
  3966. WidthAndSignedness ResultInfo =
  3967. getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
  3968. // Handle mixed-sign multiplication as a special case, because adding
  3969. // runtime or backend support for our generic irgen would be too expensive.
  3970. if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
  3971. return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
  3972. RightInfo, ResultArg, ResultQTy,
  3973. ResultInfo);
  3974. if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
  3975. ResultInfo))
  3976. return EmitCheckedUnsignedMultiplySignedResult(
  3977. *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
  3978. ResultInfo);
  3979. WidthAndSignedness EncompassingInfo =
  3980. EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
  3981. llvm::Type *EncompassingLLVMTy =
  3982. llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
  3983. llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
  3984. llvm::Intrinsic::ID IntrinsicId;
  3985. switch (BuiltinID) {
  3986. default:
  3987. llvm_unreachable("Unknown overflow builtin id.");
  3988. case Builtin::BI__builtin_add_overflow:
  3989. IntrinsicId = EncompassingInfo.Signed
  3990. ? llvm::Intrinsic::sadd_with_overflow
  3991. : llvm::Intrinsic::uadd_with_overflow;
  3992. break;
  3993. case Builtin::BI__builtin_sub_overflow:
  3994. IntrinsicId = EncompassingInfo.Signed
  3995. ? llvm::Intrinsic::ssub_with_overflow
  3996. : llvm::Intrinsic::usub_with_overflow;
  3997. break;
  3998. case Builtin::BI__builtin_mul_overflow:
  3999. IntrinsicId = EncompassingInfo.Signed
  4000. ? llvm::Intrinsic::smul_with_overflow
  4001. : llvm::Intrinsic::umul_with_overflow;
  4002. break;
  4003. }
  4004. llvm::Value *Left = EmitScalarExpr(LeftArg);
  4005. llvm::Value *Right = EmitScalarExpr(RightArg);
  4006. Address ResultPtr = EmitPointerWithAlignment(ResultArg);
  4007. // Extend each operand to the encompassing type.
  4008. Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
  4009. Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
  4010. // Perform the operation on the extended values.
  4011. llvm::Value *Overflow, *Result;
  4012. Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
  4013. if (EncompassingInfo.Width > ResultInfo.Width) {
  4014. // The encompassing type is wider than the result type, so we need to
  4015. // truncate it.
  4016. llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
  4017. // To see if the truncation caused an overflow, we will extend
  4018. // the result and then compare it to the original result.
  4019. llvm::Value *ResultTruncExt = Builder.CreateIntCast(
  4020. ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
  4021. llvm::Value *TruncationOverflow =
  4022. Builder.CreateICmpNE(Result, ResultTruncExt);
  4023. Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
  4024. Result = ResultTrunc;
  4025. }
  4026. // Finally, store the result using the pointer.
  4027. bool isVolatile =
  4028. ResultArg->getType()->getPointeeType().isVolatileQualified();
  4029. Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
  4030. return RValue::get(Overflow);
  4031. }
  4032. case Builtin::BI__builtin_uadd_overflow:
  4033. case Builtin::BI__builtin_uaddl_overflow:
  4034. case Builtin::BI__builtin_uaddll_overflow:
  4035. case Builtin::BI__builtin_usub_overflow:
  4036. case Builtin::BI__builtin_usubl_overflow:
  4037. case Builtin::BI__builtin_usubll_overflow:
  4038. case Builtin::BI__builtin_umul_overflow:
  4039. case Builtin::BI__builtin_umull_overflow:
  4040. case Builtin::BI__builtin_umulll_overflow:
  4041. case Builtin::BI__builtin_sadd_overflow:
  4042. case Builtin::BI__builtin_saddl_overflow:
  4043. case Builtin::BI__builtin_saddll_overflow:
  4044. case Builtin::BI__builtin_ssub_overflow:
  4045. case Builtin::BI__builtin_ssubl_overflow:
  4046. case Builtin::BI__builtin_ssubll_overflow:
  4047. case Builtin::BI__builtin_smul_overflow:
  4048. case Builtin::BI__builtin_smull_overflow:
  4049. case Builtin::BI__builtin_smulll_overflow: {
  4050. // We translate all of these builtins directly to the relevant llvm IR node.
  4051. // Scalarize our inputs.
  4052. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  4053. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  4054. Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
  4055. // Decide which of the overflow intrinsics we are lowering to:
  4056. llvm::Intrinsic::ID IntrinsicId;
  4057. switch (BuiltinID) {
  4058. default: llvm_unreachable("Unknown overflow builtin id.");
  4059. case Builtin::BI__builtin_uadd_overflow:
  4060. case Builtin::BI__builtin_uaddl_overflow:
  4061. case Builtin::BI__builtin_uaddll_overflow:
  4062. IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
  4063. break;
  4064. case Builtin::BI__builtin_usub_overflow:
  4065. case Builtin::BI__builtin_usubl_overflow:
  4066. case Builtin::BI__builtin_usubll_overflow:
  4067. IntrinsicId = llvm::Intrinsic::usub_with_overflow;
  4068. break;
  4069. case Builtin::BI__builtin_umul_overflow:
  4070. case Builtin::BI__builtin_umull_overflow:
  4071. case Builtin::BI__builtin_umulll_overflow:
  4072. IntrinsicId = llvm::Intrinsic::umul_with_overflow;
  4073. break;
  4074. case Builtin::BI__builtin_sadd_overflow:
  4075. case Builtin::BI__builtin_saddl_overflow:
  4076. case Builtin::BI__builtin_saddll_overflow:
  4077. IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
  4078. break;
  4079. case Builtin::BI__builtin_ssub_overflow:
  4080. case Builtin::BI__builtin_ssubl_overflow:
  4081. case Builtin::BI__builtin_ssubll_overflow:
  4082. IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
  4083. break;
  4084. case Builtin::BI__builtin_smul_overflow:
  4085. case Builtin::BI__builtin_smull_overflow:
  4086. case Builtin::BI__builtin_smulll_overflow:
  4087. IntrinsicId = llvm::Intrinsic::smul_with_overflow;
  4088. break;
  4089. }
  4090. llvm::Value *Carry;
  4091. llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
  4092. Builder.CreateStore(Sum, SumOutPtr);
  4093. return RValue::get(Carry);
  4094. }
  4095. case Builtin::BIaddressof:
  4096. case Builtin::BI__addressof:
  4097. case Builtin::BI__builtin_addressof:
  4098. return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
  4099. case Builtin::BI__builtin_function_start:
  4100. return RValue::get(CGM.GetFunctionStart(
  4101. E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
  4102. case Builtin::BI__builtin_operator_new:
  4103. return EmitBuiltinNewDeleteCall(
  4104. E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
  4105. case Builtin::BI__builtin_operator_delete:
  4106. EmitBuiltinNewDeleteCall(
  4107. E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
  4108. return RValue::get(nullptr);
  4109. case Builtin::BI__builtin_is_aligned:
  4110. return EmitBuiltinIsAligned(E);
  4111. case Builtin::BI__builtin_align_up:
  4112. return EmitBuiltinAlignTo(E, true);
  4113. case Builtin::BI__builtin_align_down:
  4114. return EmitBuiltinAlignTo(E, false);
  4115. case Builtin::BI__noop:
  4116. // __noop always evaluates to an integer literal zero.
  4117. return RValue::get(ConstantInt::get(IntTy, 0));
  4118. case Builtin::BI__builtin_call_with_static_chain: {
  4119. const CallExpr *Call = cast<CallExpr>(E->getArg(0));
  4120. const Expr *Chain = E->getArg(1);
  4121. return EmitCall(Call->getCallee()->getType(),
  4122. EmitCallee(Call->getCallee()), Call, ReturnValue,
  4123. EmitScalarExpr(Chain));
  4124. }
  4125. case Builtin::BI_InterlockedExchange8:
  4126. case Builtin::BI_InterlockedExchange16:
  4127. case Builtin::BI_InterlockedExchange:
  4128. case Builtin::BI_InterlockedExchangePointer:
  4129. return RValue::get(
  4130. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
  4131. case Builtin::BI_InterlockedCompareExchangePointer:
  4132. case Builtin::BI_InterlockedCompareExchangePointer_nf: {
  4133. llvm::Type *RTy;
  4134. llvm::IntegerType *IntType =
  4135. IntegerType::get(getLLVMContext(),
  4136. getContext().getTypeSize(E->getType()));
  4137. llvm::Type *IntPtrType = IntType->getPointerTo();
  4138. llvm::Value *Destination =
  4139. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
  4140. llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
  4141. RTy = Exchange->getType();
  4142. Exchange = Builder.CreatePtrToInt(Exchange, IntType);
  4143. llvm::Value *Comparand =
  4144. Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
  4145. auto Ordering =
  4146. BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
  4147. AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
  4148. auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
  4149. Ordering, Ordering);
  4150. Result->setVolatile(true);
  4151. return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
  4152. 0),
  4153. RTy));
  4154. }
  4155. case Builtin::BI_InterlockedCompareExchange8:
  4156. case Builtin::BI_InterlockedCompareExchange16:
  4157. case Builtin::BI_InterlockedCompareExchange:
  4158. case Builtin::BI_InterlockedCompareExchange64:
  4159. return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
  4160. case Builtin::BI_InterlockedIncrement16:
  4161. case Builtin::BI_InterlockedIncrement:
  4162. return RValue::get(
  4163. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
  4164. case Builtin::BI_InterlockedDecrement16:
  4165. case Builtin::BI_InterlockedDecrement:
  4166. return RValue::get(
  4167. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
  4168. case Builtin::BI_InterlockedAnd8:
  4169. case Builtin::BI_InterlockedAnd16:
  4170. case Builtin::BI_InterlockedAnd:
  4171. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
  4172. case Builtin::BI_InterlockedExchangeAdd8:
  4173. case Builtin::BI_InterlockedExchangeAdd16:
  4174. case Builtin::BI_InterlockedExchangeAdd:
  4175. return RValue::get(
  4176. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
  4177. case Builtin::BI_InterlockedExchangeSub8:
  4178. case Builtin::BI_InterlockedExchangeSub16:
  4179. case Builtin::BI_InterlockedExchangeSub:
  4180. return RValue::get(
  4181. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
  4182. case Builtin::BI_InterlockedOr8:
  4183. case Builtin::BI_InterlockedOr16:
  4184. case Builtin::BI_InterlockedOr:
  4185. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
  4186. case Builtin::BI_InterlockedXor8:
  4187. case Builtin::BI_InterlockedXor16:
  4188. case Builtin::BI_InterlockedXor:
  4189. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
  4190. case Builtin::BI_bittest64:
  4191. case Builtin::BI_bittest:
  4192. case Builtin::BI_bittestandcomplement64:
  4193. case Builtin::BI_bittestandcomplement:
  4194. case Builtin::BI_bittestandreset64:
  4195. case Builtin::BI_bittestandreset:
  4196. case Builtin::BI_bittestandset64:
  4197. case Builtin::BI_bittestandset:
  4198. case Builtin::BI_interlockedbittestandreset:
  4199. case Builtin::BI_interlockedbittestandreset64:
  4200. case Builtin::BI_interlockedbittestandset64:
  4201. case Builtin::BI_interlockedbittestandset:
  4202. case Builtin::BI_interlockedbittestandset_acq:
  4203. case Builtin::BI_interlockedbittestandset_rel:
  4204. case Builtin::BI_interlockedbittestandset_nf:
  4205. case Builtin::BI_interlockedbittestandreset_acq:
  4206. case Builtin::BI_interlockedbittestandreset_rel:
  4207. case Builtin::BI_interlockedbittestandreset_nf:
  4208. return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
  4209. // These builtins exist to emit regular volatile loads and stores not
  4210. // affected by the -fms-volatile setting.
  4211. case Builtin::BI__iso_volatile_load8:
  4212. case Builtin::BI__iso_volatile_load16:
  4213. case Builtin::BI__iso_volatile_load32:
  4214. case Builtin::BI__iso_volatile_load64:
  4215. return RValue::get(EmitISOVolatileLoad(*this, E));
  4216. case Builtin::BI__iso_volatile_store8:
  4217. case Builtin::BI__iso_volatile_store16:
  4218. case Builtin::BI__iso_volatile_store32:
  4219. case Builtin::BI__iso_volatile_store64:
  4220. return RValue::get(EmitISOVolatileStore(*this, E));
  4221. case Builtin::BI__exception_code:
  4222. case Builtin::BI_exception_code:
  4223. return RValue::get(EmitSEHExceptionCode());
  4224. case Builtin::BI__exception_info:
  4225. case Builtin::BI_exception_info:
  4226. return RValue::get(EmitSEHExceptionInfo());
  4227. case Builtin::BI__abnormal_termination:
  4228. case Builtin::BI_abnormal_termination:
  4229. return RValue::get(EmitSEHAbnormalTermination());
  4230. case Builtin::BI_setjmpex:
  4231. if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
  4232. E->getArg(0)->getType()->isPointerType())
  4233. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
  4234. break;
  4235. case Builtin::BI_setjmp:
  4236. if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
  4237. E->getArg(0)->getType()->isPointerType()) {
  4238. if (getTarget().getTriple().getArch() == llvm::Triple::x86)
  4239. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
  4240. else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
  4241. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
  4242. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
  4243. }
  4244. break;
  4245. // C++ std:: builtins.
  4246. case Builtin::BImove:
  4247. case Builtin::BImove_if_noexcept:
  4248. case Builtin::BIforward:
  4249. case Builtin::BIas_const:
  4250. return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
  4251. case Builtin::BI__GetExceptionInfo: {
  4252. if (llvm::GlobalVariable *GV =
  4253. CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
  4254. return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
  4255. break;
  4256. }
  4257. case Builtin::BI__fastfail:
  4258. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
  4259. case Builtin::BI__builtin_coro_id:
  4260. return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
  4261. case Builtin::BI__builtin_coro_promise:
  4262. return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
  4263. case Builtin::BI__builtin_coro_resume:
  4264. EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
  4265. return RValue::get(nullptr);
  4266. case Builtin::BI__builtin_coro_frame:
  4267. return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
  4268. case Builtin::BI__builtin_coro_noop:
  4269. return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
  4270. case Builtin::BI__builtin_coro_free:
  4271. return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
  4272. case Builtin::BI__builtin_coro_destroy:
  4273. EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
  4274. return RValue::get(nullptr);
  4275. case Builtin::BI__builtin_coro_done:
  4276. return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
  4277. case Builtin::BI__builtin_coro_alloc:
  4278. return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
  4279. case Builtin::BI__builtin_coro_begin:
  4280. return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
  4281. case Builtin::BI__builtin_coro_end:
  4282. return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
  4283. case Builtin::BI__builtin_coro_suspend:
  4284. return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
  4285. case Builtin::BI__builtin_coro_size:
  4286. return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
  4287. case Builtin::BI__builtin_coro_align:
  4288. return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
  4289. // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
  4290. case Builtin::BIread_pipe:
  4291. case Builtin::BIwrite_pipe: {
  4292. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  4293. *Arg1 = EmitScalarExpr(E->getArg(1));
  4294. CGOpenCLRuntime OpenCLRT(CGM);
  4295. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4296. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4297. // Type of the generic packet parameter.
  4298. unsigned GenericAS =
  4299. getContext().getTargetAddressSpace(LangAS::opencl_generic);
  4300. llvm::Type *I8PTy = llvm::PointerType::get(
  4301. llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
  4302. // Testing which overloaded version we should generate the call for.
  4303. if (2U == E->getNumArgs()) {
  4304. const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
  4305. : "__write_pipe_2";
  4306. // Creating a generic function type to be able to call with any builtin or
  4307. // user defined type.
  4308. llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
  4309. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4310. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4311. Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
  4312. return RValue::get(
  4313. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4314. {Arg0, BCast, PacketSize, PacketAlign}));
  4315. } else {
  4316. assert(4 == E->getNumArgs() &&
  4317. "Illegal number of parameters to pipe function");
  4318. const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
  4319. : "__write_pipe_4";
  4320. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
  4321. Int32Ty, Int32Ty};
  4322. Value *Arg2 = EmitScalarExpr(E->getArg(2)),
  4323. *Arg3 = EmitScalarExpr(E->getArg(3));
  4324. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4325. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4326. Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
  4327. // We know the third argument is an integer type, but we may need to cast
  4328. // it to i32.
  4329. if (Arg2->getType() != Int32Ty)
  4330. Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
  4331. return RValue::get(
  4332. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4333. {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
  4334. }
  4335. }
  4336. // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
  4337. // functions
  4338. case Builtin::BIreserve_read_pipe:
  4339. case Builtin::BIreserve_write_pipe:
  4340. case Builtin::BIwork_group_reserve_read_pipe:
  4341. case Builtin::BIwork_group_reserve_write_pipe:
  4342. case Builtin::BIsub_group_reserve_read_pipe:
  4343. case Builtin::BIsub_group_reserve_write_pipe: {
  4344. // Composing the mangled name for the function.
  4345. const char *Name;
  4346. if (BuiltinID == Builtin::BIreserve_read_pipe)
  4347. Name = "__reserve_read_pipe";
  4348. else if (BuiltinID == Builtin::BIreserve_write_pipe)
  4349. Name = "__reserve_write_pipe";
  4350. else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
  4351. Name = "__work_group_reserve_read_pipe";
  4352. else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
  4353. Name = "__work_group_reserve_write_pipe";
  4354. else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
  4355. Name = "__sub_group_reserve_read_pipe";
  4356. else
  4357. Name = "__sub_group_reserve_write_pipe";
  4358. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  4359. *Arg1 = EmitScalarExpr(E->getArg(1));
  4360. llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
  4361. CGOpenCLRuntime OpenCLRT(CGM);
  4362. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4363. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4364. // Building the generic function prototype.
  4365. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
  4366. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4367. ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4368. // We know the second argument is an integer type, but we may need to cast
  4369. // it to i32.
  4370. if (Arg1->getType() != Int32Ty)
  4371. Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
  4372. return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4373. {Arg0, Arg1, PacketSize, PacketAlign}));
  4374. }
  4375. // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
  4376. // functions
  4377. case Builtin::BIcommit_read_pipe:
  4378. case Builtin::BIcommit_write_pipe:
  4379. case Builtin::BIwork_group_commit_read_pipe:
  4380. case Builtin::BIwork_group_commit_write_pipe:
  4381. case Builtin::BIsub_group_commit_read_pipe:
  4382. case Builtin::BIsub_group_commit_write_pipe: {
  4383. const char *Name;
  4384. if (BuiltinID == Builtin::BIcommit_read_pipe)
  4385. Name = "__commit_read_pipe";
  4386. else if (BuiltinID == Builtin::BIcommit_write_pipe)
  4387. Name = "__commit_write_pipe";
  4388. else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
  4389. Name = "__work_group_commit_read_pipe";
  4390. else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
  4391. Name = "__work_group_commit_write_pipe";
  4392. else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
  4393. Name = "__sub_group_commit_read_pipe";
  4394. else
  4395. Name = "__sub_group_commit_write_pipe";
  4396. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  4397. *Arg1 = EmitScalarExpr(E->getArg(1));
  4398. CGOpenCLRuntime OpenCLRT(CGM);
  4399. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4400. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4401. // Building the generic function prototype.
  4402. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
  4403. llvm::FunctionType *FTy =
  4404. llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
  4405. llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4406. return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4407. {Arg0, Arg1, PacketSize, PacketAlign}));
  4408. }
  4409. // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
  4410. case Builtin::BIget_pipe_num_packets:
  4411. case Builtin::BIget_pipe_max_packets: {
  4412. const char *BaseName;
  4413. const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
  4414. if (BuiltinID == Builtin::BIget_pipe_num_packets)
  4415. BaseName = "__get_pipe_num_packets";
  4416. else
  4417. BaseName = "__get_pipe_max_packets";
  4418. std::string Name = std::string(BaseName) +
  4419. std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
  4420. // Building the generic function prototype.
  4421. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  4422. CGOpenCLRuntime OpenCLRT(CGM);
  4423. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4424. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4425. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
  4426. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4427. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4428. return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4429. {Arg0, PacketSize, PacketAlign}));
  4430. }
  4431. // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
  4432. case Builtin::BIto_global:
  4433. case Builtin::BIto_local:
  4434. case Builtin::BIto_private: {
  4435. auto Arg0 = EmitScalarExpr(E->getArg(0));
  4436. auto NewArgT = llvm::PointerType::get(Int8Ty,
  4437. CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4438. auto NewRetT = llvm::PointerType::get(Int8Ty,
  4439. CGM.getContext().getTargetAddressSpace(
  4440. E->getType()->getPointeeType().getAddressSpace()));
  4441. auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
  4442. llvm::Value *NewArg;
  4443. if (Arg0->getType()->getPointerAddressSpace() !=
  4444. NewArgT->getPointerAddressSpace())
  4445. NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
  4446. else
  4447. NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
  4448. auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
  4449. auto NewCall =
  4450. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
  4451. return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
  4452. ConvertType(E->getType())));
  4453. }
  4454. // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
  4455. // It contains four different overload formats specified in Table 6.13.17.1.
  4456. case Builtin::BIenqueue_kernel: {
  4457. StringRef Name; // Generated function call name
  4458. unsigned NumArgs = E->getNumArgs();
  4459. llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
  4460. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4461. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4462. llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
  4463. llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
  4464. LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
  4465. llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
  4466. llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
  4467. if (NumArgs == 4) {
  4468. // The most basic form of the call with parameters:
  4469. // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
  4470. Name = "__enqueue_kernel_basic";
  4471. llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
  4472. GenericVoidPtrTy};
  4473. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4474. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4475. auto Info =
  4476. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
  4477. llvm::Value *Kernel =
  4478. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4479. llvm::Value *Block =
  4480. Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4481. AttrBuilder B(Builder.getContext());
  4482. B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
  4483. llvm::AttributeList ByValAttrSet =
  4484. llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
  4485. auto RTCall =
  4486. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
  4487. {Queue, Flags, Range, Kernel, Block});
  4488. RTCall->setAttributes(ByValAttrSet);
  4489. return RValue::get(RTCall);
  4490. }
  4491. assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
  4492. // Create a temporary array to hold the sizes of local pointer arguments
  4493. // for the block. \p First is the position of the first size argument.
  4494. auto CreateArrayForSizeVar = [=](unsigned First)
  4495. -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
  4496. llvm::APInt ArraySize(32, NumArgs - First);
  4497. QualType SizeArrayTy = getContext().getConstantArrayType(
  4498. getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
  4499. /*IndexTypeQuals=*/0);
  4500. auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
  4501. llvm::Value *TmpPtr = Tmp.getPointer();
  4502. llvm::Value *TmpSize = EmitLifetimeStart(
  4503. CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
  4504. llvm::Value *ElemPtr;
  4505. // Each of the following arguments specifies the size of the corresponding
  4506. // argument passed to the enqueued block.
  4507. auto *Zero = llvm::ConstantInt::get(IntTy, 0);
  4508. for (unsigned I = First; I < NumArgs; ++I) {
  4509. auto *Index = llvm::ConstantInt::get(IntTy, I - First);
  4510. auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
  4511. {Zero, Index});
  4512. if (I == First)
  4513. ElemPtr = GEP;
  4514. auto *V =
  4515. Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
  4516. Builder.CreateAlignedStore(
  4517. V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
  4518. }
  4519. return std::tie(ElemPtr, TmpSize, TmpPtr);
  4520. };
  4521. // Could have events and/or varargs.
  4522. if (E->getArg(3)->getType()->isBlockPointerType()) {
  4523. // No events passed, but has variadic arguments.
  4524. Name = "__enqueue_kernel_varargs";
  4525. auto Info =
  4526. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
  4527. llvm::Value *Kernel =
  4528. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4529. auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4530. llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
  4531. std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
  4532. // Create a vector of the arguments, as well as a constant value to
  4533. // express to the runtime the number of variadic arguments.
  4534. llvm::Value *const Args[] = {Queue, Flags,
  4535. Range, Kernel,
  4536. Block, ConstantInt::get(IntTy, NumArgs - 4),
  4537. ElemPtr};
  4538. llvm::Type *const ArgTys[] = {
  4539. QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
  4540. GenericVoidPtrTy, IntTy, ElemPtr->getType()};
  4541. llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
  4542. auto Call = RValue::get(
  4543. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
  4544. if (TmpSize)
  4545. EmitLifetimeEnd(TmpSize, TmpPtr);
  4546. return Call;
  4547. }
  4548. // Any calls now have event arguments passed.
  4549. if (NumArgs >= 7) {
  4550. llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
  4551. llvm::PointerType *EventPtrTy = EventTy->getPointerTo(
  4552. CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4553. llvm::Value *NumEvents =
  4554. Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
  4555. // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
  4556. // to be a null pointer constant (including `0` literal), we can take it
  4557. // into account and emit null pointer directly.
  4558. llvm::Value *EventWaitList = nullptr;
  4559. if (E->getArg(4)->isNullPointerConstant(
  4560. getContext(), Expr::NPC_ValueDependentIsNotNull)) {
  4561. EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy);
  4562. } else {
  4563. EventWaitList = E->getArg(4)->getType()->isArrayType()
  4564. ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
  4565. : EmitScalarExpr(E->getArg(4));
  4566. // Convert to generic address space.
  4567. EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy);
  4568. }
  4569. llvm::Value *EventRet = nullptr;
  4570. if (E->getArg(5)->isNullPointerConstant(
  4571. getContext(), Expr::NPC_ValueDependentIsNotNull)) {
  4572. EventRet = llvm::ConstantPointerNull::get(EventPtrTy);
  4573. } else {
  4574. EventRet =
  4575. Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy);
  4576. }
  4577. auto Info =
  4578. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
  4579. llvm::Value *Kernel =
  4580. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4581. llvm::Value *Block =
  4582. Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4583. std::vector<llvm::Type *> ArgTys = {
  4584. QueueTy, Int32Ty, RangeTy, Int32Ty,
  4585. EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
  4586. std::vector<llvm::Value *> Args = {Queue, Flags, Range,
  4587. NumEvents, EventWaitList, EventRet,
  4588. Kernel, Block};
  4589. if (NumArgs == 7) {
  4590. // Has events but no variadics.
  4591. Name = "__enqueue_kernel_basic_events";
  4592. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4593. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4594. return RValue::get(
  4595. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4596. llvm::ArrayRef<llvm::Value *>(Args)));
  4597. }
  4598. // Has event info and variadics
  4599. // Pass the number of variadics to the runtime function too.
  4600. Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
  4601. ArgTys.push_back(Int32Ty);
  4602. Name = "__enqueue_kernel_events_varargs";
  4603. llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
  4604. std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
  4605. Args.push_back(ElemPtr);
  4606. ArgTys.push_back(ElemPtr->getType());
  4607. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4608. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4609. auto Call =
  4610. RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4611. llvm::ArrayRef<llvm::Value *>(Args)));
  4612. if (TmpSize)
  4613. EmitLifetimeEnd(TmpSize, TmpPtr);
  4614. return Call;
  4615. }
  4616. [[fallthrough]];
  4617. }
  4618. // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
  4619. // parameter.
  4620. case Builtin::BIget_kernel_work_group_size: {
  4621. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4622. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4623. auto Info =
  4624. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
  4625. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4626. Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4627. return RValue::get(EmitRuntimeCall(
  4628. CGM.CreateRuntimeFunction(
  4629. llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
  4630. false),
  4631. "__get_kernel_work_group_size_impl"),
  4632. {Kernel, Arg}));
  4633. }
  4634. case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
  4635. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4636. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4637. auto Info =
  4638. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
  4639. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4640. Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4641. return RValue::get(EmitRuntimeCall(
  4642. CGM.CreateRuntimeFunction(
  4643. llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
  4644. false),
  4645. "__get_kernel_preferred_work_group_size_multiple_impl"),
  4646. {Kernel, Arg}));
  4647. }
  4648. case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
  4649. case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
  4650. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4651. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4652. LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
  4653. llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
  4654. auto Info =
  4655. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
  4656. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4657. Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4658. const char *Name =
  4659. BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
  4660. ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
  4661. : "__get_kernel_sub_group_count_for_ndrange_impl";
  4662. return RValue::get(EmitRuntimeCall(
  4663. CGM.CreateRuntimeFunction(
  4664. llvm::FunctionType::get(
  4665. IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
  4666. false),
  4667. Name),
  4668. {NDRange, Kernel, Block}));
  4669. }
  4670. case Builtin::BI__builtin_store_half:
  4671. case Builtin::BI__builtin_store_halff: {
  4672. Value *Val = EmitScalarExpr(E->getArg(0));
  4673. Address Address = EmitPointerWithAlignment(E->getArg(1));
  4674. Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
  4675. Builder.CreateStore(HalfVal, Address);
  4676. return RValue::get(nullptr);
  4677. }
  4678. case Builtin::BI__builtin_load_half: {
  4679. Address Address = EmitPointerWithAlignment(E->getArg(0));
  4680. Value *HalfVal = Builder.CreateLoad(Address);
  4681. return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
  4682. }
  4683. case Builtin::BI__builtin_load_halff: {
  4684. Address Address = EmitPointerWithAlignment(E->getArg(0));
  4685. Value *HalfVal = Builder.CreateLoad(Address);
  4686. return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
  4687. }
  4688. case Builtin::BIprintf:
  4689. if (getTarget().getTriple().isNVPTX() ||
  4690. getTarget().getTriple().isAMDGCN()) {
  4691. if (getLangOpts().OpenMPIsDevice)
  4692. return EmitOpenMPDevicePrintfCallExpr(E);
  4693. if (getTarget().getTriple().isNVPTX())
  4694. return EmitNVPTXDevicePrintfCallExpr(E);
  4695. if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
  4696. return EmitAMDGPUDevicePrintfCallExpr(E);
  4697. }
  4698. break;
  4699. case Builtin::BI__builtin_canonicalize:
  4700. case Builtin::BI__builtin_canonicalizef:
  4701. case Builtin::BI__builtin_canonicalizef16:
  4702. case Builtin::BI__builtin_canonicalizel:
  4703. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
  4704. case Builtin::BI__builtin_thread_pointer: {
  4705. if (!getContext().getTargetInfo().isTLSSupported())
  4706. CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
  4707. // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
  4708. break;
  4709. }
  4710. case Builtin::BI__builtin_os_log_format:
  4711. return emitBuiltinOSLogFormat(*E);
  4712. case Builtin::BI__xray_customevent: {
  4713. if (!ShouldXRayInstrumentFunction())
  4714. return RValue::getIgnored();
  4715. if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
  4716. XRayInstrKind::Custom))
  4717. return RValue::getIgnored();
  4718. if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
  4719. if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
  4720. return RValue::getIgnored();
  4721. Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
  4722. auto FTy = F->getFunctionType();
  4723. auto Arg0 = E->getArg(0);
  4724. auto Arg0Val = EmitScalarExpr(Arg0);
  4725. auto Arg0Ty = Arg0->getType();
  4726. auto PTy0 = FTy->getParamType(0);
  4727. if (PTy0 != Arg0Val->getType()) {
  4728. if (Arg0Ty->isArrayType())
  4729. Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
  4730. else
  4731. Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
  4732. }
  4733. auto Arg1 = EmitScalarExpr(E->getArg(1));
  4734. auto PTy1 = FTy->getParamType(1);
  4735. if (PTy1 != Arg1->getType())
  4736. Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
  4737. return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
  4738. }
  4739. case Builtin::BI__xray_typedevent: {
  4740. // TODO: There should be a way to always emit events even if the current
  4741. // function is not instrumented. Losing events in a stream can cripple
  4742. // a trace.
  4743. if (!ShouldXRayInstrumentFunction())
  4744. return RValue::getIgnored();
  4745. if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
  4746. XRayInstrKind::Typed))
  4747. return RValue::getIgnored();
  4748. if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
  4749. if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
  4750. return RValue::getIgnored();
  4751. Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
  4752. auto FTy = F->getFunctionType();
  4753. auto Arg0 = EmitScalarExpr(E->getArg(0));
  4754. auto PTy0 = FTy->getParamType(0);
  4755. if (PTy0 != Arg0->getType())
  4756. Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
  4757. auto Arg1 = E->getArg(1);
  4758. auto Arg1Val = EmitScalarExpr(Arg1);
  4759. auto Arg1Ty = Arg1->getType();
  4760. auto PTy1 = FTy->getParamType(1);
  4761. if (PTy1 != Arg1Val->getType()) {
  4762. if (Arg1Ty->isArrayType())
  4763. Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
  4764. else
  4765. Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
  4766. }
  4767. auto Arg2 = EmitScalarExpr(E->getArg(2));
  4768. auto PTy2 = FTy->getParamType(2);
  4769. if (PTy2 != Arg2->getType())
  4770. Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
  4771. return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
  4772. }
  4773. case Builtin::BI__builtin_ms_va_start:
  4774. case Builtin::BI__builtin_ms_va_end:
  4775. return RValue::get(
  4776. EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
  4777. BuiltinID == Builtin::BI__builtin_ms_va_start));
  4778. case Builtin::BI__builtin_ms_va_copy: {
  4779. // Lower this manually. We can't reliably determine whether or not any
  4780. // given va_copy() is for a Win64 va_list from the calling convention
  4781. // alone, because it's legal to do this from a System V ABI function.
  4782. // With opaque pointer types, we won't have enough information in LLVM
  4783. // IR to determine this from the argument types, either. Best to do it
  4784. // now, while we have enough information.
  4785. Address DestAddr = EmitMSVAListRef(E->getArg(0));
  4786. Address SrcAddr = EmitMSVAListRef(E->getArg(1));
  4787. llvm::Type *BPP = Int8PtrPtrTy;
  4788. DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
  4789. Int8PtrTy, DestAddr.getAlignment());
  4790. SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
  4791. Int8PtrTy, SrcAddr.getAlignment());
  4792. Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
  4793. return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
  4794. }
  4795. case Builtin::BI__builtin_get_device_side_mangled_name: {
  4796. auto Name = CGM.getCUDARuntime().getDeviceSideName(
  4797. cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
  4798. auto Str = CGM.GetAddrOfConstantCString(Name, "");
  4799. llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
  4800. llvm::ConstantInt::get(SizeTy, 0)};
  4801. auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
  4802. Str.getPointer(), Zeros);
  4803. return RValue::get(Ptr);
  4804. }
  4805. }
  4806. // If this is an alias for a lib function (e.g. __builtin_sin), emit
  4807. // the call using the normal call path, but using the unmangled
  4808. // version of the function name.
  4809. if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
  4810. return emitLibraryCall(*this, FD, E,
  4811. CGM.getBuiltinLibFunction(FD, BuiltinID));
  4812. // If this is a predefined lib function (e.g. malloc), emit the call
  4813. // using exactly the normal call path.
  4814. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
  4815. return emitLibraryCall(*this, FD, E,
  4816. cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
  4817. // Check that a call to a target specific builtin has the correct target
  4818. // features.
  4819. // This is down here to avoid non-target specific builtins, however, if
  4820. // generic builtins start to require generic target features then we
  4821. // can move this up to the beginning of the function.
  4822. checkTargetFeatures(E, FD);
  4823. if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
  4824. LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
  4825. // See if we have a target specific intrinsic.
  4826. StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
  4827. Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
  4828. StringRef Prefix =
  4829. llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
  4830. if (!Prefix.empty()) {
  4831. IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
  4832. // NOTE we don't need to perform a compatibility flag check here since the
  4833. // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
  4834. // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
  4835. if (IntrinsicID == Intrinsic::not_intrinsic)
  4836. IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
  4837. }
  4838. if (IntrinsicID != Intrinsic::not_intrinsic) {
  4839. SmallVector<Value*, 16> Args;
  4840. // Find out if any arguments are required to be integer constant
  4841. // expressions.
  4842. unsigned ICEArguments = 0;
  4843. ASTContext::GetBuiltinTypeError Error;
  4844. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  4845. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  4846. Function *F = CGM.getIntrinsic(IntrinsicID);
  4847. llvm::FunctionType *FTy = F->getFunctionType();
  4848. for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
  4849. Value *ArgValue;
  4850. // If this is a normal argument, just emit it as a scalar.
  4851. if ((ICEArguments & (1 << i)) == 0) {
  4852. ArgValue = EmitScalarExpr(E->getArg(i));
  4853. } else {
  4854. // If this is required to be a constant, constant fold it so that we
  4855. // know that the generated intrinsic gets a ConstantInt.
  4856. ArgValue = llvm::ConstantInt::get(
  4857. getLLVMContext(),
  4858. *E->getArg(i)->getIntegerConstantExpr(getContext()));
  4859. }
  4860. // If the intrinsic arg type is different from the builtin arg type
  4861. // we need to do a bit cast.
  4862. llvm::Type *PTy = FTy->getParamType(i);
  4863. if (PTy != ArgValue->getType()) {
  4864. // XXX - vector of pointers?
  4865. if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
  4866. if (PtrTy->getAddressSpace() !=
  4867. ArgValue->getType()->getPointerAddressSpace()) {
  4868. ArgValue = Builder.CreateAddrSpaceCast(
  4869. ArgValue,
  4870. ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
  4871. }
  4872. }
  4873. assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
  4874. "Must be able to losslessly bit cast to param");
  4875. // Cast vector type (e.g., v256i32) to x86_amx, this only happen
  4876. // in amx intrinsics.
  4877. if (PTy->isX86_AMXTy())
  4878. ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
  4879. {ArgValue->getType()}, {ArgValue});
  4880. else
  4881. ArgValue = Builder.CreateBitCast(ArgValue, PTy);
  4882. }
  4883. Args.push_back(ArgValue);
  4884. }
  4885. Value *V = Builder.CreateCall(F, Args);
  4886. QualType BuiltinRetType = E->getType();
  4887. llvm::Type *RetTy = VoidTy;
  4888. if (!BuiltinRetType->isVoidType())
  4889. RetTy = ConvertType(BuiltinRetType);
  4890. if (RetTy != V->getType()) {
  4891. // XXX - vector of pointers?
  4892. if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
  4893. if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
  4894. V = Builder.CreateAddrSpaceCast(
  4895. V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
  4896. }
  4897. }
  4898. assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
  4899. "Must be able to losslessly bit cast result type");
  4900. // Cast x86_amx to vector type (e.g., v256i32), this only happen
  4901. // in amx intrinsics.
  4902. if (V->getType()->isX86_AMXTy())
  4903. V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
  4904. {V});
  4905. else
  4906. V = Builder.CreateBitCast(V, RetTy);
  4907. }
  4908. if (RetTy->isVoidTy())
  4909. return RValue::get(nullptr);
  4910. return RValue::get(V);
  4911. }
  4912. // Some target-specific builtins can have aggregate return values, e.g.
  4913. // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
  4914. // ReturnValue to be non-null, so that the target-specific emission code can
  4915. // always just emit into it.
  4916. TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
  4917. if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
  4918. Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
  4919. ReturnValue = ReturnValueSlot(DestPtr, false);
  4920. }
  4921. // Now see if we can emit a target-specific builtin.
  4922. if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
  4923. switch (EvalKind) {
  4924. case TEK_Scalar:
  4925. if (V->getType()->isVoidTy())
  4926. return RValue::get(nullptr);
  4927. return RValue::get(V);
  4928. case TEK_Aggregate:
  4929. return RValue::getAggregate(ReturnValue.getValue(),
  4930. ReturnValue.isVolatile());
  4931. case TEK_Complex:
  4932. llvm_unreachable("No current target builtin returns complex");
  4933. }
  4934. llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
  4935. }
  4936. ErrorUnsupported(E, "builtin function");
  4937. // Unknown builtin, for now just dump it out and return undef.
  4938. return GetUndefRValue(E->getType());
  4939. }
  4940. static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
  4941. unsigned BuiltinID, const CallExpr *E,
  4942. ReturnValueSlot ReturnValue,
  4943. llvm::Triple::ArchType Arch) {
  4944. switch (Arch) {
  4945. case llvm::Triple::arm:
  4946. case llvm::Triple::armeb:
  4947. case llvm::Triple::thumb:
  4948. case llvm::Triple::thumbeb:
  4949. return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
  4950. case llvm::Triple::aarch64:
  4951. case llvm::Triple::aarch64_32:
  4952. case llvm::Triple::aarch64_be:
  4953. return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
  4954. case llvm::Triple::bpfeb:
  4955. case llvm::Triple::bpfel:
  4956. return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
  4957. case llvm::Triple::x86:
  4958. case llvm::Triple::x86_64:
  4959. return CGF->EmitX86BuiltinExpr(BuiltinID, E);
  4960. case llvm::Triple::ppc:
  4961. case llvm::Triple::ppcle:
  4962. case llvm::Triple::ppc64:
  4963. case llvm::Triple::ppc64le:
  4964. return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
  4965. case llvm::Triple::r600:
  4966. case llvm::Triple::amdgcn:
  4967. return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
  4968. case llvm::Triple::systemz:
  4969. return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
  4970. case llvm::Triple::nvptx:
  4971. case llvm::Triple::nvptx64:
  4972. return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
  4973. case llvm::Triple::wasm32:
  4974. case llvm::Triple::wasm64:
  4975. return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
  4976. case llvm::Triple::hexagon:
  4977. return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
  4978. case llvm::Triple::riscv32:
  4979. case llvm::Triple::riscv64:
  4980. return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
  4981. case llvm::Triple::loongarch32:
  4982. case llvm::Triple::loongarch64:
  4983. return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E);
  4984. default:
  4985. return nullptr;
  4986. }
  4987. }
  4988. Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
  4989. const CallExpr *E,
  4990. ReturnValueSlot ReturnValue) {
  4991. if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
  4992. assert(getContext().getAuxTargetInfo() && "Missing aux target info");
  4993. return EmitTargetArchBuiltinExpr(
  4994. this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
  4995. ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
  4996. }
  4997. return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
  4998. getTarget().getTriple().getArch());
  4999. }
  5000. static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
  5001. NeonTypeFlags TypeFlags,
  5002. bool HasLegalHalfType = true,
  5003. bool V1Ty = false,
  5004. bool AllowBFloatArgsAndRet = true) {
  5005. int IsQuad = TypeFlags.isQuad();
  5006. switch (TypeFlags.getEltType()) {
  5007. case NeonTypeFlags::Int8:
  5008. case NeonTypeFlags::Poly8:
  5009. return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
  5010. case NeonTypeFlags::Int16:
  5011. case NeonTypeFlags::Poly16:
  5012. return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  5013. case NeonTypeFlags::BFloat16:
  5014. if (AllowBFloatArgsAndRet)
  5015. return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
  5016. else
  5017. return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  5018. case NeonTypeFlags::Float16:
  5019. if (HasLegalHalfType)
  5020. return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
  5021. else
  5022. return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  5023. case NeonTypeFlags::Int32:
  5024. return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
  5025. case NeonTypeFlags::Int64:
  5026. case NeonTypeFlags::Poly64:
  5027. return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
  5028. case NeonTypeFlags::Poly128:
  5029. // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
  5030. // There is a lot of i128 and f128 API missing.
  5031. // so we use v16i8 to represent poly128 and get pattern matched.
  5032. return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
  5033. case NeonTypeFlags::Float32:
  5034. return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
  5035. case NeonTypeFlags::Float64:
  5036. return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
  5037. }
  5038. llvm_unreachable("Unknown vector element type!");
  5039. }
  5040. static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
  5041. NeonTypeFlags IntTypeFlags) {
  5042. int IsQuad = IntTypeFlags.isQuad();
  5043. switch (IntTypeFlags.getEltType()) {
  5044. case NeonTypeFlags::Int16:
  5045. return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
  5046. case NeonTypeFlags::Int32:
  5047. return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
  5048. case NeonTypeFlags::Int64:
  5049. return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
  5050. default:
  5051. llvm_unreachable("Type can't be converted to floating-point!");
  5052. }
  5053. }
  5054. Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
  5055. const ElementCount &Count) {
  5056. Value *SV = llvm::ConstantVector::getSplat(Count, C);
  5057. return Builder.CreateShuffleVector(V, V, SV, "lane");
  5058. }
  5059. Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
  5060. ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
  5061. return EmitNeonSplat(V, C, EC);
  5062. }
  5063. Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
  5064. const char *name,
  5065. unsigned shift, bool rightshift) {
  5066. unsigned j = 0;
  5067. for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
  5068. ai != ae; ++ai, ++j) {
  5069. if (F->isConstrainedFPIntrinsic())
  5070. if (ai->getType()->isMetadataTy())
  5071. continue;
  5072. if (shift > 0 && shift == j)
  5073. Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
  5074. else
  5075. Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
  5076. }
  5077. if (F->isConstrainedFPIntrinsic())
  5078. return Builder.CreateConstrainedFPCall(F, Ops, name);
  5079. else
  5080. return Builder.CreateCall(F, Ops, name);
  5081. }
  5082. Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
  5083. bool neg) {
  5084. int SV = cast<ConstantInt>(V)->getSExtValue();
  5085. return ConstantInt::get(Ty, neg ? -SV : SV);
  5086. }
  5087. // Right-shift a vector by a constant.
  5088. Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
  5089. llvm::Type *Ty, bool usgn,
  5090. const char *name) {
  5091. llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
  5092. int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
  5093. int EltSize = VTy->getScalarSizeInBits();
  5094. Vec = Builder.CreateBitCast(Vec, Ty);
  5095. // lshr/ashr are undefined when the shift amount is equal to the vector
  5096. // element size.
  5097. if (ShiftAmt == EltSize) {
  5098. if (usgn) {
  5099. // Right-shifting an unsigned value by its size yields 0.
  5100. return llvm::ConstantAggregateZero::get(VTy);
  5101. } else {
  5102. // Right-shifting a signed value by its size is equivalent
  5103. // to a shift of size-1.
  5104. --ShiftAmt;
  5105. Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
  5106. }
  5107. }
  5108. Shift = EmitNeonShiftVector(Shift, Ty, false);
  5109. if (usgn)
  5110. return Builder.CreateLShr(Vec, Shift, name);
  5111. else
  5112. return Builder.CreateAShr(Vec, Shift, name);
  5113. }
  5114. enum {
  5115. AddRetType = (1 << 0),
  5116. Add1ArgType = (1 << 1),
  5117. Add2ArgTypes = (1 << 2),
  5118. VectorizeRetType = (1 << 3),
  5119. VectorizeArgTypes = (1 << 4),
  5120. InventFloatType = (1 << 5),
  5121. UnsignedAlts = (1 << 6),
  5122. Use64BitVectors = (1 << 7),
  5123. Use128BitVectors = (1 << 8),
  5124. Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
  5125. VectorRet = AddRetType | VectorizeRetType,
  5126. VectorRetGetArgs01 =
  5127. AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
  5128. FpCmpzModifiers =
  5129. AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
  5130. };
  5131. namespace {
  5132. struct ARMVectorIntrinsicInfo {
  5133. const char *NameHint;
  5134. unsigned BuiltinID;
  5135. unsigned LLVMIntrinsic;
  5136. unsigned AltLLVMIntrinsic;
  5137. uint64_t TypeModifier;
  5138. bool operator<(unsigned RHSBuiltinID) const {
  5139. return BuiltinID < RHSBuiltinID;
  5140. }
  5141. bool operator<(const ARMVectorIntrinsicInfo &TE) const {
  5142. return BuiltinID < TE.BuiltinID;
  5143. }
  5144. };
  5145. } // end anonymous namespace
  5146. #define NEONMAP0(NameBase) \
  5147. { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
  5148. #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
  5149. { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
  5150. Intrinsic::LLVMIntrinsic, 0, TypeModifier }
  5151. #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
  5152. { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
  5153. Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
  5154. TypeModifier }
  5155. static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
  5156. NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
  5157. NEONMAP0(splat_lane_v),
  5158. NEONMAP0(splat_laneq_v),
  5159. NEONMAP0(splatq_lane_v),
  5160. NEONMAP0(splatq_laneq_v),
  5161. NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
  5162. NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
  5163. NEONMAP1(vabs_v, arm_neon_vabs, 0),
  5164. NEONMAP1(vabsq_v, arm_neon_vabs, 0),
  5165. NEONMAP0(vadd_v),
  5166. NEONMAP0(vaddhn_v),
  5167. NEONMAP0(vaddq_v),
  5168. NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
  5169. NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
  5170. NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
  5171. NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
  5172. NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
  5173. NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
  5174. NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
  5175. NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
  5176. NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
  5177. NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
  5178. NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
  5179. NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
  5180. NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
  5181. NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
  5182. NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
  5183. NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
  5184. NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
  5185. NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
  5186. NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
  5187. NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
  5188. NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
  5189. NEONMAP1(vcage_v, arm_neon_vacge, 0),
  5190. NEONMAP1(vcageq_v, arm_neon_vacge, 0),
  5191. NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
  5192. NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
  5193. NEONMAP1(vcale_v, arm_neon_vacge, 0),
  5194. NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
  5195. NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
  5196. NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
  5197. NEONMAP0(vceqz_v),
  5198. NEONMAP0(vceqzq_v),
  5199. NEONMAP0(vcgez_v),
  5200. NEONMAP0(vcgezq_v),
  5201. NEONMAP0(vcgtz_v),
  5202. NEONMAP0(vcgtzq_v),
  5203. NEONMAP0(vclez_v),
  5204. NEONMAP0(vclezq_v),
  5205. NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
  5206. NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
  5207. NEONMAP0(vcltz_v),
  5208. NEONMAP0(vcltzq_v),
  5209. NEONMAP1(vclz_v, ctlz, Add1ArgType),
  5210. NEONMAP1(vclzq_v, ctlz, Add1ArgType),
  5211. NEONMAP1(vcnt_v, ctpop, Add1ArgType),
  5212. NEONMAP1(vcntq_v, ctpop, Add1ArgType),
  5213. NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
  5214. NEONMAP0(vcvt_f16_s16),
  5215. NEONMAP0(vcvt_f16_u16),
  5216. NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
  5217. NEONMAP0(vcvt_f32_v),
  5218. NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
  5219. NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
  5220. NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  5221. NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
  5222. NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
  5223. NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
  5224. NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
  5225. NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
  5226. NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
  5227. NEONMAP0(vcvt_s16_f16),
  5228. NEONMAP0(vcvt_s32_v),
  5229. NEONMAP0(vcvt_s64_v),
  5230. NEONMAP0(vcvt_u16_f16),
  5231. NEONMAP0(vcvt_u32_v),
  5232. NEONMAP0(vcvt_u64_v),
  5233. NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
  5234. NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
  5235. NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
  5236. NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
  5237. NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
  5238. NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
  5239. NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
  5240. NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
  5241. NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
  5242. NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
  5243. NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
  5244. NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
  5245. NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
  5246. NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
  5247. NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
  5248. NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
  5249. NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
  5250. NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
  5251. NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
  5252. NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
  5253. NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
  5254. NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
  5255. NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
  5256. NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
  5257. NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
  5258. NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
  5259. NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
  5260. NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
  5261. NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
  5262. NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
  5263. NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
  5264. NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
  5265. NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
  5266. NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
  5267. NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
  5268. NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
  5269. NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
  5270. NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
  5271. NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
  5272. NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
  5273. NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
  5274. NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
  5275. NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
  5276. NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
  5277. NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
  5278. NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
  5279. NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
  5280. NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
  5281. NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
  5282. NEONMAP0(vcvtq_f16_s16),
  5283. NEONMAP0(vcvtq_f16_u16),
  5284. NEONMAP0(vcvtq_f32_v),
  5285. NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
  5286. NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
  5287. NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  5288. NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
  5289. NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
  5290. NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
  5291. NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
  5292. NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
  5293. NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
  5294. NEONMAP0(vcvtq_s16_f16),
  5295. NEONMAP0(vcvtq_s32_v),
  5296. NEONMAP0(vcvtq_s64_v),
  5297. NEONMAP0(vcvtq_u16_f16),
  5298. NEONMAP0(vcvtq_u32_v),
  5299. NEONMAP0(vcvtq_u64_v),
  5300. NEONMAP1(vdot_s32, arm_neon_sdot, 0),
  5301. NEONMAP1(vdot_u32, arm_neon_udot, 0),
  5302. NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
  5303. NEONMAP1(vdotq_u32, arm_neon_udot, 0),
  5304. NEONMAP0(vext_v),
  5305. NEONMAP0(vextq_v),
  5306. NEONMAP0(vfma_v),
  5307. NEONMAP0(vfmaq_v),
  5308. NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
  5309. NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
  5310. NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
  5311. NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
  5312. NEONMAP0(vld1_dup_v),
  5313. NEONMAP1(vld1_v, arm_neon_vld1, 0),
  5314. NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
  5315. NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
  5316. NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
  5317. NEONMAP0(vld1q_dup_v),
  5318. NEONMAP1(vld1q_v, arm_neon_vld1, 0),
  5319. NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
  5320. NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
  5321. NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
  5322. NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
  5323. NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
  5324. NEONMAP1(vld2_v, arm_neon_vld2, 0),
  5325. NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
  5326. NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
  5327. NEONMAP1(vld2q_v, arm_neon_vld2, 0),
  5328. NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
  5329. NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
  5330. NEONMAP1(vld3_v, arm_neon_vld3, 0),
  5331. NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
  5332. NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
  5333. NEONMAP1(vld3q_v, arm_neon_vld3, 0),
  5334. NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
  5335. NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
  5336. NEONMAP1(vld4_v, arm_neon_vld4, 0),
  5337. NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
  5338. NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
  5339. NEONMAP1(vld4q_v, arm_neon_vld4, 0),
  5340. NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
  5341. NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
  5342. NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
  5343. NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
  5344. NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
  5345. NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
  5346. NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
  5347. NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
  5348. NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
  5349. NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
  5350. NEONMAP0(vmovl_v),
  5351. NEONMAP0(vmovn_v),
  5352. NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
  5353. NEONMAP0(vmull_v),
  5354. NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
  5355. NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
  5356. NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
  5357. NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
  5358. NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
  5359. NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
  5360. NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
  5361. NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
  5362. NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
  5363. NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
  5364. NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
  5365. NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
  5366. NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
  5367. NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
  5368. NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
  5369. NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
  5370. NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
  5371. NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
  5372. NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
  5373. NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
  5374. NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
  5375. NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
  5376. NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
  5377. NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
  5378. NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
  5379. NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
  5380. NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
  5381. NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
  5382. NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
  5383. NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
  5384. NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
  5385. NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
  5386. NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
  5387. NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
  5388. NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
  5389. NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
  5390. NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
  5391. NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
  5392. NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
  5393. NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
  5394. NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
  5395. NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
  5396. NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
  5397. NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
  5398. NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
  5399. NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
  5400. NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
  5401. NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
  5402. NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
  5403. NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
  5404. NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
  5405. NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
  5406. NEONMAP0(vrndi_v),
  5407. NEONMAP0(vrndiq_v),
  5408. NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
  5409. NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
  5410. NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
  5411. NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
  5412. NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
  5413. NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
  5414. NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
  5415. NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
  5416. NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
  5417. NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
  5418. NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
  5419. NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
  5420. NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
  5421. NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
  5422. NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
  5423. NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
  5424. NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
  5425. NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
  5426. NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
  5427. NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
  5428. NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
  5429. NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
  5430. NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
  5431. NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
  5432. NEONMAP0(vshl_n_v),
  5433. NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
  5434. NEONMAP0(vshll_n_v),
  5435. NEONMAP0(vshlq_n_v),
  5436. NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
  5437. NEONMAP0(vshr_n_v),
  5438. NEONMAP0(vshrn_n_v),
  5439. NEONMAP0(vshrq_n_v),
  5440. NEONMAP1(vst1_v, arm_neon_vst1, 0),
  5441. NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
  5442. NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
  5443. NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
  5444. NEONMAP1(vst1q_v, arm_neon_vst1, 0),
  5445. NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
  5446. NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
  5447. NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
  5448. NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
  5449. NEONMAP1(vst2_v, arm_neon_vst2, 0),
  5450. NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
  5451. NEONMAP1(vst2q_v, arm_neon_vst2, 0),
  5452. NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
  5453. NEONMAP1(vst3_v, arm_neon_vst3, 0),
  5454. NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
  5455. NEONMAP1(vst3q_v, arm_neon_vst3, 0),
  5456. NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
  5457. NEONMAP1(vst4_v, arm_neon_vst4, 0),
  5458. NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
  5459. NEONMAP1(vst4q_v, arm_neon_vst4, 0),
  5460. NEONMAP0(vsubhn_v),
  5461. NEONMAP0(vtrn_v),
  5462. NEONMAP0(vtrnq_v),
  5463. NEONMAP0(vtst_v),
  5464. NEONMAP0(vtstq_v),
  5465. NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
  5466. NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
  5467. NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
  5468. NEONMAP0(vuzp_v),
  5469. NEONMAP0(vuzpq_v),
  5470. NEONMAP0(vzip_v),
  5471. NEONMAP0(vzipq_v)
  5472. };
  5473. static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
  5474. NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
  5475. NEONMAP0(splat_lane_v),
  5476. NEONMAP0(splat_laneq_v),
  5477. NEONMAP0(splatq_lane_v),
  5478. NEONMAP0(splatq_laneq_v),
  5479. NEONMAP1(vabs_v, aarch64_neon_abs, 0),
  5480. NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
  5481. NEONMAP0(vadd_v),
  5482. NEONMAP0(vaddhn_v),
  5483. NEONMAP0(vaddq_p128),
  5484. NEONMAP0(vaddq_v),
  5485. NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
  5486. NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
  5487. NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
  5488. NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
  5489. NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5490. NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5491. NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5492. NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5493. NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5494. NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5495. NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5496. NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5497. NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
  5498. NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
  5499. NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
  5500. NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
  5501. NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
  5502. NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
  5503. NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
  5504. NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
  5505. NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
  5506. NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
  5507. NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
  5508. NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
  5509. NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
  5510. NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
  5511. NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
  5512. NEONMAP1(vcage_v, aarch64_neon_facge, 0),
  5513. NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
  5514. NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
  5515. NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
  5516. NEONMAP1(vcale_v, aarch64_neon_facge, 0),
  5517. NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
  5518. NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
  5519. NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
  5520. NEONMAP0(vceqz_v),
  5521. NEONMAP0(vceqzq_v),
  5522. NEONMAP0(vcgez_v),
  5523. NEONMAP0(vcgezq_v),
  5524. NEONMAP0(vcgtz_v),
  5525. NEONMAP0(vcgtzq_v),
  5526. NEONMAP0(vclez_v),
  5527. NEONMAP0(vclezq_v),
  5528. NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
  5529. NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
  5530. NEONMAP0(vcltz_v),
  5531. NEONMAP0(vcltzq_v),
  5532. NEONMAP1(vclz_v, ctlz, Add1ArgType),
  5533. NEONMAP1(vclzq_v, ctlz, Add1ArgType),
  5534. NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
  5535. NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
  5536. NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
  5537. NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
  5538. NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
  5539. NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
  5540. NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
  5541. NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
  5542. NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
  5543. NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
  5544. NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
  5545. NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
  5546. NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
  5547. NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
  5548. NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
  5549. NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
  5550. NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
  5551. NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
  5552. NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
  5553. NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
  5554. NEONMAP1(vcnt_v, ctpop, Add1ArgType),
  5555. NEONMAP1(vcntq_v, ctpop, Add1ArgType),
  5556. NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
  5557. NEONMAP0(vcvt_f16_s16),
  5558. NEONMAP0(vcvt_f16_u16),
  5559. NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
  5560. NEONMAP0(vcvt_f32_v),
  5561. NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
  5562. NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
  5563. NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5564. NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5565. NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
  5566. NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
  5567. NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
  5568. NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
  5569. NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
  5570. NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
  5571. NEONMAP0(vcvtq_f16_s16),
  5572. NEONMAP0(vcvtq_f16_u16),
  5573. NEONMAP0(vcvtq_f32_v),
  5574. NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
  5575. NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
  5576. NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
  5577. NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5578. NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5579. NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
  5580. NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
  5581. NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
  5582. NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
  5583. NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
  5584. NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
  5585. NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
  5586. NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
  5587. NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
  5588. NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
  5589. NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
  5590. NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5591. NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5592. NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5593. NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5594. NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5595. NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5596. NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5597. NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5598. NEONMAP0(vext_v),
  5599. NEONMAP0(vextq_v),
  5600. NEONMAP0(vfma_v),
  5601. NEONMAP0(vfmaq_v),
  5602. NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
  5603. NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
  5604. NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
  5605. NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
  5606. NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
  5607. NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
  5608. NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
  5609. NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
  5610. NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
  5611. NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
  5612. NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
  5613. NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
  5614. NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
  5615. NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
  5616. NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
  5617. NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
  5618. NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
  5619. NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
  5620. NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
  5621. NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
  5622. NEONMAP0(vmovl_v),
  5623. NEONMAP0(vmovn_v),
  5624. NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
  5625. NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
  5626. NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
  5627. NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
  5628. NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
  5629. NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
  5630. NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
  5631. NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
  5632. NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
  5633. NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
  5634. NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
  5635. NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
  5636. NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
  5637. NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
  5638. NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
  5639. NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
  5640. NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
  5641. NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
  5642. NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
  5643. NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
  5644. NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
  5645. NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
  5646. NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
  5647. NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
  5648. NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
  5649. NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
  5650. NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
  5651. NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
  5652. NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
  5653. NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
  5654. NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
  5655. NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
  5656. NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
  5657. NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
  5658. NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
  5659. NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
  5660. NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
  5661. NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
  5662. NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
  5663. NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
  5664. NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
  5665. NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
  5666. NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
  5667. NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
  5668. NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
  5669. NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
  5670. NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
  5671. NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
  5672. NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
  5673. NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
  5674. NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
  5675. NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
  5676. NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
  5677. NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
  5678. NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
  5679. NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
  5680. NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
  5681. NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
  5682. NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
  5683. NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
  5684. NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
  5685. NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
  5686. NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
  5687. NEONMAP0(vrndi_v),
  5688. NEONMAP0(vrndiq_v),
  5689. NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
  5690. NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
  5691. NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
  5692. NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
  5693. NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
  5694. NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
  5695. NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
  5696. NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
  5697. NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
  5698. NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
  5699. NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
  5700. NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
  5701. NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
  5702. NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
  5703. NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
  5704. NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
  5705. NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
  5706. NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
  5707. NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
  5708. NEONMAP0(vshl_n_v),
  5709. NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
  5710. NEONMAP0(vshll_n_v),
  5711. NEONMAP0(vshlq_n_v),
  5712. NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
  5713. NEONMAP0(vshr_n_v),
  5714. NEONMAP0(vshrn_n_v),
  5715. NEONMAP0(vshrq_n_v),
  5716. NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
  5717. NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
  5718. NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
  5719. NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
  5720. NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
  5721. NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
  5722. NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
  5723. NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
  5724. NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
  5725. NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
  5726. NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
  5727. NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
  5728. NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
  5729. NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
  5730. NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
  5731. NEONMAP0(vsubhn_v),
  5732. NEONMAP0(vtst_v),
  5733. NEONMAP0(vtstq_v),
  5734. NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
  5735. NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
  5736. NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
  5737. NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
  5738. };
  5739. static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
  5740. NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
  5741. NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
  5742. NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
  5743. NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
  5744. NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
  5745. NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
  5746. NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
  5747. NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
  5748. NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
  5749. NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5750. NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
  5751. NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
  5752. NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
  5753. NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
  5754. NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5755. NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5756. NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
  5757. NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
  5758. NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5759. NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5760. NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
  5761. NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
  5762. NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5763. NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5764. NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5765. NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5766. NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5767. NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5768. NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5769. NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5770. NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5771. NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5772. NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5773. NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5774. NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
  5775. NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5776. NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5777. NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5778. NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5779. NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5780. NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5781. NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5782. NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5783. NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5784. NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5785. NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5786. NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5787. NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5788. NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5789. NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5790. NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5791. NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5792. NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5793. NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
  5794. NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5795. NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5796. NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5797. NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5798. NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
  5799. NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
  5800. NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5801. NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5802. NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
  5803. NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
  5804. NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5805. NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5806. NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5807. NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5808. NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
  5809. NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
  5810. NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5811. NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5812. NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
  5813. NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
  5814. NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
  5815. NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
  5816. NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
  5817. NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5818. NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5819. NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5820. NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5821. NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5822. NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5823. NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5824. NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5825. NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5826. NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5827. NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
  5828. NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
  5829. NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
  5830. NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
  5831. NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
  5832. NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
  5833. NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
  5834. NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
  5835. NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
  5836. NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
  5837. NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
  5838. NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
  5839. NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
  5840. NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
  5841. NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
  5842. NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
  5843. NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
  5844. NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
  5845. NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
  5846. NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
  5847. NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
  5848. NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
  5849. NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
  5850. NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
  5851. NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
  5852. NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
  5853. NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
  5854. NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
  5855. NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
  5856. NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
  5857. NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
  5858. NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
  5859. NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
  5860. NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
  5861. NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
  5862. NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
  5863. NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
  5864. NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
  5865. NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
  5866. NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
  5867. NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
  5868. NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
  5869. NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
  5870. NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
  5871. NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
  5872. NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
  5873. NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
  5874. NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
  5875. NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
  5876. NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
  5877. NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
  5878. NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
  5879. NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5880. NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5881. NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5882. NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5883. NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
  5884. NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
  5885. NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5886. NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5887. NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5888. NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5889. NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
  5890. NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
  5891. NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
  5892. NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
  5893. NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
  5894. NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
  5895. NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
  5896. NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
  5897. NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
  5898. NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
  5899. NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
  5900. NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
  5901. NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
  5902. NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
  5903. NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
  5904. NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
  5905. NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
  5906. NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
  5907. NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
  5908. NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
  5909. NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
  5910. NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
  5911. NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
  5912. NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
  5913. NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
  5914. NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
  5915. NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
  5916. NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
  5917. NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
  5918. NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
  5919. NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
  5920. NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
  5921. NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
  5922. NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
  5923. NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
  5924. NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
  5925. NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
  5926. NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
  5927. NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
  5928. NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
  5929. NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
  5930. NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
  5931. NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
  5932. NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
  5933. NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
  5934. NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
  5935. NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
  5936. NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
  5937. NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
  5938. NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
  5939. NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
  5940. NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
  5941. // FP16 scalar intrinisics go here.
  5942. NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
  5943. NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5944. NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5945. NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5946. NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5947. NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5948. NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5949. NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5950. NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5951. NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5952. NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5953. NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5954. NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5955. NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5956. NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5957. NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5958. NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5959. NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5960. NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5961. NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5962. NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5963. NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5964. NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5965. NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5966. NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5967. NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5968. NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5969. NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5970. NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5971. NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
  5972. NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
  5973. NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
  5974. NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
  5975. NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
  5976. };
  5977. // Some intrinsics are equivalent for codegen.
  5978. static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
  5979. { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
  5980. { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
  5981. { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
  5982. { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
  5983. { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
  5984. { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
  5985. { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
  5986. { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
  5987. { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
  5988. { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
  5989. { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
  5990. { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
  5991. { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
  5992. { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
  5993. { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
  5994. { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
  5995. { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
  5996. { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
  5997. { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
  5998. { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
  5999. { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
  6000. { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
  6001. { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
  6002. { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
  6003. { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
  6004. { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
  6005. { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
  6006. { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
  6007. { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
  6008. { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
  6009. { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
  6010. { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
  6011. { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
  6012. { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
  6013. { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
  6014. { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
  6015. { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
  6016. { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
  6017. { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
  6018. { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
  6019. { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
  6020. { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
  6021. { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
  6022. { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
  6023. { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
  6024. { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
  6025. { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
  6026. { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
  6027. { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
  6028. { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
  6029. { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
  6030. { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
  6031. { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
  6032. { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
  6033. { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
  6034. { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
  6035. { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
  6036. { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
  6037. { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
  6038. { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
  6039. { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
  6040. { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
  6041. { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
  6042. { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
  6043. { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
  6044. { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
  6045. { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
  6046. { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
  6047. { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
  6048. { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
  6049. { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
  6050. { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
  6051. { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
  6052. { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
  6053. { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
  6054. { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
  6055. { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
  6056. { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
  6057. { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
  6058. { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
  6059. { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
  6060. { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
  6061. { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
  6062. { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
  6063. { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
  6064. { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
  6065. { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
  6066. { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
  6067. { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
  6068. { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
  6069. { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
  6070. { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
  6071. { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
  6072. { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
  6073. { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
  6074. { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
  6075. { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
  6076. { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
  6077. { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
  6078. { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
  6079. { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
  6080. { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
  6081. { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
  6082. { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
  6083. { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
  6084. { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
  6085. { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
  6086. { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
  6087. { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
  6088. { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
  6089. { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
  6090. { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
  6091. { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
  6092. { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
  6093. { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
  6094. { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
  6095. { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
  6096. { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
  6097. { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
  6098. { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
  6099. { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
  6100. { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
  6101. { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
  6102. { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
  6103. { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
  6104. { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
  6105. { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
  6106. { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
  6107. { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
  6108. { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
  6109. { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
  6110. { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
  6111. { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
  6112. { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
  6113. { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
  6114. { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
  6115. { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
  6116. { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
  6117. };
  6118. #undef NEONMAP0
  6119. #undef NEONMAP1
  6120. #undef NEONMAP2
  6121. #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
  6122. { \
  6123. #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
  6124. TypeModifier \
  6125. }
  6126. #define SVEMAP2(NameBase, TypeModifier) \
  6127. { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
  6128. static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
  6129. #define GET_SVE_LLVM_INTRINSIC_MAP
  6130. #include "clang/Basic/arm_sve_builtin_cg.inc"
  6131. #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
  6132. #undef GET_SVE_LLVM_INTRINSIC_MAP
  6133. };
  6134. #undef SVEMAP1
  6135. #undef SVEMAP2
  6136. static bool NEONSIMDIntrinsicsProvenSorted = false;
  6137. static bool AArch64SIMDIntrinsicsProvenSorted = false;
  6138. static bool AArch64SISDIntrinsicsProvenSorted = false;
  6139. static bool AArch64SVEIntrinsicsProvenSorted = false;
  6140. static const ARMVectorIntrinsicInfo *
  6141. findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
  6142. unsigned BuiltinID, bool &MapProvenSorted) {
  6143. #ifndef NDEBUG
  6144. if (!MapProvenSorted) {
  6145. assert(llvm::is_sorted(IntrinsicMap));
  6146. MapProvenSorted = true;
  6147. }
  6148. #endif
  6149. const ARMVectorIntrinsicInfo *Builtin =
  6150. llvm::lower_bound(IntrinsicMap, BuiltinID);
  6151. if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
  6152. return Builtin;
  6153. return nullptr;
  6154. }
  6155. Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
  6156. unsigned Modifier,
  6157. llvm::Type *ArgType,
  6158. const CallExpr *E) {
  6159. int VectorSize = 0;
  6160. if (Modifier & Use64BitVectors)
  6161. VectorSize = 64;
  6162. else if (Modifier & Use128BitVectors)
  6163. VectorSize = 128;
  6164. // Return type.
  6165. SmallVector<llvm::Type *, 3> Tys;
  6166. if (Modifier & AddRetType) {
  6167. llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
  6168. if (Modifier & VectorizeRetType)
  6169. Ty = llvm::FixedVectorType::get(
  6170. Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
  6171. Tys.push_back(Ty);
  6172. }
  6173. // Arguments.
  6174. if (Modifier & VectorizeArgTypes) {
  6175. int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
  6176. ArgType = llvm::FixedVectorType::get(ArgType, Elts);
  6177. }
  6178. if (Modifier & (Add1ArgType | Add2ArgTypes))
  6179. Tys.push_back(ArgType);
  6180. if (Modifier & Add2ArgTypes)
  6181. Tys.push_back(ArgType);
  6182. if (Modifier & InventFloatType)
  6183. Tys.push_back(FloatTy);
  6184. return CGM.getIntrinsic(IntrinsicID, Tys);
  6185. }
  6186. static Value *EmitCommonNeonSISDBuiltinExpr(
  6187. CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
  6188. SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
  6189. unsigned BuiltinID = SISDInfo.BuiltinID;
  6190. unsigned int Int = SISDInfo.LLVMIntrinsic;
  6191. unsigned Modifier = SISDInfo.TypeModifier;
  6192. const char *s = SISDInfo.NameHint;
  6193. switch (BuiltinID) {
  6194. case NEON::BI__builtin_neon_vcled_s64:
  6195. case NEON::BI__builtin_neon_vcled_u64:
  6196. case NEON::BI__builtin_neon_vcles_f32:
  6197. case NEON::BI__builtin_neon_vcled_f64:
  6198. case NEON::BI__builtin_neon_vcltd_s64:
  6199. case NEON::BI__builtin_neon_vcltd_u64:
  6200. case NEON::BI__builtin_neon_vclts_f32:
  6201. case NEON::BI__builtin_neon_vcltd_f64:
  6202. case NEON::BI__builtin_neon_vcales_f32:
  6203. case NEON::BI__builtin_neon_vcaled_f64:
  6204. case NEON::BI__builtin_neon_vcalts_f32:
  6205. case NEON::BI__builtin_neon_vcaltd_f64:
  6206. // Only one direction of comparisons actually exist, cmle is actually a cmge
  6207. // with swapped operands. The table gives us the right intrinsic but we
  6208. // still need to do the swap.
  6209. std::swap(Ops[0], Ops[1]);
  6210. break;
  6211. }
  6212. assert(Int && "Generic code assumes a valid intrinsic");
  6213. // Determine the type(s) of this overloaded AArch64 intrinsic.
  6214. const Expr *Arg = E->getArg(0);
  6215. llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
  6216. Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
  6217. int j = 0;
  6218. ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
  6219. for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
  6220. ai != ae; ++ai, ++j) {
  6221. llvm::Type *ArgTy = ai->getType();
  6222. if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
  6223. ArgTy->getPrimitiveSizeInBits())
  6224. continue;
  6225. assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
  6226. // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
  6227. // it before inserting.
  6228. Ops[j] = CGF.Builder.CreateTruncOrBitCast(
  6229. Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
  6230. Ops[j] =
  6231. CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
  6232. }
  6233. Value *Result = CGF.EmitNeonCall(F, Ops, s);
  6234. llvm::Type *ResultType = CGF.ConvertType(E->getType());
  6235. if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
  6236. Result->getType()->getPrimitiveSizeInBits().getFixedValue())
  6237. return CGF.Builder.CreateExtractElement(Result, C0);
  6238. return CGF.Builder.CreateBitCast(Result, ResultType, s);
  6239. }
  6240. Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
  6241. unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
  6242. const char *NameHint, unsigned Modifier, const CallExpr *E,
  6243. SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
  6244. llvm::Triple::ArchType Arch) {
  6245. // Get the last argument, which specifies the vector type.
  6246. const Expr *Arg = E->getArg(E->getNumArgs() - 1);
  6247. std::optional<llvm::APSInt> NeonTypeConst =
  6248. Arg->getIntegerConstantExpr(getContext());
  6249. if (!NeonTypeConst)
  6250. return nullptr;
  6251. // Determine the type of this overloaded NEON intrinsic.
  6252. NeonTypeFlags Type(NeonTypeConst->getZExtValue());
  6253. bool Usgn = Type.isUnsigned();
  6254. bool Quad = Type.isQuad();
  6255. const bool HasLegalHalfType = getTarget().hasLegalHalfType();
  6256. const bool AllowBFloatArgsAndRet =
  6257. getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
  6258. llvm::FixedVectorType *VTy =
  6259. GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
  6260. llvm::Type *Ty = VTy;
  6261. if (!Ty)
  6262. return nullptr;
  6263. auto getAlignmentValue32 = [&](Address addr) -> Value* {
  6264. return Builder.getInt32(addr.getAlignment().getQuantity());
  6265. };
  6266. unsigned Int = LLVMIntrinsic;
  6267. if ((Modifier & UnsignedAlts) && !Usgn)
  6268. Int = AltLLVMIntrinsic;
  6269. switch (BuiltinID) {
  6270. default: break;
  6271. case NEON::BI__builtin_neon_splat_lane_v:
  6272. case NEON::BI__builtin_neon_splat_laneq_v:
  6273. case NEON::BI__builtin_neon_splatq_lane_v:
  6274. case NEON::BI__builtin_neon_splatq_laneq_v: {
  6275. auto NumElements = VTy->getElementCount();
  6276. if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
  6277. NumElements = NumElements * 2;
  6278. if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
  6279. NumElements = NumElements.divideCoefficientBy(2);
  6280. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  6281. return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
  6282. }
  6283. case NEON::BI__builtin_neon_vpadd_v:
  6284. case NEON::BI__builtin_neon_vpaddq_v:
  6285. // We don't allow fp/int overloading of intrinsics.
  6286. if (VTy->getElementType()->isFloatingPointTy() &&
  6287. Int == Intrinsic::aarch64_neon_addp)
  6288. Int = Intrinsic::aarch64_neon_faddp;
  6289. break;
  6290. case NEON::BI__builtin_neon_vabs_v:
  6291. case NEON::BI__builtin_neon_vabsq_v:
  6292. if (VTy->getElementType()->isFloatingPointTy())
  6293. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
  6294. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
  6295. case NEON::BI__builtin_neon_vadd_v:
  6296. case NEON::BI__builtin_neon_vaddq_v: {
  6297. llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
  6298. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  6299. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  6300. Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
  6301. return Builder.CreateBitCast(Ops[0], Ty);
  6302. }
  6303. case NEON::BI__builtin_neon_vaddhn_v: {
  6304. llvm::FixedVectorType *SrcTy =
  6305. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6306. // %sum = add <4 x i32> %lhs, %rhs
  6307. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6308. Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
  6309. Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
  6310. // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
  6311. Constant *ShiftAmt =
  6312. ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
  6313. Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
  6314. // %res = trunc <4 x i32> %high to <4 x i16>
  6315. return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
  6316. }
  6317. case NEON::BI__builtin_neon_vcale_v:
  6318. case NEON::BI__builtin_neon_vcaleq_v:
  6319. case NEON::BI__builtin_neon_vcalt_v:
  6320. case NEON::BI__builtin_neon_vcaltq_v:
  6321. std::swap(Ops[0], Ops[1]);
  6322. [[fallthrough]];
  6323. case NEON::BI__builtin_neon_vcage_v:
  6324. case NEON::BI__builtin_neon_vcageq_v:
  6325. case NEON::BI__builtin_neon_vcagt_v:
  6326. case NEON::BI__builtin_neon_vcagtq_v: {
  6327. llvm::Type *Ty;
  6328. switch (VTy->getScalarSizeInBits()) {
  6329. default: llvm_unreachable("unexpected type");
  6330. case 32:
  6331. Ty = FloatTy;
  6332. break;
  6333. case 64:
  6334. Ty = DoubleTy;
  6335. break;
  6336. case 16:
  6337. Ty = HalfTy;
  6338. break;
  6339. }
  6340. auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
  6341. llvm::Type *Tys[] = { VTy, VecFlt };
  6342. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6343. return EmitNeonCall(F, Ops, NameHint);
  6344. }
  6345. case NEON::BI__builtin_neon_vceqz_v:
  6346. case NEON::BI__builtin_neon_vceqzq_v:
  6347. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
  6348. ICmpInst::ICMP_EQ, "vceqz");
  6349. case NEON::BI__builtin_neon_vcgez_v:
  6350. case NEON::BI__builtin_neon_vcgezq_v:
  6351. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
  6352. ICmpInst::ICMP_SGE, "vcgez");
  6353. case NEON::BI__builtin_neon_vclez_v:
  6354. case NEON::BI__builtin_neon_vclezq_v:
  6355. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
  6356. ICmpInst::ICMP_SLE, "vclez");
  6357. case NEON::BI__builtin_neon_vcgtz_v:
  6358. case NEON::BI__builtin_neon_vcgtzq_v:
  6359. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
  6360. ICmpInst::ICMP_SGT, "vcgtz");
  6361. case NEON::BI__builtin_neon_vcltz_v:
  6362. case NEON::BI__builtin_neon_vcltzq_v:
  6363. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
  6364. ICmpInst::ICMP_SLT, "vcltz");
  6365. case NEON::BI__builtin_neon_vclz_v:
  6366. case NEON::BI__builtin_neon_vclzq_v:
  6367. // We generate target-independent intrinsic, which needs a second argument
  6368. // for whether or not clz of zero is undefined; on ARM it isn't.
  6369. Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
  6370. break;
  6371. case NEON::BI__builtin_neon_vcvt_f32_v:
  6372. case NEON::BI__builtin_neon_vcvtq_f32_v:
  6373. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6374. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
  6375. HasLegalHalfType);
  6376. return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  6377. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  6378. case NEON::BI__builtin_neon_vcvt_f16_s16:
  6379. case NEON::BI__builtin_neon_vcvt_f16_u16:
  6380. case NEON::BI__builtin_neon_vcvtq_f16_s16:
  6381. case NEON::BI__builtin_neon_vcvtq_f16_u16:
  6382. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6383. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
  6384. HasLegalHalfType);
  6385. return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  6386. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  6387. case NEON::BI__builtin_neon_vcvt_n_f16_s16:
  6388. case NEON::BI__builtin_neon_vcvt_n_f16_u16:
  6389. case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
  6390. case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
  6391. llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
  6392. Function *F = CGM.getIntrinsic(Int, Tys);
  6393. return EmitNeonCall(F, Ops, "vcvt_n");
  6394. }
  6395. case NEON::BI__builtin_neon_vcvt_n_f32_v:
  6396. case NEON::BI__builtin_neon_vcvt_n_f64_v:
  6397. case NEON::BI__builtin_neon_vcvtq_n_f32_v:
  6398. case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
  6399. llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
  6400. Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
  6401. Function *F = CGM.getIntrinsic(Int, Tys);
  6402. return EmitNeonCall(F, Ops, "vcvt_n");
  6403. }
  6404. case NEON::BI__builtin_neon_vcvt_n_s16_f16:
  6405. case NEON::BI__builtin_neon_vcvt_n_s32_v:
  6406. case NEON::BI__builtin_neon_vcvt_n_u16_f16:
  6407. case NEON::BI__builtin_neon_vcvt_n_u32_v:
  6408. case NEON::BI__builtin_neon_vcvt_n_s64_v:
  6409. case NEON::BI__builtin_neon_vcvt_n_u64_v:
  6410. case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
  6411. case NEON::BI__builtin_neon_vcvtq_n_s32_v:
  6412. case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
  6413. case NEON::BI__builtin_neon_vcvtq_n_u32_v:
  6414. case NEON::BI__builtin_neon_vcvtq_n_s64_v:
  6415. case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
  6416. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6417. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6418. return EmitNeonCall(F, Ops, "vcvt_n");
  6419. }
  6420. case NEON::BI__builtin_neon_vcvt_s32_v:
  6421. case NEON::BI__builtin_neon_vcvt_u32_v:
  6422. case NEON::BI__builtin_neon_vcvt_s64_v:
  6423. case NEON::BI__builtin_neon_vcvt_u64_v:
  6424. case NEON::BI__builtin_neon_vcvt_s16_f16:
  6425. case NEON::BI__builtin_neon_vcvt_u16_f16:
  6426. case NEON::BI__builtin_neon_vcvtq_s32_v:
  6427. case NEON::BI__builtin_neon_vcvtq_u32_v:
  6428. case NEON::BI__builtin_neon_vcvtq_s64_v:
  6429. case NEON::BI__builtin_neon_vcvtq_u64_v:
  6430. case NEON::BI__builtin_neon_vcvtq_s16_f16:
  6431. case NEON::BI__builtin_neon_vcvtq_u16_f16: {
  6432. Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
  6433. return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
  6434. : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
  6435. }
  6436. case NEON::BI__builtin_neon_vcvta_s16_f16:
  6437. case NEON::BI__builtin_neon_vcvta_s32_v:
  6438. case NEON::BI__builtin_neon_vcvta_s64_v:
  6439. case NEON::BI__builtin_neon_vcvta_u16_f16:
  6440. case NEON::BI__builtin_neon_vcvta_u32_v:
  6441. case NEON::BI__builtin_neon_vcvta_u64_v:
  6442. case NEON::BI__builtin_neon_vcvtaq_s16_f16:
  6443. case NEON::BI__builtin_neon_vcvtaq_s32_v:
  6444. case NEON::BI__builtin_neon_vcvtaq_s64_v:
  6445. case NEON::BI__builtin_neon_vcvtaq_u16_f16:
  6446. case NEON::BI__builtin_neon_vcvtaq_u32_v:
  6447. case NEON::BI__builtin_neon_vcvtaq_u64_v:
  6448. case NEON::BI__builtin_neon_vcvtn_s16_f16:
  6449. case NEON::BI__builtin_neon_vcvtn_s32_v:
  6450. case NEON::BI__builtin_neon_vcvtn_s64_v:
  6451. case NEON::BI__builtin_neon_vcvtn_u16_f16:
  6452. case NEON::BI__builtin_neon_vcvtn_u32_v:
  6453. case NEON::BI__builtin_neon_vcvtn_u64_v:
  6454. case NEON::BI__builtin_neon_vcvtnq_s16_f16:
  6455. case NEON::BI__builtin_neon_vcvtnq_s32_v:
  6456. case NEON::BI__builtin_neon_vcvtnq_s64_v:
  6457. case NEON::BI__builtin_neon_vcvtnq_u16_f16:
  6458. case NEON::BI__builtin_neon_vcvtnq_u32_v:
  6459. case NEON::BI__builtin_neon_vcvtnq_u64_v:
  6460. case NEON::BI__builtin_neon_vcvtp_s16_f16:
  6461. case NEON::BI__builtin_neon_vcvtp_s32_v:
  6462. case NEON::BI__builtin_neon_vcvtp_s64_v:
  6463. case NEON::BI__builtin_neon_vcvtp_u16_f16:
  6464. case NEON::BI__builtin_neon_vcvtp_u32_v:
  6465. case NEON::BI__builtin_neon_vcvtp_u64_v:
  6466. case NEON::BI__builtin_neon_vcvtpq_s16_f16:
  6467. case NEON::BI__builtin_neon_vcvtpq_s32_v:
  6468. case NEON::BI__builtin_neon_vcvtpq_s64_v:
  6469. case NEON::BI__builtin_neon_vcvtpq_u16_f16:
  6470. case NEON::BI__builtin_neon_vcvtpq_u32_v:
  6471. case NEON::BI__builtin_neon_vcvtpq_u64_v:
  6472. case NEON::BI__builtin_neon_vcvtm_s16_f16:
  6473. case NEON::BI__builtin_neon_vcvtm_s32_v:
  6474. case NEON::BI__builtin_neon_vcvtm_s64_v:
  6475. case NEON::BI__builtin_neon_vcvtm_u16_f16:
  6476. case NEON::BI__builtin_neon_vcvtm_u32_v:
  6477. case NEON::BI__builtin_neon_vcvtm_u64_v:
  6478. case NEON::BI__builtin_neon_vcvtmq_s16_f16:
  6479. case NEON::BI__builtin_neon_vcvtmq_s32_v:
  6480. case NEON::BI__builtin_neon_vcvtmq_s64_v:
  6481. case NEON::BI__builtin_neon_vcvtmq_u16_f16:
  6482. case NEON::BI__builtin_neon_vcvtmq_u32_v:
  6483. case NEON::BI__builtin_neon_vcvtmq_u64_v: {
  6484. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6485. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
  6486. }
  6487. case NEON::BI__builtin_neon_vcvtx_f32_v: {
  6488. llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
  6489. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
  6490. }
  6491. case NEON::BI__builtin_neon_vext_v:
  6492. case NEON::BI__builtin_neon_vextq_v: {
  6493. int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
  6494. SmallVector<int, 16> Indices;
  6495. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  6496. Indices.push_back(i+CV);
  6497. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6498. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6499. return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
  6500. }
  6501. case NEON::BI__builtin_neon_vfma_v:
  6502. case NEON::BI__builtin_neon_vfmaq_v: {
  6503. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6504. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6505. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6506. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  6507. return emitCallMaybeConstrainedFPBuiltin(
  6508. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  6509. {Ops[1], Ops[2], Ops[0]});
  6510. }
  6511. case NEON::BI__builtin_neon_vld1_v:
  6512. case NEON::BI__builtin_neon_vld1q_v: {
  6513. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  6514. Ops.push_back(getAlignmentValue32(PtrOp0));
  6515. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
  6516. }
  6517. case NEON::BI__builtin_neon_vld1_x2_v:
  6518. case NEON::BI__builtin_neon_vld1q_x2_v:
  6519. case NEON::BI__builtin_neon_vld1_x3_v:
  6520. case NEON::BI__builtin_neon_vld1q_x3_v:
  6521. case NEON::BI__builtin_neon_vld1_x4_v:
  6522. case NEON::BI__builtin_neon_vld1q_x4_v: {
  6523. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
  6524. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  6525. llvm::Type *Tys[2] = { VTy, PTy };
  6526. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6527. Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
  6528. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  6529. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6530. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  6531. }
  6532. case NEON::BI__builtin_neon_vld2_v:
  6533. case NEON::BI__builtin_neon_vld2q_v:
  6534. case NEON::BI__builtin_neon_vld3_v:
  6535. case NEON::BI__builtin_neon_vld3q_v:
  6536. case NEON::BI__builtin_neon_vld4_v:
  6537. case NEON::BI__builtin_neon_vld4q_v:
  6538. case NEON::BI__builtin_neon_vld2_dup_v:
  6539. case NEON::BI__builtin_neon_vld2q_dup_v:
  6540. case NEON::BI__builtin_neon_vld3_dup_v:
  6541. case NEON::BI__builtin_neon_vld3q_dup_v:
  6542. case NEON::BI__builtin_neon_vld4_dup_v:
  6543. case NEON::BI__builtin_neon_vld4q_dup_v: {
  6544. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  6545. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6546. Value *Align = getAlignmentValue32(PtrOp1);
  6547. Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
  6548. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  6549. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6550. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  6551. }
  6552. case NEON::BI__builtin_neon_vld1_dup_v:
  6553. case NEON::BI__builtin_neon_vld1q_dup_v: {
  6554. Value *V = PoisonValue::get(Ty);
  6555. PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
  6556. LoadInst *Ld = Builder.CreateLoad(PtrOp0);
  6557. llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
  6558. Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
  6559. return EmitNeonSplat(Ops[0], CI);
  6560. }
  6561. case NEON::BI__builtin_neon_vld2_lane_v:
  6562. case NEON::BI__builtin_neon_vld2q_lane_v:
  6563. case NEON::BI__builtin_neon_vld3_lane_v:
  6564. case NEON::BI__builtin_neon_vld3q_lane_v:
  6565. case NEON::BI__builtin_neon_vld4_lane_v:
  6566. case NEON::BI__builtin_neon_vld4q_lane_v: {
  6567. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  6568. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6569. for (unsigned I = 2; I < Ops.size() - 1; ++I)
  6570. Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
  6571. Ops.push_back(getAlignmentValue32(PtrOp1));
  6572. Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
  6573. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  6574. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6575. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  6576. }
  6577. case NEON::BI__builtin_neon_vmovl_v: {
  6578. llvm::FixedVectorType *DTy =
  6579. llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
  6580. Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
  6581. if (Usgn)
  6582. return Builder.CreateZExt(Ops[0], Ty, "vmovl");
  6583. return Builder.CreateSExt(Ops[0], Ty, "vmovl");
  6584. }
  6585. case NEON::BI__builtin_neon_vmovn_v: {
  6586. llvm::FixedVectorType *QTy =
  6587. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6588. Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
  6589. return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
  6590. }
  6591. case NEON::BI__builtin_neon_vmull_v:
  6592. // FIXME: the integer vmull operations could be emitted in terms of pure
  6593. // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
  6594. // hoisting the exts outside loops. Until global ISel comes along that can
  6595. // see through such movement this leads to bad CodeGen. So we need an
  6596. // intrinsic for now.
  6597. Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
  6598. Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
  6599. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
  6600. case NEON::BI__builtin_neon_vpadal_v:
  6601. case NEON::BI__builtin_neon_vpadalq_v: {
  6602. // The source operand type has twice as many elements of half the size.
  6603. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  6604. llvm::Type *EltTy =
  6605. llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
  6606. auto *NarrowTy =
  6607. llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
  6608. llvm::Type *Tys[2] = { Ty, NarrowTy };
  6609. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  6610. }
  6611. case NEON::BI__builtin_neon_vpaddl_v:
  6612. case NEON::BI__builtin_neon_vpaddlq_v: {
  6613. // The source operand type has twice as many elements of half the size.
  6614. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  6615. llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
  6616. auto *NarrowTy =
  6617. llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
  6618. llvm::Type *Tys[2] = { Ty, NarrowTy };
  6619. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
  6620. }
  6621. case NEON::BI__builtin_neon_vqdmlal_v:
  6622. case NEON::BI__builtin_neon_vqdmlsl_v: {
  6623. SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
  6624. Ops[1] =
  6625. EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
  6626. Ops.resize(2);
  6627. return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
  6628. }
  6629. case NEON::BI__builtin_neon_vqdmulhq_lane_v:
  6630. case NEON::BI__builtin_neon_vqdmulh_lane_v:
  6631. case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
  6632. case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
  6633. auto *RTy = cast<llvm::FixedVectorType>(Ty);
  6634. if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
  6635. BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
  6636. RTy = llvm::FixedVectorType::get(RTy->getElementType(),
  6637. RTy->getNumElements() * 2);
  6638. llvm::Type *Tys[2] = {
  6639. RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
  6640. /*isQuad*/ false))};
  6641. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  6642. }
  6643. case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
  6644. case NEON::BI__builtin_neon_vqdmulh_laneq_v:
  6645. case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
  6646. case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
  6647. llvm::Type *Tys[2] = {
  6648. Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
  6649. /*isQuad*/ true))};
  6650. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  6651. }
  6652. case NEON::BI__builtin_neon_vqshl_n_v:
  6653. case NEON::BI__builtin_neon_vqshlq_n_v:
  6654. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
  6655. 1, false);
  6656. case NEON::BI__builtin_neon_vqshlu_n_v:
  6657. case NEON::BI__builtin_neon_vqshluq_n_v:
  6658. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
  6659. 1, false);
  6660. case NEON::BI__builtin_neon_vrecpe_v:
  6661. case NEON::BI__builtin_neon_vrecpeq_v:
  6662. case NEON::BI__builtin_neon_vrsqrte_v:
  6663. case NEON::BI__builtin_neon_vrsqrteq_v:
  6664. Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
  6665. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
  6666. case NEON::BI__builtin_neon_vrndi_v:
  6667. case NEON::BI__builtin_neon_vrndiq_v:
  6668. Int = Builder.getIsFPConstrained()
  6669. ? Intrinsic::experimental_constrained_nearbyint
  6670. : Intrinsic::nearbyint;
  6671. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
  6672. case NEON::BI__builtin_neon_vrshr_n_v:
  6673. case NEON::BI__builtin_neon_vrshrq_n_v:
  6674. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
  6675. 1, true);
  6676. case NEON::BI__builtin_neon_vsha512hq_u64:
  6677. case NEON::BI__builtin_neon_vsha512h2q_u64:
  6678. case NEON::BI__builtin_neon_vsha512su0q_u64:
  6679. case NEON::BI__builtin_neon_vsha512su1q_u64: {
  6680. Function *F = CGM.getIntrinsic(Int);
  6681. return EmitNeonCall(F, Ops, "");
  6682. }
  6683. case NEON::BI__builtin_neon_vshl_n_v:
  6684. case NEON::BI__builtin_neon_vshlq_n_v:
  6685. Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
  6686. return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
  6687. "vshl_n");
  6688. case NEON::BI__builtin_neon_vshll_n_v: {
  6689. llvm::FixedVectorType *SrcTy =
  6690. llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
  6691. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6692. if (Usgn)
  6693. Ops[0] = Builder.CreateZExt(Ops[0], VTy);
  6694. else
  6695. Ops[0] = Builder.CreateSExt(Ops[0], VTy);
  6696. Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
  6697. return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
  6698. }
  6699. case NEON::BI__builtin_neon_vshrn_n_v: {
  6700. llvm::FixedVectorType *SrcTy =
  6701. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6702. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6703. Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
  6704. if (Usgn)
  6705. Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
  6706. else
  6707. Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
  6708. return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
  6709. }
  6710. case NEON::BI__builtin_neon_vshr_n_v:
  6711. case NEON::BI__builtin_neon_vshrq_n_v:
  6712. return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
  6713. case NEON::BI__builtin_neon_vst1_v:
  6714. case NEON::BI__builtin_neon_vst1q_v:
  6715. case NEON::BI__builtin_neon_vst2_v:
  6716. case NEON::BI__builtin_neon_vst2q_v:
  6717. case NEON::BI__builtin_neon_vst3_v:
  6718. case NEON::BI__builtin_neon_vst3q_v:
  6719. case NEON::BI__builtin_neon_vst4_v:
  6720. case NEON::BI__builtin_neon_vst4q_v:
  6721. case NEON::BI__builtin_neon_vst2_lane_v:
  6722. case NEON::BI__builtin_neon_vst2q_lane_v:
  6723. case NEON::BI__builtin_neon_vst3_lane_v:
  6724. case NEON::BI__builtin_neon_vst3q_lane_v:
  6725. case NEON::BI__builtin_neon_vst4_lane_v:
  6726. case NEON::BI__builtin_neon_vst4q_lane_v: {
  6727. llvm::Type *Tys[] = {Int8PtrTy, Ty};
  6728. Ops.push_back(getAlignmentValue32(PtrOp0));
  6729. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
  6730. }
  6731. case NEON::BI__builtin_neon_vsm3partw1q_u32:
  6732. case NEON::BI__builtin_neon_vsm3partw2q_u32:
  6733. case NEON::BI__builtin_neon_vsm3ss1q_u32:
  6734. case NEON::BI__builtin_neon_vsm4ekeyq_u32:
  6735. case NEON::BI__builtin_neon_vsm4eq_u32: {
  6736. Function *F = CGM.getIntrinsic(Int);
  6737. return EmitNeonCall(F, Ops, "");
  6738. }
  6739. case NEON::BI__builtin_neon_vsm3tt1aq_u32:
  6740. case NEON::BI__builtin_neon_vsm3tt1bq_u32:
  6741. case NEON::BI__builtin_neon_vsm3tt2aq_u32:
  6742. case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
  6743. Function *F = CGM.getIntrinsic(Int);
  6744. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  6745. return EmitNeonCall(F, Ops, "");
  6746. }
  6747. case NEON::BI__builtin_neon_vst1_x2_v:
  6748. case NEON::BI__builtin_neon_vst1q_x2_v:
  6749. case NEON::BI__builtin_neon_vst1_x3_v:
  6750. case NEON::BI__builtin_neon_vst1q_x3_v:
  6751. case NEON::BI__builtin_neon_vst1_x4_v:
  6752. case NEON::BI__builtin_neon_vst1q_x4_v: {
  6753. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
  6754. // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
  6755. // in AArch64 it comes last. We may want to stick to one or another.
  6756. if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
  6757. Arch == llvm::Triple::aarch64_32) {
  6758. llvm::Type *Tys[2] = { VTy, PTy };
  6759. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  6760. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
  6761. }
  6762. llvm::Type *Tys[2] = { PTy, VTy };
  6763. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
  6764. }
  6765. case NEON::BI__builtin_neon_vsubhn_v: {
  6766. llvm::FixedVectorType *SrcTy =
  6767. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6768. // %sum = add <4 x i32> %lhs, %rhs
  6769. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6770. Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
  6771. Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
  6772. // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
  6773. Constant *ShiftAmt =
  6774. ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
  6775. Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
  6776. // %res = trunc <4 x i32> %high to <4 x i16>
  6777. return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
  6778. }
  6779. case NEON::BI__builtin_neon_vtrn_v:
  6780. case NEON::BI__builtin_neon_vtrnq_v: {
  6781. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  6782. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6783. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6784. Value *SV = nullptr;
  6785. for (unsigned vi = 0; vi != 2; ++vi) {
  6786. SmallVector<int, 16> Indices;
  6787. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  6788. Indices.push_back(i+vi);
  6789. Indices.push_back(i+e+vi);
  6790. }
  6791. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  6792. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
  6793. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  6794. }
  6795. return SV;
  6796. }
  6797. case NEON::BI__builtin_neon_vtst_v:
  6798. case NEON::BI__builtin_neon_vtstq_v: {
  6799. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6800. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6801. Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
  6802. Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
  6803. ConstantAggregateZero::get(Ty));
  6804. return Builder.CreateSExt(Ops[0], Ty, "vtst");
  6805. }
  6806. case NEON::BI__builtin_neon_vuzp_v:
  6807. case NEON::BI__builtin_neon_vuzpq_v: {
  6808. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  6809. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6810. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6811. Value *SV = nullptr;
  6812. for (unsigned vi = 0; vi != 2; ++vi) {
  6813. SmallVector<int, 16> Indices;
  6814. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  6815. Indices.push_back(2*i+vi);
  6816. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  6817. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
  6818. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  6819. }
  6820. return SV;
  6821. }
  6822. case NEON::BI__builtin_neon_vxarq_u64: {
  6823. Function *F = CGM.getIntrinsic(Int);
  6824. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  6825. return EmitNeonCall(F, Ops, "");
  6826. }
  6827. case NEON::BI__builtin_neon_vzip_v:
  6828. case NEON::BI__builtin_neon_vzipq_v: {
  6829. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  6830. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6831. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6832. Value *SV = nullptr;
  6833. for (unsigned vi = 0; vi != 2; ++vi) {
  6834. SmallVector<int, 16> Indices;
  6835. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  6836. Indices.push_back((i + vi*e) >> 1);
  6837. Indices.push_back(((i + vi*e) >> 1)+e);
  6838. }
  6839. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  6840. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
  6841. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  6842. }
  6843. return SV;
  6844. }
  6845. case NEON::BI__builtin_neon_vdot_s32:
  6846. case NEON::BI__builtin_neon_vdot_u32:
  6847. case NEON::BI__builtin_neon_vdotq_s32:
  6848. case NEON::BI__builtin_neon_vdotq_u32: {
  6849. auto *InputTy =
  6850. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6851. llvm::Type *Tys[2] = { Ty, InputTy };
  6852. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
  6853. }
  6854. case NEON::BI__builtin_neon_vfmlal_low_f16:
  6855. case NEON::BI__builtin_neon_vfmlalq_low_f16: {
  6856. auto *InputTy =
  6857. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6858. llvm::Type *Tys[2] = { Ty, InputTy };
  6859. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
  6860. }
  6861. case NEON::BI__builtin_neon_vfmlsl_low_f16:
  6862. case NEON::BI__builtin_neon_vfmlslq_low_f16: {
  6863. auto *InputTy =
  6864. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6865. llvm::Type *Tys[2] = { Ty, InputTy };
  6866. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
  6867. }
  6868. case NEON::BI__builtin_neon_vfmlal_high_f16:
  6869. case NEON::BI__builtin_neon_vfmlalq_high_f16: {
  6870. auto *InputTy =
  6871. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6872. llvm::Type *Tys[2] = { Ty, InputTy };
  6873. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
  6874. }
  6875. case NEON::BI__builtin_neon_vfmlsl_high_f16:
  6876. case NEON::BI__builtin_neon_vfmlslq_high_f16: {
  6877. auto *InputTy =
  6878. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6879. llvm::Type *Tys[2] = { Ty, InputTy };
  6880. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
  6881. }
  6882. case NEON::BI__builtin_neon_vmmlaq_s32:
  6883. case NEON::BI__builtin_neon_vmmlaq_u32: {
  6884. auto *InputTy =
  6885. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6886. llvm::Type *Tys[2] = { Ty, InputTy };
  6887. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
  6888. }
  6889. case NEON::BI__builtin_neon_vusmmlaq_s32: {
  6890. auto *InputTy =
  6891. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6892. llvm::Type *Tys[2] = { Ty, InputTy };
  6893. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
  6894. }
  6895. case NEON::BI__builtin_neon_vusdot_s32:
  6896. case NEON::BI__builtin_neon_vusdotq_s32: {
  6897. auto *InputTy =
  6898. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6899. llvm::Type *Tys[2] = { Ty, InputTy };
  6900. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
  6901. }
  6902. case NEON::BI__builtin_neon_vbfdot_f32:
  6903. case NEON::BI__builtin_neon_vbfdotq_f32: {
  6904. llvm::Type *InputTy =
  6905. llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
  6906. llvm::Type *Tys[2] = { Ty, InputTy };
  6907. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
  6908. }
  6909. case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
  6910. llvm::Type *Tys[1] = { Ty };
  6911. Function *F = CGM.getIntrinsic(Int, Tys);
  6912. return EmitNeonCall(F, Ops, "vcvtfp2bf");
  6913. }
  6914. }
  6915. assert(Int && "Expected valid intrinsic number");
  6916. // Determine the type(s) of this overloaded AArch64 intrinsic.
  6917. Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
  6918. Value *Result = EmitNeonCall(F, Ops, NameHint);
  6919. llvm::Type *ResultType = ConvertType(E->getType());
  6920. // AArch64 intrinsic one-element vector type cast to
  6921. // scalar type expected by the builtin
  6922. return Builder.CreateBitCast(Result, ResultType, NameHint);
  6923. }
  6924. Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
  6925. Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
  6926. const CmpInst::Predicate Ip, const Twine &Name) {
  6927. llvm::Type *OTy = Op->getType();
  6928. // FIXME: this is utterly horrific. We should not be looking at previous
  6929. // codegen context to find out what needs doing. Unfortunately TableGen
  6930. // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
  6931. // (etc).
  6932. if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
  6933. OTy = BI->getOperand(0)->getType();
  6934. Op = Builder.CreateBitCast(Op, OTy);
  6935. if (OTy->getScalarType()->isFloatingPointTy()) {
  6936. if (Fp == CmpInst::FCMP_OEQ)
  6937. Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
  6938. else
  6939. Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
  6940. } else {
  6941. Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
  6942. }
  6943. return Builder.CreateSExt(Op, Ty, Name);
  6944. }
  6945. static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  6946. Value *ExtOp, Value *IndexOp,
  6947. llvm::Type *ResTy, unsigned IntID,
  6948. const char *Name) {
  6949. SmallVector<Value *, 2> TblOps;
  6950. if (ExtOp)
  6951. TblOps.push_back(ExtOp);
  6952. // Build a vector containing sequential number like (0, 1, 2, ..., 15)
  6953. SmallVector<int, 16> Indices;
  6954. auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
  6955. for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
  6956. Indices.push_back(2*i);
  6957. Indices.push_back(2*i+1);
  6958. }
  6959. int PairPos = 0, End = Ops.size() - 1;
  6960. while (PairPos < End) {
  6961. TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
  6962. Ops[PairPos+1], Indices,
  6963. Name));
  6964. PairPos += 2;
  6965. }
  6966. // If there's an odd number of 64-bit lookup table, fill the high 64-bit
  6967. // of the 128-bit lookup table with zero.
  6968. if (PairPos == End) {
  6969. Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
  6970. TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
  6971. ZeroTbl, Indices, Name));
  6972. }
  6973. Function *TblF;
  6974. TblOps.push_back(IndexOp);
  6975. TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
  6976. return CGF.EmitNeonCall(TblF, TblOps, Name);
  6977. }
  6978. Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
  6979. unsigned Value;
  6980. switch (BuiltinID) {
  6981. default:
  6982. return nullptr;
  6983. case clang::ARM::BI__builtin_arm_nop:
  6984. Value = 0;
  6985. break;
  6986. case clang::ARM::BI__builtin_arm_yield:
  6987. case clang::ARM::BI__yield:
  6988. Value = 1;
  6989. break;
  6990. case clang::ARM::BI__builtin_arm_wfe:
  6991. case clang::ARM::BI__wfe:
  6992. Value = 2;
  6993. break;
  6994. case clang::ARM::BI__builtin_arm_wfi:
  6995. case clang::ARM::BI__wfi:
  6996. Value = 3;
  6997. break;
  6998. case clang::ARM::BI__builtin_arm_sev:
  6999. case clang::ARM::BI__sev:
  7000. Value = 4;
  7001. break;
  7002. case clang::ARM::BI__builtin_arm_sevl:
  7003. case clang::ARM::BI__sevl:
  7004. Value = 5;
  7005. break;
  7006. }
  7007. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
  7008. llvm::ConstantInt::get(Int32Ty, Value));
  7009. }
  7010. enum SpecialRegisterAccessKind {
  7011. NormalRead,
  7012. VolatileRead,
  7013. Write,
  7014. };
  7015. // Generates the IR for the read/write special register builtin,
  7016. // ValueType is the type of the value that is to be written or read,
  7017. // RegisterType is the type of the register being written to or read from.
  7018. static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
  7019. const CallExpr *E,
  7020. llvm::Type *RegisterType,
  7021. llvm::Type *ValueType,
  7022. SpecialRegisterAccessKind AccessKind,
  7023. StringRef SysReg = "") {
  7024. // write and register intrinsics only support 32, 64 and 128 bit operations.
  7025. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
  7026. RegisterType->isIntegerTy(128)) &&
  7027. "Unsupported size for register.");
  7028. CodeGen::CGBuilderTy &Builder = CGF.Builder;
  7029. CodeGen::CodeGenModule &CGM = CGF.CGM;
  7030. LLVMContext &Context = CGM.getLLVMContext();
  7031. if (SysReg.empty()) {
  7032. const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
  7033. SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
  7034. }
  7035. llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
  7036. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  7037. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  7038. llvm::Type *Types[] = { RegisterType };
  7039. bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
  7040. assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
  7041. && "Can't fit 64-bit value in 32-bit register");
  7042. if (AccessKind != Write) {
  7043. assert(AccessKind == NormalRead || AccessKind == VolatileRead);
  7044. llvm::Function *F = CGM.getIntrinsic(
  7045. AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
  7046. : llvm::Intrinsic::read_register,
  7047. Types);
  7048. llvm::Value *Call = Builder.CreateCall(F, Metadata);
  7049. if (MixedTypes)
  7050. // Read into 64 bit register and then truncate result to 32 bit.
  7051. return Builder.CreateTrunc(Call, ValueType);
  7052. if (ValueType->isPointerTy())
  7053. // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
  7054. return Builder.CreateIntToPtr(Call, ValueType);
  7055. return Call;
  7056. }
  7057. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
  7058. llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
  7059. if (MixedTypes) {
  7060. // Extend 32 bit write value to 64 bit to pass to write.
  7061. ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
  7062. return Builder.CreateCall(F, { Metadata, ArgValue });
  7063. }
  7064. if (ValueType->isPointerTy()) {
  7065. // Have VoidPtrTy ArgValue but want to return an i32/i64.
  7066. ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
  7067. return Builder.CreateCall(F, { Metadata, ArgValue });
  7068. }
  7069. return Builder.CreateCall(F, { Metadata, ArgValue });
  7070. }
  7071. /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
  7072. /// argument that specifies the vector type.
  7073. static bool HasExtraNeonArgument(unsigned BuiltinID) {
  7074. switch (BuiltinID) {
  7075. default: break;
  7076. case NEON::BI__builtin_neon_vget_lane_i8:
  7077. case NEON::BI__builtin_neon_vget_lane_i16:
  7078. case NEON::BI__builtin_neon_vget_lane_bf16:
  7079. case NEON::BI__builtin_neon_vget_lane_i32:
  7080. case NEON::BI__builtin_neon_vget_lane_i64:
  7081. case NEON::BI__builtin_neon_vget_lane_f32:
  7082. case NEON::BI__builtin_neon_vgetq_lane_i8:
  7083. case NEON::BI__builtin_neon_vgetq_lane_i16:
  7084. case NEON::BI__builtin_neon_vgetq_lane_bf16:
  7085. case NEON::BI__builtin_neon_vgetq_lane_i32:
  7086. case NEON::BI__builtin_neon_vgetq_lane_i64:
  7087. case NEON::BI__builtin_neon_vgetq_lane_f32:
  7088. case NEON::BI__builtin_neon_vduph_lane_bf16:
  7089. case NEON::BI__builtin_neon_vduph_laneq_bf16:
  7090. case NEON::BI__builtin_neon_vset_lane_i8:
  7091. case NEON::BI__builtin_neon_vset_lane_i16:
  7092. case NEON::BI__builtin_neon_vset_lane_bf16:
  7093. case NEON::BI__builtin_neon_vset_lane_i32:
  7094. case NEON::BI__builtin_neon_vset_lane_i64:
  7095. case NEON::BI__builtin_neon_vset_lane_f32:
  7096. case NEON::BI__builtin_neon_vsetq_lane_i8:
  7097. case NEON::BI__builtin_neon_vsetq_lane_i16:
  7098. case NEON::BI__builtin_neon_vsetq_lane_bf16:
  7099. case NEON::BI__builtin_neon_vsetq_lane_i32:
  7100. case NEON::BI__builtin_neon_vsetq_lane_i64:
  7101. case NEON::BI__builtin_neon_vsetq_lane_f32:
  7102. case NEON::BI__builtin_neon_vsha1h_u32:
  7103. case NEON::BI__builtin_neon_vsha1cq_u32:
  7104. case NEON::BI__builtin_neon_vsha1pq_u32:
  7105. case NEON::BI__builtin_neon_vsha1mq_u32:
  7106. case NEON::BI__builtin_neon_vcvth_bf16_f32:
  7107. case clang::ARM::BI_MoveToCoprocessor:
  7108. case clang::ARM::BI_MoveToCoprocessor2:
  7109. return false;
  7110. }
  7111. return true;
  7112. }
  7113. Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
  7114. const CallExpr *E,
  7115. ReturnValueSlot ReturnValue,
  7116. llvm::Triple::ArchType Arch) {
  7117. if (auto Hint = GetValueForARMHint(BuiltinID))
  7118. return Hint;
  7119. if (BuiltinID == clang::ARM::BI__emit) {
  7120. bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
  7121. llvm::FunctionType *FTy =
  7122. llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
  7123. Expr::EvalResult Result;
  7124. if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
  7125. llvm_unreachable("Sema will ensure that the parameter is constant");
  7126. llvm::APSInt Value = Result.Val.getInt();
  7127. uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
  7128. llvm::InlineAsm *Emit =
  7129. IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
  7130. /*hasSideEffects=*/true)
  7131. : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
  7132. /*hasSideEffects=*/true);
  7133. return Builder.CreateCall(Emit);
  7134. }
  7135. if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
  7136. Value *Option = EmitScalarExpr(E->getArg(0));
  7137. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
  7138. }
  7139. if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
  7140. Value *Address = EmitScalarExpr(E->getArg(0));
  7141. Value *RW = EmitScalarExpr(E->getArg(1));
  7142. Value *IsData = EmitScalarExpr(E->getArg(2));
  7143. // Locality is not supported on ARM target
  7144. Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
  7145. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  7146. return Builder.CreateCall(F, {Address, RW, Locality, IsData});
  7147. }
  7148. if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
  7149. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  7150. return Builder.CreateCall(
  7151. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  7152. }
  7153. if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
  7154. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  7155. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
  7156. }
  7157. if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
  7158. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  7159. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
  7160. "cls");
  7161. }
  7162. if (BuiltinID == clang::ARM::BI__clear_cache) {
  7163. assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
  7164. const FunctionDecl *FD = E->getDirectCallee();
  7165. Value *Ops[2];
  7166. for (unsigned i = 0; i < 2; i++)
  7167. Ops[i] = EmitScalarExpr(E->getArg(i));
  7168. llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
  7169. llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
  7170. StringRef Name = FD->getName();
  7171. return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
  7172. }
  7173. if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
  7174. BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
  7175. Function *F;
  7176. switch (BuiltinID) {
  7177. default: llvm_unreachable("unexpected builtin");
  7178. case clang::ARM::BI__builtin_arm_mcrr:
  7179. F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
  7180. break;
  7181. case clang::ARM::BI__builtin_arm_mcrr2:
  7182. F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
  7183. break;
  7184. }
  7185. // MCRR{2} instruction has 5 operands but
  7186. // the intrinsic has 4 because Rt and Rt2
  7187. // are represented as a single unsigned 64
  7188. // bit integer in the intrinsic definition
  7189. // but internally it's represented as 2 32
  7190. // bit integers.
  7191. Value *Coproc = EmitScalarExpr(E->getArg(0));
  7192. Value *Opc1 = EmitScalarExpr(E->getArg(1));
  7193. Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
  7194. Value *CRm = EmitScalarExpr(E->getArg(3));
  7195. Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
  7196. Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
  7197. Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
  7198. Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
  7199. return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
  7200. }
  7201. if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
  7202. BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
  7203. Function *F;
  7204. switch (BuiltinID) {
  7205. default: llvm_unreachable("unexpected builtin");
  7206. case clang::ARM::BI__builtin_arm_mrrc:
  7207. F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
  7208. break;
  7209. case clang::ARM::BI__builtin_arm_mrrc2:
  7210. F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
  7211. break;
  7212. }
  7213. Value *Coproc = EmitScalarExpr(E->getArg(0));
  7214. Value *Opc1 = EmitScalarExpr(E->getArg(1));
  7215. Value *CRm = EmitScalarExpr(E->getArg(2));
  7216. Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
  7217. // Returns an unsigned 64 bit integer, represented
  7218. // as two 32 bit integers.
  7219. Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
  7220. Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
  7221. Rt = Builder.CreateZExt(Rt, Int64Ty);
  7222. Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
  7223. Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
  7224. RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
  7225. RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
  7226. return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
  7227. }
  7228. if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
  7229. ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
  7230. BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
  7231. getContext().getTypeSize(E->getType()) == 64) ||
  7232. BuiltinID == clang::ARM::BI__ldrexd) {
  7233. Function *F;
  7234. switch (BuiltinID) {
  7235. default: llvm_unreachable("unexpected builtin");
  7236. case clang::ARM::BI__builtin_arm_ldaex:
  7237. F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
  7238. break;
  7239. case clang::ARM::BI__builtin_arm_ldrexd:
  7240. case clang::ARM::BI__builtin_arm_ldrex:
  7241. case clang::ARM::BI__ldrexd:
  7242. F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
  7243. break;
  7244. }
  7245. Value *LdPtr = EmitScalarExpr(E->getArg(0));
  7246. Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
  7247. "ldrexd");
  7248. Value *Val0 = Builder.CreateExtractValue(Val, 1);
  7249. Value *Val1 = Builder.CreateExtractValue(Val, 0);
  7250. Val0 = Builder.CreateZExt(Val0, Int64Ty);
  7251. Val1 = Builder.CreateZExt(Val1, Int64Ty);
  7252. Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
  7253. Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
  7254. Val = Builder.CreateOr(Val, Val1);
  7255. return Builder.CreateBitCast(Val, ConvertType(E->getType()));
  7256. }
  7257. if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
  7258. BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
  7259. Value *LoadAddr = EmitScalarExpr(E->getArg(0));
  7260. QualType Ty = E->getType();
  7261. llvm::Type *RealResTy = ConvertType(Ty);
  7262. llvm::Type *IntTy =
  7263. llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
  7264. llvm::Type *PtrTy = IntTy->getPointerTo();
  7265. LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
  7266. Function *F = CGM.getIntrinsic(
  7267. BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
  7268. : Intrinsic::arm_ldrex,
  7269. PtrTy);
  7270. CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
  7271. Val->addParamAttr(
  7272. 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
  7273. if (RealResTy->isPointerTy())
  7274. return Builder.CreateIntToPtr(Val, RealResTy);
  7275. else {
  7276. llvm::Type *IntResTy = llvm::IntegerType::get(
  7277. getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
  7278. return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
  7279. RealResTy);
  7280. }
  7281. }
  7282. if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
  7283. ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
  7284. BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
  7285. getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
  7286. Function *F = CGM.getIntrinsic(
  7287. BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
  7288. : Intrinsic::arm_strexd);
  7289. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
  7290. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  7291. Value *Val = EmitScalarExpr(E->getArg(0));
  7292. Builder.CreateStore(Val, Tmp);
  7293. Address LdPtr = Builder.CreateElementBitCast(Tmp, STy);
  7294. Val = Builder.CreateLoad(LdPtr);
  7295. Value *Arg0 = Builder.CreateExtractValue(Val, 0);
  7296. Value *Arg1 = Builder.CreateExtractValue(Val, 1);
  7297. Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
  7298. return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
  7299. }
  7300. if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
  7301. BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
  7302. Value *StoreVal = EmitScalarExpr(E->getArg(0));
  7303. Value *StoreAddr = EmitScalarExpr(E->getArg(1));
  7304. QualType Ty = E->getArg(0)->getType();
  7305. llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
  7306. getContext().getTypeSize(Ty));
  7307. StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
  7308. if (StoreVal->getType()->isPointerTy())
  7309. StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
  7310. else {
  7311. llvm::Type *IntTy = llvm::IntegerType::get(
  7312. getLLVMContext(),
  7313. CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
  7314. StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
  7315. StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
  7316. }
  7317. Function *F = CGM.getIntrinsic(
  7318. BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
  7319. : Intrinsic::arm_strex,
  7320. StoreAddr->getType());
  7321. CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
  7322. CI->addParamAttr(
  7323. 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
  7324. return CI;
  7325. }
  7326. if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
  7327. Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
  7328. return Builder.CreateCall(F);
  7329. }
  7330. // CRC32
  7331. Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
  7332. switch (BuiltinID) {
  7333. case clang::ARM::BI__builtin_arm_crc32b:
  7334. CRCIntrinsicID = Intrinsic::arm_crc32b; break;
  7335. case clang::ARM::BI__builtin_arm_crc32cb:
  7336. CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
  7337. case clang::ARM::BI__builtin_arm_crc32h:
  7338. CRCIntrinsicID = Intrinsic::arm_crc32h; break;
  7339. case clang::ARM::BI__builtin_arm_crc32ch:
  7340. CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
  7341. case clang::ARM::BI__builtin_arm_crc32w:
  7342. case clang::ARM::BI__builtin_arm_crc32d:
  7343. CRCIntrinsicID = Intrinsic::arm_crc32w; break;
  7344. case clang::ARM::BI__builtin_arm_crc32cw:
  7345. case clang::ARM::BI__builtin_arm_crc32cd:
  7346. CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
  7347. }
  7348. if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
  7349. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  7350. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  7351. // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
  7352. // intrinsics, hence we need different codegen for these cases.
  7353. if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
  7354. BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
  7355. Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
  7356. Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
  7357. Value *Arg1b = Builder.CreateLShr(Arg1, C1);
  7358. Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
  7359. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  7360. Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
  7361. return Builder.CreateCall(F, {Res, Arg1b});
  7362. } else {
  7363. Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
  7364. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  7365. return Builder.CreateCall(F, {Arg0, Arg1});
  7366. }
  7367. }
  7368. if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
  7369. BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
  7370. BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
  7371. BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
  7372. BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
  7373. BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
  7374. SpecialRegisterAccessKind AccessKind = Write;
  7375. if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
  7376. BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
  7377. BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
  7378. AccessKind = VolatileRead;
  7379. bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
  7380. BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
  7381. bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
  7382. BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
  7383. llvm::Type *ValueType;
  7384. llvm::Type *RegisterType;
  7385. if (IsPointerBuiltin) {
  7386. ValueType = VoidPtrTy;
  7387. RegisterType = Int32Ty;
  7388. } else if (Is64Bit) {
  7389. ValueType = RegisterType = Int64Ty;
  7390. } else {
  7391. ValueType = RegisterType = Int32Ty;
  7392. }
  7393. return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
  7394. AccessKind);
  7395. }
  7396. if (BuiltinID == ARM::BI__builtin_sponentry) {
  7397. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
  7398. return Builder.CreateCall(F);
  7399. }
  7400. // Handle MSVC intrinsics before argument evaluation to prevent double
  7401. // evaluation.
  7402. if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
  7403. return EmitMSVCBuiltinExpr(*MsvcIntId, E);
  7404. // Deal with MVE builtins
  7405. if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
  7406. return Result;
  7407. // Handle CDE builtins
  7408. if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
  7409. return Result;
  7410. // Some intrinsics are equivalent - if they are use the base intrinsic ID.
  7411. auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
  7412. return P.first == BuiltinID;
  7413. });
  7414. if (It != end(NEONEquivalentIntrinsicMap))
  7415. BuiltinID = It->second;
  7416. // Find out if any arguments are required to be integer constant
  7417. // expressions.
  7418. unsigned ICEArguments = 0;
  7419. ASTContext::GetBuiltinTypeError Error;
  7420. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  7421. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  7422. auto getAlignmentValue32 = [&](Address addr) -> Value* {
  7423. return Builder.getInt32(addr.getAlignment().getQuantity());
  7424. };
  7425. Address PtrOp0 = Address::invalid();
  7426. Address PtrOp1 = Address::invalid();
  7427. SmallVector<Value*, 4> Ops;
  7428. bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
  7429. unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
  7430. for (unsigned i = 0, e = NumArgs; i != e; i++) {
  7431. if (i == 0) {
  7432. switch (BuiltinID) {
  7433. case NEON::BI__builtin_neon_vld1_v:
  7434. case NEON::BI__builtin_neon_vld1q_v:
  7435. case NEON::BI__builtin_neon_vld1q_lane_v:
  7436. case NEON::BI__builtin_neon_vld1_lane_v:
  7437. case NEON::BI__builtin_neon_vld1_dup_v:
  7438. case NEON::BI__builtin_neon_vld1q_dup_v:
  7439. case NEON::BI__builtin_neon_vst1_v:
  7440. case NEON::BI__builtin_neon_vst1q_v:
  7441. case NEON::BI__builtin_neon_vst1q_lane_v:
  7442. case NEON::BI__builtin_neon_vst1_lane_v:
  7443. case NEON::BI__builtin_neon_vst2_v:
  7444. case NEON::BI__builtin_neon_vst2q_v:
  7445. case NEON::BI__builtin_neon_vst2_lane_v:
  7446. case NEON::BI__builtin_neon_vst2q_lane_v:
  7447. case NEON::BI__builtin_neon_vst3_v:
  7448. case NEON::BI__builtin_neon_vst3q_v:
  7449. case NEON::BI__builtin_neon_vst3_lane_v:
  7450. case NEON::BI__builtin_neon_vst3q_lane_v:
  7451. case NEON::BI__builtin_neon_vst4_v:
  7452. case NEON::BI__builtin_neon_vst4q_v:
  7453. case NEON::BI__builtin_neon_vst4_lane_v:
  7454. case NEON::BI__builtin_neon_vst4q_lane_v:
  7455. // Get the alignment for the argument in addition to the value;
  7456. // we'll use it later.
  7457. PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
  7458. Ops.push_back(PtrOp0.getPointer());
  7459. continue;
  7460. }
  7461. }
  7462. if (i == 1) {
  7463. switch (BuiltinID) {
  7464. case NEON::BI__builtin_neon_vld2_v:
  7465. case NEON::BI__builtin_neon_vld2q_v:
  7466. case NEON::BI__builtin_neon_vld3_v:
  7467. case NEON::BI__builtin_neon_vld3q_v:
  7468. case NEON::BI__builtin_neon_vld4_v:
  7469. case NEON::BI__builtin_neon_vld4q_v:
  7470. case NEON::BI__builtin_neon_vld2_lane_v:
  7471. case NEON::BI__builtin_neon_vld2q_lane_v:
  7472. case NEON::BI__builtin_neon_vld3_lane_v:
  7473. case NEON::BI__builtin_neon_vld3q_lane_v:
  7474. case NEON::BI__builtin_neon_vld4_lane_v:
  7475. case NEON::BI__builtin_neon_vld4q_lane_v:
  7476. case NEON::BI__builtin_neon_vld2_dup_v:
  7477. case NEON::BI__builtin_neon_vld2q_dup_v:
  7478. case NEON::BI__builtin_neon_vld3_dup_v:
  7479. case NEON::BI__builtin_neon_vld3q_dup_v:
  7480. case NEON::BI__builtin_neon_vld4_dup_v:
  7481. case NEON::BI__builtin_neon_vld4q_dup_v:
  7482. // Get the alignment for the argument in addition to the value;
  7483. // we'll use it later.
  7484. PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
  7485. Ops.push_back(PtrOp1.getPointer());
  7486. continue;
  7487. }
  7488. }
  7489. if ((ICEArguments & (1 << i)) == 0) {
  7490. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  7491. } else {
  7492. // If this is required to be a constant, constant fold it so that we know
  7493. // that the generated intrinsic gets a ConstantInt.
  7494. Ops.push_back(llvm::ConstantInt::get(
  7495. getLLVMContext(),
  7496. *E->getArg(i)->getIntegerConstantExpr(getContext())));
  7497. }
  7498. }
  7499. switch (BuiltinID) {
  7500. default: break;
  7501. case NEON::BI__builtin_neon_vget_lane_i8:
  7502. case NEON::BI__builtin_neon_vget_lane_i16:
  7503. case NEON::BI__builtin_neon_vget_lane_i32:
  7504. case NEON::BI__builtin_neon_vget_lane_i64:
  7505. case NEON::BI__builtin_neon_vget_lane_bf16:
  7506. case NEON::BI__builtin_neon_vget_lane_f32:
  7507. case NEON::BI__builtin_neon_vgetq_lane_i8:
  7508. case NEON::BI__builtin_neon_vgetq_lane_i16:
  7509. case NEON::BI__builtin_neon_vgetq_lane_i32:
  7510. case NEON::BI__builtin_neon_vgetq_lane_i64:
  7511. case NEON::BI__builtin_neon_vgetq_lane_bf16:
  7512. case NEON::BI__builtin_neon_vgetq_lane_f32:
  7513. case NEON::BI__builtin_neon_vduph_lane_bf16:
  7514. case NEON::BI__builtin_neon_vduph_laneq_bf16:
  7515. return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
  7516. case NEON::BI__builtin_neon_vrndns_f32: {
  7517. Value *Arg = EmitScalarExpr(E->getArg(0));
  7518. llvm::Type *Tys[] = {Arg->getType()};
  7519. Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
  7520. return Builder.CreateCall(F, {Arg}, "vrndn"); }
  7521. case NEON::BI__builtin_neon_vset_lane_i8:
  7522. case NEON::BI__builtin_neon_vset_lane_i16:
  7523. case NEON::BI__builtin_neon_vset_lane_i32:
  7524. case NEON::BI__builtin_neon_vset_lane_i64:
  7525. case NEON::BI__builtin_neon_vset_lane_bf16:
  7526. case NEON::BI__builtin_neon_vset_lane_f32:
  7527. case NEON::BI__builtin_neon_vsetq_lane_i8:
  7528. case NEON::BI__builtin_neon_vsetq_lane_i16:
  7529. case NEON::BI__builtin_neon_vsetq_lane_i32:
  7530. case NEON::BI__builtin_neon_vsetq_lane_i64:
  7531. case NEON::BI__builtin_neon_vsetq_lane_bf16:
  7532. case NEON::BI__builtin_neon_vsetq_lane_f32:
  7533. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  7534. case NEON::BI__builtin_neon_vsha1h_u32:
  7535. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
  7536. "vsha1h");
  7537. case NEON::BI__builtin_neon_vsha1cq_u32:
  7538. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
  7539. "vsha1h");
  7540. case NEON::BI__builtin_neon_vsha1pq_u32:
  7541. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
  7542. "vsha1h");
  7543. case NEON::BI__builtin_neon_vsha1mq_u32:
  7544. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
  7545. "vsha1h");
  7546. case NEON::BI__builtin_neon_vcvth_bf16_f32: {
  7547. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
  7548. "vcvtbfp2bf");
  7549. }
  7550. // The ARM _MoveToCoprocessor builtins put the input register value as
  7551. // the first argument, but the LLVM intrinsic expects it as the third one.
  7552. case clang::ARM::BI_MoveToCoprocessor:
  7553. case clang::ARM::BI_MoveToCoprocessor2: {
  7554. Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
  7555. ? Intrinsic::arm_mcr
  7556. : Intrinsic::arm_mcr2);
  7557. return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
  7558. Ops[3], Ops[4], Ops[5]});
  7559. }
  7560. }
  7561. // Get the last argument, which specifies the vector type.
  7562. assert(HasExtraArg);
  7563. const Expr *Arg = E->getArg(E->getNumArgs()-1);
  7564. std::optional<llvm::APSInt> Result =
  7565. Arg->getIntegerConstantExpr(getContext());
  7566. if (!Result)
  7567. return nullptr;
  7568. if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
  7569. BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
  7570. // Determine the overloaded type of this builtin.
  7571. llvm::Type *Ty;
  7572. if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
  7573. Ty = FloatTy;
  7574. else
  7575. Ty = DoubleTy;
  7576. // Determine whether this is an unsigned conversion or not.
  7577. bool usgn = Result->getZExtValue() == 1;
  7578. unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
  7579. // Call the appropriate intrinsic.
  7580. Function *F = CGM.getIntrinsic(Int, Ty);
  7581. return Builder.CreateCall(F, Ops, "vcvtr");
  7582. }
  7583. // Determine the type of this overloaded NEON intrinsic.
  7584. NeonTypeFlags Type = Result->getZExtValue();
  7585. bool usgn = Type.isUnsigned();
  7586. bool rightShift = false;
  7587. llvm::FixedVectorType *VTy =
  7588. GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
  7589. getTarget().hasBFloat16Type());
  7590. llvm::Type *Ty = VTy;
  7591. if (!Ty)
  7592. return nullptr;
  7593. // Many NEON builtins have identical semantics and uses in ARM and
  7594. // AArch64. Emit these in a single function.
  7595. auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
  7596. const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
  7597. IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
  7598. if (Builtin)
  7599. return EmitCommonNeonBuiltinExpr(
  7600. Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
  7601. Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
  7602. unsigned Int;
  7603. switch (BuiltinID) {
  7604. default: return nullptr;
  7605. case NEON::BI__builtin_neon_vld1q_lane_v:
  7606. // Handle 64-bit integer elements as a special case. Use shuffles of
  7607. // one-element vectors to avoid poor code for i64 in the backend.
  7608. if (VTy->getElementType()->isIntegerTy(64)) {
  7609. // Extract the other lane.
  7610. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7611. int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
  7612. Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
  7613. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
  7614. // Load the value as a one-element vector.
  7615. Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
  7616. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  7617. Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
  7618. Value *Align = getAlignmentValue32(PtrOp0);
  7619. Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
  7620. // Combine them.
  7621. int Indices[] = {1 - Lane, Lane};
  7622. return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
  7623. }
  7624. [[fallthrough]];
  7625. case NEON::BI__builtin_neon_vld1_lane_v: {
  7626. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7627. PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
  7628. Value *Ld = Builder.CreateLoad(PtrOp0);
  7629. return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
  7630. }
  7631. case NEON::BI__builtin_neon_vqrshrn_n_v:
  7632. Int =
  7633. usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
  7634. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
  7635. 1, true);
  7636. case NEON::BI__builtin_neon_vqrshrun_n_v:
  7637. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
  7638. Ops, "vqrshrun_n", 1, true);
  7639. case NEON::BI__builtin_neon_vqshrn_n_v:
  7640. Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
  7641. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
  7642. 1, true);
  7643. case NEON::BI__builtin_neon_vqshrun_n_v:
  7644. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
  7645. Ops, "vqshrun_n", 1, true);
  7646. case NEON::BI__builtin_neon_vrecpe_v:
  7647. case NEON::BI__builtin_neon_vrecpeq_v:
  7648. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
  7649. Ops, "vrecpe");
  7650. case NEON::BI__builtin_neon_vrshrn_n_v:
  7651. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
  7652. Ops, "vrshrn_n", 1, true);
  7653. case NEON::BI__builtin_neon_vrsra_n_v:
  7654. case NEON::BI__builtin_neon_vrsraq_n_v:
  7655. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7656. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7657. Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
  7658. Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
  7659. Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
  7660. return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
  7661. case NEON::BI__builtin_neon_vsri_n_v:
  7662. case NEON::BI__builtin_neon_vsriq_n_v:
  7663. rightShift = true;
  7664. [[fallthrough]];
  7665. case NEON::BI__builtin_neon_vsli_n_v:
  7666. case NEON::BI__builtin_neon_vsliq_n_v:
  7667. Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
  7668. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
  7669. Ops, "vsli_n");
  7670. case NEON::BI__builtin_neon_vsra_n_v:
  7671. case NEON::BI__builtin_neon_vsraq_n_v:
  7672. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7673. Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
  7674. return Builder.CreateAdd(Ops[0], Ops[1]);
  7675. case NEON::BI__builtin_neon_vst1q_lane_v:
  7676. // Handle 64-bit integer elements as a special case. Use a shuffle to get
  7677. // a one-element vector and avoid poor code for i64 in the backend.
  7678. if (VTy->getElementType()->isIntegerTy(64)) {
  7679. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7680. Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
  7681. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
  7682. Ops[2] = getAlignmentValue32(PtrOp0);
  7683. llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
  7684. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
  7685. Tys), Ops);
  7686. }
  7687. [[fallthrough]];
  7688. case NEON::BI__builtin_neon_vst1_lane_v: {
  7689. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7690. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
  7691. auto St = Builder.CreateStore(
  7692. Ops[1], Builder.CreateElementBitCast(PtrOp0, Ops[1]->getType()));
  7693. return St;
  7694. }
  7695. case NEON::BI__builtin_neon_vtbl1_v:
  7696. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
  7697. Ops, "vtbl1");
  7698. case NEON::BI__builtin_neon_vtbl2_v:
  7699. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
  7700. Ops, "vtbl2");
  7701. case NEON::BI__builtin_neon_vtbl3_v:
  7702. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
  7703. Ops, "vtbl3");
  7704. case NEON::BI__builtin_neon_vtbl4_v:
  7705. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
  7706. Ops, "vtbl4");
  7707. case NEON::BI__builtin_neon_vtbx1_v:
  7708. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
  7709. Ops, "vtbx1");
  7710. case NEON::BI__builtin_neon_vtbx2_v:
  7711. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
  7712. Ops, "vtbx2");
  7713. case NEON::BI__builtin_neon_vtbx3_v:
  7714. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
  7715. Ops, "vtbx3");
  7716. case NEON::BI__builtin_neon_vtbx4_v:
  7717. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
  7718. Ops, "vtbx4");
  7719. }
  7720. }
  7721. template<typename Integer>
  7722. static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
  7723. return E->getIntegerConstantExpr(Context)->getExtValue();
  7724. }
  7725. static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
  7726. llvm::Type *T, bool Unsigned) {
  7727. // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
  7728. // which finds it convenient to specify signed/unsigned as a boolean flag.
  7729. return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
  7730. }
  7731. static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
  7732. uint32_t Shift, bool Unsigned) {
  7733. // MVE helper function for integer shift right. This must handle signed vs
  7734. // unsigned, and also deal specially with the case where the shift count is
  7735. // equal to the lane size. In LLVM IR, an LShr with that parameter would be
  7736. // undefined behavior, but in MVE it's legal, so we must convert it to code
  7737. // that is not undefined in IR.
  7738. unsigned LaneBits = cast<llvm::VectorType>(V->getType())
  7739. ->getElementType()
  7740. ->getPrimitiveSizeInBits();
  7741. if (Shift == LaneBits) {
  7742. // An unsigned shift of the full lane size always generates zero, so we can
  7743. // simply emit a zero vector. A signed shift of the full lane size does the
  7744. // same thing as shifting by one bit fewer.
  7745. if (Unsigned)
  7746. return llvm::Constant::getNullValue(V->getType());
  7747. else
  7748. --Shift;
  7749. }
  7750. return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
  7751. }
  7752. static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
  7753. // MVE-specific helper function for a vector splat, which infers the element
  7754. // count of the output vector by knowing that MVE vectors are all 128 bits
  7755. // wide.
  7756. unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
  7757. return Builder.CreateVectorSplat(Elements, V);
  7758. }
  7759. static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
  7760. CodeGenFunction *CGF,
  7761. llvm::Value *V,
  7762. llvm::Type *DestType) {
  7763. // Convert one MVE vector type into another by reinterpreting its in-register
  7764. // format.
  7765. //
  7766. // Little-endian, this is identical to a bitcast (which reinterprets the
  7767. // memory format). But big-endian, they're not necessarily the same, because
  7768. // the register and memory formats map to each other differently depending on
  7769. // the lane size.
  7770. //
  7771. // We generate a bitcast whenever we can (if we're little-endian, or if the
  7772. // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
  7773. // that performs the different kind of reinterpretation.
  7774. if (CGF->getTarget().isBigEndian() &&
  7775. V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
  7776. return Builder.CreateCall(
  7777. CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
  7778. {DestType, V->getType()}),
  7779. V);
  7780. } else {
  7781. return Builder.CreateBitCast(V, DestType);
  7782. }
  7783. }
  7784. static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
  7785. // Make a shufflevector that extracts every other element of a vector (evens
  7786. // or odds, as desired).
  7787. SmallVector<int, 16> Indices;
  7788. unsigned InputElements =
  7789. cast<llvm::FixedVectorType>(V->getType())->getNumElements();
  7790. for (unsigned i = 0; i < InputElements; i += 2)
  7791. Indices.push_back(i + Odd);
  7792. return Builder.CreateShuffleVector(V, Indices);
  7793. }
  7794. static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
  7795. llvm::Value *V1) {
  7796. // Make a shufflevector that interleaves two vectors element by element.
  7797. assert(V0->getType() == V1->getType() && "Can't zip different vector types");
  7798. SmallVector<int, 16> Indices;
  7799. unsigned InputElements =
  7800. cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
  7801. for (unsigned i = 0; i < InputElements; i++) {
  7802. Indices.push_back(i);
  7803. Indices.push_back(i + InputElements);
  7804. }
  7805. return Builder.CreateShuffleVector(V0, V1, Indices);
  7806. }
  7807. template<unsigned HighBit, unsigned OtherBits>
  7808. static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
  7809. // MVE-specific helper function to make a vector splat of a constant such as
  7810. // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
  7811. llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
  7812. unsigned LaneBits = T->getPrimitiveSizeInBits();
  7813. uint32_t Value = HighBit << (LaneBits - 1);
  7814. if (OtherBits)
  7815. Value |= (1UL << (LaneBits - 1)) - 1;
  7816. llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
  7817. return ARMMVEVectorSplat(Builder, Lane);
  7818. }
  7819. static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
  7820. llvm::Value *V,
  7821. unsigned ReverseWidth) {
  7822. // MVE-specific helper function which reverses the elements of a
  7823. // vector within every (ReverseWidth)-bit collection of lanes.
  7824. SmallVector<int, 16> Indices;
  7825. unsigned LaneSize = V->getType()->getScalarSizeInBits();
  7826. unsigned Elements = 128 / LaneSize;
  7827. unsigned Mask = ReverseWidth / LaneSize - 1;
  7828. for (unsigned i = 0; i < Elements; i++)
  7829. Indices.push_back(i ^ Mask);
  7830. return Builder.CreateShuffleVector(V, Indices);
  7831. }
  7832. Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
  7833. const CallExpr *E,
  7834. ReturnValueSlot ReturnValue,
  7835. llvm::Triple::ArchType Arch) {
  7836. enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
  7837. Intrinsic::ID IRIntr;
  7838. unsigned NumVectors;
  7839. // Code autogenerated by Tablegen will handle all the simple builtins.
  7840. switch (BuiltinID) {
  7841. #include "clang/Basic/arm_mve_builtin_cg.inc"
  7842. // If we didn't match an MVE builtin id at all, go back to the
  7843. // main EmitARMBuiltinExpr.
  7844. default:
  7845. return nullptr;
  7846. }
  7847. // Anything that breaks from that switch is an MVE builtin that
  7848. // needs handwritten code to generate.
  7849. switch (CustomCodeGenType) {
  7850. case CustomCodeGen::VLD24: {
  7851. llvm::SmallVector<Value *, 4> Ops;
  7852. llvm::SmallVector<llvm::Type *, 4> Tys;
  7853. auto MvecCType = E->getType();
  7854. auto MvecLType = ConvertType(MvecCType);
  7855. assert(MvecLType->isStructTy() &&
  7856. "Return type for vld[24]q should be a struct");
  7857. assert(MvecLType->getStructNumElements() == 1 &&
  7858. "Return-type struct for vld[24]q should have one element");
  7859. auto MvecLTypeInner = MvecLType->getStructElementType(0);
  7860. assert(MvecLTypeInner->isArrayTy() &&
  7861. "Return-type struct for vld[24]q should contain an array");
  7862. assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
  7863. "Array member of return-type struct vld[24]q has wrong length");
  7864. auto VecLType = MvecLTypeInner->getArrayElementType();
  7865. Tys.push_back(VecLType);
  7866. auto Addr = E->getArg(0);
  7867. Ops.push_back(EmitScalarExpr(Addr));
  7868. Tys.push_back(ConvertType(Addr->getType()));
  7869. Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
  7870. Value *LoadResult = Builder.CreateCall(F, Ops);
  7871. Value *MvecOut = PoisonValue::get(MvecLType);
  7872. for (unsigned i = 0; i < NumVectors; ++i) {
  7873. Value *Vec = Builder.CreateExtractValue(LoadResult, i);
  7874. MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
  7875. }
  7876. if (ReturnValue.isNull())
  7877. return MvecOut;
  7878. else
  7879. return Builder.CreateStore(MvecOut, ReturnValue.getValue());
  7880. }
  7881. case CustomCodeGen::VST24: {
  7882. llvm::SmallVector<Value *, 4> Ops;
  7883. llvm::SmallVector<llvm::Type *, 4> Tys;
  7884. auto Addr = E->getArg(0);
  7885. Ops.push_back(EmitScalarExpr(Addr));
  7886. Tys.push_back(ConvertType(Addr->getType()));
  7887. auto MvecCType = E->getArg(1)->getType();
  7888. auto MvecLType = ConvertType(MvecCType);
  7889. assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
  7890. assert(MvecLType->getStructNumElements() == 1 &&
  7891. "Data-type struct for vst2q should have one element");
  7892. auto MvecLTypeInner = MvecLType->getStructElementType(0);
  7893. assert(MvecLTypeInner->isArrayTy() &&
  7894. "Data-type struct for vst2q should contain an array");
  7895. assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
  7896. "Array member of return-type struct vld[24]q has wrong length");
  7897. auto VecLType = MvecLTypeInner->getArrayElementType();
  7898. Tys.push_back(VecLType);
  7899. AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
  7900. EmitAggExpr(E->getArg(1), MvecSlot);
  7901. auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
  7902. for (unsigned i = 0; i < NumVectors; i++)
  7903. Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
  7904. Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
  7905. Value *ToReturn = nullptr;
  7906. for (unsigned i = 0; i < NumVectors; i++) {
  7907. Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
  7908. ToReturn = Builder.CreateCall(F, Ops);
  7909. Ops.pop_back();
  7910. }
  7911. return ToReturn;
  7912. }
  7913. }
  7914. llvm_unreachable("unknown custom codegen type.");
  7915. }
  7916. Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
  7917. const CallExpr *E,
  7918. ReturnValueSlot ReturnValue,
  7919. llvm::Triple::ArchType Arch) {
  7920. switch (BuiltinID) {
  7921. default:
  7922. return nullptr;
  7923. #include "clang/Basic/arm_cde_builtin_cg.inc"
  7924. }
  7925. }
  7926. static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
  7927. const CallExpr *E,
  7928. SmallVectorImpl<Value *> &Ops,
  7929. llvm::Triple::ArchType Arch) {
  7930. unsigned int Int = 0;
  7931. const char *s = nullptr;
  7932. switch (BuiltinID) {
  7933. default:
  7934. return nullptr;
  7935. case NEON::BI__builtin_neon_vtbl1_v:
  7936. case NEON::BI__builtin_neon_vqtbl1_v:
  7937. case NEON::BI__builtin_neon_vqtbl1q_v:
  7938. case NEON::BI__builtin_neon_vtbl2_v:
  7939. case NEON::BI__builtin_neon_vqtbl2_v:
  7940. case NEON::BI__builtin_neon_vqtbl2q_v:
  7941. case NEON::BI__builtin_neon_vtbl3_v:
  7942. case NEON::BI__builtin_neon_vqtbl3_v:
  7943. case NEON::BI__builtin_neon_vqtbl3q_v:
  7944. case NEON::BI__builtin_neon_vtbl4_v:
  7945. case NEON::BI__builtin_neon_vqtbl4_v:
  7946. case NEON::BI__builtin_neon_vqtbl4q_v:
  7947. break;
  7948. case NEON::BI__builtin_neon_vtbx1_v:
  7949. case NEON::BI__builtin_neon_vqtbx1_v:
  7950. case NEON::BI__builtin_neon_vqtbx1q_v:
  7951. case NEON::BI__builtin_neon_vtbx2_v:
  7952. case NEON::BI__builtin_neon_vqtbx2_v:
  7953. case NEON::BI__builtin_neon_vqtbx2q_v:
  7954. case NEON::BI__builtin_neon_vtbx3_v:
  7955. case NEON::BI__builtin_neon_vqtbx3_v:
  7956. case NEON::BI__builtin_neon_vqtbx3q_v:
  7957. case NEON::BI__builtin_neon_vtbx4_v:
  7958. case NEON::BI__builtin_neon_vqtbx4_v:
  7959. case NEON::BI__builtin_neon_vqtbx4q_v:
  7960. break;
  7961. }
  7962. assert(E->getNumArgs() >= 3);
  7963. // Get the last argument, which specifies the vector type.
  7964. const Expr *Arg = E->getArg(E->getNumArgs() - 1);
  7965. std::optional<llvm::APSInt> Result =
  7966. Arg->getIntegerConstantExpr(CGF.getContext());
  7967. if (!Result)
  7968. return nullptr;
  7969. // Determine the type of this overloaded NEON intrinsic.
  7970. NeonTypeFlags Type = Result->getZExtValue();
  7971. llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
  7972. if (!Ty)
  7973. return nullptr;
  7974. CodeGen::CGBuilderTy &Builder = CGF.Builder;
  7975. // AArch64 scalar builtins are not overloaded, they do not have an extra
  7976. // argument that specifies the vector type, need to handle each case.
  7977. switch (BuiltinID) {
  7978. case NEON::BI__builtin_neon_vtbl1_v: {
  7979. return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
  7980. Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
  7981. }
  7982. case NEON::BI__builtin_neon_vtbl2_v: {
  7983. return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
  7984. Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
  7985. }
  7986. case NEON::BI__builtin_neon_vtbl3_v: {
  7987. return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
  7988. Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
  7989. }
  7990. case NEON::BI__builtin_neon_vtbl4_v: {
  7991. return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
  7992. Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
  7993. }
  7994. case NEON::BI__builtin_neon_vtbx1_v: {
  7995. Value *TblRes =
  7996. packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
  7997. Intrinsic::aarch64_neon_tbl1, "vtbl1");
  7998. llvm::Constant *EightV = ConstantInt::get(Ty, 8);
  7999. Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
  8000. CmpRes = Builder.CreateSExt(CmpRes, Ty);
  8001. Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
  8002. Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
  8003. return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
  8004. }
  8005. case NEON::BI__builtin_neon_vtbx2_v: {
  8006. return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
  8007. Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
  8008. }
  8009. case NEON::BI__builtin_neon_vtbx3_v: {
  8010. Value *TblRes =
  8011. packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
  8012. Intrinsic::aarch64_neon_tbl2, "vtbl2");
  8013. llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
  8014. Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
  8015. TwentyFourV);
  8016. CmpRes = Builder.CreateSExt(CmpRes, Ty);
  8017. Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
  8018. Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
  8019. return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
  8020. }
  8021. case NEON::BI__builtin_neon_vtbx4_v: {
  8022. return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
  8023. Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
  8024. }
  8025. case NEON::BI__builtin_neon_vqtbl1_v:
  8026. case NEON::BI__builtin_neon_vqtbl1q_v:
  8027. Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
  8028. case NEON::BI__builtin_neon_vqtbl2_v:
  8029. case NEON::BI__builtin_neon_vqtbl2q_v: {
  8030. Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
  8031. case NEON::BI__builtin_neon_vqtbl3_v:
  8032. case NEON::BI__builtin_neon_vqtbl3q_v:
  8033. Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
  8034. case NEON::BI__builtin_neon_vqtbl4_v:
  8035. case NEON::BI__builtin_neon_vqtbl4q_v:
  8036. Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
  8037. case NEON::BI__builtin_neon_vqtbx1_v:
  8038. case NEON::BI__builtin_neon_vqtbx1q_v:
  8039. Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
  8040. case NEON::BI__builtin_neon_vqtbx2_v:
  8041. case NEON::BI__builtin_neon_vqtbx2q_v:
  8042. Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
  8043. case NEON::BI__builtin_neon_vqtbx3_v:
  8044. case NEON::BI__builtin_neon_vqtbx3q_v:
  8045. Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
  8046. case NEON::BI__builtin_neon_vqtbx4_v:
  8047. case NEON::BI__builtin_neon_vqtbx4q_v:
  8048. Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
  8049. }
  8050. }
  8051. if (!Int)
  8052. return nullptr;
  8053. Function *F = CGF.CGM.getIntrinsic(Int, Ty);
  8054. return CGF.EmitNeonCall(F, Ops, s);
  8055. }
  8056. Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
  8057. auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  8058. Op = Builder.CreateBitCast(Op, Int16Ty);
  8059. Value *V = PoisonValue::get(VTy);
  8060. llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
  8061. Op = Builder.CreateInsertElement(V, Op, CI);
  8062. return Op;
  8063. }
  8064. /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
  8065. /// access builtin. Only required if it can't be inferred from the base pointer
  8066. /// operand.
  8067. llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
  8068. switch (TypeFlags.getMemEltType()) {
  8069. case SVETypeFlags::MemEltTyDefault:
  8070. return getEltType(TypeFlags);
  8071. case SVETypeFlags::MemEltTyInt8:
  8072. return Builder.getInt8Ty();
  8073. case SVETypeFlags::MemEltTyInt16:
  8074. return Builder.getInt16Ty();
  8075. case SVETypeFlags::MemEltTyInt32:
  8076. return Builder.getInt32Ty();
  8077. case SVETypeFlags::MemEltTyInt64:
  8078. return Builder.getInt64Ty();
  8079. }
  8080. llvm_unreachable("Unknown MemEltType");
  8081. }
  8082. llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
  8083. switch (TypeFlags.getEltType()) {
  8084. default:
  8085. llvm_unreachable("Invalid SVETypeFlag!");
  8086. case SVETypeFlags::EltTyInt8:
  8087. return Builder.getInt8Ty();
  8088. case SVETypeFlags::EltTyInt16:
  8089. return Builder.getInt16Ty();
  8090. case SVETypeFlags::EltTyInt32:
  8091. return Builder.getInt32Ty();
  8092. case SVETypeFlags::EltTyInt64:
  8093. return Builder.getInt64Ty();
  8094. case SVETypeFlags::EltTyFloat16:
  8095. return Builder.getHalfTy();
  8096. case SVETypeFlags::EltTyFloat32:
  8097. return Builder.getFloatTy();
  8098. case SVETypeFlags::EltTyFloat64:
  8099. return Builder.getDoubleTy();
  8100. case SVETypeFlags::EltTyBFloat16:
  8101. return Builder.getBFloatTy();
  8102. case SVETypeFlags::EltTyBool8:
  8103. case SVETypeFlags::EltTyBool16:
  8104. case SVETypeFlags::EltTyBool32:
  8105. case SVETypeFlags::EltTyBool64:
  8106. return Builder.getInt1Ty();
  8107. }
  8108. }
  8109. // Return the llvm predicate vector type corresponding to the specified element
  8110. // TypeFlags.
  8111. llvm::ScalableVectorType *
  8112. CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
  8113. switch (TypeFlags.getEltType()) {
  8114. default: llvm_unreachable("Unhandled SVETypeFlag!");
  8115. case SVETypeFlags::EltTyInt8:
  8116. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
  8117. case SVETypeFlags::EltTyInt16:
  8118. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  8119. case SVETypeFlags::EltTyInt32:
  8120. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  8121. case SVETypeFlags::EltTyInt64:
  8122. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  8123. case SVETypeFlags::EltTyBFloat16:
  8124. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  8125. case SVETypeFlags::EltTyFloat16:
  8126. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  8127. case SVETypeFlags::EltTyFloat32:
  8128. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  8129. case SVETypeFlags::EltTyFloat64:
  8130. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  8131. case SVETypeFlags::EltTyBool8:
  8132. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
  8133. case SVETypeFlags::EltTyBool16:
  8134. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  8135. case SVETypeFlags::EltTyBool32:
  8136. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  8137. case SVETypeFlags::EltTyBool64:
  8138. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  8139. }
  8140. }
  8141. // Return the llvm vector type corresponding to the specified element TypeFlags.
  8142. llvm::ScalableVectorType *
  8143. CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
  8144. switch (TypeFlags.getEltType()) {
  8145. default:
  8146. llvm_unreachable("Invalid SVETypeFlag!");
  8147. case SVETypeFlags::EltTyInt8:
  8148. return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
  8149. case SVETypeFlags::EltTyInt16:
  8150. return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
  8151. case SVETypeFlags::EltTyInt32:
  8152. return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
  8153. case SVETypeFlags::EltTyInt64:
  8154. return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
  8155. case SVETypeFlags::EltTyFloat16:
  8156. return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
  8157. case SVETypeFlags::EltTyBFloat16:
  8158. return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
  8159. case SVETypeFlags::EltTyFloat32:
  8160. return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
  8161. case SVETypeFlags::EltTyFloat64:
  8162. return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
  8163. case SVETypeFlags::EltTyBool8:
  8164. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
  8165. case SVETypeFlags::EltTyBool16:
  8166. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  8167. case SVETypeFlags::EltTyBool32:
  8168. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  8169. case SVETypeFlags::EltTyBool64:
  8170. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  8171. }
  8172. }
  8173. llvm::Value *
  8174. CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
  8175. Function *Ptrue =
  8176. CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
  8177. return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
  8178. }
  8179. constexpr unsigned SVEBitsPerBlock = 128;
  8180. static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
  8181. unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
  8182. return llvm::ScalableVectorType::get(EltTy, NumElts);
  8183. }
  8184. // Reinterpret the input predicate so that it can be used to correctly isolate
  8185. // the elements of the specified datatype.
  8186. Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
  8187. llvm::ScalableVectorType *VTy) {
  8188. auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
  8189. if (Pred->getType() == RTy)
  8190. return Pred;
  8191. unsigned IntID;
  8192. llvm::Type *IntrinsicTy;
  8193. switch (VTy->getMinNumElements()) {
  8194. default:
  8195. llvm_unreachable("unsupported element count!");
  8196. case 2:
  8197. case 4:
  8198. case 8:
  8199. IntID = Intrinsic::aarch64_sve_convert_from_svbool;
  8200. IntrinsicTy = RTy;
  8201. break;
  8202. case 16:
  8203. IntID = Intrinsic::aarch64_sve_convert_to_svbool;
  8204. IntrinsicTy = Pred->getType();
  8205. break;
  8206. }
  8207. Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
  8208. Value *C = Builder.CreateCall(F, Pred);
  8209. assert(C->getType() == RTy && "Unexpected return type!");
  8210. return C;
  8211. }
  8212. Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
  8213. SmallVectorImpl<Value *> &Ops,
  8214. unsigned IntID) {
  8215. auto *ResultTy = getSVEType(TypeFlags);
  8216. auto *OverloadedTy =
  8217. llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
  8218. // At the ACLE level there's only one predicate type, svbool_t, which is
  8219. // mapped to <n x 16 x i1>. However, this might be incompatible with the
  8220. // actual type being loaded. For example, when loading doubles (i64) the
  8221. // predicated should be <n x 2 x i1> instead. At the IR level the type of
  8222. // the predicate and the data being loaded must match. Cast accordingly.
  8223. Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
  8224. Function *F = nullptr;
  8225. if (Ops[1]->getType()->isVectorTy())
  8226. // This is the "vector base, scalar offset" case. In order to uniquely
  8227. // map this built-in to an LLVM IR intrinsic, we need both the return type
  8228. // and the type of the vector base.
  8229. F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
  8230. else
  8231. // This is the "scalar base, vector offset case". The type of the offset
  8232. // is encoded in the name of the intrinsic. We only need to specify the
  8233. // return type in order to uniquely map this built-in to an LLVM IR
  8234. // intrinsic.
  8235. F = CGM.getIntrinsic(IntID, OverloadedTy);
  8236. // Pass 0 when the offset is missing. This can only be applied when using
  8237. // the "vector base" addressing mode for which ACLE allows no offset. The
  8238. // corresponding LLVM IR always requires an offset.
  8239. if (Ops.size() == 2) {
  8240. assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
  8241. Ops.push_back(ConstantInt::get(Int64Ty, 0));
  8242. }
  8243. // For "vector base, scalar index" scale the index so that it becomes a
  8244. // scalar offset.
  8245. if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
  8246. unsigned BytesPerElt =
  8247. OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
  8248. Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
  8249. }
  8250. Value *Call = Builder.CreateCall(F, Ops);
  8251. // The following sext/zext is only needed when ResultTy != OverloadedTy. In
  8252. // other cases it's folded into a nop.
  8253. return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
  8254. : Builder.CreateSExt(Call, ResultTy);
  8255. }
  8256. Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
  8257. SmallVectorImpl<Value *> &Ops,
  8258. unsigned IntID) {
  8259. auto *SrcDataTy = getSVEType(TypeFlags);
  8260. auto *OverloadedTy =
  8261. llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
  8262. // In ACLE the source data is passed in the last argument, whereas in LLVM IR
  8263. // it's the first argument. Move it accordingly.
  8264. Ops.insert(Ops.begin(), Ops.pop_back_val());
  8265. Function *F = nullptr;
  8266. if (Ops[2]->getType()->isVectorTy())
  8267. // This is the "vector base, scalar offset" case. In order to uniquely
  8268. // map this built-in to an LLVM IR intrinsic, we need both the return type
  8269. // and the type of the vector base.
  8270. F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
  8271. else
  8272. // This is the "scalar base, vector offset case". The type of the offset
  8273. // is encoded in the name of the intrinsic. We only need to specify the
  8274. // return type in order to uniquely map this built-in to an LLVM IR
  8275. // intrinsic.
  8276. F = CGM.getIntrinsic(IntID, OverloadedTy);
  8277. // Pass 0 when the offset is missing. This can only be applied when using
  8278. // the "vector base" addressing mode for which ACLE allows no offset. The
  8279. // corresponding LLVM IR always requires an offset.
  8280. if (Ops.size() == 3) {
  8281. assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
  8282. Ops.push_back(ConstantInt::get(Int64Ty, 0));
  8283. }
  8284. // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
  8285. // folded into a nop.
  8286. Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
  8287. // At the ACLE level there's only one predicate type, svbool_t, which is
  8288. // mapped to <n x 16 x i1>. However, this might be incompatible with the
  8289. // actual type being stored. For example, when storing doubles (i64) the
  8290. // predicated should be <n x 2 x i1> instead. At the IR level the type of
  8291. // the predicate and the data being stored must match. Cast accordingly.
  8292. Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
  8293. // For "vector base, scalar index" scale the index so that it becomes a
  8294. // scalar offset.
  8295. if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
  8296. unsigned BytesPerElt =
  8297. OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
  8298. Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
  8299. }
  8300. return Builder.CreateCall(F, Ops);
  8301. }
  8302. Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
  8303. SmallVectorImpl<Value *> &Ops,
  8304. unsigned IntID) {
  8305. // The gather prefetches are overloaded on the vector input - this can either
  8306. // be the vector of base addresses or vector of offsets.
  8307. auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
  8308. if (!OverloadedTy)
  8309. OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
  8310. // Cast the predicate from svbool_t to the right number of elements.
  8311. Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
  8312. // vector + imm addressing modes
  8313. if (Ops[1]->getType()->isVectorTy()) {
  8314. if (Ops.size() == 3) {
  8315. // Pass 0 for 'vector+imm' when the index is omitted.
  8316. Ops.push_back(ConstantInt::get(Int64Ty, 0));
  8317. // The sv_prfop is the last operand in the builtin and IR intrinsic.
  8318. std::swap(Ops[2], Ops[3]);
  8319. } else {
  8320. // Index needs to be passed as scaled offset.
  8321. llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
  8322. unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
  8323. if (BytesPerElt > 1)
  8324. Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
  8325. }
  8326. }
  8327. Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
  8328. return Builder.CreateCall(F, Ops);
  8329. }
  8330. Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
  8331. SmallVectorImpl<Value*> &Ops,
  8332. unsigned IntID) {
  8333. llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
  8334. auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
  8335. auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
  8336. unsigned N;
  8337. switch (IntID) {
  8338. case Intrinsic::aarch64_sve_ld2_sret:
  8339. N = 2;
  8340. break;
  8341. case Intrinsic::aarch64_sve_ld3_sret:
  8342. N = 3;
  8343. break;
  8344. case Intrinsic::aarch64_sve_ld4_sret:
  8345. N = 4;
  8346. break;
  8347. default:
  8348. llvm_unreachable("unknown intrinsic!");
  8349. }
  8350. auto RetTy = llvm::VectorType::get(VTy->getElementType(),
  8351. VTy->getElementCount() * N);
  8352. Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
  8353. Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
  8354. // Does the load have an offset?
  8355. if (Ops.size() > 2)
  8356. BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
  8357. BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
  8358. Function *F = CGM.getIntrinsic(IntID, {VTy});
  8359. Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
  8360. unsigned MinElts = VTy->getMinNumElements();
  8361. Value *Ret = llvm::PoisonValue::get(RetTy);
  8362. for (unsigned I = 0; I < N; I++) {
  8363. Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
  8364. Value *SRet = Builder.CreateExtractValue(Call, I);
  8365. Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
  8366. }
  8367. return Ret;
  8368. }
  8369. Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
  8370. SmallVectorImpl<Value*> &Ops,
  8371. unsigned IntID) {
  8372. llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
  8373. auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
  8374. auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
  8375. unsigned N;
  8376. switch (IntID) {
  8377. case Intrinsic::aarch64_sve_st2:
  8378. N = 2;
  8379. break;
  8380. case Intrinsic::aarch64_sve_st3:
  8381. N = 3;
  8382. break;
  8383. case Intrinsic::aarch64_sve_st4:
  8384. N = 4;
  8385. break;
  8386. default:
  8387. llvm_unreachable("unknown intrinsic!");
  8388. }
  8389. Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
  8390. Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
  8391. // Does the store have an offset?
  8392. if (Ops.size() > 3)
  8393. BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
  8394. BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
  8395. Value *Val = Ops.back();
  8396. // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
  8397. // need to break up the tuple vector.
  8398. SmallVector<llvm::Value*, 5> Operands;
  8399. unsigned MinElts = VTy->getElementCount().getKnownMinValue();
  8400. for (unsigned I = 0; I < N; ++I) {
  8401. Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
  8402. Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx));
  8403. }
  8404. Operands.append({Predicate, BasePtr});
  8405. Function *F = CGM.getIntrinsic(IntID, { VTy });
  8406. return Builder.CreateCall(F, Operands);
  8407. }
  8408. // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
  8409. // svpmullt_pair intrinsics, with the exception that their results are bitcast
  8410. // to a wider type.
  8411. Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
  8412. SmallVectorImpl<Value *> &Ops,
  8413. unsigned BuiltinID) {
  8414. // Splat scalar operand to vector (intrinsics with _n infix)
  8415. if (TypeFlags.hasSplatOperand()) {
  8416. unsigned OpNo = TypeFlags.getSplatOperand();
  8417. Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
  8418. }
  8419. // The pair-wise function has a narrower overloaded type.
  8420. Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
  8421. Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
  8422. // Now bitcast to the wider result type.
  8423. llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
  8424. return EmitSVEReinterpret(Call, Ty);
  8425. }
  8426. Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
  8427. ArrayRef<Value *> Ops, unsigned BuiltinID) {
  8428. llvm::Type *OverloadedTy = getSVEType(TypeFlags);
  8429. Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
  8430. return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
  8431. }
  8432. Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
  8433. SmallVectorImpl<Value *> &Ops,
  8434. unsigned BuiltinID) {
  8435. auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
  8436. auto *VectorTy = getSVEVectorForElementType(MemEltTy);
  8437. auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
  8438. Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
  8439. Value *BasePtr = Ops[1];
  8440. // Implement the index operand if not omitted.
  8441. if (Ops.size() > 3) {
  8442. BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
  8443. BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
  8444. }
  8445. // Prefetch intriniscs always expect an i8*
  8446. BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
  8447. Value *PrfOp = Ops.back();
  8448. Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
  8449. return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
  8450. }
  8451. Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
  8452. llvm::Type *ReturnTy,
  8453. SmallVectorImpl<Value *> &Ops,
  8454. unsigned BuiltinID,
  8455. bool IsZExtReturn) {
  8456. QualType LangPTy = E->getArg(1)->getType();
  8457. llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
  8458. LangPTy->castAs<PointerType>()->getPointeeType());
  8459. // The vector type that is returned may be different from the
  8460. // eventual type loaded from memory.
  8461. auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
  8462. auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
  8463. Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
  8464. Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
  8465. // Does the load have an offset?
  8466. if (Ops.size() > 2)
  8467. BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
  8468. BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
  8469. Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
  8470. auto *Load =
  8471. cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
  8472. auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
  8473. CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
  8474. return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
  8475. : Builder.CreateSExt(Load, VectorTy);
  8476. }
  8477. Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
  8478. SmallVectorImpl<Value *> &Ops,
  8479. unsigned BuiltinID) {
  8480. QualType LangPTy = E->getArg(1)->getType();
  8481. llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
  8482. LangPTy->castAs<PointerType>()->getPointeeType());
  8483. // The vector type that is stored may be different from the
  8484. // eventual type stored to memory.
  8485. auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
  8486. auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
  8487. Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
  8488. Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
  8489. // Does the store have an offset?
  8490. if (Ops.size() == 4)
  8491. BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
  8492. // Last value is always the data
  8493. llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
  8494. BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
  8495. Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
  8496. auto *Store =
  8497. cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
  8498. auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
  8499. CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
  8500. return Store;
  8501. }
  8502. // Limit the usage of scalable llvm IR generated by the ACLE by using the
  8503. // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
  8504. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
  8505. return Builder.CreateVectorSplat(
  8506. cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
  8507. }
  8508. Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
  8509. return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
  8510. }
  8511. Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
  8512. // FIXME: For big endian this needs an additional REV, or needs a separate
  8513. // intrinsic that is code-generated as a no-op, because the LLVM bitcast
  8514. // instruction is defined as 'bitwise' equivalent from memory point of
  8515. // view (when storing/reloading), whereas the svreinterpret builtin
  8516. // implements bitwise equivalent cast from register point of view.
  8517. // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
  8518. return Builder.CreateBitCast(Val, Ty);
  8519. }
  8520. static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
  8521. SmallVectorImpl<Value *> &Ops) {
  8522. auto *SplatZero = Constant::getNullValue(Ty);
  8523. Ops.insert(Ops.begin(), SplatZero);
  8524. }
  8525. static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
  8526. SmallVectorImpl<Value *> &Ops) {
  8527. auto *SplatUndef = UndefValue::get(Ty);
  8528. Ops.insert(Ops.begin(), SplatUndef);
  8529. }
  8530. SmallVector<llvm::Type *, 2>
  8531. CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
  8532. llvm::Type *ResultType,
  8533. ArrayRef<Value *> Ops) {
  8534. if (TypeFlags.isOverloadNone())
  8535. return {};
  8536. llvm::Type *DefaultType = getSVEType(TypeFlags);
  8537. if (TypeFlags.isOverloadWhile())
  8538. return {DefaultType, Ops[1]->getType()};
  8539. if (TypeFlags.isOverloadWhileRW())
  8540. return {getSVEPredType(TypeFlags), Ops[0]->getType()};
  8541. if (TypeFlags.isOverloadCvt())
  8542. return {Ops[0]->getType(), Ops.back()->getType()};
  8543. assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
  8544. return {DefaultType};
  8545. }
  8546. Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
  8547. llvm::Type *Ty,
  8548. ArrayRef<Value *> Ops) {
  8549. assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
  8550. "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
  8551. unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
  8552. auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
  8553. TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
  8554. Value *Idx = ConstantInt::get(CGM.Int64Ty,
  8555. I * SingleVecTy->getMinNumElements());
  8556. if (TypeFlags.isTupleSet())
  8557. return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
  8558. return Builder.CreateExtractVector(Ty, Ops[0], Idx);
  8559. }
  8560. Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
  8561. llvm::Type *Ty,
  8562. ArrayRef<Value *> Ops) {
  8563. assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
  8564. auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
  8565. unsigned MinElts = SrcTy->getMinNumElements();
  8566. Value *Call = llvm::PoisonValue::get(Ty);
  8567. for (unsigned I = 0; I < Ops.size(); I++) {
  8568. Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
  8569. Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
  8570. }
  8571. return Call;
  8572. }
  8573. Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
  8574. const CallExpr *E) {
  8575. // Find out if any arguments are required to be integer constant expressions.
  8576. unsigned ICEArguments = 0;
  8577. ASTContext::GetBuiltinTypeError Error;
  8578. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  8579. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  8580. llvm::Type *Ty = ConvertType(E->getType());
  8581. if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
  8582. BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
  8583. Value *Val = EmitScalarExpr(E->getArg(0));
  8584. return EmitSVEReinterpret(Val, Ty);
  8585. }
  8586. llvm::SmallVector<Value *, 4> Ops;
  8587. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  8588. if ((ICEArguments & (1 << i)) == 0)
  8589. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  8590. else {
  8591. // If this is required to be a constant, constant fold it so that we know
  8592. // that the generated intrinsic gets a ConstantInt.
  8593. std::optional<llvm::APSInt> Result =
  8594. E->getArg(i)->getIntegerConstantExpr(getContext());
  8595. assert(Result && "Expected argument to be a constant");
  8596. // Immediates for SVE llvm intrinsics are always 32bit. We can safely
  8597. // truncate because the immediate has been range checked and no valid
  8598. // immediate requires more than a handful of bits.
  8599. *Result = Result->extOrTrunc(32);
  8600. Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
  8601. }
  8602. }
  8603. auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
  8604. AArch64SVEIntrinsicsProvenSorted);
  8605. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8606. if (TypeFlags.isLoad())
  8607. return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
  8608. TypeFlags.isZExtReturn());
  8609. else if (TypeFlags.isStore())
  8610. return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
  8611. else if (TypeFlags.isGatherLoad())
  8612. return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8613. else if (TypeFlags.isScatterStore())
  8614. return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8615. else if (TypeFlags.isPrefetch())
  8616. return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8617. else if (TypeFlags.isGatherPrefetch())
  8618. return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8619. else if (TypeFlags.isStructLoad())
  8620. return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8621. else if (TypeFlags.isStructStore())
  8622. return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8623. else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
  8624. return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
  8625. else if (TypeFlags.isTupleCreate())
  8626. return EmitSVETupleCreate(TypeFlags, Ty, Ops);
  8627. else if (TypeFlags.isUndef())
  8628. return UndefValue::get(Ty);
  8629. else if (Builtin->LLVMIntrinsic != 0) {
  8630. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
  8631. InsertExplicitZeroOperand(Builder, Ty, Ops);
  8632. if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
  8633. InsertExplicitUndefOperand(Builder, Ty, Ops);
  8634. // Some ACLE builtins leave out the argument to specify the predicate
  8635. // pattern, which is expected to be expanded to an SV_ALL pattern.
  8636. if (TypeFlags.isAppendSVALL())
  8637. Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
  8638. if (TypeFlags.isInsertOp1SVALL())
  8639. Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
  8640. // Predicates must match the main datatype.
  8641. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  8642. if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
  8643. if (PredTy->getElementType()->isIntegerTy(1))
  8644. Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
  8645. // Splat scalar operand to vector (intrinsics with _n infix)
  8646. if (TypeFlags.hasSplatOperand()) {
  8647. unsigned OpNo = TypeFlags.getSplatOperand();
  8648. Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
  8649. }
  8650. if (TypeFlags.isReverseCompare())
  8651. std::swap(Ops[1], Ops[2]);
  8652. if (TypeFlags.isReverseUSDOT())
  8653. std::swap(Ops[1], Ops[2]);
  8654. // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
  8655. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
  8656. llvm::Type *OpndTy = Ops[1]->getType();
  8657. auto *SplatZero = Constant::getNullValue(OpndTy);
  8658. Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
  8659. }
  8660. Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
  8661. getSVEOverloadTypes(TypeFlags, Ty, Ops));
  8662. Value *Call = Builder.CreateCall(F, Ops);
  8663. // Predicate results must be converted to svbool_t.
  8664. if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
  8665. if (PredTy->getScalarType()->isIntegerTy(1))
  8666. Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
  8667. return Call;
  8668. }
  8669. switch (BuiltinID) {
  8670. default:
  8671. return nullptr;
  8672. case SVE::BI__builtin_sve_svmov_b_z: {
  8673. // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
  8674. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8675. llvm::Type* OverloadedTy = getSVEType(TypeFlags);
  8676. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
  8677. return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
  8678. }
  8679. case SVE::BI__builtin_sve_svnot_b_z: {
  8680. // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
  8681. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8682. llvm::Type* OverloadedTy = getSVEType(TypeFlags);
  8683. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
  8684. return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
  8685. }
  8686. case SVE::BI__builtin_sve_svmovlb_u16:
  8687. case SVE::BI__builtin_sve_svmovlb_u32:
  8688. case SVE::BI__builtin_sve_svmovlb_u64:
  8689. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
  8690. case SVE::BI__builtin_sve_svmovlb_s16:
  8691. case SVE::BI__builtin_sve_svmovlb_s32:
  8692. case SVE::BI__builtin_sve_svmovlb_s64:
  8693. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
  8694. case SVE::BI__builtin_sve_svmovlt_u16:
  8695. case SVE::BI__builtin_sve_svmovlt_u32:
  8696. case SVE::BI__builtin_sve_svmovlt_u64:
  8697. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
  8698. case SVE::BI__builtin_sve_svmovlt_s16:
  8699. case SVE::BI__builtin_sve_svmovlt_s32:
  8700. case SVE::BI__builtin_sve_svmovlt_s64:
  8701. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
  8702. case SVE::BI__builtin_sve_svpmullt_u16:
  8703. case SVE::BI__builtin_sve_svpmullt_u64:
  8704. case SVE::BI__builtin_sve_svpmullt_n_u16:
  8705. case SVE::BI__builtin_sve_svpmullt_n_u64:
  8706. return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
  8707. case SVE::BI__builtin_sve_svpmullb_u16:
  8708. case SVE::BI__builtin_sve_svpmullb_u64:
  8709. case SVE::BI__builtin_sve_svpmullb_n_u16:
  8710. case SVE::BI__builtin_sve_svpmullb_n_u64:
  8711. return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
  8712. case SVE::BI__builtin_sve_svdup_n_b8:
  8713. case SVE::BI__builtin_sve_svdup_n_b16:
  8714. case SVE::BI__builtin_sve_svdup_n_b32:
  8715. case SVE::BI__builtin_sve_svdup_n_b64: {
  8716. Value *CmpNE =
  8717. Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
  8718. llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
  8719. Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
  8720. return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
  8721. }
  8722. case SVE::BI__builtin_sve_svdupq_n_b8:
  8723. case SVE::BI__builtin_sve_svdupq_n_b16:
  8724. case SVE::BI__builtin_sve_svdupq_n_b32:
  8725. case SVE::BI__builtin_sve_svdupq_n_b64:
  8726. case SVE::BI__builtin_sve_svdupq_n_u8:
  8727. case SVE::BI__builtin_sve_svdupq_n_s8:
  8728. case SVE::BI__builtin_sve_svdupq_n_u64:
  8729. case SVE::BI__builtin_sve_svdupq_n_f64:
  8730. case SVE::BI__builtin_sve_svdupq_n_s64:
  8731. case SVE::BI__builtin_sve_svdupq_n_u16:
  8732. case SVE::BI__builtin_sve_svdupq_n_f16:
  8733. case SVE::BI__builtin_sve_svdupq_n_bf16:
  8734. case SVE::BI__builtin_sve_svdupq_n_s16:
  8735. case SVE::BI__builtin_sve_svdupq_n_u32:
  8736. case SVE::BI__builtin_sve_svdupq_n_f32:
  8737. case SVE::BI__builtin_sve_svdupq_n_s32: {
  8738. // These builtins are implemented by storing each element to an array and using
  8739. // ld1rq to materialize a vector.
  8740. unsigned NumOpnds = Ops.size();
  8741. bool IsBoolTy =
  8742. cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
  8743. // For svdupq_n_b* the element type of is an integer of type 128/numelts,
  8744. // so that the compare can use the width that is natural for the expected
  8745. // number of predicate lanes.
  8746. llvm::Type *EltTy = Ops[0]->getType();
  8747. if (IsBoolTy)
  8748. EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
  8749. SmallVector<llvm::Value *, 16> VecOps;
  8750. for (unsigned I = 0; I < NumOpnds; ++I)
  8751. VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
  8752. Value *Vec = BuildVector(VecOps);
  8753. llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
  8754. Value *InsertSubVec = Builder.CreateInsertVector(
  8755. OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
  8756. Function *F =
  8757. CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
  8758. Value *DupQLane =
  8759. Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
  8760. if (!IsBoolTy)
  8761. return DupQLane;
  8762. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8763. Value *Pred = EmitSVEAllTruePred(TypeFlags);
  8764. // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
  8765. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
  8766. : Intrinsic::aarch64_sve_cmpne_wide,
  8767. OverloadedTy);
  8768. Value *Call = Builder.CreateCall(
  8769. F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
  8770. return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
  8771. }
  8772. case SVE::BI__builtin_sve_svpfalse_b:
  8773. return ConstantInt::getFalse(Ty);
  8774. case SVE::BI__builtin_sve_svlen_bf16:
  8775. case SVE::BI__builtin_sve_svlen_f16:
  8776. case SVE::BI__builtin_sve_svlen_f32:
  8777. case SVE::BI__builtin_sve_svlen_f64:
  8778. case SVE::BI__builtin_sve_svlen_s8:
  8779. case SVE::BI__builtin_sve_svlen_s16:
  8780. case SVE::BI__builtin_sve_svlen_s32:
  8781. case SVE::BI__builtin_sve_svlen_s64:
  8782. case SVE::BI__builtin_sve_svlen_u8:
  8783. case SVE::BI__builtin_sve_svlen_u16:
  8784. case SVE::BI__builtin_sve_svlen_u32:
  8785. case SVE::BI__builtin_sve_svlen_u64: {
  8786. SVETypeFlags TF(Builtin->TypeModifier);
  8787. auto VTy = cast<llvm::VectorType>(getSVEType(TF));
  8788. auto *NumEls =
  8789. llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
  8790. Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
  8791. return Builder.CreateMul(NumEls, Builder.CreateCall(F));
  8792. }
  8793. case SVE::BI__builtin_sve_svtbl2_u8:
  8794. case SVE::BI__builtin_sve_svtbl2_s8:
  8795. case SVE::BI__builtin_sve_svtbl2_u16:
  8796. case SVE::BI__builtin_sve_svtbl2_s16:
  8797. case SVE::BI__builtin_sve_svtbl2_u32:
  8798. case SVE::BI__builtin_sve_svtbl2_s32:
  8799. case SVE::BI__builtin_sve_svtbl2_u64:
  8800. case SVE::BI__builtin_sve_svtbl2_s64:
  8801. case SVE::BI__builtin_sve_svtbl2_f16:
  8802. case SVE::BI__builtin_sve_svtbl2_bf16:
  8803. case SVE::BI__builtin_sve_svtbl2_f32:
  8804. case SVE::BI__builtin_sve_svtbl2_f64: {
  8805. SVETypeFlags TF(Builtin->TypeModifier);
  8806. auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
  8807. Value *V0 = Builder.CreateExtractVector(VTy, Ops[0],
  8808. ConstantInt::get(CGM.Int64Ty, 0));
  8809. unsigned MinElts = VTy->getMinNumElements();
  8810. Value *V1 = Builder.CreateExtractVector(
  8811. VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts));
  8812. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
  8813. return Builder.CreateCall(F, {V0, V1, Ops[1]});
  8814. }
  8815. case SVE::BI__builtin_sve_svset_neonq_s8:
  8816. case SVE::BI__builtin_sve_svset_neonq_s16:
  8817. case SVE::BI__builtin_sve_svset_neonq_s32:
  8818. case SVE::BI__builtin_sve_svset_neonq_s64:
  8819. case SVE::BI__builtin_sve_svset_neonq_u8:
  8820. case SVE::BI__builtin_sve_svset_neonq_u16:
  8821. case SVE::BI__builtin_sve_svset_neonq_u32:
  8822. case SVE::BI__builtin_sve_svset_neonq_u64:
  8823. case SVE::BI__builtin_sve_svset_neonq_f16:
  8824. case SVE::BI__builtin_sve_svset_neonq_f32:
  8825. case SVE::BI__builtin_sve_svset_neonq_f64:
  8826. case SVE::BI__builtin_sve_svset_neonq_bf16: {
  8827. return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
  8828. }
  8829. case SVE::BI__builtin_sve_svget_neonq_s8:
  8830. case SVE::BI__builtin_sve_svget_neonq_s16:
  8831. case SVE::BI__builtin_sve_svget_neonq_s32:
  8832. case SVE::BI__builtin_sve_svget_neonq_s64:
  8833. case SVE::BI__builtin_sve_svget_neonq_u8:
  8834. case SVE::BI__builtin_sve_svget_neonq_u16:
  8835. case SVE::BI__builtin_sve_svget_neonq_u32:
  8836. case SVE::BI__builtin_sve_svget_neonq_u64:
  8837. case SVE::BI__builtin_sve_svget_neonq_f16:
  8838. case SVE::BI__builtin_sve_svget_neonq_f32:
  8839. case SVE::BI__builtin_sve_svget_neonq_f64:
  8840. case SVE::BI__builtin_sve_svget_neonq_bf16: {
  8841. return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
  8842. }
  8843. case SVE::BI__builtin_sve_svdup_neonq_s8:
  8844. case SVE::BI__builtin_sve_svdup_neonq_s16:
  8845. case SVE::BI__builtin_sve_svdup_neonq_s32:
  8846. case SVE::BI__builtin_sve_svdup_neonq_s64:
  8847. case SVE::BI__builtin_sve_svdup_neonq_u8:
  8848. case SVE::BI__builtin_sve_svdup_neonq_u16:
  8849. case SVE::BI__builtin_sve_svdup_neonq_u32:
  8850. case SVE::BI__builtin_sve_svdup_neonq_u64:
  8851. case SVE::BI__builtin_sve_svdup_neonq_f16:
  8852. case SVE::BI__builtin_sve_svdup_neonq_f32:
  8853. case SVE::BI__builtin_sve_svdup_neonq_f64:
  8854. case SVE::BI__builtin_sve_svdup_neonq_bf16: {
  8855. Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
  8856. Builder.getInt64(0));
  8857. return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
  8858. {Insert, Builder.getInt64(0)});
  8859. }
  8860. }
  8861. /// Should not happen
  8862. return nullptr;
  8863. }
  8864. Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
  8865. const CallExpr *E,
  8866. llvm::Triple::ArchType Arch) {
  8867. if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
  8868. BuiltinID <= clang::AArch64::LastSVEBuiltin)
  8869. return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
  8870. unsigned HintID = static_cast<unsigned>(-1);
  8871. switch (BuiltinID) {
  8872. default: break;
  8873. case clang::AArch64::BI__builtin_arm_nop:
  8874. HintID = 0;
  8875. break;
  8876. case clang::AArch64::BI__builtin_arm_yield:
  8877. case clang::AArch64::BI__yield:
  8878. HintID = 1;
  8879. break;
  8880. case clang::AArch64::BI__builtin_arm_wfe:
  8881. case clang::AArch64::BI__wfe:
  8882. HintID = 2;
  8883. break;
  8884. case clang::AArch64::BI__builtin_arm_wfi:
  8885. case clang::AArch64::BI__wfi:
  8886. HintID = 3;
  8887. break;
  8888. case clang::AArch64::BI__builtin_arm_sev:
  8889. case clang::AArch64::BI__sev:
  8890. HintID = 4;
  8891. break;
  8892. case clang::AArch64::BI__builtin_arm_sevl:
  8893. case clang::AArch64::BI__sevl:
  8894. HintID = 5;
  8895. break;
  8896. }
  8897. if (HintID != static_cast<unsigned>(-1)) {
  8898. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
  8899. return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
  8900. }
  8901. if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
  8902. assert((getContext().getTypeSize(E->getType()) == 32) &&
  8903. "rbit of unusual size!");
  8904. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8905. return Builder.CreateCall(
  8906. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  8907. }
  8908. if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
  8909. assert((getContext().getTypeSize(E->getType()) == 64) &&
  8910. "rbit of unusual size!");
  8911. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8912. return Builder.CreateCall(
  8913. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  8914. }
  8915. if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
  8916. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8917. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
  8918. "cls");
  8919. }
  8920. if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
  8921. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8922. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
  8923. "cls");
  8924. }
  8925. if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
  8926. BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
  8927. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8928. llvm::Type *Ty = Arg->getType();
  8929. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
  8930. Arg, "frint32z");
  8931. }
  8932. if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
  8933. BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
  8934. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8935. llvm::Type *Ty = Arg->getType();
  8936. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
  8937. Arg, "frint64z");
  8938. }
  8939. if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
  8940. BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
  8941. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8942. llvm::Type *Ty = Arg->getType();
  8943. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
  8944. Arg, "frint32x");
  8945. }
  8946. if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
  8947. BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
  8948. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8949. llvm::Type *Ty = Arg->getType();
  8950. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
  8951. Arg, "frint64x");
  8952. }
  8953. if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
  8954. assert((getContext().getTypeSize(E->getType()) == 32) &&
  8955. "__jcvt of unusual size!");
  8956. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8957. return Builder.CreateCall(
  8958. CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
  8959. }
  8960. if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
  8961. BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
  8962. BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
  8963. BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
  8964. llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
  8965. llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
  8966. if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
  8967. // Load from the address via an LLVM intrinsic, receiving a
  8968. // tuple of 8 i64 words, and store each one to ValPtr.
  8969. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
  8970. llvm::Value *Val = Builder.CreateCall(F, MemAddr);
  8971. llvm::Value *ToRet;
  8972. for (size_t i = 0; i < 8; i++) {
  8973. llvm::Value *ValOffsetPtr =
  8974. Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
  8975. Address Addr =
  8976. Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
  8977. ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
  8978. }
  8979. return ToRet;
  8980. } else {
  8981. // Load 8 i64 words from ValPtr, and store them to the address
  8982. // via an LLVM intrinsic.
  8983. SmallVector<llvm::Value *, 9> Args;
  8984. Args.push_back(MemAddr);
  8985. for (size_t i = 0; i < 8; i++) {
  8986. llvm::Value *ValOffsetPtr =
  8987. Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
  8988. Address Addr =
  8989. Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
  8990. Args.push_back(Builder.CreateLoad(Addr));
  8991. }
  8992. auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
  8993. ? Intrinsic::aarch64_st64b
  8994. : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
  8995. ? Intrinsic::aarch64_st64bv
  8996. : Intrinsic::aarch64_st64bv0);
  8997. Function *F = CGM.getIntrinsic(Intr);
  8998. return Builder.CreateCall(F, Args);
  8999. }
  9000. }
  9001. if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
  9002. BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
  9003. auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
  9004. ? Intrinsic::aarch64_rndr
  9005. : Intrinsic::aarch64_rndrrs);
  9006. Function *F = CGM.getIntrinsic(Intr);
  9007. llvm::Value *Val = Builder.CreateCall(F);
  9008. Value *RandomValue = Builder.CreateExtractValue(Val, 0);
  9009. Value *Status = Builder.CreateExtractValue(Val, 1);
  9010. Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
  9011. Builder.CreateStore(RandomValue, MemAddress);
  9012. Status = Builder.CreateZExt(Status, Int32Ty);
  9013. return Status;
  9014. }
  9015. if (BuiltinID == clang::AArch64::BI__clear_cache) {
  9016. assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
  9017. const FunctionDecl *FD = E->getDirectCallee();
  9018. Value *Ops[2];
  9019. for (unsigned i = 0; i < 2; i++)
  9020. Ops[i] = EmitScalarExpr(E->getArg(i));
  9021. llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
  9022. llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
  9023. StringRef Name = FD->getName();
  9024. return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
  9025. }
  9026. if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
  9027. BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
  9028. getContext().getTypeSize(E->getType()) == 128) {
  9029. Function *F =
  9030. CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
  9031. ? Intrinsic::aarch64_ldaxp
  9032. : Intrinsic::aarch64_ldxp);
  9033. Value *LdPtr = EmitScalarExpr(E->getArg(0));
  9034. Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
  9035. "ldxp");
  9036. Value *Val0 = Builder.CreateExtractValue(Val, 1);
  9037. Value *Val1 = Builder.CreateExtractValue(Val, 0);
  9038. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  9039. Val0 = Builder.CreateZExt(Val0, Int128Ty);
  9040. Val1 = Builder.CreateZExt(Val1, Int128Ty);
  9041. Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
  9042. Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
  9043. Val = Builder.CreateOr(Val, Val1);
  9044. return Builder.CreateBitCast(Val, ConvertType(E->getType()));
  9045. } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
  9046. BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
  9047. Value *LoadAddr = EmitScalarExpr(E->getArg(0));
  9048. QualType Ty = E->getType();
  9049. llvm::Type *RealResTy = ConvertType(Ty);
  9050. llvm::Type *IntTy =
  9051. llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
  9052. llvm::Type *PtrTy = IntTy->getPointerTo();
  9053. LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
  9054. Function *F =
  9055. CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
  9056. ? Intrinsic::aarch64_ldaxr
  9057. : Intrinsic::aarch64_ldxr,
  9058. PtrTy);
  9059. CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
  9060. Val->addParamAttr(
  9061. 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
  9062. if (RealResTy->isPointerTy())
  9063. return Builder.CreateIntToPtr(Val, RealResTy);
  9064. llvm::Type *IntResTy = llvm::IntegerType::get(
  9065. getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
  9066. return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
  9067. RealResTy);
  9068. }
  9069. if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
  9070. BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
  9071. getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
  9072. Function *F =
  9073. CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
  9074. ? Intrinsic::aarch64_stlxp
  9075. : Intrinsic::aarch64_stxp);
  9076. llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
  9077. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  9078. EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
  9079. Tmp = Builder.CreateElementBitCast(Tmp, STy);
  9080. llvm::Value *Val = Builder.CreateLoad(Tmp);
  9081. Value *Arg0 = Builder.CreateExtractValue(Val, 0);
  9082. Value *Arg1 = Builder.CreateExtractValue(Val, 1);
  9083. Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
  9084. Int8PtrTy);
  9085. return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
  9086. }
  9087. if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
  9088. BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
  9089. Value *StoreVal = EmitScalarExpr(E->getArg(0));
  9090. Value *StoreAddr = EmitScalarExpr(E->getArg(1));
  9091. QualType Ty = E->getArg(0)->getType();
  9092. llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
  9093. getContext().getTypeSize(Ty));
  9094. StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
  9095. if (StoreVal->getType()->isPointerTy())
  9096. StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
  9097. else {
  9098. llvm::Type *IntTy = llvm::IntegerType::get(
  9099. getLLVMContext(),
  9100. CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
  9101. StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
  9102. StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
  9103. }
  9104. Function *F =
  9105. CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
  9106. ? Intrinsic::aarch64_stlxr
  9107. : Intrinsic::aarch64_stxr,
  9108. StoreAddr->getType());
  9109. CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
  9110. CI->addParamAttr(
  9111. 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
  9112. return CI;
  9113. }
  9114. if (BuiltinID == clang::AArch64::BI__getReg) {
  9115. Expr::EvalResult Result;
  9116. if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
  9117. llvm_unreachable("Sema will ensure that the parameter is constant");
  9118. llvm::APSInt Value = Result.Val.getInt();
  9119. LLVMContext &Context = CGM.getLLVMContext();
  9120. std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
  9121. llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
  9122. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  9123. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  9124. llvm::Function *F =
  9125. CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
  9126. return Builder.CreateCall(F, Metadata);
  9127. }
  9128. if (BuiltinID == clang::AArch64::BI__break) {
  9129. Expr::EvalResult Result;
  9130. if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
  9131. llvm_unreachable("Sema will ensure that the parameter is constant");
  9132. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
  9133. return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
  9134. }
  9135. if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
  9136. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
  9137. return Builder.CreateCall(F);
  9138. }
  9139. if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
  9140. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  9141. llvm::SyncScope::SingleThread);
  9142. // CRC32
  9143. Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
  9144. switch (BuiltinID) {
  9145. case clang::AArch64::BI__builtin_arm_crc32b:
  9146. CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
  9147. case clang::AArch64::BI__builtin_arm_crc32cb:
  9148. CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
  9149. case clang::AArch64::BI__builtin_arm_crc32h:
  9150. CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
  9151. case clang::AArch64::BI__builtin_arm_crc32ch:
  9152. CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
  9153. case clang::AArch64::BI__builtin_arm_crc32w:
  9154. CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
  9155. case clang::AArch64::BI__builtin_arm_crc32cw:
  9156. CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
  9157. case clang::AArch64::BI__builtin_arm_crc32d:
  9158. CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
  9159. case clang::AArch64::BI__builtin_arm_crc32cd:
  9160. CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
  9161. }
  9162. if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
  9163. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  9164. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  9165. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  9166. llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
  9167. Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
  9168. return Builder.CreateCall(F, {Arg0, Arg1});
  9169. }
  9170. // Memory Operations (MOPS)
  9171. if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
  9172. Value *Dst = EmitScalarExpr(E->getArg(0));
  9173. Value *Val = EmitScalarExpr(E->getArg(1));
  9174. Value *Size = EmitScalarExpr(E->getArg(2));
  9175. Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
  9176. Val = Builder.CreateTrunc(Val, Int8Ty);
  9177. Size = Builder.CreateIntCast(Size, Int64Ty, false);
  9178. return Builder.CreateCall(
  9179. CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
  9180. }
  9181. // Memory Tagging Extensions (MTE) Intrinsics
  9182. Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
  9183. switch (BuiltinID) {
  9184. case clang::AArch64::BI__builtin_arm_irg:
  9185. MTEIntrinsicID = Intrinsic::aarch64_irg; break;
  9186. case clang::AArch64::BI__builtin_arm_addg:
  9187. MTEIntrinsicID = Intrinsic::aarch64_addg; break;
  9188. case clang::AArch64::BI__builtin_arm_gmi:
  9189. MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
  9190. case clang::AArch64::BI__builtin_arm_ldg:
  9191. MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
  9192. case clang::AArch64::BI__builtin_arm_stg:
  9193. MTEIntrinsicID = Intrinsic::aarch64_stg; break;
  9194. case clang::AArch64::BI__builtin_arm_subp:
  9195. MTEIntrinsicID = Intrinsic::aarch64_subp; break;
  9196. }
  9197. if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
  9198. llvm::Type *T = ConvertType(E->getType());
  9199. if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
  9200. Value *Pointer = EmitScalarExpr(E->getArg(0));
  9201. Value *Mask = EmitScalarExpr(E->getArg(1));
  9202. Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
  9203. Mask = Builder.CreateZExt(Mask, Int64Ty);
  9204. Value *RV = Builder.CreateCall(
  9205. CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
  9206. return Builder.CreatePointerCast(RV, T);
  9207. }
  9208. if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
  9209. Value *Pointer = EmitScalarExpr(E->getArg(0));
  9210. Value *TagOffset = EmitScalarExpr(E->getArg(1));
  9211. Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
  9212. TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
  9213. Value *RV = Builder.CreateCall(
  9214. CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
  9215. return Builder.CreatePointerCast(RV, T);
  9216. }
  9217. if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
  9218. Value *Pointer = EmitScalarExpr(E->getArg(0));
  9219. Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
  9220. ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
  9221. Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
  9222. return Builder.CreateCall(
  9223. CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
  9224. }
  9225. // Although it is possible to supply a different return
  9226. // address (first arg) to this intrinsic, for now we set
  9227. // return address same as input address.
  9228. if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
  9229. Value *TagAddress = EmitScalarExpr(E->getArg(0));
  9230. TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
  9231. Value *RV = Builder.CreateCall(
  9232. CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
  9233. return Builder.CreatePointerCast(RV, T);
  9234. }
  9235. // Although it is possible to supply a different tag (to set)
  9236. // to this intrinsic (as first arg), for now we supply
  9237. // the tag that is in input address arg (common use case).
  9238. if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
  9239. Value *TagAddress = EmitScalarExpr(E->getArg(0));
  9240. TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
  9241. return Builder.CreateCall(
  9242. CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
  9243. }
  9244. if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
  9245. Value *PointerA = EmitScalarExpr(E->getArg(0));
  9246. Value *PointerB = EmitScalarExpr(E->getArg(1));
  9247. PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
  9248. PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
  9249. return Builder.CreateCall(
  9250. CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
  9251. }
  9252. }
  9253. if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
  9254. BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
  9255. BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
  9256. BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
  9257. BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
  9258. BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
  9259. BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
  9260. BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
  9261. SpecialRegisterAccessKind AccessKind = Write;
  9262. if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
  9263. BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
  9264. BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
  9265. BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
  9266. AccessKind = VolatileRead;
  9267. bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
  9268. BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
  9269. bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
  9270. BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
  9271. bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
  9272. BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
  9273. llvm::Type *ValueType;
  9274. llvm::Type *RegisterType = Int64Ty;
  9275. if (Is32Bit) {
  9276. ValueType = Int32Ty;
  9277. } else if (Is128Bit) {
  9278. llvm::Type *Int128Ty =
  9279. llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
  9280. ValueType = Int128Ty;
  9281. RegisterType = Int128Ty;
  9282. } else if (IsPointerBuiltin) {
  9283. ValueType = VoidPtrTy;
  9284. } else {
  9285. ValueType = Int64Ty;
  9286. };
  9287. return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
  9288. AccessKind);
  9289. }
  9290. if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
  9291. BuiltinID == clang::AArch64::BI_WriteStatusReg) {
  9292. LLVMContext &Context = CGM.getLLVMContext();
  9293. unsigned SysReg =
  9294. E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
  9295. std::string SysRegStr;
  9296. llvm::raw_string_ostream(SysRegStr) <<
  9297. ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
  9298. ((SysReg >> 11) & 7) << ":" <<
  9299. ((SysReg >> 7) & 15) << ":" <<
  9300. ((SysReg >> 3) & 15) << ":" <<
  9301. ( SysReg & 7);
  9302. llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
  9303. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  9304. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  9305. llvm::Type *RegisterType = Int64Ty;
  9306. llvm::Type *Types[] = { RegisterType };
  9307. if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
  9308. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
  9309. return Builder.CreateCall(F, Metadata);
  9310. }
  9311. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
  9312. llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
  9313. return Builder.CreateCall(F, { Metadata, ArgValue });
  9314. }
  9315. if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
  9316. llvm::Function *F =
  9317. CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
  9318. return Builder.CreateCall(F);
  9319. }
  9320. if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
  9321. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
  9322. return Builder.CreateCall(F);
  9323. }
  9324. if (BuiltinID == clang::AArch64::BI__mulh ||
  9325. BuiltinID == clang::AArch64::BI__umulh) {
  9326. llvm::Type *ResType = ConvertType(E->getType());
  9327. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  9328. bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
  9329. Value *LHS =
  9330. Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
  9331. Value *RHS =
  9332. Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
  9333. Value *MulResult, *HigherBits;
  9334. if (IsSigned) {
  9335. MulResult = Builder.CreateNSWMul(LHS, RHS);
  9336. HigherBits = Builder.CreateAShr(MulResult, 64);
  9337. } else {
  9338. MulResult = Builder.CreateNUWMul(LHS, RHS);
  9339. HigherBits = Builder.CreateLShr(MulResult, 64);
  9340. }
  9341. HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
  9342. return HigherBits;
  9343. }
  9344. if (BuiltinID == AArch64::BI__writex18byte ||
  9345. BuiltinID == AArch64::BI__writex18word ||
  9346. BuiltinID == AArch64::BI__writex18dword ||
  9347. BuiltinID == AArch64::BI__writex18qword) {
  9348. llvm::Type *IntTy = ConvertType(E->getArg(1)->getType());
  9349. // Read x18 as i8*
  9350. LLVMContext &Context = CGM.getLLVMContext();
  9351. llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
  9352. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  9353. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  9354. llvm::Function *F =
  9355. CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
  9356. llvm::Value *X18 = Builder.CreateCall(F, Metadata);
  9357. X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
  9358. // Store val at x18 + offset
  9359. Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
  9360. Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
  9361. Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
  9362. Value *Val = EmitScalarExpr(E->getArg(1));
  9363. StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
  9364. return Store;
  9365. }
  9366. if (BuiltinID == AArch64::BI__readx18byte ||
  9367. BuiltinID == AArch64::BI__readx18word ||
  9368. BuiltinID == AArch64::BI__readx18dword ||
  9369. BuiltinID == AArch64::BI__readx18qword) {
  9370. llvm::Type *IntTy = ConvertType(E->getType());
  9371. // Read x18 as i8*
  9372. LLVMContext &Context = CGM.getLLVMContext();
  9373. llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
  9374. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  9375. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  9376. llvm::Function *F =
  9377. CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
  9378. llvm::Value *X18 = Builder.CreateCall(F, Metadata);
  9379. X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
  9380. // Load x18 + offset
  9381. Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
  9382. Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
  9383. Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
  9384. LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
  9385. return Load;
  9386. }
  9387. // Handle MSVC intrinsics before argument evaluation to prevent double
  9388. // evaluation.
  9389. if (std::optional<MSVCIntrin> MsvcIntId =
  9390. translateAarch64ToMsvcIntrin(BuiltinID))
  9391. return EmitMSVCBuiltinExpr(*MsvcIntId, E);
  9392. // Some intrinsics are equivalent - if they are use the base intrinsic ID.
  9393. auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
  9394. return P.first == BuiltinID;
  9395. });
  9396. if (It != end(NEONEquivalentIntrinsicMap))
  9397. BuiltinID = It->second;
  9398. // Find out if any arguments are required to be integer constant
  9399. // expressions.
  9400. unsigned ICEArguments = 0;
  9401. ASTContext::GetBuiltinTypeError Error;
  9402. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  9403. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  9404. llvm::SmallVector<Value*, 4> Ops;
  9405. Address PtrOp0 = Address::invalid();
  9406. for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
  9407. if (i == 0) {
  9408. switch (BuiltinID) {
  9409. case NEON::BI__builtin_neon_vld1_v:
  9410. case NEON::BI__builtin_neon_vld1q_v:
  9411. case NEON::BI__builtin_neon_vld1_dup_v:
  9412. case NEON::BI__builtin_neon_vld1q_dup_v:
  9413. case NEON::BI__builtin_neon_vld1_lane_v:
  9414. case NEON::BI__builtin_neon_vld1q_lane_v:
  9415. case NEON::BI__builtin_neon_vst1_v:
  9416. case NEON::BI__builtin_neon_vst1q_v:
  9417. case NEON::BI__builtin_neon_vst1_lane_v:
  9418. case NEON::BI__builtin_neon_vst1q_lane_v:
  9419. // Get the alignment for the argument in addition to the value;
  9420. // we'll use it later.
  9421. PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
  9422. Ops.push_back(PtrOp0.getPointer());
  9423. continue;
  9424. }
  9425. }
  9426. if ((ICEArguments & (1 << i)) == 0) {
  9427. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  9428. } else {
  9429. // If this is required to be a constant, constant fold it so that we know
  9430. // that the generated intrinsic gets a ConstantInt.
  9431. Ops.push_back(llvm::ConstantInt::get(
  9432. getLLVMContext(),
  9433. *E->getArg(i)->getIntegerConstantExpr(getContext())));
  9434. }
  9435. }
  9436. auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
  9437. const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
  9438. SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
  9439. if (Builtin) {
  9440. Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
  9441. Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
  9442. assert(Result && "SISD intrinsic should have been handled");
  9443. return Result;
  9444. }
  9445. const Expr *Arg = E->getArg(E->getNumArgs()-1);
  9446. NeonTypeFlags Type(0);
  9447. if (std::optional<llvm::APSInt> Result =
  9448. Arg->getIntegerConstantExpr(getContext()))
  9449. // Determine the type of this overloaded NEON intrinsic.
  9450. Type = NeonTypeFlags(Result->getZExtValue());
  9451. bool usgn = Type.isUnsigned();
  9452. bool quad = Type.isQuad();
  9453. // Handle non-overloaded intrinsics first.
  9454. switch (BuiltinID) {
  9455. default: break;
  9456. case NEON::BI__builtin_neon_vabsh_f16:
  9457. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9458. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
  9459. case NEON::BI__builtin_neon_vaddq_p128: {
  9460. llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
  9461. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9462. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  9463. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  9464. Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
  9465. llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
  9466. return Builder.CreateBitCast(Ops[0], Int128Ty);
  9467. }
  9468. case NEON::BI__builtin_neon_vldrq_p128: {
  9469. llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
  9470. llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
  9471. Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
  9472. return Builder.CreateAlignedLoad(Int128Ty, Ptr,
  9473. CharUnits::fromQuantity(16));
  9474. }
  9475. case NEON::BI__builtin_neon_vstrq_p128: {
  9476. llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
  9477. Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
  9478. return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
  9479. }
  9480. case NEON::BI__builtin_neon_vcvts_f32_u32:
  9481. case NEON::BI__builtin_neon_vcvtd_f64_u64:
  9482. usgn = true;
  9483. [[fallthrough]];
  9484. case NEON::BI__builtin_neon_vcvts_f32_s32:
  9485. case NEON::BI__builtin_neon_vcvtd_f64_s64: {
  9486. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9487. bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
  9488. llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
  9489. llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
  9490. Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
  9491. if (usgn)
  9492. return Builder.CreateUIToFP(Ops[0], FTy);
  9493. return Builder.CreateSIToFP(Ops[0], FTy);
  9494. }
  9495. case NEON::BI__builtin_neon_vcvth_f16_u16:
  9496. case NEON::BI__builtin_neon_vcvth_f16_u32:
  9497. case NEON::BI__builtin_neon_vcvth_f16_u64:
  9498. usgn = true;
  9499. [[fallthrough]];
  9500. case NEON::BI__builtin_neon_vcvth_f16_s16:
  9501. case NEON::BI__builtin_neon_vcvth_f16_s32:
  9502. case NEON::BI__builtin_neon_vcvth_f16_s64: {
  9503. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9504. llvm::Type *FTy = HalfTy;
  9505. llvm::Type *InTy;
  9506. if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
  9507. InTy = Int64Ty;
  9508. else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
  9509. InTy = Int32Ty;
  9510. else
  9511. InTy = Int16Ty;
  9512. Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
  9513. if (usgn)
  9514. return Builder.CreateUIToFP(Ops[0], FTy);
  9515. return Builder.CreateSIToFP(Ops[0], FTy);
  9516. }
  9517. case NEON::BI__builtin_neon_vcvtah_u16_f16:
  9518. case NEON::BI__builtin_neon_vcvtmh_u16_f16:
  9519. case NEON::BI__builtin_neon_vcvtnh_u16_f16:
  9520. case NEON::BI__builtin_neon_vcvtph_u16_f16:
  9521. case NEON::BI__builtin_neon_vcvth_u16_f16:
  9522. case NEON::BI__builtin_neon_vcvtah_s16_f16:
  9523. case NEON::BI__builtin_neon_vcvtmh_s16_f16:
  9524. case NEON::BI__builtin_neon_vcvtnh_s16_f16:
  9525. case NEON::BI__builtin_neon_vcvtph_s16_f16:
  9526. case NEON::BI__builtin_neon_vcvth_s16_f16: {
  9527. unsigned Int;
  9528. llvm::Type* InTy = Int32Ty;
  9529. llvm::Type* FTy = HalfTy;
  9530. llvm::Type *Tys[2] = {InTy, FTy};
  9531. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9532. switch (BuiltinID) {
  9533. default: llvm_unreachable("missing builtin ID in switch!");
  9534. case NEON::BI__builtin_neon_vcvtah_u16_f16:
  9535. Int = Intrinsic::aarch64_neon_fcvtau; break;
  9536. case NEON::BI__builtin_neon_vcvtmh_u16_f16:
  9537. Int = Intrinsic::aarch64_neon_fcvtmu; break;
  9538. case NEON::BI__builtin_neon_vcvtnh_u16_f16:
  9539. Int = Intrinsic::aarch64_neon_fcvtnu; break;
  9540. case NEON::BI__builtin_neon_vcvtph_u16_f16:
  9541. Int = Intrinsic::aarch64_neon_fcvtpu; break;
  9542. case NEON::BI__builtin_neon_vcvth_u16_f16:
  9543. Int = Intrinsic::aarch64_neon_fcvtzu; break;
  9544. case NEON::BI__builtin_neon_vcvtah_s16_f16:
  9545. Int = Intrinsic::aarch64_neon_fcvtas; break;
  9546. case NEON::BI__builtin_neon_vcvtmh_s16_f16:
  9547. Int = Intrinsic::aarch64_neon_fcvtms; break;
  9548. case NEON::BI__builtin_neon_vcvtnh_s16_f16:
  9549. Int = Intrinsic::aarch64_neon_fcvtns; break;
  9550. case NEON::BI__builtin_neon_vcvtph_s16_f16:
  9551. Int = Intrinsic::aarch64_neon_fcvtps; break;
  9552. case NEON::BI__builtin_neon_vcvth_s16_f16:
  9553. Int = Intrinsic::aarch64_neon_fcvtzs; break;
  9554. }
  9555. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
  9556. return Builder.CreateTrunc(Ops[0], Int16Ty);
  9557. }
  9558. case NEON::BI__builtin_neon_vcaleh_f16:
  9559. case NEON::BI__builtin_neon_vcalth_f16:
  9560. case NEON::BI__builtin_neon_vcageh_f16:
  9561. case NEON::BI__builtin_neon_vcagth_f16: {
  9562. unsigned Int;
  9563. llvm::Type* InTy = Int32Ty;
  9564. llvm::Type* FTy = HalfTy;
  9565. llvm::Type *Tys[2] = {InTy, FTy};
  9566. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9567. switch (BuiltinID) {
  9568. default: llvm_unreachable("missing builtin ID in switch!");
  9569. case NEON::BI__builtin_neon_vcageh_f16:
  9570. Int = Intrinsic::aarch64_neon_facge; break;
  9571. case NEON::BI__builtin_neon_vcagth_f16:
  9572. Int = Intrinsic::aarch64_neon_facgt; break;
  9573. case NEON::BI__builtin_neon_vcaleh_f16:
  9574. Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
  9575. case NEON::BI__builtin_neon_vcalth_f16:
  9576. Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
  9577. }
  9578. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
  9579. return Builder.CreateTrunc(Ops[0], Int16Ty);
  9580. }
  9581. case NEON::BI__builtin_neon_vcvth_n_s16_f16:
  9582. case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
  9583. unsigned Int;
  9584. llvm::Type* InTy = Int32Ty;
  9585. llvm::Type* FTy = HalfTy;
  9586. llvm::Type *Tys[2] = {InTy, FTy};
  9587. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9588. switch (BuiltinID) {
  9589. default: llvm_unreachable("missing builtin ID in switch!");
  9590. case NEON::BI__builtin_neon_vcvth_n_s16_f16:
  9591. Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
  9592. case NEON::BI__builtin_neon_vcvth_n_u16_f16:
  9593. Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
  9594. }
  9595. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
  9596. return Builder.CreateTrunc(Ops[0], Int16Ty);
  9597. }
  9598. case NEON::BI__builtin_neon_vcvth_n_f16_s16:
  9599. case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
  9600. unsigned Int;
  9601. llvm::Type* FTy = HalfTy;
  9602. llvm::Type* InTy = Int32Ty;
  9603. llvm::Type *Tys[2] = {FTy, InTy};
  9604. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9605. switch (BuiltinID) {
  9606. default: llvm_unreachable("missing builtin ID in switch!");
  9607. case NEON::BI__builtin_neon_vcvth_n_f16_s16:
  9608. Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
  9609. Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
  9610. break;
  9611. case NEON::BI__builtin_neon_vcvth_n_f16_u16:
  9612. Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
  9613. Ops[0] = Builder.CreateZExt(Ops[0], InTy);
  9614. break;
  9615. }
  9616. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
  9617. }
  9618. case NEON::BI__builtin_neon_vpaddd_s64: {
  9619. auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
  9620. Value *Vec = EmitScalarExpr(E->getArg(0));
  9621. // The vector is v2f64, so make sure it's bitcast to that.
  9622. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
  9623. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  9624. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  9625. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  9626. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  9627. // Pairwise addition of a v2f64 into a scalar f64.
  9628. return Builder.CreateAdd(Op0, Op1, "vpaddd");
  9629. }
  9630. case NEON::BI__builtin_neon_vpaddd_f64: {
  9631. auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
  9632. Value *Vec = EmitScalarExpr(E->getArg(0));
  9633. // The vector is v2f64, so make sure it's bitcast to that.
  9634. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
  9635. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  9636. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  9637. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  9638. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  9639. // Pairwise addition of a v2f64 into a scalar f64.
  9640. return Builder.CreateFAdd(Op0, Op1, "vpaddd");
  9641. }
  9642. case NEON::BI__builtin_neon_vpadds_f32: {
  9643. auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
  9644. Value *Vec = EmitScalarExpr(E->getArg(0));
  9645. // The vector is v2f32, so make sure it's bitcast to that.
  9646. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
  9647. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  9648. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  9649. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  9650. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  9651. // Pairwise addition of a v2f32 into a scalar f32.
  9652. return Builder.CreateFAdd(Op0, Op1, "vpaddd");
  9653. }
  9654. case NEON::BI__builtin_neon_vceqzd_s64:
  9655. case NEON::BI__builtin_neon_vceqzd_f64:
  9656. case NEON::BI__builtin_neon_vceqzs_f32:
  9657. case NEON::BI__builtin_neon_vceqzh_f16:
  9658. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9659. return EmitAArch64CompareBuiltinExpr(
  9660. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9661. ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
  9662. case NEON::BI__builtin_neon_vcgezd_s64:
  9663. case NEON::BI__builtin_neon_vcgezd_f64:
  9664. case NEON::BI__builtin_neon_vcgezs_f32:
  9665. case NEON::BI__builtin_neon_vcgezh_f16:
  9666. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9667. return EmitAArch64CompareBuiltinExpr(
  9668. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9669. ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
  9670. case NEON::BI__builtin_neon_vclezd_s64:
  9671. case NEON::BI__builtin_neon_vclezd_f64:
  9672. case NEON::BI__builtin_neon_vclezs_f32:
  9673. case NEON::BI__builtin_neon_vclezh_f16:
  9674. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9675. return EmitAArch64CompareBuiltinExpr(
  9676. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9677. ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
  9678. case NEON::BI__builtin_neon_vcgtzd_s64:
  9679. case NEON::BI__builtin_neon_vcgtzd_f64:
  9680. case NEON::BI__builtin_neon_vcgtzs_f32:
  9681. case NEON::BI__builtin_neon_vcgtzh_f16:
  9682. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9683. return EmitAArch64CompareBuiltinExpr(
  9684. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9685. ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
  9686. case NEON::BI__builtin_neon_vcltzd_s64:
  9687. case NEON::BI__builtin_neon_vcltzd_f64:
  9688. case NEON::BI__builtin_neon_vcltzs_f32:
  9689. case NEON::BI__builtin_neon_vcltzh_f16:
  9690. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9691. return EmitAArch64CompareBuiltinExpr(
  9692. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9693. ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
  9694. case NEON::BI__builtin_neon_vceqzd_u64: {
  9695. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9696. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  9697. Ops[0] =
  9698. Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
  9699. return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
  9700. }
  9701. case NEON::BI__builtin_neon_vceqd_f64:
  9702. case NEON::BI__builtin_neon_vcled_f64:
  9703. case NEON::BI__builtin_neon_vcltd_f64:
  9704. case NEON::BI__builtin_neon_vcged_f64:
  9705. case NEON::BI__builtin_neon_vcgtd_f64: {
  9706. llvm::CmpInst::Predicate P;
  9707. switch (BuiltinID) {
  9708. default: llvm_unreachable("missing builtin ID in switch!");
  9709. case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
  9710. case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
  9711. case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
  9712. case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
  9713. case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
  9714. }
  9715. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9716. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  9717. Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
  9718. if (P == llvm::FCmpInst::FCMP_OEQ)
  9719. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  9720. else
  9721. Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
  9722. return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
  9723. }
  9724. case NEON::BI__builtin_neon_vceqs_f32:
  9725. case NEON::BI__builtin_neon_vcles_f32:
  9726. case NEON::BI__builtin_neon_vclts_f32:
  9727. case NEON::BI__builtin_neon_vcges_f32:
  9728. case NEON::BI__builtin_neon_vcgts_f32: {
  9729. llvm::CmpInst::Predicate P;
  9730. switch (BuiltinID) {
  9731. default: llvm_unreachable("missing builtin ID in switch!");
  9732. case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
  9733. case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
  9734. case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
  9735. case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
  9736. case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
  9737. }
  9738. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9739. Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
  9740. Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
  9741. if (P == llvm::FCmpInst::FCMP_OEQ)
  9742. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  9743. else
  9744. Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
  9745. return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
  9746. }
  9747. case NEON::BI__builtin_neon_vceqh_f16:
  9748. case NEON::BI__builtin_neon_vcleh_f16:
  9749. case NEON::BI__builtin_neon_vclth_f16:
  9750. case NEON::BI__builtin_neon_vcgeh_f16:
  9751. case NEON::BI__builtin_neon_vcgth_f16: {
  9752. llvm::CmpInst::Predicate P;
  9753. switch (BuiltinID) {
  9754. default: llvm_unreachable("missing builtin ID in switch!");
  9755. case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
  9756. case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
  9757. case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
  9758. case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
  9759. case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
  9760. }
  9761. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9762. Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
  9763. Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
  9764. if (P == llvm::FCmpInst::FCMP_OEQ)
  9765. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  9766. else
  9767. Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
  9768. return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
  9769. }
  9770. case NEON::BI__builtin_neon_vceqd_s64:
  9771. case NEON::BI__builtin_neon_vceqd_u64:
  9772. case NEON::BI__builtin_neon_vcgtd_s64:
  9773. case NEON::BI__builtin_neon_vcgtd_u64:
  9774. case NEON::BI__builtin_neon_vcltd_s64:
  9775. case NEON::BI__builtin_neon_vcltd_u64:
  9776. case NEON::BI__builtin_neon_vcged_u64:
  9777. case NEON::BI__builtin_neon_vcged_s64:
  9778. case NEON::BI__builtin_neon_vcled_u64:
  9779. case NEON::BI__builtin_neon_vcled_s64: {
  9780. llvm::CmpInst::Predicate P;
  9781. switch (BuiltinID) {
  9782. default: llvm_unreachable("missing builtin ID in switch!");
  9783. case NEON::BI__builtin_neon_vceqd_s64:
  9784. case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
  9785. case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
  9786. case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
  9787. case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
  9788. case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
  9789. case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
  9790. case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
  9791. case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
  9792. case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
  9793. }
  9794. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9795. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  9796. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  9797. Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
  9798. return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
  9799. }
  9800. case NEON::BI__builtin_neon_vtstd_s64:
  9801. case NEON::BI__builtin_neon_vtstd_u64: {
  9802. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9803. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  9804. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  9805. Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
  9806. Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
  9807. llvm::Constant::getNullValue(Int64Ty));
  9808. return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
  9809. }
  9810. case NEON::BI__builtin_neon_vset_lane_i8:
  9811. case NEON::BI__builtin_neon_vset_lane_i16:
  9812. case NEON::BI__builtin_neon_vset_lane_i32:
  9813. case NEON::BI__builtin_neon_vset_lane_i64:
  9814. case NEON::BI__builtin_neon_vset_lane_bf16:
  9815. case NEON::BI__builtin_neon_vset_lane_f32:
  9816. case NEON::BI__builtin_neon_vsetq_lane_i8:
  9817. case NEON::BI__builtin_neon_vsetq_lane_i16:
  9818. case NEON::BI__builtin_neon_vsetq_lane_i32:
  9819. case NEON::BI__builtin_neon_vsetq_lane_i64:
  9820. case NEON::BI__builtin_neon_vsetq_lane_bf16:
  9821. case NEON::BI__builtin_neon_vsetq_lane_f32:
  9822. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  9823. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  9824. case NEON::BI__builtin_neon_vset_lane_f64:
  9825. // The vector type needs a cast for the v1f64 variant.
  9826. Ops[1] =
  9827. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
  9828. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  9829. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  9830. case NEON::BI__builtin_neon_vsetq_lane_f64:
  9831. // The vector type needs a cast for the v2f64 variant.
  9832. Ops[1] =
  9833. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
  9834. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  9835. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  9836. case NEON::BI__builtin_neon_vget_lane_i8:
  9837. case NEON::BI__builtin_neon_vdupb_lane_i8:
  9838. Ops[0] =
  9839. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
  9840. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9841. "vget_lane");
  9842. case NEON::BI__builtin_neon_vgetq_lane_i8:
  9843. case NEON::BI__builtin_neon_vdupb_laneq_i8:
  9844. Ops[0] =
  9845. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
  9846. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9847. "vgetq_lane");
  9848. case NEON::BI__builtin_neon_vget_lane_i16:
  9849. case NEON::BI__builtin_neon_vduph_lane_i16:
  9850. Ops[0] =
  9851. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
  9852. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9853. "vget_lane");
  9854. case NEON::BI__builtin_neon_vgetq_lane_i16:
  9855. case NEON::BI__builtin_neon_vduph_laneq_i16:
  9856. Ops[0] =
  9857. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
  9858. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9859. "vgetq_lane");
  9860. case NEON::BI__builtin_neon_vget_lane_i32:
  9861. case NEON::BI__builtin_neon_vdups_lane_i32:
  9862. Ops[0] =
  9863. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
  9864. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9865. "vget_lane");
  9866. case NEON::BI__builtin_neon_vdups_lane_f32:
  9867. Ops[0] =
  9868. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
  9869. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9870. "vdups_lane");
  9871. case NEON::BI__builtin_neon_vgetq_lane_i32:
  9872. case NEON::BI__builtin_neon_vdups_laneq_i32:
  9873. Ops[0] =
  9874. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
  9875. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9876. "vgetq_lane");
  9877. case NEON::BI__builtin_neon_vget_lane_i64:
  9878. case NEON::BI__builtin_neon_vdupd_lane_i64:
  9879. Ops[0] =
  9880. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
  9881. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9882. "vget_lane");
  9883. case NEON::BI__builtin_neon_vdupd_lane_f64:
  9884. Ops[0] =
  9885. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
  9886. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9887. "vdupd_lane");
  9888. case NEON::BI__builtin_neon_vgetq_lane_i64:
  9889. case NEON::BI__builtin_neon_vdupd_laneq_i64:
  9890. Ops[0] =
  9891. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
  9892. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9893. "vgetq_lane");
  9894. case NEON::BI__builtin_neon_vget_lane_f32:
  9895. Ops[0] =
  9896. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
  9897. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9898. "vget_lane");
  9899. case NEON::BI__builtin_neon_vget_lane_f64:
  9900. Ops[0] =
  9901. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
  9902. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9903. "vget_lane");
  9904. case NEON::BI__builtin_neon_vgetq_lane_f32:
  9905. case NEON::BI__builtin_neon_vdups_laneq_f32:
  9906. Ops[0] =
  9907. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
  9908. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9909. "vgetq_lane");
  9910. case NEON::BI__builtin_neon_vgetq_lane_f64:
  9911. case NEON::BI__builtin_neon_vdupd_laneq_f64:
  9912. Ops[0] =
  9913. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
  9914. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9915. "vgetq_lane");
  9916. case NEON::BI__builtin_neon_vaddh_f16:
  9917. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9918. return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
  9919. case NEON::BI__builtin_neon_vsubh_f16:
  9920. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9921. return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
  9922. case NEON::BI__builtin_neon_vmulh_f16:
  9923. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9924. return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
  9925. case NEON::BI__builtin_neon_vdivh_f16:
  9926. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9927. return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
  9928. case NEON::BI__builtin_neon_vfmah_f16:
  9929. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  9930. return emitCallMaybeConstrainedFPBuiltin(
  9931. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
  9932. {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
  9933. case NEON::BI__builtin_neon_vfmsh_f16: {
  9934. // FIXME: This should be an fneg instruction:
  9935. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
  9936. Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
  9937. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  9938. return emitCallMaybeConstrainedFPBuiltin(
  9939. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
  9940. {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
  9941. }
  9942. case NEON::BI__builtin_neon_vaddd_s64:
  9943. case NEON::BI__builtin_neon_vaddd_u64:
  9944. return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
  9945. case NEON::BI__builtin_neon_vsubd_s64:
  9946. case NEON::BI__builtin_neon_vsubd_u64:
  9947. return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
  9948. case NEON::BI__builtin_neon_vqdmlalh_s16:
  9949. case NEON::BI__builtin_neon_vqdmlslh_s16: {
  9950. SmallVector<Value *, 2> ProductOps;
  9951. ProductOps.push_back(vectorWrapScalar16(Ops[1]));
  9952. ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
  9953. auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
  9954. Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
  9955. ProductOps, "vqdmlXl");
  9956. Constant *CI = ConstantInt::get(SizeTy, 0);
  9957. Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
  9958. unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
  9959. ? Intrinsic::aarch64_neon_sqadd
  9960. : Intrinsic::aarch64_neon_sqsub;
  9961. return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
  9962. }
  9963. case NEON::BI__builtin_neon_vqshlud_n_s64: {
  9964. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9965. Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
  9966. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
  9967. Ops, "vqshlu_n");
  9968. }
  9969. case NEON::BI__builtin_neon_vqshld_n_u64:
  9970. case NEON::BI__builtin_neon_vqshld_n_s64: {
  9971. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
  9972. ? Intrinsic::aarch64_neon_uqshl
  9973. : Intrinsic::aarch64_neon_sqshl;
  9974. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9975. Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
  9976. return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
  9977. }
  9978. case NEON::BI__builtin_neon_vrshrd_n_u64:
  9979. case NEON::BI__builtin_neon_vrshrd_n_s64: {
  9980. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
  9981. ? Intrinsic::aarch64_neon_urshl
  9982. : Intrinsic::aarch64_neon_srshl;
  9983. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9984. int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
  9985. Ops[1] = ConstantInt::get(Int64Ty, -SV);
  9986. return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
  9987. }
  9988. case NEON::BI__builtin_neon_vrsrad_n_u64:
  9989. case NEON::BI__builtin_neon_vrsrad_n_s64: {
  9990. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
  9991. ? Intrinsic::aarch64_neon_urshl
  9992. : Intrinsic::aarch64_neon_srshl;
  9993. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  9994. Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
  9995. Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
  9996. {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
  9997. return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
  9998. }
  9999. case NEON::BI__builtin_neon_vshld_n_s64:
  10000. case NEON::BI__builtin_neon_vshld_n_u64: {
  10001. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  10002. return Builder.CreateShl(
  10003. Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
  10004. }
  10005. case NEON::BI__builtin_neon_vshrd_n_s64: {
  10006. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  10007. return Builder.CreateAShr(
  10008. Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
  10009. Amt->getZExtValue())),
  10010. "shrd_n");
  10011. }
  10012. case NEON::BI__builtin_neon_vshrd_n_u64: {
  10013. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  10014. uint64_t ShiftAmt = Amt->getZExtValue();
  10015. // Right-shifting an unsigned value by its size yields 0.
  10016. if (ShiftAmt == 64)
  10017. return ConstantInt::get(Int64Ty, 0);
  10018. return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
  10019. "shrd_n");
  10020. }
  10021. case NEON::BI__builtin_neon_vsrad_n_s64: {
  10022. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
  10023. Ops[1] = Builder.CreateAShr(
  10024. Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
  10025. Amt->getZExtValue())),
  10026. "shrd_n");
  10027. return Builder.CreateAdd(Ops[0], Ops[1]);
  10028. }
  10029. case NEON::BI__builtin_neon_vsrad_n_u64: {
  10030. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
  10031. uint64_t ShiftAmt = Amt->getZExtValue();
  10032. // Right-shifting an unsigned value by its size yields 0.
  10033. // As Op + 0 = Op, return Ops[0] directly.
  10034. if (ShiftAmt == 64)
  10035. return Ops[0];
  10036. Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
  10037. "shrd_n");
  10038. return Builder.CreateAdd(Ops[0], Ops[1]);
  10039. }
  10040. case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
  10041. case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
  10042. case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
  10043. case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
  10044. Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
  10045. "lane");
  10046. SmallVector<Value *, 2> ProductOps;
  10047. ProductOps.push_back(vectorWrapScalar16(Ops[1]));
  10048. ProductOps.push_back(vectorWrapScalar16(Ops[2]));
  10049. auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
  10050. Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
  10051. ProductOps, "vqdmlXl");
  10052. Constant *CI = ConstantInt::get(SizeTy, 0);
  10053. Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
  10054. Ops.pop_back();
  10055. unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
  10056. BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
  10057. ? Intrinsic::aarch64_neon_sqadd
  10058. : Intrinsic::aarch64_neon_sqsub;
  10059. return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
  10060. }
  10061. case NEON::BI__builtin_neon_vqdmlals_s32:
  10062. case NEON::BI__builtin_neon_vqdmlsls_s32: {
  10063. SmallVector<Value *, 2> ProductOps;
  10064. ProductOps.push_back(Ops[1]);
  10065. ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
  10066. Ops[1] =
  10067. EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
  10068. ProductOps, "vqdmlXl");
  10069. unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
  10070. ? Intrinsic::aarch64_neon_sqadd
  10071. : Intrinsic::aarch64_neon_sqsub;
  10072. return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
  10073. }
  10074. case NEON::BI__builtin_neon_vqdmlals_lane_s32:
  10075. case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
  10076. case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
  10077. case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
  10078. Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
  10079. "lane");
  10080. SmallVector<Value *, 2> ProductOps;
  10081. ProductOps.push_back(Ops[1]);
  10082. ProductOps.push_back(Ops[2]);
  10083. Ops[1] =
  10084. EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
  10085. ProductOps, "vqdmlXl");
  10086. Ops.pop_back();
  10087. unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
  10088. BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
  10089. ? Intrinsic::aarch64_neon_sqadd
  10090. : Intrinsic::aarch64_neon_sqsub;
  10091. return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
  10092. }
  10093. case NEON::BI__builtin_neon_vget_lane_bf16:
  10094. case NEON::BI__builtin_neon_vduph_lane_bf16:
  10095. case NEON::BI__builtin_neon_vduph_lane_f16: {
  10096. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  10097. "vget_lane");
  10098. }
  10099. case NEON::BI__builtin_neon_vgetq_lane_bf16:
  10100. case NEON::BI__builtin_neon_vduph_laneq_bf16:
  10101. case NEON::BI__builtin_neon_vduph_laneq_f16: {
  10102. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  10103. "vgetq_lane");
  10104. }
  10105. case clang::AArch64::BI_InterlockedAdd: {
  10106. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  10107. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  10108. AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
  10109. AtomicRMWInst::Add, Arg0, Arg1,
  10110. llvm::AtomicOrdering::SequentiallyConsistent);
  10111. return Builder.CreateAdd(RMWI, Arg1);
  10112. }
  10113. }
  10114. llvm::FixedVectorType *VTy = GetNeonType(this, Type);
  10115. llvm::Type *Ty = VTy;
  10116. if (!Ty)
  10117. return nullptr;
  10118. // Not all intrinsics handled by the common case work for AArch64 yet, so only
  10119. // defer to common code if it's been added to our special map.
  10120. Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
  10121. AArch64SIMDIntrinsicsProvenSorted);
  10122. if (Builtin)
  10123. return EmitCommonNeonBuiltinExpr(
  10124. Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
  10125. Builtin->NameHint, Builtin->TypeModifier, E, Ops,
  10126. /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
  10127. if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
  10128. return V;
  10129. unsigned Int;
  10130. switch (BuiltinID) {
  10131. default: return nullptr;
  10132. case NEON::BI__builtin_neon_vbsl_v:
  10133. case NEON::BI__builtin_neon_vbslq_v: {
  10134. llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
  10135. Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
  10136. Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
  10137. Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
  10138. Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
  10139. Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
  10140. Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
  10141. return Builder.CreateBitCast(Ops[0], Ty);
  10142. }
  10143. case NEON::BI__builtin_neon_vfma_lane_v:
  10144. case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
  10145. // The ARM builtins (and instructions) have the addend as the first
  10146. // operand, but the 'fma' intrinsics have it last. Swap it around here.
  10147. Value *Addend = Ops[0];
  10148. Value *Multiplicand = Ops[1];
  10149. Value *LaneSource = Ops[2];
  10150. Ops[0] = Multiplicand;
  10151. Ops[1] = LaneSource;
  10152. Ops[2] = Addend;
  10153. // Now adjust things to handle the lane access.
  10154. auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
  10155. ? llvm::FixedVectorType::get(VTy->getElementType(),
  10156. VTy->getNumElements() / 2)
  10157. : VTy;
  10158. llvm::Constant *cst = cast<Constant>(Ops[3]);
  10159. Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
  10160. Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
  10161. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
  10162. Ops.pop_back();
  10163. Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
  10164. : Intrinsic::fma;
  10165. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
  10166. }
  10167. case NEON::BI__builtin_neon_vfma_laneq_v: {
  10168. auto *VTy = cast<llvm::FixedVectorType>(Ty);
  10169. // v1f64 fma should be mapped to Neon scalar f64 fma
  10170. if (VTy && VTy->getElementType() == DoubleTy) {
  10171. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  10172. Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
  10173. llvm::FixedVectorType *VTy =
  10174. GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
  10175. Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
  10176. Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
  10177. Value *Result;
  10178. Result = emitCallMaybeConstrainedFPBuiltin(
  10179. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
  10180. DoubleTy, {Ops[1], Ops[2], Ops[0]});
  10181. return Builder.CreateBitCast(Result, Ty);
  10182. }
  10183. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10184. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10185. auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
  10186. VTy->getNumElements() * 2);
  10187. Ops[2] = Builder.CreateBitCast(Ops[2], STy);
  10188. Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
  10189. cast<ConstantInt>(Ops[3]));
  10190. Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
  10191. return emitCallMaybeConstrainedFPBuiltin(
  10192. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  10193. {Ops[2], Ops[1], Ops[0]});
  10194. }
  10195. case NEON::BI__builtin_neon_vfmaq_laneq_v: {
  10196. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10197. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10198. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10199. Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
  10200. return emitCallMaybeConstrainedFPBuiltin(
  10201. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  10202. {Ops[2], Ops[1], Ops[0]});
  10203. }
  10204. case NEON::BI__builtin_neon_vfmah_lane_f16:
  10205. case NEON::BI__builtin_neon_vfmas_lane_f32:
  10206. case NEON::BI__builtin_neon_vfmah_laneq_f16:
  10207. case NEON::BI__builtin_neon_vfmas_laneq_f32:
  10208. case NEON::BI__builtin_neon_vfmad_lane_f64:
  10209. case NEON::BI__builtin_neon_vfmad_laneq_f64: {
  10210. Ops.push_back(EmitScalarExpr(E->getArg(3)));
  10211. llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
  10212. Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
  10213. return emitCallMaybeConstrainedFPBuiltin(
  10214. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  10215. {Ops[1], Ops[2], Ops[0]});
  10216. }
  10217. case NEON::BI__builtin_neon_vmull_v:
  10218. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  10219. Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
  10220. if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
  10221. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
  10222. case NEON::BI__builtin_neon_vmax_v:
  10223. case NEON::BI__builtin_neon_vmaxq_v:
  10224. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  10225. Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
  10226. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
  10227. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
  10228. case NEON::BI__builtin_neon_vmaxh_f16: {
  10229. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10230. Int = Intrinsic::aarch64_neon_fmax;
  10231. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
  10232. }
  10233. case NEON::BI__builtin_neon_vmin_v:
  10234. case NEON::BI__builtin_neon_vminq_v:
  10235. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  10236. Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
  10237. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
  10238. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
  10239. case NEON::BI__builtin_neon_vminh_f16: {
  10240. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10241. Int = Intrinsic::aarch64_neon_fmin;
  10242. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
  10243. }
  10244. case NEON::BI__builtin_neon_vabd_v:
  10245. case NEON::BI__builtin_neon_vabdq_v:
  10246. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  10247. Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
  10248. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
  10249. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
  10250. case NEON::BI__builtin_neon_vpadal_v:
  10251. case NEON::BI__builtin_neon_vpadalq_v: {
  10252. unsigned ArgElts = VTy->getNumElements();
  10253. llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
  10254. unsigned BitWidth = EltTy->getBitWidth();
  10255. auto *ArgTy = llvm::FixedVectorType::get(
  10256. llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
  10257. llvm::Type* Tys[2] = { VTy, ArgTy };
  10258. Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
  10259. SmallVector<llvm::Value*, 1> TmpOps;
  10260. TmpOps.push_back(Ops[1]);
  10261. Function *F = CGM.getIntrinsic(Int, Tys);
  10262. llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
  10263. llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
  10264. return Builder.CreateAdd(tmp, addend);
  10265. }
  10266. case NEON::BI__builtin_neon_vpmin_v:
  10267. case NEON::BI__builtin_neon_vpminq_v:
  10268. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  10269. Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
  10270. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
  10271. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
  10272. case NEON::BI__builtin_neon_vpmax_v:
  10273. case NEON::BI__builtin_neon_vpmaxq_v:
  10274. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  10275. Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
  10276. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
  10277. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
  10278. case NEON::BI__builtin_neon_vminnm_v:
  10279. case NEON::BI__builtin_neon_vminnmq_v:
  10280. Int = Intrinsic::aarch64_neon_fminnm;
  10281. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
  10282. case NEON::BI__builtin_neon_vminnmh_f16:
  10283. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10284. Int = Intrinsic::aarch64_neon_fminnm;
  10285. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
  10286. case NEON::BI__builtin_neon_vmaxnm_v:
  10287. case NEON::BI__builtin_neon_vmaxnmq_v:
  10288. Int = Intrinsic::aarch64_neon_fmaxnm;
  10289. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
  10290. case NEON::BI__builtin_neon_vmaxnmh_f16:
  10291. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10292. Int = Intrinsic::aarch64_neon_fmaxnm;
  10293. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
  10294. case NEON::BI__builtin_neon_vrecpss_f32: {
  10295. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10296. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
  10297. Ops, "vrecps");
  10298. }
  10299. case NEON::BI__builtin_neon_vrecpsd_f64:
  10300. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10301. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
  10302. Ops, "vrecps");
  10303. case NEON::BI__builtin_neon_vrecpsh_f16:
  10304. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  10305. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
  10306. Ops, "vrecps");
  10307. case NEON::BI__builtin_neon_vqshrun_n_v:
  10308. Int = Intrinsic::aarch64_neon_sqshrun;
  10309. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
  10310. case NEON::BI__builtin_neon_vqrshrun_n_v:
  10311. Int = Intrinsic::aarch64_neon_sqrshrun;
  10312. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
  10313. case NEON::BI__builtin_neon_vqshrn_n_v:
  10314. Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
  10315. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
  10316. case NEON::BI__builtin_neon_vrshrn_n_v:
  10317. Int = Intrinsic::aarch64_neon_rshrn;
  10318. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
  10319. case NEON::BI__builtin_neon_vqrshrn_n_v:
  10320. Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
  10321. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
  10322. case NEON::BI__builtin_neon_vrndah_f16: {
  10323. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10324. Int = Builder.getIsFPConstrained()
  10325. ? Intrinsic::experimental_constrained_round
  10326. : Intrinsic::round;
  10327. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
  10328. }
  10329. case NEON::BI__builtin_neon_vrnda_v:
  10330. case NEON::BI__builtin_neon_vrndaq_v: {
  10331. Int = Builder.getIsFPConstrained()
  10332. ? Intrinsic::experimental_constrained_round
  10333. : Intrinsic::round;
  10334. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
  10335. }
  10336. case NEON::BI__builtin_neon_vrndih_f16: {
  10337. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10338. Int = Builder.getIsFPConstrained()
  10339. ? Intrinsic::experimental_constrained_nearbyint
  10340. : Intrinsic::nearbyint;
  10341. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
  10342. }
  10343. case NEON::BI__builtin_neon_vrndmh_f16: {
  10344. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10345. Int = Builder.getIsFPConstrained()
  10346. ? Intrinsic::experimental_constrained_floor
  10347. : Intrinsic::floor;
  10348. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
  10349. }
  10350. case NEON::BI__builtin_neon_vrndm_v:
  10351. case NEON::BI__builtin_neon_vrndmq_v: {
  10352. Int = Builder.getIsFPConstrained()
  10353. ? Intrinsic::experimental_constrained_floor
  10354. : Intrinsic::floor;
  10355. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
  10356. }
  10357. case NEON::BI__builtin_neon_vrndnh_f16: {
  10358. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10359. Int = Builder.getIsFPConstrained()
  10360. ? Intrinsic::experimental_constrained_roundeven
  10361. : Intrinsic::roundeven;
  10362. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
  10363. }
  10364. case NEON::BI__builtin_neon_vrndn_v:
  10365. case NEON::BI__builtin_neon_vrndnq_v: {
  10366. Int = Builder.getIsFPConstrained()
  10367. ? Intrinsic::experimental_constrained_roundeven
  10368. : Intrinsic::roundeven;
  10369. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
  10370. }
  10371. case NEON::BI__builtin_neon_vrndns_f32: {
  10372. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10373. Int = Builder.getIsFPConstrained()
  10374. ? Intrinsic::experimental_constrained_roundeven
  10375. : Intrinsic::roundeven;
  10376. return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
  10377. }
  10378. case NEON::BI__builtin_neon_vrndph_f16: {
  10379. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10380. Int = Builder.getIsFPConstrained()
  10381. ? Intrinsic::experimental_constrained_ceil
  10382. : Intrinsic::ceil;
  10383. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
  10384. }
  10385. case NEON::BI__builtin_neon_vrndp_v:
  10386. case NEON::BI__builtin_neon_vrndpq_v: {
  10387. Int = Builder.getIsFPConstrained()
  10388. ? Intrinsic::experimental_constrained_ceil
  10389. : Intrinsic::ceil;
  10390. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
  10391. }
  10392. case NEON::BI__builtin_neon_vrndxh_f16: {
  10393. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10394. Int = Builder.getIsFPConstrained()
  10395. ? Intrinsic::experimental_constrained_rint
  10396. : Intrinsic::rint;
  10397. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
  10398. }
  10399. case NEON::BI__builtin_neon_vrndx_v:
  10400. case NEON::BI__builtin_neon_vrndxq_v: {
  10401. Int = Builder.getIsFPConstrained()
  10402. ? Intrinsic::experimental_constrained_rint
  10403. : Intrinsic::rint;
  10404. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
  10405. }
  10406. case NEON::BI__builtin_neon_vrndh_f16: {
  10407. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10408. Int = Builder.getIsFPConstrained()
  10409. ? Intrinsic::experimental_constrained_trunc
  10410. : Intrinsic::trunc;
  10411. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
  10412. }
  10413. case NEON::BI__builtin_neon_vrnd32x_f32:
  10414. case NEON::BI__builtin_neon_vrnd32xq_f32: {
  10415. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10416. Int = Intrinsic::aarch64_neon_frint32x;
  10417. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
  10418. }
  10419. case NEON::BI__builtin_neon_vrnd32z_f32:
  10420. case NEON::BI__builtin_neon_vrnd32zq_f32: {
  10421. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10422. Int = Intrinsic::aarch64_neon_frint32z;
  10423. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
  10424. }
  10425. case NEON::BI__builtin_neon_vrnd64x_f32:
  10426. case NEON::BI__builtin_neon_vrnd64xq_f32: {
  10427. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10428. Int = Intrinsic::aarch64_neon_frint64x;
  10429. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
  10430. }
  10431. case NEON::BI__builtin_neon_vrnd64z_f32:
  10432. case NEON::BI__builtin_neon_vrnd64zq_f32: {
  10433. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10434. Int = Intrinsic::aarch64_neon_frint64z;
  10435. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
  10436. }
  10437. case NEON::BI__builtin_neon_vrnd_v:
  10438. case NEON::BI__builtin_neon_vrndq_v: {
  10439. Int = Builder.getIsFPConstrained()
  10440. ? Intrinsic::experimental_constrained_trunc
  10441. : Intrinsic::trunc;
  10442. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
  10443. }
  10444. case NEON::BI__builtin_neon_vcvt_f64_v:
  10445. case NEON::BI__builtin_neon_vcvtq_f64_v:
  10446. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10447. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
  10448. return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  10449. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  10450. case NEON::BI__builtin_neon_vcvt_f64_f32: {
  10451. assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
  10452. "unexpected vcvt_f64_f32 builtin");
  10453. NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
  10454. Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
  10455. return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
  10456. }
  10457. case NEON::BI__builtin_neon_vcvt_f32_f64: {
  10458. assert(Type.getEltType() == NeonTypeFlags::Float32 &&
  10459. "unexpected vcvt_f32_f64 builtin");
  10460. NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
  10461. Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
  10462. return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
  10463. }
  10464. case NEON::BI__builtin_neon_vcvt_s32_v:
  10465. case NEON::BI__builtin_neon_vcvt_u32_v:
  10466. case NEON::BI__builtin_neon_vcvt_s64_v:
  10467. case NEON::BI__builtin_neon_vcvt_u64_v:
  10468. case NEON::BI__builtin_neon_vcvt_s16_f16:
  10469. case NEON::BI__builtin_neon_vcvt_u16_f16:
  10470. case NEON::BI__builtin_neon_vcvtq_s32_v:
  10471. case NEON::BI__builtin_neon_vcvtq_u32_v:
  10472. case NEON::BI__builtin_neon_vcvtq_s64_v:
  10473. case NEON::BI__builtin_neon_vcvtq_u64_v:
  10474. case NEON::BI__builtin_neon_vcvtq_s16_f16:
  10475. case NEON::BI__builtin_neon_vcvtq_u16_f16: {
  10476. Int =
  10477. usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
  10478. llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
  10479. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
  10480. }
  10481. case NEON::BI__builtin_neon_vcvta_s16_f16:
  10482. case NEON::BI__builtin_neon_vcvta_u16_f16:
  10483. case NEON::BI__builtin_neon_vcvta_s32_v:
  10484. case NEON::BI__builtin_neon_vcvtaq_s16_f16:
  10485. case NEON::BI__builtin_neon_vcvtaq_s32_v:
  10486. case NEON::BI__builtin_neon_vcvta_u32_v:
  10487. case NEON::BI__builtin_neon_vcvtaq_u16_f16:
  10488. case NEON::BI__builtin_neon_vcvtaq_u32_v:
  10489. case NEON::BI__builtin_neon_vcvta_s64_v:
  10490. case NEON::BI__builtin_neon_vcvtaq_s64_v:
  10491. case NEON::BI__builtin_neon_vcvta_u64_v:
  10492. case NEON::BI__builtin_neon_vcvtaq_u64_v: {
  10493. Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
  10494. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10495. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
  10496. }
  10497. case NEON::BI__builtin_neon_vcvtm_s16_f16:
  10498. case NEON::BI__builtin_neon_vcvtm_s32_v:
  10499. case NEON::BI__builtin_neon_vcvtmq_s16_f16:
  10500. case NEON::BI__builtin_neon_vcvtmq_s32_v:
  10501. case NEON::BI__builtin_neon_vcvtm_u16_f16:
  10502. case NEON::BI__builtin_neon_vcvtm_u32_v:
  10503. case NEON::BI__builtin_neon_vcvtmq_u16_f16:
  10504. case NEON::BI__builtin_neon_vcvtmq_u32_v:
  10505. case NEON::BI__builtin_neon_vcvtm_s64_v:
  10506. case NEON::BI__builtin_neon_vcvtmq_s64_v:
  10507. case NEON::BI__builtin_neon_vcvtm_u64_v:
  10508. case NEON::BI__builtin_neon_vcvtmq_u64_v: {
  10509. Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
  10510. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10511. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
  10512. }
  10513. case NEON::BI__builtin_neon_vcvtn_s16_f16:
  10514. case NEON::BI__builtin_neon_vcvtn_s32_v:
  10515. case NEON::BI__builtin_neon_vcvtnq_s16_f16:
  10516. case NEON::BI__builtin_neon_vcvtnq_s32_v:
  10517. case NEON::BI__builtin_neon_vcvtn_u16_f16:
  10518. case NEON::BI__builtin_neon_vcvtn_u32_v:
  10519. case NEON::BI__builtin_neon_vcvtnq_u16_f16:
  10520. case NEON::BI__builtin_neon_vcvtnq_u32_v:
  10521. case NEON::BI__builtin_neon_vcvtn_s64_v:
  10522. case NEON::BI__builtin_neon_vcvtnq_s64_v:
  10523. case NEON::BI__builtin_neon_vcvtn_u64_v:
  10524. case NEON::BI__builtin_neon_vcvtnq_u64_v: {
  10525. Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
  10526. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10527. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
  10528. }
  10529. case NEON::BI__builtin_neon_vcvtp_s16_f16:
  10530. case NEON::BI__builtin_neon_vcvtp_s32_v:
  10531. case NEON::BI__builtin_neon_vcvtpq_s16_f16:
  10532. case NEON::BI__builtin_neon_vcvtpq_s32_v:
  10533. case NEON::BI__builtin_neon_vcvtp_u16_f16:
  10534. case NEON::BI__builtin_neon_vcvtp_u32_v:
  10535. case NEON::BI__builtin_neon_vcvtpq_u16_f16:
  10536. case NEON::BI__builtin_neon_vcvtpq_u32_v:
  10537. case NEON::BI__builtin_neon_vcvtp_s64_v:
  10538. case NEON::BI__builtin_neon_vcvtpq_s64_v:
  10539. case NEON::BI__builtin_neon_vcvtp_u64_v:
  10540. case NEON::BI__builtin_neon_vcvtpq_u64_v: {
  10541. Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
  10542. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10543. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
  10544. }
  10545. case NEON::BI__builtin_neon_vmulx_v:
  10546. case NEON::BI__builtin_neon_vmulxq_v: {
  10547. Int = Intrinsic::aarch64_neon_fmulx;
  10548. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
  10549. }
  10550. case NEON::BI__builtin_neon_vmulxh_lane_f16:
  10551. case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
  10552. // vmulx_lane should be mapped to Neon scalar mulx after
  10553. // extracting the scalar element
  10554. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  10555. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
  10556. Ops.pop_back();
  10557. Int = Intrinsic::aarch64_neon_fmulx;
  10558. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
  10559. }
  10560. case NEON::BI__builtin_neon_vmul_lane_v:
  10561. case NEON::BI__builtin_neon_vmul_laneq_v: {
  10562. // v1f64 vmul_lane should be mapped to Neon scalar mul lane
  10563. bool Quad = false;
  10564. if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
  10565. Quad = true;
  10566. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  10567. llvm::FixedVectorType *VTy =
  10568. GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
  10569. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  10570. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
  10571. Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
  10572. return Builder.CreateBitCast(Result, Ty);
  10573. }
  10574. case NEON::BI__builtin_neon_vnegd_s64:
  10575. return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
  10576. case NEON::BI__builtin_neon_vnegh_f16:
  10577. return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
  10578. case NEON::BI__builtin_neon_vpmaxnm_v:
  10579. case NEON::BI__builtin_neon_vpmaxnmq_v: {
  10580. Int = Intrinsic::aarch64_neon_fmaxnmp;
  10581. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
  10582. }
  10583. case NEON::BI__builtin_neon_vpminnm_v:
  10584. case NEON::BI__builtin_neon_vpminnmq_v: {
  10585. Int = Intrinsic::aarch64_neon_fminnmp;
  10586. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
  10587. }
  10588. case NEON::BI__builtin_neon_vsqrth_f16: {
  10589. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10590. Int = Builder.getIsFPConstrained()
  10591. ? Intrinsic::experimental_constrained_sqrt
  10592. : Intrinsic::sqrt;
  10593. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
  10594. }
  10595. case NEON::BI__builtin_neon_vsqrt_v:
  10596. case NEON::BI__builtin_neon_vsqrtq_v: {
  10597. Int = Builder.getIsFPConstrained()
  10598. ? Intrinsic::experimental_constrained_sqrt
  10599. : Intrinsic::sqrt;
  10600. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10601. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
  10602. }
  10603. case NEON::BI__builtin_neon_vrbit_v:
  10604. case NEON::BI__builtin_neon_vrbitq_v: {
  10605. Int = Intrinsic::bitreverse;
  10606. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
  10607. }
  10608. case NEON::BI__builtin_neon_vaddv_u8:
  10609. // FIXME: These are handled by the AArch64 scalar code.
  10610. usgn = true;
  10611. [[fallthrough]];
  10612. case NEON::BI__builtin_neon_vaddv_s8: {
  10613. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10614. Ty = Int32Ty;
  10615. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10616. llvm::Type *Tys[2] = { Ty, VTy };
  10617. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10618. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10619. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10620. }
  10621. case NEON::BI__builtin_neon_vaddv_u16:
  10622. usgn = true;
  10623. [[fallthrough]];
  10624. case NEON::BI__builtin_neon_vaddv_s16: {
  10625. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10626. Ty = Int32Ty;
  10627. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10628. llvm::Type *Tys[2] = { Ty, VTy };
  10629. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10630. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10631. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10632. }
  10633. case NEON::BI__builtin_neon_vaddvq_u8:
  10634. usgn = true;
  10635. [[fallthrough]];
  10636. case NEON::BI__builtin_neon_vaddvq_s8: {
  10637. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10638. Ty = Int32Ty;
  10639. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10640. llvm::Type *Tys[2] = { Ty, VTy };
  10641. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10642. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10643. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10644. }
  10645. case NEON::BI__builtin_neon_vaddvq_u16:
  10646. usgn = true;
  10647. [[fallthrough]];
  10648. case NEON::BI__builtin_neon_vaddvq_s16: {
  10649. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10650. Ty = Int32Ty;
  10651. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10652. llvm::Type *Tys[2] = { Ty, VTy };
  10653. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10654. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10655. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10656. }
  10657. case NEON::BI__builtin_neon_vmaxv_u8: {
  10658. Int = Intrinsic::aarch64_neon_umaxv;
  10659. Ty = Int32Ty;
  10660. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10661. llvm::Type *Tys[2] = { Ty, VTy };
  10662. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10663. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10664. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10665. }
  10666. case NEON::BI__builtin_neon_vmaxv_u16: {
  10667. Int = Intrinsic::aarch64_neon_umaxv;
  10668. Ty = Int32Ty;
  10669. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10670. llvm::Type *Tys[2] = { Ty, VTy };
  10671. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10672. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10673. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10674. }
  10675. case NEON::BI__builtin_neon_vmaxvq_u8: {
  10676. Int = Intrinsic::aarch64_neon_umaxv;
  10677. Ty = Int32Ty;
  10678. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10679. llvm::Type *Tys[2] = { Ty, VTy };
  10680. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10681. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10682. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10683. }
  10684. case NEON::BI__builtin_neon_vmaxvq_u16: {
  10685. Int = Intrinsic::aarch64_neon_umaxv;
  10686. Ty = Int32Ty;
  10687. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10688. llvm::Type *Tys[2] = { Ty, VTy };
  10689. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10690. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10691. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10692. }
  10693. case NEON::BI__builtin_neon_vmaxv_s8: {
  10694. Int = Intrinsic::aarch64_neon_smaxv;
  10695. Ty = Int32Ty;
  10696. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10697. llvm::Type *Tys[2] = { Ty, VTy };
  10698. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10699. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10700. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10701. }
  10702. case NEON::BI__builtin_neon_vmaxv_s16: {
  10703. Int = Intrinsic::aarch64_neon_smaxv;
  10704. Ty = Int32Ty;
  10705. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10706. llvm::Type *Tys[2] = { Ty, VTy };
  10707. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10708. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10709. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10710. }
  10711. case NEON::BI__builtin_neon_vmaxvq_s8: {
  10712. Int = Intrinsic::aarch64_neon_smaxv;
  10713. Ty = Int32Ty;
  10714. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10715. llvm::Type *Tys[2] = { Ty, VTy };
  10716. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10717. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10718. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10719. }
  10720. case NEON::BI__builtin_neon_vmaxvq_s16: {
  10721. Int = Intrinsic::aarch64_neon_smaxv;
  10722. Ty = Int32Ty;
  10723. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10724. llvm::Type *Tys[2] = { Ty, VTy };
  10725. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10726. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10727. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10728. }
  10729. case NEON::BI__builtin_neon_vmaxv_f16: {
  10730. Int = Intrinsic::aarch64_neon_fmaxv;
  10731. Ty = HalfTy;
  10732. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10733. llvm::Type *Tys[2] = { Ty, VTy };
  10734. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10735. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10736. return Builder.CreateTrunc(Ops[0], HalfTy);
  10737. }
  10738. case NEON::BI__builtin_neon_vmaxvq_f16: {
  10739. Int = Intrinsic::aarch64_neon_fmaxv;
  10740. Ty = HalfTy;
  10741. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10742. llvm::Type *Tys[2] = { Ty, VTy };
  10743. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10744. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10745. return Builder.CreateTrunc(Ops[0], HalfTy);
  10746. }
  10747. case NEON::BI__builtin_neon_vminv_u8: {
  10748. Int = Intrinsic::aarch64_neon_uminv;
  10749. Ty = Int32Ty;
  10750. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10751. llvm::Type *Tys[2] = { Ty, VTy };
  10752. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10753. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10754. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10755. }
  10756. case NEON::BI__builtin_neon_vminv_u16: {
  10757. Int = Intrinsic::aarch64_neon_uminv;
  10758. Ty = Int32Ty;
  10759. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10760. llvm::Type *Tys[2] = { Ty, VTy };
  10761. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10762. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10763. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10764. }
  10765. case NEON::BI__builtin_neon_vminvq_u8: {
  10766. Int = Intrinsic::aarch64_neon_uminv;
  10767. Ty = Int32Ty;
  10768. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10769. llvm::Type *Tys[2] = { Ty, VTy };
  10770. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10771. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10772. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10773. }
  10774. case NEON::BI__builtin_neon_vminvq_u16: {
  10775. Int = Intrinsic::aarch64_neon_uminv;
  10776. Ty = Int32Ty;
  10777. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10778. llvm::Type *Tys[2] = { Ty, VTy };
  10779. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10780. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10781. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10782. }
  10783. case NEON::BI__builtin_neon_vminv_s8: {
  10784. Int = Intrinsic::aarch64_neon_sminv;
  10785. Ty = Int32Ty;
  10786. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10787. llvm::Type *Tys[2] = { Ty, VTy };
  10788. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10789. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10790. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10791. }
  10792. case NEON::BI__builtin_neon_vminv_s16: {
  10793. Int = Intrinsic::aarch64_neon_sminv;
  10794. Ty = Int32Ty;
  10795. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10796. llvm::Type *Tys[2] = { Ty, VTy };
  10797. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10798. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10799. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10800. }
  10801. case NEON::BI__builtin_neon_vminvq_s8: {
  10802. Int = Intrinsic::aarch64_neon_sminv;
  10803. Ty = Int32Ty;
  10804. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10805. llvm::Type *Tys[2] = { Ty, VTy };
  10806. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10807. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10808. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10809. }
  10810. case NEON::BI__builtin_neon_vminvq_s16: {
  10811. Int = Intrinsic::aarch64_neon_sminv;
  10812. Ty = Int32Ty;
  10813. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10814. llvm::Type *Tys[2] = { Ty, VTy };
  10815. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10816. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10817. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10818. }
  10819. case NEON::BI__builtin_neon_vminv_f16: {
  10820. Int = Intrinsic::aarch64_neon_fminv;
  10821. Ty = HalfTy;
  10822. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10823. llvm::Type *Tys[2] = { Ty, VTy };
  10824. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10825. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10826. return Builder.CreateTrunc(Ops[0], HalfTy);
  10827. }
  10828. case NEON::BI__builtin_neon_vminvq_f16: {
  10829. Int = Intrinsic::aarch64_neon_fminv;
  10830. Ty = HalfTy;
  10831. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10832. llvm::Type *Tys[2] = { Ty, VTy };
  10833. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10834. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10835. return Builder.CreateTrunc(Ops[0], HalfTy);
  10836. }
  10837. case NEON::BI__builtin_neon_vmaxnmv_f16: {
  10838. Int = Intrinsic::aarch64_neon_fmaxnmv;
  10839. Ty = HalfTy;
  10840. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10841. llvm::Type *Tys[2] = { Ty, VTy };
  10842. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10843. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
  10844. return Builder.CreateTrunc(Ops[0], HalfTy);
  10845. }
  10846. case NEON::BI__builtin_neon_vmaxnmvq_f16: {
  10847. Int = Intrinsic::aarch64_neon_fmaxnmv;
  10848. Ty = HalfTy;
  10849. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10850. llvm::Type *Tys[2] = { Ty, VTy };
  10851. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10852. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
  10853. return Builder.CreateTrunc(Ops[0], HalfTy);
  10854. }
  10855. case NEON::BI__builtin_neon_vminnmv_f16: {
  10856. Int = Intrinsic::aarch64_neon_fminnmv;
  10857. Ty = HalfTy;
  10858. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10859. llvm::Type *Tys[2] = { Ty, VTy };
  10860. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10861. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
  10862. return Builder.CreateTrunc(Ops[0], HalfTy);
  10863. }
  10864. case NEON::BI__builtin_neon_vminnmvq_f16: {
  10865. Int = Intrinsic::aarch64_neon_fminnmv;
  10866. Ty = HalfTy;
  10867. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10868. llvm::Type *Tys[2] = { Ty, VTy };
  10869. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10870. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
  10871. return Builder.CreateTrunc(Ops[0], HalfTy);
  10872. }
  10873. case NEON::BI__builtin_neon_vmul_n_f64: {
  10874. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  10875. Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
  10876. return Builder.CreateFMul(Ops[0], RHS);
  10877. }
  10878. case NEON::BI__builtin_neon_vaddlv_u8: {
  10879. Int = Intrinsic::aarch64_neon_uaddlv;
  10880. Ty = Int32Ty;
  10881. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10882. llvm::Type *Tys[2] = { Ty, VTy };
  10883. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10884. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10885. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10886. }
  10887. case NEON::BI__builtin_neon_vaddlv_u16: {
  10888. Int = Intrinsic::aarch64_neon_uaddlv;
  10889. Ty = Int32Ty;
  10890. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10891. llvm::Type *Tys[2] = { Ty, VTy };
  10892. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10893. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10894. }
  10895. case NEON::BI__builtin_neon_vaddlvq_u8: {
  10896. Int = Intrinsic::aarch64_neon_uaddlv;
  10897. Ty = Int32Ty;
  10898. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10899. llvm::Type *Tys[2] = { Ty, VTy };
  10900. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10901. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10902. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10903. }
  10904. case NEON::BI__builtin_neon_vaddlvq_u16: {
  10905. Int = Intrinsic::aarch64_neon_uaddlv;
  10906. Ty = Int32Ty;
  10907. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10908. llvm::Type *Tys[2] = { Ty, VTy };
  10909. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10910. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10911. }
  10912. case NEON::BI__builtin_neon_vaddlv_s8: {
  10913. Int = Intrinsic::aarch64_neon_saddlv;
  10914. Ty = Int32Ty;
  10915. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10916. llvm::Type *Tys[2] = { Ty, VTy };
  10917. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10918. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10919. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10920. }
  10921. case NEON::BI__builtin_neon_vaddlv_s16: {
  10922. Int = Intrinsic::aarch64_neon_saddlv;
  10923. Ty = Int32Ty;
  10924. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10925. llvm::Type *Tys[2] = { Ty, VTy };
  10926. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10927. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10928. }
  10929. case NEON::BI__builtin_neon_vaddlvq_s8: {
  10930. Int = Intrinsic::aarch64_neon_saddlv;
  10931. Ty = Int32Ty;
  10932. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10933. llvm::Type *Tys[2] = { Ty, VTy };
  10934. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10935. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10936. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10937. }
  10938. case NEON::BI__builtin_neon_vaddlvq_s16: {
  10939. Int = Intrinsic::aarch64_neon_saddlv;
  10940. Ty = Int32Ty;
  10941. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10942. llvm::Type *Tys[2] = { Ty, VTy };
  10943. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10944. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10945. }
  10946. case NEON::BI__builtin_neon_vsri_n_v:
  10947. case NEON::BI__builtin_neon_vsriq_n_v: {
  10948. Int = Intrinsic::aarch64_neon_vsri;
  10949. llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
  10950. return EmitNeonCall(Intrin, Ops, "vsri_n");
  10951. }
  10952. case NEON::BI__builtin_neon_vsli_n_v:
  10953. case NEON::BI__builtin_neon_vsliq_n_v: {
  10954. Int = Intrinsic::aarch64_neon_vsli;
  10955. llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
  10956. return EmitNeonCall(Intrin, Ops, "vsli_n");
  10957. }
  10958. case NEON::BI__builtin_neon_vsra_n_v:
  10959. case NEON::BI__builtin_neon_vsraq_n_v:
  10960. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10961. Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
  10962. return Builder.CreateAdd(Ops[0], Ops[1]);
  10963. case NEON::BI__builtin_neon_vrsra_n_v:
  10964. case NEON::BI__builtin_neon_vrsraq_n_v: {
  10965. Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
  10966. SmallVector<llvm::Value*,2> TmpOps;
  10967. TmpOps.push_back(Ops[1]);
  10968. TmpOps.push_back(Ops[2]);
  10969. Function* F = CGM.getIntrinsic(Int, Ty);
  10970. llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
  10971. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  10972. return Builder.CreateAdd(Ops[0], tmp);
  10973. }
  10974. case NEON::BI__builtin_neon_vld1_v:
  10975. case NEON::BI__builtin_neon_vld1q_v: {
  10976. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
  10977. return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
  10978. }
  10979. case NEON::BI__builtin_neon_vst1_v:
  10980. case NEON::BI__builtin_neon_vst1q_v:
  10981. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
  10982. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  10983. return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
  10984. case NEON::BI__builtin_neon_vld1_lane_v:
  10985. case NEON::BI__builtin_neon_vld1q_lane_v: {
  10986. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10987. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  10988. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10989. Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
  10990. PtrOp0.getAlignment());
  10991. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
  10992. }
  10993. case NEON::BI__builtin_neon_vld1_dup_v:
  10994. case NEON::BI__builtin_neon_vld1q_dup_v: {
  10995. Value *V = PoisonValue::get(Ty);
  10996. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  10997. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10998. Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
  10999. PtrOp0.getAlignment());
  11000. llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
  11001. Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
  11002. return EmitNeonSplat(Ops[0], CI);
  11003. }
  11004. case NEON::BI__builtin_neon_vst1_lane_v:
  11005. case NEON::BI__builtin_neon_vst1q_lane_v:
  11006. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11007. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
  11008. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  11009. return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
  11010. PtrOp0.getAlignment());
  11011. case NEON::BI__builtin_neon_vld2_v:
  11012. case NEON::BI__builtin_neon_vld2q_v: {
  11013. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  11014. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  11015. llvm::Type *Tys[2] = { VTy, PTy };
  11016. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
  11017. Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
  11018. Ops[0] = Builder.CreateBitCast(Ops[0],
  11019. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11020. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11021. }
  11022. case NEON::BI__builtin_neon_vld3_v:
  11023. case NEON::BI__builtin_neon_vld3q_v: {
  11024. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  11025. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  11026. llvm::Type *Tys[2] = { VTy, PTy };
  11027. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
  11028. Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
  11029. Ops[0] = Builder.CreateBitCast(Ops[0],
  11030. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11031. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11032. }
  11033. case NEON::BI__builtin_neon_vld4_v:
  11034. case NEON::BI__builtin_neon_vld4q_v: {
  11035. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  11036. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  11037. llvm::Type *Tys[2] = { VTy, PTy };
  11038. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
  11039. Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
  11040. Ops[0] = Builder.CreateBitCast(Ops[0],
  11041. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11042. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11043. }
  11044. case NEON::BI__builtin_neon_vld2_dup_v:
  11045. case NEON::BI__builtin_neon_vld2q_dup_v: {
  11046. llvm::Type *PTy =
  11047. llvm::PointerType::getUnqual(VTy->getElementType());
  11048. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  11049. llvm::Type *Tys[2] = { VTy, PTy };
  11050. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
  11051. Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
  11052. Ops[0] = Builder.CreateBitCast(Ops[0],
  11053. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11054. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11055. }
  11056. case NEON::BI__builtin_neon_vld3_dup_v:
  11057. case NEON::BI__builtin_neon_vld3q_dup_v: {
  11058. llvm::Type *PTy =
  11059. llvm::PointerType::getUnqual(VTy->getElementType());
  11060. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  11061. llvm::Type *Tys[2] = { VTy, PTy };
  11062. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
  11063. Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
  11064. Ops[0] = Builder.CreateBitCast(Ops[0],
  11065. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11066. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11067. }
  11068. case NEON::BI__builtin_neon_vld4_dup_v:
  11069. case NEON::BI__builtin_neon_vld4q_dup_v: {
  11070. llvm::Type *PTy =
  11071. llvm::PointerType::getUnqual(VTy->getElementType());
  11072. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  11073. llvm::Type *Tys[2] = { VTy, PTy };
  11074. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
  11075. Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
  11076. Ops[0] = Builder.CreateBitCast(Ops[0],
  11077. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11078. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11079. }
  11080. case NEON::BI__builtin_neon_vld2_lane_v:
  11081. case NEON::BI__builtin_neon_vld2q_lane_v: {
  11082. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  11083. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
  11084. std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
  11085. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11086. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  11087. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  11088. Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
  11089. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  11090. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  11091. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11092. }
  11093. case NEON::BI__builtin_neon_vld3_lane_v:
  11094. case NEON::BI__builtin_neon_vld3q_lane_v: {
  11095. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  11096. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
  11097. std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
  11098. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11099. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  11100. Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
  11101. Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
  11102. Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
  11103. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  11104. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  11105. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11106. }
  11107. case NEON::BI__builtin_neon_vld4_lane_v:
  11108. case NEON::BI__builtin_neon_vld4q_lane_v: {
  11109. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  11110. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
  11111. std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
  11112. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11113. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  11114. Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
  11115. Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
  11116. Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
  11117. Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
  11118. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  11119. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  11120. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  11121. }
  11122. case NEON::BI__builtin_neon_vst2_v:
  11123. case NEON::BI__builtin_neon_vst2q_v: {
  11124. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  11125. llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
  11126. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
  11127. Ops, "");
  11128. }
  11129. case NEON::BI__builtin_neon_vst2_lane_v:
  11130. case NEON::BI__builtin_neon_vst2q_lane_v: {
  11131. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  11132. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  11133. llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
  11134. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
  11135. Ops, "");
  11136. }
  11137. case NEON::BI__builtin_neon_vst3_v:
  11138. case NEON::BI__builtin_neon_vst3q_v: {
  11139. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  11140. llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
  11141. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
  11142. Ops, "");
  11143. }
  11144. case NEON::BI__builtin_neon_vst3_lane_v:
  11145. case NEON::BI__builtin_neon_vst3q_lane_v: {
  11146. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  11147. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  11148. llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
  11149. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
  11150. Ops, "");
  11151. }
  11152. case NEON::BI__builtin_neon_vst4_v:
  11153. case NEON::BI__builtin_neon_vst4q_v: {
  11154. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  11155. llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
  11156. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
  11157. Ops, "");
  11158. }
  11159. case NEON::BI__builtin_neon_vst4_lane_v:
  11160. case NEON::BI__builtin_neon_vst4q_lane_v: {
  11161. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  11162. Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
  11163. llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
  11164. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
  11165. Ops, "");
  11166. }
  11167. case NEON::BI__builtin_neon_vtrn_v:
  11168. case NEON::BI__builtin_neon_vtrnq_v: {
  11169. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  11170. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11171. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  11172. Value *SV = nullptr;
  11173. for (unsigned vi = 0; vi != 2; ++vi) {
  11174. SmallVector<int, 16> Indices;
  11175. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  11176. Indices.push_back(i+vi);
  11177. Indices.push_back(i+e+vi);
  11178. }
  11179. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  11180. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
  11181. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  11182. }
  11183. return SV;
  11184. }
  11185. case NEON::BI__builtin_neon_vuzp_v:
  11186. case NEON::BI__builtin_neon_vuzpq_v: {
  11187. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  11188. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11189. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  11190. Value *SV = nullptr;
  11191. for (unsigned vi = 0; vi != 2; ++vi) {
  11192. SmallVector<int, 16> Indices;
  11193. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  11194. Indices.push_back(2*i+vi);
  11195. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  11196. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
  11197. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  11198. }
  11199. return SV;
  11200. }
  11201. case NEON::BI__builtin_neon_vzip_v:
  11202. case NEON::BI__builtin_neon_vzipq_v: {
  11203. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  11204. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  11205. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  11206. Value *SV = nullptr;
  11207. for (unsigned vi = 0; vi != 2; ++vi) {
  11208. SmallVector<int, 16> Indices;
  11209. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  11210. Indices.push_back((i + vi*e) >> 1);
  11211. Indices.push_back(((i + vi*e) >> 1)+e);
  11212. }
  11213. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  11214. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
  11215. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  11216. }
  11217. return SV;
  11218. }
  11219. case NEON::BI__builtin_neon_vqtbl1q_v: {
  11220. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
  11221. Ops, "vtbl1");
  11222. }
  11223. case NEON::BI__builtin_neon_vqtbl2q_v: {
  11224. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
  11225. Ops, "vtbl2");
  11226. }
  11227. case NEON::BI__builtin_neon_vqtbl3q_v: {
  11228. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
  11229. Ops, "vtbl3");
  11230. }
  11231. case NEON::BI__builtin_neon_vqtbl4q_v: {
  11232. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
  11233. Ops, "vtbl4");
  11234. }
  11235. case NEON::BI__builtin_neon_vqtbx1q_v: {
  11236. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
  11237. Ops, "vtbx1");
  11238. }
  11239. case NEON::BI__builtin_neon_vqtbx2q_v: {
  11240. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
  11241. Ops, "vtbx2");
  11242. }
  11243. case NEON::BI__builtin_neon_vqtbx3q_v: {
  11244. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
  11245. Ops, "vtbx3");
  11246. }
  11247. case NEON::BI__builtin_neon_vqtbx4q_v: {
  11248. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
  11249. Ops, "vtbx4");
  11250. }
  11251. case NEON::BI__builtin_neon_vsqadd_v:
  11252. case NEON::BI__builtin_neon_vsqaddq_v: {
  11253. Int = Intrinsic::aarch64_neon_usqadd;
  11254. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
  11255. }
  11256. case NEON::BI__builtin_neon_vuqadd_v:
  11257. case NEON::BI__builtin_neon_vuqaddq_v: {
  11258. Int = Intrinsic::aarch64_neon_suqadd;
  11259. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
  11260. }
  11261. }
  11262. }
  11263. Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
  11264. const CallExpr *E) {
  11265. assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
  11266. BuiltinID == BPF::BI__builtin_btf_type_id ||
  11267. BuiltinID == BPF::BI__builtin_preserve_type_info ||
  11268. BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
  11269. "unexpected BPF builtin");
  11270. // A sequence number, injected into IR builtin functions, to
  11271. // prevent CSE given the only difference of the function
  11272. // may just be the debuginfo metadata.
  11273. static uint32_t BuiltinSeqNum;
  11274. switch (BuiltinID) {
  11275. default:
  11276. llvm_unreachable("Unexpected BPF builtin");
  11277. case BPF::BI__builtin_preserve_field_info: {
  11278. const Expr *Arg = E->getArg(0);
  11279. bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
  11280. if (!getDebugInfo()) {
  11281. CGM.Error(E->getExprLoc(),
  11282. "using __builtin_preserve_field_info() without -g");
  11283. return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
  11284. : EmitLValue(Arg).getPointer(*this);
  11285. }
  11286. // Enable underlying preserve_*_access_index() generation.
  11287. bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
  11288. IsInPreservedAIRegion = true;
  11289. Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
  11290. : EmitLValue(Arg).getPointer(*this);
  11291. IsInPreservedAIRegion = OldIsInPreservedAIRegion;
  11292. ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  11293. Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
  11294. // Built the IR for the preserve_field_info intrinsic.
  11295. llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
  11296. &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
  11297. {FieldAddr->getType()});
  11298. return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
  11299. }
  11300. case BPF::BI__builtin_btf_type_id:
  11301. case BPF::BI__builtin_preserve_type_info: {
  11302. if (!getDebugInfo()) {
  11303. CGM.Error(E->getExprLoc(), "using builtin function without -g");
  11304. return nullptr;
  11305. }
  11306. const Expr *Arg0 = E->getArg(0);
  11307. llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
  11308. Arg0->getType(), Arg0->getExprLoc());
  11309. ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  11310. Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
  11311. Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
  11312. llvm::Function *FnDecl;
  11313. if (BuiltinID == BPF::BI__builtin_btf_type_id)
  11314. FnDecl = llvm::Intrinsic::getDeclaration(
  11315. &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
  11316. else
  11317. FnDecl = llvm::Intrinsic::getDeclaration(
  11318. &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
  11319. CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
  11320. Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
  11321. return Fn;
  11322. }
  11323. case BPF::BI__builtin_preserve_enum_value: {
  11324. if (!getDebugInfo()) {
  11325. CGM.Error(E->getExprLoc(), "using builtin function without -g");
  11326. return nullptr;
  11327. }
  11328. const Expr *Arg0 = E->getArg(0);
  11329. llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
  11330. Arg0->getType(), Arg0->getExprLoc());
  11331. // Find enumerator
  11332. const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
  11333. const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
  11334. const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
  11335. const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
  11336. auto &InitVal = Enumerator->getInitVal();
  11337. std::string InitValStr;
  11338. if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
  11339. InitValStr = std::to_string(InitVal.getSExtValue());
  11340. else
  11341. InitValStr = std::to_string(InitVal.getZExtValue());
  11342. std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
  11343. Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
  11344. ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  11345. Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
  11346. Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
  11347. llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
  11348. &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
  11349. CallInst *Fn =
  11350. Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
  11351. Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
  11352. return Fn;
  11353. }
  11354. }
  11355. }
  11356. llvm::Value *CodeGenFunction::
  11357. BuildVector(ArrayRef<llvm::Value*> Ops) {
  11358. assert((Ops.size() & (Ops.size() - 1)) == 0 &&
  11359. "Not a power-of-two sized vector!");
  11360. bool AllConstants = true;
  11361. for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
  11362. AllConstants &= isa<Constant>(Ops[i]);
  11363. // If this is a constant vector, create a ConstantVector.
  11364. if (AllConstants) {
  11365. SmallVector<llvm::Constant*, 16> CstOps;
  11366. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  11367. CstOps.push_back(cast<Constant>(Ops[i]));
  11368. return llvm::ConstantVector::get(CstOps);
  11369. }
  11370. // Otherwise, insertelement the values to build the vector.
  11371. Value *Result = llvm::PoisonValue::get(
  11372. llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
  11373. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  11374. Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
  11375. return Result;
  11376. }
  11377. // Convert the mask from an integer type to a vector of i1.
  11378. static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
  11379. unsigned NumElts) {
  11380. auto *MaskTy = llvm::FixedVectorType::get(
  11381. CGF.Builder.getInt1Ty(),
  11382. cast<IntegerType>(Mask->getType())->getBitWidth());
  11383. Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
  11384. // If we have less than 8 elements, then the starting mask was an i8 and
  11385. // we need to extract down to the right number of elements.
  11386. if (NumElts < 8) {
  11387. int Indices[4];
  11388. for (unsigned i = 0; i != NumElts; ++i)
  11389. Indices[i] = i;
  11390. MaskVec = CGF.Builder.CreateShuffleVector(
  11391. MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
  11392. }
  11393. return MaskVec;
  11394. }
  11395. static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  11396. Align Alignment) {
  11397. // Cast the pointer to right type.
  11398. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
  11399. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11400. Value *MaskVec = getMaskVecValue(
  11401. CGF, Ops[2],
  11402. cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
  11403. return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
  11404. }
  11405. static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  11406. Align Alignment) {
  11407. // Cast the pointer to right type.
  11408. llvm::Type *Ty = Ops[1]->getType();
  11409. Value *Ptr =
  11410. CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  11411. Value *MaskVec = getMaskVecValue(
  11412. CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
  11413. return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
  11414. }
  11415. static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
  11416. ArrayRef<Value *> Ops) {
  11417. auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
  11418. llvm::Type *PtrTy = ResultTy->getElementType();
  11419. // Cast the pointer to element type.
  11420. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
  11421. llvm::PointerType::getUnqual(PtrTy));
  11422. Value *MaskVec = getMaskVecValue(
  11423. CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
  11424. llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
  11425. ResultTy);
  11426. return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
  11427. }
  11428. static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
  11429. ArrayRef<Value *> Ops,
  11430. bool IsCompress) {
  11431. auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
  11432. Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
  11433. Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
  11434. : Intrinsic::x86_avx512_mask_expand;
  11435. llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
  11436. return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
  11437. }
  11438. static Value *EmitX86CompressStore(CodeGenFunction &CGF,
  11439. ArrayRef<Value *> Ops) {
  11440. auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
  11441. llvm::Type *PtrTy = ResultTy->getElementType();
  11442. // Cast the pointer to element type.
  11443. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
  11444. llvm::PointerType::getUnqual(PtrTy));
  11445. Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
  11446. llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
  11447. ResultTy);
  11448. return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
  11449. }
  11450. static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
  11451. ArrayRef<Value *> Ops,
  11452. bool InvertLHS = false) {
  11453. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  11454. Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
  11455. Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
  11456. if (InvertLHS)
  11457. LHS = CGF.Builder.CreateNot(LHS);
  11458. return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
  11459. Ops[0]->getType());
  11460. }
  11461. static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
  11462. Value *Amt, bool IsRight) {
  11463. llvm::Type *Ty = Op0->getType();
  11464. // Amount may be scalar immediate, in which case create a splat vector.
  11465. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
  11466. // we only care about the lowest log2 bits anyway.
  11467. if (Amt->getType() != Ty) {
  11468. unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
  11469. Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
  11470. Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
  11471. }
  11472. unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
  11473. Function *F = CGF.CGM.getIntrinsic(IID, Ty);
  11474. return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
  11475. }
  11476. static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  11477. bool IsSigned) {
  11478. Value *Op0 = Ops[0];
  11479. Value *Op1 = Ops[1];
  11480. llvm::Type *Ty = Op0->getType();
  11481. uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  11482. CmpInst::Predicate Pred;
  11483. switch (Imm) {
  11484. case 0x0:
  11485. Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  11486. break;
  11487. case 0x1:
  11488. Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  11489. break;
  11490. case 0x2:
  11491. Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  11492. break;
  11493. case 0x3:
  11494. Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  11495. break;
  11496. case 0x4:
  11497. Pred = ICmpInst::ICMP_EQ;
  11498. break;
  11499. case 0x5:
  11500. Pred = ICmpInst::ICMP_NE;
  11501. break;
  11502. case 0x6:
  11503. return llvm::Constant::getNullValue(Ty); // FALSE
  11504. case 0x7:
  11505. return llvm::Constant::getAllOnesValue(Ty); // TRUE
  11506. default:
  11507. llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
  11508. }
  11509. Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
  11510. Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
  11511. return Res;
  11512. }
  11513. static Value *EmitX86Select(CodeGenFunction &CGF,
  11514. Value *Mask, Value *Op0, Value *Op1) {
  11515. // If the mask is all ones just return first argument.
  11516. if (const auto *C = dyn_cast<Constant>(Mask))
  11517. if (C->isAllOnesValue())
  11518. return Op0;
  11519. Mask = getMaskVecValue(
  11520. CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
  11521. return CGF.Builder.CreateSelect(Mask, Op0, Op1);
  11522. }
  11523. static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
  11524. Value *Mask, Value *Op0, Value *Op1) {
  11525. // If the mask is all ones just return first argument.
  11526. if (const auto *C = dyn_cast<Constant>(Mask))
  11527. if (C->isAllOnesValue())
  11528. return Op0;
  11529. auto *MaskTy = llvm::FixedVectorType::get(
  11530. CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
  11531. Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
  11532. Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
  11533. return CGF.Builder.CreateSelect(Mask, Op0, Op1);
  11534. }
  11535. static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
  11536. unsigned NumElts, Value *MaskIn) {
  11537. if (MaskIn) {
  11538. const auto *C = dyn_cast<Constant>(MaskIn);
  11539. if (!C || !C->isAllOnesValue())
  11540. Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
  11541. }
  11542. if (NumElts < 8) {
  11543. int Indices[8];
  11544. for (unsigned i = 0; i != NumElts; ++i)
  11545. Indices[i] = i;
  11546. for (unsigned i = NumElts; i != 8; ++i)
  11547. Indices[i] = i % NumElts + NumElts;
  11548. Cmp = CGF.Builder.CreateShuffleVector(
  11549. Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
  11550. }
  11551. return CGF.Builder.CreateBitCast(Cmp,
  11552. IntegerType::get(CGF.getLLVMContext(),
  11553. std::max(NumElts, 8U)));
  11554. }
  11555. static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
  11556. bool Signed, ArrayRef<Value *> Ops) {
  11557. assert((Ops.size() == 2 || Ops.size() == 4) &&
  11558. "Unexpected number of arguments");
  11559. unsigned NumElts =
  11560. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  11561. Value *Cmp;
  11562. if (CC == 3) {
  11563. Cmp = Constant::getNullValue(
  11564. llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
  11565. } else if (CC == 7) {
  11566. Cmp = Constant::getAllOnesValue(
  11567. llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
  11568. } else {
  11569. ICmpInst::Predicate Pred;
  11570. switch (CC) {
  11571. default: llvm_unreachable("Unknown condition code");
  11572. case 0: Pred = ICmpInst::ICMP_EQ; break;
  11573. case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
  11574. case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
  11575. case 4: Pred = ICmpInst::ICMP_NE; break;
  11576. case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
  11577. case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
  11578. }
  11579. Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
  11580. }
  11581. Value *MaskIn = nullptr;
  11582. if (Ops.size() == 4)
  11583. MaskIn = Ops[3];
  11584. return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
  11585. }
  11586. static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
  11587. Value *Zero = Constant::getNullValue(In->getType());
  11588. return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
  11589. }
  11590. static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
  11591. ArrayRef<Value *> Ops, bool IsSigned) {
  11592. unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
  11593. llvm::Type *Ty = Ops[1]->getType();
  11594. Value *Res;
  11595. if (Rnd != 4) {
  11596. Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
  11597. : Intrinsic::x86_avx512_uitofp_round;
  11598. Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
  11599. Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
  11600. } else {
  11601. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  11602. Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
  11603. : CGF.Builder.CreateUIToFP(Ops[0], Ty);
  11604. }
  11605. return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
  11606. }
  11607. // Lowers X86 FMA intrinsics to IR.
  11608. static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
  11609. ArrayRef<Value *> Ops, unsigned BuiltinID,
  11610. bool IsAddSub) {
  11611. bool Subtract = false;
  11612. Intrinsic::ID IID = Intrinsic::not_intrinsic;
  11613. switch (BuiltinID) {
  11614. default: break;
  11615. case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
  11616. Subtract = true;
  11617. [[fallthrough]];
  11618. case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
  11619. case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
  11620. case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
  11621. IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
  11622. break;
  11623. case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
  11624. Subtract = true;
  11625. [[fallthrough]];
  11626. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
  11627. case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
  11628. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
  11629. IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
  11630. break;
  11631. case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
  11632. Subtract = true;
  11633. [[fallthrough]];
  11634. case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
  11635. case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
  11636. case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
  11637. IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
  11638. case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
  11639. Subtract = true;
  11640. [[fallthrough]];
  11641. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
  11642. case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
  11643. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
  11644. IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
  11645. case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
  11646. Subtract = true;
  11647. [[fallthrough]];
  11648. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
  11649. case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
  11650. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
  11651. IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
  11652. break;
  11653. case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
  11654. Subtract = true;
  11655. [[fallthrough]];
  11656. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
  11657. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
  11658. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
  11659. IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
  11660. break;
  11661. }
  11662. Value *A = Ops[0];
  11663. Value *B = Ops[1];
  11664. Value *C = Ops[2];
  11665. if (Subtract)
  11666. C = CGF.Builder.CreateFNeg(C);
  11667. Value *Res;
  11668. // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
  11669. if (IID != Intrinsic::not_intrinsic &&
  11670. (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
  11671. IsAddSub)) {
  11672. Function *Intr = CGF.CGM.getIntrinsic(IID);
  11673. Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
  11674. } else {
  11675. llvm::Type *Ty = A->getType();
  11676. Function *FMA;
  11677. if (CGF.Builder.getIsFPConstrained()) {
  11678. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  11679. FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
  11680. Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
  11681. } else {
  11682. FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
  11683. Res = CGF.Builder.CreateCall(FMA, {A, B, C});
  11684. }
  11685. }
  11686. // Handle any required masking.
  11687. Value *MaskFalseVal = nullptr;
  11688. switch (BuiltinID) {
  11689. case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
  11690. case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
  11691. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
  11692. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
  11693. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
  11694. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
  11695. MaskFalseVal = Ops[0];
  11696. break;
  11697. case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
  11698. case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
  11699. case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
  11700. case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
  11701. case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
  11702. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
  11703. MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
  11704. break;
  11705. case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
  11706. case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
  11707. case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
  11708. case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
  11709. case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
  11710. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
  11711. case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
  11712. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
  11713. case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
  11714. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
  11715. case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
  11716. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
  11717. MaskFalseVal = Ops[2];
  11718. break;
  11719. }
  11720. if (MaskFalseVal)
  11721. return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
  11722. return Res;
  11723. }
  11724. static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
  11725. MutableArrayRef<Value *> Ops, Value *Upper,
  11726. bool ZeroMask = false, unsigned PTIdx = 0,
  11727. bool NegAcc = false) {
  11728. unsigned Rnd = 4;
  11729. if (Ops.size() > 4)
  11730. Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
  11731. if (NegAcc)
  11732. Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
  11733. Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  11734. Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  11735. Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  11736. Value *Res;
  11737. if (Rnd != 4) {
  11738. Intrinsic::ID IID;
  11739. switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
  11740. case 16:
  11741. IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
  11742. break;
  11743. case 32:
  11744. IID = Intrinsic::x86_avx512_vfmadd_f32;
  11745. break;
  11746. case 64:
  11747. IID = Intrinsic::x86_avx512_vfmadd_f64;
  11748. break;
  11749. default:
  11750. llvm_unreachable("Unexpected size");
  11751. }
  11752. Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
  11753. {Ops[0], Ops[1], Ops[2], Ops[4]});
  11754. } else if (CGF.Builder.getIsFPConstrained()) {
  11755. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  11756. Function *FMA = CGF.CGM.getIntrinsic(
  11757. Intrinsic::experimental_constrained_fma, Ops[0]->getType());
  11758. Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
  11759. } else {
  11760. Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
  11761. Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
  11762. }
  11763. // If we have more than 3 arguments, we need to do masking.
  11764. if (Ops.size() > 3) {
  11765. Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
  11766. : Ops[PTIdx];
  11767. // If we negated the accumulator and the its the PassThru value we need to
  11768. // bypass the negate. Conveniently Upper should be the same thing in this
  11769. // case.
  11770. if (NegAcc && PTIdx == 2)
  11771. PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
  11772. Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
  11773. }
  11774. return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
  11775. }
  11776. static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
  11777. ArrayRef<Value *> Ops) {
  11778. llvm::Type *Ty = Ops[0]->getType();
  11779. // Arguments have a vXi32 type so cast to vXi64.
  11780. Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
  11781. Ty->getPrimitiveSizeInBits() / 64);
  11782. Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
  11783. Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
  11784. if (IsSigned) {
  11785. // Shift left then arithmetic shift right.
  11786. Constant *ShiftAmt = ConstantInt::get(Ty, 32);
  11787. LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
  11788. LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
  11789. RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
  11790. RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
  11791. } else {
  11792. // Clear the upper bits.
  11793. Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
  11794. LHS = CGF.Builder.CreateAnd(LHS, Mask);
  11795. RHS = CGF.Builder.CreateAnd(RHS, Mask);
  11796. }
  11797. return CGF.Builder.CreateMul(LHS, RHS);
  11798. }
  11799. // Emit a masked pternlog intrinsic. This only exists because the header has to
  11800. // use a macro and we aren't able to pass the input argument to a pternlog
  11801. // builtin and a select builtin without evaluating it twice.
  11802. static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
  11803. ArrayRef<Value *> Ops) {
  11804. llvm::Type *Ty = Ops[0]->getType();
  11805. unsigned VecWidth = Ty->getPrimitiveSizeInBits();
  11806. unsigned EltWidth = Ty->getScalarSizeInBits();
  11807. Intrinsic::ID IID;
  11808. if (VecWidth == 128 && EltWidth == 32)
  11809. IID = Intrinsic::x86_avx512_pternlog_d_128;
  11810. else if (VecWidth == 256 && EltWidth == 32)
  11811. IID = Intrinsic::x86_avx512_pternlog_d_256;
  11812. else if (VecWidth == 512 && EltWidth == 32)
  11813. IID = Intrinsic::x86_avx512_pternlog_d_512;
  11814. else if (VecWidth == 128 && EltWidth == 64)
  11815. IID = Intrinsic::x86_avx512_pternlog_q_128;
  11816. else if (VecWidth == 256 && EltWidth == 64)
  11817. IID = Intrinsic::x86_avx512_pternlog_q_256;
  11818. else if (VecWidth == 512 && EltWidth == 64)
  11819. IID = Intrinsic::x86_avx512_pternlog_q_512;
  11820. else
  11821. llvm_unreachable("Unexpected intrinsic");
  11822. Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
  11823. Ops.drop_back());
  11824. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
  11825. return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
  11826. }
  11827. static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
  11828. llvm::Type *DstTy) {
  11829. unsigned NumberOfElements =
  11830. cast<llvm::FixedVectorType>(DstTy)->getNumElements();
  11831. Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
  11832. return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
  11833. }
  11834. Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
  11835. const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
  11836. StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
  11837. return EmitX86CpuIs(CPUStr);
  11838. }
  11839. // Convert F16 halfs to floats.
  11840. static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
  11841. ArrayRef<Value *> Ops,
  11842. llvm::Type *DstTy) {
  11843. assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
  11844. "Unknown cvtph2ps intrinsic");
  11845. // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
  11846. if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
  11847. Function *F =
  11848. CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
  11849. return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
  11850. }
  11851. unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
  11852. Value *Src = Ops[0];
  11853. // Extract the subvector.
  11854. if (NumDstElts !=
  11855. cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
  11856. assert(NumDstElts == 4 && "Unexpected vector size");
  11857. Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
  11858. }
  11859. // Bitcast from vXi16 to vXf16.
  11860. auto *HalfTy = llvm::FixedVectorType::get(
  11861. llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
  11862. Src = CGF.Builder.CreateBitCast(Src, HalfTy);
  11863. // Perform the fp-extension.
  11864. Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
  11865. if (Ops.size() >= 3)
  11866. Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
  11867. return Res;
  11868. }
  11869. Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
  11870. llvm::Type *Int32Ty = Builder.getInt32Ty();
  11871. // Matching the struct layout from the compiler-rt/libgcc structure that is
  11872. // filled in:
  11873. // unsigned int __cpu_vendor;
  11874. // unsigned int __cpu_type;
  11875. // unsigned int __cpu_subtype;
  11876. // unsigned int __cpu_features[1];
  11877. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
  11878. llvm::ArrayType::get(Int32Ty, 1));
  11879. // Grab the global __cpu_model.
  11880. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
  11881. cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
  11882. // Calculate the index needed to access the correct field based on the
  11883. // range. Also adjust the expected value.
  11884. unsigned Index;
  11885. unsigned Value;
  11886. std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
  11887. #define X86_VENDOR(ENUM, STRING) \
  11888. .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
  11889. #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
  11890. .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
  11891. #define X86_CPU_TYPE(ENUM, STR) \
  11892. .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
  11893. #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
  11894. .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
  11895. #define X86_CPU_SUBTYPE(ENUM, STR) \
  11896. .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
  11897. #include "llvm/TargetParser/X86TargetParser.def"
  11898. .Default({0, 0});
  11899. assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
  11900. // Grab the appropriate field from __cpu_model.
  11901. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
  11902. ConstantInt::get(Int32Ty, Index)};
  11903. llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
  11904. CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
  11905. CharUnits::fromQuantity(4));
  11906. // Check the value of the field against the requested value.
  11907. return Builder.CreateICmpEQ(CpuValue,
  11908. llvm::ConstantInt::get(Int32Ty, Value));
  11909. }
  11910. Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
  11911. const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
  11912. StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
  11913. return EmitX86CpuSupports(FeatureStr);
  11914. }
  11915. Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
  11916. return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
  11917. }
  11918. llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
  11919. uint32_t Features1 = Lo_32(FeaturesMask);
  11920. uint32_t Features2 = Hi_32(FeaturesMask);
  11921. Value *Result = Builder.getTrue();
  11922. if (Features1 != 0) {
  11923. // Matching the struct layout from the compiler-rt/libgcc structure that is
  11924. // filled in:
  11925. // unsigned int __cpu_vendor;
  11926. // unsigned int __cpu_type;
  11927. // unsigned int __cpu_subtype;
  11928. // unsigned int __cpu_features[1];
  11929. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
  11930. llvm::ArrayType::get(Int32Ty, 1));
  11931. // Grab the global __cpu_model.
  11932. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
  11933. cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
  11934. // Grab the first (0th) element from the field __cpu_features off of the
  11935. // global in the struct STy.
  11936. Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
  11937. Builder.getInt32(0)};
  11938. Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
  11939. Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
  11940. CharUnits::fromQuantity(4));
  11941. // Check the value of the bit corresponding to the feature requested.
  11942. Value *Mask = Builder.getInt32(Features1);
  11943. Value *Bitset = Builder.CreateAnd(Features, Mask);
  11944. Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
  11945. Result = Builder.CreateAnd(Result, Cmp);
  11946. }
  11947. if (Features2 != 0) {
  11948. llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
  11949. "__cpu_features2");
  11950. cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
  11951. Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2,
  11952. CharUnits::fromQuantity(4));
  11953. // Check the value of the bit corresponding to the feature requested.
  11954. Value *Mask = Builder.getInt32(Features2);
  11955. Value *Bitset = Builder.CreateAnd(Features, Mask);
  11956. Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
  11957. Result = Builder.CreateAnd(Result, Cmp);
  11958. }
  11959. return Result;
  11960. }
  11961. Value *CodeGenFunction::EmitAArch64CpuInit() {
  11962. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
  11963. llvm::FunctionCallee Func =
  11964. CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver");
  11965. cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
  11966. cast<llvm::GlobalValue>(Func.getCallee())
  11967. ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
  11968. return Builder.CreateCall(Func);
  11969. }
  11970. Value *CodeGenFunction::EmitX86CpuInit() {
  11971. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
  11972. /*Variadic*/ false);
  11973. llvm::FunctionCallee Func =
  11974. CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
  11975. cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
  11976. cast<llvm::GlobalValue>(Func.getCallee())
  11977. ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
  11978. return Builder.CreateCall(Func);
  11979. }
  11980. llvm::Value *
  11981. CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
  11982. uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
  11983. Value *Result = Builder.getTrue();
  11984. if (FeaturesMask != 0) {
  11985. // Get features from structure in runtime library
  11986. // struct {
  11987. // unsigned long long features;
  11988. // } __aarch64_cpu_features;
  11989. llvm::Type *STy = llvm::StructType::get(Int64Ty);
  11990. llvm::Constant *AArch64CPUFeatures =
  11991. CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
  11992. cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
  11993. llvm::Value *CpuFeatures = Builder.CreateGEP(
  11994. STy, AArch64CPUFeatures,
  11995. {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
  11996. Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
  11997. CharUnits::fromQuantity(8));
  11998. Value *Mask = Builder.getInt64(FeaturesMask);
  11999. Value *Bitset = Builder.CreateAnd(Features, Mask);
  12000. Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
  12001. Result = Builder.CreateAnd(Result, Cmp);
  12002. }
  12003. return Result;
  12004. }
  12005. Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
  12006. const CallExpr *E) {
  12007. if (BuiltinID == X86::BI__builtin_cpu_is)
  12008. return EmitX86CpuIs(E);
  12009. if (BuiltinID == X86::BI__builtin_cpu_supports)
  12010. return EmitX86CpuSupports(E);
  12011. if (BuiltinID == X86::BI__builtin_cpu_init)
  12012. return EmitX86CpuInit();
  12013. // Handle MSVC intrinsics before argument evaluation to prevent double
  12014. // evaluation.
  12015. if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
  12016. return EmitMSVCBuiltinExpr(*MsvcIntId, E);
  12017. SmallVector<Value*, 4> Ops;
  12018. bool IsMaskFCmp = false;
  12019. bool IsConjFMA = false;
  12020. // Find out if any arguments are required to be integer constant expressions.
  12021. unsigned ICEArguments = 0;
  12022. ASTContext::GetBuiltinTypeError Error;
  12023. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  12024. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  12025. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  12026. // If this is a normal argument, just emit it as a scalar.
  12027. if ((ICEArguments & (1 << i)) == 0) {
  12028. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  12029. continue;
  12030. }
  12031. // If this is required to be a constant, constant fold it so that we know
  12032. // that the generated intrinsic gets a ConstantInt.
  12033. Ops.push_back(llvm::ConstantInt::get(
  12034. getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
  12035. }
  12036. // These exist so that the builtin that takes an immediate can be bounds
  12037. // checked by clang to avoid passing bad immediates to the backend. Since
  12038. // AVX has a larger immediate than SSE we would need separate builtins to
  12039. // do the different bounds checking. Rather than create a clang specific
  12040. // SSE only builtin, this implements eight separate builtins to match gcc
  12041. // implementation.
  12042. auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
  12043. Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
  12044. llvm::Function *F = CGM.getIntrinsic(ID);
  12045. return Builder.CreateCall(F, Ops);
  12046. };
  12047. // For the vector forms of FP comparisons, translate the builtins directly to
  12048. // IR.
  12049. // TODO: The builtins could be removed if the SSE header files used vector
  12050. // extension comparisons directly (vector ordered/unordered may need
  12051. // additional support via __builtin_isnan()).
  12052. auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
  12053. bool IsSignaling) {
  12054. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  12055. Value *Cmp;
  12056. if (IsSignaling)
  12057. Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
  12058. else
  12059. Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
  12060. llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
  12061. llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
  12062. Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
  12063. return Builder.CreateBitCast(Sext, FPVecTy);
  12064. };
  12065. switch (BuiltinID) {
  12066. default: return nullptr;
  12067. case X86::BI_mm_prefetch: {
  12068. Value *Address = Ops[0];
  12069. ConstantInt *C = cast<ConstantInt>(Ops[1]);
  12070. Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
  12071. Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
  12072. Value *Data = ConstantInt::get(Int32Ty, 1);
  12073. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  12074. return Builder.CreateCall(F, {Address, RW, Locality, Data});
  12075. }
  12076. case X86::BI_mm_clflush: {
  12077. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
  12078. Ops[0]);
  12079. }
  12080. case X86::BI_mm_lfence: {
  12081. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
  12082. }
  12083. case X86::BI_mm_mfence: {
  12084. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
  12085. }
  12086. case X86::BI_mm_sfence: {
  12087. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
  12088. }
  12089. case X86::BI_mm_pause: {
  12090. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
  12091. }
  12092. case X86::BI__rdtsc: {
  12093. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
  12094. }
  12095. case X86::BI__builtin_ia32_rdtscp: {
  12096. Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
  12097. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
  12098. Ops[0]);
  12099. return Builder.CreateExtractValue(Call, 0);
  12100. }
  12101. case X86::BI__builtin_ia32_lzcnt_u16:
  12102. case X86::BI__builtin_ia32_lzcnt_u32:
  12103. case X86::BI__builtin_ia32_lzcnt_u64: {
  12104. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
  12105. return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
  12106. }
  12107. case X86::BI__builtin_ia32_tzcnt_u16:
  12108. case X86::BI__builtin_ia32_tzcnt_u32:
  12109. case X86::BI__builtin_ia32_tzcnt_u64: {
  12110. Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
  12111. return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
  12112. }
  12113. case X86::BI__builtin_ia32_undef128:
  12114. case X86::BI__builtin_ia32_undef256:
  12115. case X86::BI__builtin_ia32_undef512:
  12116. // The x86 definition of "undef" is not the same as the LLVM definition
  12117. // (PR32176). We leave optimizing away an unnecessary zero constant to the
  12118. // IR optimizer and backend.
  12119. // TODO: If we had a "freeze" IR instruction to generate a fixed undef
  12120. // value, we should use that here instead of a zero.
  12121. return llvm::Constant::getNullValue(ConvertType(E->getType()));
  12122. case X86::BI__builtin_ia32_vec_init_v8qi:
  12123. case X86::BI__builtin_ia32_vec_init_v4hi:
  12124. case X86::BI__builtin_ia32_vec_init_v2si:
  12125. return Builder.CreateBitCast(BuildVector(Ops),
  12126. llvm::Type::getX86_MMXTy(getLLVMContext()));
  12127. case X86::BI__builtin_ia32_vec_ext_v2si:
  12128. case X86::BI__builtin_ia32_vec_ext_v16qi:
  12129. case X86::BI__builtin_ia32_vec_ext_v8hi:
  12130. case X86::BI__builtin_ia32_vec_ext_v4si:
  12131. case X86::BI__builtin_ia32_vec_ext_v4sf:
  12132. case X86::BI__builtin_ia32_vec_ext_v2di:
  12133. case X86::BI__builtin_ia32_vec_ext_v32qi:
  12134. case X86::BI__builtin_ia32_vec_ext_v16hi:
  12135. case X86::BI__builtin_ia32_vec_ext_v8si:
  12136. case X86::BI__builtin_ia32_vec_ext_v4di: {
  12137. unsigned NumElts =
  12138. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12139. uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
  12140. Index &= NumElts - 1;
  12141. // These builtins exist so we can ensure the index is an ICE and in range.
  12142. // Otherwise we could just do this in the header file.
  12143. return Builder.CreateExtractElement(Ops[0], Index);
  12144. }
  12145. case X86::BI__builtin_ia32_vec_set_v16qi:
  12146. case X86::BI__builtin_ia32_vec_set_v8hi:
  12147. case X86::BI__builtin_ia32_vec_set_v4si:
  12148. case X86::BI__builtin_ia32_vec_set_v2di:
  12149. case X86::BI__builtin_ia32_vec_set_v32qi:
  12150. case X86::BI__builtin_ia32_vec_set_v16hi:
  12151. case X86::BI__builtin_ia32_vec_set_v8si:
  12152. case X86::BI__builtin_ia32_vec_set_v4di: {
  12153. unsigned NumElts =
  12154. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12155. unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
  12156. Index &= NumElts - 1;
  12157. // These builtins exist so we can ensure the index is an ICE and in range.
  12158. // Otherwise we could just do this in the header file.
  12159. return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
  12160. }
  12161. case X86::BI_mm_setcsr:
  12162. case X86::BI__builtin_ia32_ldmxcsr: {
  12163. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  12164. Builder.CreateStore(Ops[0], Tmp);
  12165. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
  12166. Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
  12167. }
  12168. case X86::BI_mm_getcsr:
  12169. case X86::BI__builtin_ia32_stmxcsr: {
  12170. Address Tmp = CreateMemTemp(E->getType());
  12171. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
  12172. Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
  12173. return Builder.CreateLoad(Tmp, "stmxcsr");
  12174. }
  12175. case X86::BI__builtin_ia32_xsave:
  12176. case X86::BI__builtin_ia32_xsave64:
  12177. case X86::BI__builtin_ia32_xrstor:
  12178. case X86::BI__builtin_ia32_xrstor64:
  12179. case X86::BI__builtin_ia32_xsaveopt:
  12180. case X86::BI__builtin_ia32_xsaveopt64:
  12181. case X86::BI__builtin_ia32_xrstors:
  12182. case X86::BI__builtin_ia32_xrstors64:
  12183. case X86::BI__builtin_ia32_xsavec:
  12184. case X86::BI__builtin_ia32_xsavec64:
  12185. case X86::BI__builtin_ia32_xsaves:
  12186. case X86::BI__builtin_ia32_xsaves64:
  12187. case X86::BI__builtin_ia32_xsetbv:
  12188. case X86::BI_xsetbv: {
  12189. Intrinsic::ID ID;
  12190. #define INTRINSIC_X86_XSAVE_ID(NAME) \
  12191. case X86::BI__builtin_ia32_##NAME: \
  12192. ID = Intrinsic::x86_##NAME; \
  12193. break
  12194. switch (BuiltinID) {
  12195. default: llvm_unreachable("Unsupported intrinsic!");
  12196. INTRINSIC_X86_XSAVE_ID(xsave);
  12197. INTRINSIC_X86_XSAVE_ID(xsave64);
  12198. INTRINSIC_X86_XSAVE_ID(xrstor);
  12199. INTRINSIC_X86_XSAVE_ID(xrstor64);
  12200. INTRINSIC_X86_XSAVE_ID(xsaveopt);
  12201. INTRINSIC_X86_XSAVE_ID(xsaveopt64);
  12202. INTRINSIC_X86_XSAVE_ID(xrstors);
  12203. INTRINSIC_X86_XSAVE_ID(xrstors64);
  12204. INTRINSIC_X86_XSAVE_ID(xsavec);
  12205. INTRINSIC_X86_XSAVE_ID(xsavec64);
  12206. INTRINSIC_X86_XSAVE_ID(xsaves);
  12207. INTRINSIC_X86_XSAVE_ID(xsaves64);
  12208. INTRINSIC_X86_XSAVE_ID(xsetbv);
  12209. case X86::BI_xsetbv:
  12210. ID = Intrinsic::x86_xsetbv;
  12211. break;
  12212. }
  12213. #undef INTRINSIC_X86_XSAVE_ID
  12214. Value *Mhi = Builder.CreateTrunc(
  12215. Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
  12216. Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
  12217. Ops[1] = Mhi;
  12218. Ops.push_back(Mlo);
  12219. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  12220. }
  12221. case X86::BI__builtin_ia32_xgetbv:
  12222. case X86::BI_xgetbv:
  12223. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
  12224. case X86::BI__builtin_ia32_storedqudi128_mask:
  12225. case X86::BI__builtin_ia32_storedqusi128_mask:
  12226. case X86::BI__builtin_ia32_storedquhi128_mask:
  12227. case X86::BI__builtin_ia32_storedquqi128_mask:
  12228. case X86::BI__builtin_ia32_storeupd128_mask:
  12229. case X86::BI__builtin_ia32_storeups128_mask:
  12230. case X86::BI__builtin_ia32_storedqudi256_mask:
  12231. case X86::BI__builtin_ia32_storedqusi256_mask:
  12232. case X86::BI__builtin_ia32_storedquhi256_mask:
  12233. case X86::BI__builtin_ia32_storedquqi256_mask:
  12234. case X86::BI__builtin_ia32_storeupd256_mask:
  12235. case X86::BI__builtin_ia32_storeups256_mask:
  12236. case X86::BI__builtin_ia32_storedqudi512_mask:
  12237. case X86::BI__builtin_ia32_storedqusi512_mask:
  12238. case X86::BI__builtin_ia32_storedquhi512_mask:
  12239. case X86::BI__builtin_ia32_storedquqi512_mask:
  12240. case X86::BI__builtin_ia32_storeupd512_mask:
  12241. case X86::BI__builtin_ia32_storeups512_mask:
  12242. return EmitX86MaskedStore(*this, Ops, Align(1));
  12243. case X86::BI__builtin_ia32_storesh128_mask:
  12244. case X86::BI__builtin_ia32_storess128_mask:
  12245. case X86::BI__builtin_ia32_storesd128_mask:
  12246. return EmitX86MaskedStore(*this, Ops, Align(1));
  12247. case X86::BI__builtin_ia32_vpopcntb_128:
  12248. case X86::BI__builtin_ia32_vpopcntd_128:
  12249. case X86::BI__builtin_ia32_vpopcntq_128:
  12250. case X86::BI__builtin_ia32_vpopcntw_128:
  12251. case X86::BI__builtin_ia32_vpopcntb_256:
  12252. case X86::BI__builtin_ia32_vpopcntd_256:
  12253. case X86::BI__builtin_ia32_vpopcntq_256:
  12254. case X86::BI__builtin_ia32_vpopcntw_256:
  12255. case X86::BI__builtin_ia32_vpopcntb_512:
  12256. case X86::BI__builtin_ia32_vpopcntd_512:
  12257. case X86::BI__builtin_ia32_vpopcntq_512:
  12258. case X86::BI__builtin_ia32_vpopcntw_512: {
  12259. llvm::Type *ResultType = ConvertType(E->getType());
  12260. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  12261. return Builder.CreateCall(F, Ops);
  12262. }
  12263. case X86::BI__builtin_ia32_cvtmask2b128:
  12264. case X86::BI__builtin_ia32_cvtmask2b256:
  12265. case X86::BI__builtin_ia32_cvtmask2b512:
  12266. case X86::BI__builtin_ia32_cvtmask2w128:
  12267. case X86::BI__builtin_ia32_cvtmask2w256:
  12268. case X86::BI__builtin_ia32_cvtmask2w512:
  12269. case X86::BI__builtin_ia32_cvtmask2d128:
  12270. case X86::BI__builtin_ia32_cvtmask2d256:
  12271. case X86::BI__builtin_ia32_cvtmask2d512:
  12272. case X86::BI__builtin_ia32_cvtmask2q128:
  12273. case X86::BI__builtin_ia32_cvtmask2q256:
  12274. case X86::BI__builtin_ia32_cvtmask2q512:
  12275. return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
  12276. case X86::BI__builtin_ia32_cvtb2mask128:
  12277. case X86::BI__builtin_ia32_cvtb2mask256:
  12278. case X86::BI__builtin_ia32_cvtb2mask512:
  12279. case X86::BI__builtin_ia32_cvtw2mask128:
  12280. case X86::BI__builtin_ia32_cvtw2mask256:
  12281. case X86::BI__builtin_ia32_cvtw2mask512:
  12282. case X86::BI__builtin_ia32_cvtd2mask128:
  12283. case X86::BI__builtin_ia32_cvtd2mask256:
  12284. case X86::BI__builtin_ia32_cvtd2mask512:
  12285. case X86::BI__builtin_ia32_cvtq2mask128:
  12286. case X86::BI__builtin_ia32_cvtq2mask256:
  12287. case X86::BI__builtin_ia32_cvtq2mask512:
  12288. return EmitX86ConvertToMask(*this, Ops[0]);
  12289. case X86::BI__builtin_ia32_cvtdq2ps512_mask:
  12290. case X86::BI__builtin_ia32_cvtqq2ps512_mask:
  12291. case X86::BI__builtin_ia32_cvtqq2pd512_mask:
  12292. case X86::BI__builtin_ia32_vcvtw2ph512_mask:
  12293. case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
  12294. case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
  12295. return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
  12296. case X86::BI__builtin_ia32_cvtudq2ps512_mask:
  12297. case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
  12298. case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
  12299. case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
  12300. case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
  12301. case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
  12302. return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
  12303. case X86::BI__builtin_ia32_vfmaddss3:
  12304. case X86::BI__builtin_ia32_vfmaddsd3:
  12305. case X86::BI__builtin_ia32_vfmaddsh3_mask:
  12306. case X86::BI__builtin_ia32_vfmaddss3_mask:
  12307. case X86::BI__builtin_ia32_vfmaddsd3_mask:
  12308. return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
  12309. case X86::BI__builtin_ia32_vfmaddss:
  12310. case X86::BI__builtin_ia32_vfmaddsd:
  12311. return EmitScalarFMAExpr(*this, E, Ops,
  12312. Constant::getNullValue(Ops[0]->getType()));
  12313. case X86::BI__builtin_ia32_vfmaddsh3_maskz:
  12314. case X86::BI__builtin_ia32_vfmaddss3_maskz:
  12315. case X86::BI__builtin_ia32_vfmaddsd3_maskz:
  12316. return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
  12317. case X86::BI__builtin_ia32_vfmaddsh3_mask3:
  12318. case X86::BI__builtin_ia32_vfmaddss3_mask3:
  12319. case X86::BI__builtin_ia32_vfmaddsd3_mask3:
  12320. return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
  12321. case X86::BI__builtin_ia32_vfmsubsh3_mask3:
  12322. case X86::BI__builtin_ia32_vfmsubss3_mask3:
  12323. case X86::BI__builtin_ia32_vfmsubsd3_mask3:
  12324. return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
  12325. /*NegAcc*/ true);
  12326. case X86::BI__builtin_ia32_vfmaddph:
  12327. case X86::BI__builtin_ia32_vfmaddps:
  12328. case X86::BI__builtin_ia32_vfmaddpd:
  12329. case X86::BI__builtin_ia32_vfmaddph256:
  12330. case X86::BI__builtin_ia32_vfmaddps256:
  12331. case X86::BI__builtin_ia32_vfmaddpd256:
  12332. case X86::BI__builtin_ia32_vfmaddph512_mask:
  12333. case X86::BI__builtin_ia32_vfmaddph512_maskz:
  12334. case X86::BI__builtin_ia32_vfmaddph512_mask3:
  12335. case X86::BI__builtin_ia32_vfmaddps512_mask:
  12336. case X86::BI__builtin_ia32_vfmaddps512_maskz:
  12337. case X86::BI__builtin_ia32_vfmaddps512_mask3:
  12338. case X86::BI__builtin_ia32_vfmsubps512_mask3:
  12339. case X86::BI__builtin_ia32_vfmaddpd512_mask:
  12340. case X86::BI__builtin_ia32_vfmaddpd512_maskz:
  12341. case X86::BI__builtin_ia32_vfmaddpd512_mask3:
  12342. case X86::BI__builtin_ia32_vfmsubpd512_mask3:
  12343. case X86::BI__builtin_ia32_vfmsubph512_mask3:
  12344. return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
  12345. case X86::BI__builtin_ia32_vfmaddsubph512_mask:
  12346. case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
  12347. case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
  12348. case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
  12349. case X86::BI__builtin_ia32_vfmaddsubps512_mask:
  12350. case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
  12351. case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
  12352. case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
  12353. case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
  12354. case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
  12355. case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
  12356. case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
  12357. return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
  12358. case X86::BI__builtin_ia32_movdqa32store128_mask:
  12359. case X86::BI__builtin_ia32_movdqa64store128_mask:
  12360. case X86::BI__builtin_ia32_storeaps128_mask:
  12361. case X86::BI__builtin_ia32_storeapd128_mask:
  12362. case X86::BI__builtin_ia32_movdqa32store256_mask:
  12363. case X86::BI__builtin_ia32_movdqa64store256_mask:
  12364. case X86::BI__builtin_ia32_storeaps256_mask:
  12365. case X86::BI__builtin_ia32_storeapd256_mask:
  12366. case X86::BI__builtin_ia32_movdqa32store512_mask:
  12367. case X86::BI__builtin_ia32_movdqa64store512_mask:
  12368. case X86::BI__builtin_ia32_storeaps512_mask:
  12369. case X86::BI__builtin_ia32_storeapd512_mask:
  12370. return EmitX86MaskedStore(
  12371. *this, Ops,
  12372. getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
  12373. case X86::BI__builtin_ia32_loadups128_mask:
  12374. case X86::BI__builtin_ia32_loadups256_mask:
  12375. case X86::BI__builtin_ia32_loadups512_mask:
  12376. case X86::BI__builtin_ia32_loadupd128_mask:
  12377. case X86::BI__builtin_ia32_loadupd256_mask:
  12378. case X86::BI__builtin_ia32_loadupd512_mask:
  12379. case X86::BI__builtin_ia32_loaddquqi128_mask:
  12380. case X86::BI__builtin_ia32_loaddquqi256_mask:
  12381. case X86::BI__builtin_ia32_loaddquqi512_mask:
  12382. case X86::BI__builtin_ia32_loaddquhi128_mask:
  12383. case X86::BI__builtin_ia32_loaddquhi256_mask:
  12384. case X86::BI__builtin_ia32_loaddquhi512_mask:
  12385. case X86::BI__builtin_ia32_loaddqusi128_mask:
  12386. case X86::BI__builtin_ia32_loaddqusi256_mask:
  12387. case X86::BI__builtin_ia32_loaddqusi512_mask:
  12388. case X86::BI__builtin_ia32_loaddqudi128_mask:
  12389. case X86::BI__builtin_ia32_loaddqudi256_mask:
  12390. case X86::BI__builtin_ia32_loaddqudi512_mask:
  12391. return EmitX86MaskedLoad(*this, Ops, Align(1));
  12392. case X86::BI__builtin_ia32_loadsh128_mask:
  12393. case X86::BI__builtin_ia32_loadss128_mask:
  12394. case X86::BI__builtin_ia32_loadsd128_mask:
  12395. return EmitX86MaskedLoad(*this, Ops, Align(1));
  12396. case X86::BI__builtin_ia32_loadaps128_mask:
  12397. case X86::BI__builtin_ia32_loadaps256_mask:
  12398. case X86::BI__builtin_ia32_loadaps512_mask:
  12399. case X86::BI__builtin_ia32_loadapd128_mask:
  12400. case X86::BI__builtin_ia32_loadapd256_mask:
  12401. case X86::BI__builtin_ia32_loadapd512_mask:
  12402. case X86::BI__builtin_ia32_movdqa32load128_mask:
  12403. case X86::BI__builtin_ia32_movdqa32load256_mask:
  12404. case X86::BI__builtin_ia32_movdqa32load512_mask:
  12405. case X86::BI__builtin_ia32_movdqa64load128_mask:
  12406. case X86::BI__builtin_ia32_movdqa64load256_mask:
  12407. case X86::BI__builtin_ia32_movdqa64load512_mask:
  12408. return EmitX86MaskedLoad(
  12409. *this, Ops,
  12410. getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
  12411. case X86::BI__builtin_ia32_expandloaddf128_mask:
  12412. case X86::BI__builtin_ia32_expandloaddf256_mask:
  12413. case X86::BI__builtin_ia32_expandloaddf512_mask:
  12414. case X86::BI__builtin_ia32_expandloadsf128_mask:
  12415. case X86::BI__builtin_ia32_expandloadsf256_mask:
  12416. case X86::BI__builtin_ia32_expandloadsf512_mask:
  12417. case X86::BI__builtin_ia32_expandloaddi128_mask:
  12418. case X86::BI__builtin_ia32_expandloaddi256_mask:
  12419. case X86::BI__builtin_ia32_expandloaddi512_mask:
  12420. case X86::BI__builtin_ia32_expandloadsi128_mask:
  12421. case X86::BI__builtin_ia32_expandloadsi256_mask:
  12422. case X86::BI__builtin_ia32_expandloadsi512_mask:
  12423. case X86::BI__builtin_ia32_expandloadhi128_mask:
  12424. case X86::BI__builtin_ia32_expandloadhi256_mask:
  12425. case X86::BI__builtin_ia32_expandloadhi512_mask:
  12426. case X86::BI__builtin_ia32_expandloadqi128_mask:
  12427. case X86::BI__builtin_ia32_expandloadqi256_mask:
  12428. case X86::BI__builtin_ia32_expandloadqi512_mask:
  12429. return EmitX86ExpandLoad(*this, Ops);
  12430. case X86::BI__builtin_ia32_compressstoredf128_mask:
  12431. case X86::BI__builtin_ia32_compressstoredf256_mask:
  12432. case X86::BI__builtin_ia32_compressstoredf512_mask:
  12433. case X86::BI__builtin_ia32_compressstoresf128_mask:
  12434. case X86::BI__builtin_ia32_compressstoresf256_mask:
  12435. case X86::BI__builtin_ia32_compressstoresf512_mask:
  12436. case X86::BI__builtin_ia32_compressstoredi128_mask:
  12437. case X86::BI__builtin_ia32_compressstoredi256_mask:
  12438. case X86::BI__builtin_ia32_compressstoredi512_mask:
  12439. case X86::BI__builtin_ia32_compressstoresi128_mask:
  12440. case X86::BI__builtin_ia32_compressstoresi256_mask:
  12441. case X86::BI__builtin_ia32_compressstoresi512_mask:
  12442. case X86::BI__builtin_ia32_compressstorehi128_mask:
  12443. case X86::BI__builtin_ia32_compressstorehi256_mask:
  12444. case X86::BI__builtin_ia32_compressstorehi512_mask:
  12445. case X86::BI__builtin_ia32_compressstoreqi128_mask:
  12446. case X86::BI__builtin_ia32_compressstoreqi256_mask:
  12447. case X86::BI__builtin_ia32_compressstoreqi512_mask:
  12448. return EmitX86CompressStore(*this, Ops);
  12449. case X86::BI__builtin_ia32_expanddf128_mask:
  12450. case X86::BI__builtin_ia32_expanddf256_mask:
  12451. case X86::BI__builtin_ia32_expanddf512_mask:
  12452. case X86::BI__builtin_ia32_expandsf128_mask:
  12453. case X86::BI__builtin_ia32_expandsf256_mask:
  12454. case X86::BI__builtin_ia32_expandsf512_mask:
  12455. case X86::BI__builtin_ia32_expanddi128_mask:
  12456. case X86::BI__builtin_ia32_expanddi256_mask:
  12457. case X86::BI__builtin_ia32_expanddi512_mask:
  12458. case X86::BI__builtin_ia32_expandsi128_mask:
  12459. case X86::BI__builtin_ia32_expandsi256_mask:
  12460. case X86::BI__builtin_ia32_expandsi512_mask:
  12461. case X86::BI__builtin_ia32_expandhi128_mask:
  12462. case X86::BI__builtin_ia32_expandhi256_mask:
  12463. case X86::BI__builtin_ia32_expandhi512_mask:
  12464. case X86::BI__builtin_ia32_expandqi128_mask:
  12465. case X86::BI__builtin_ia32_expandqi256_mask:
  12466. case X86::BI__builtin_ia32_expandqi512_mask:
  12467. return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
  12468. case X86::BI__builtin_ia32_compressdf128_mask:
  12469. case X86::BI__builtin_ia32_compressdf256_mask:
  12470. case X86::BI__builtin_ia32_compressdf512_mask:
  12471. case X86::BI__builtin_ia32_compresssf128_mask:
  12472. case X86::BI__builtin_ia32_compresssf256_mask:
  12473. case X86::BI__builtin_ia32_compresssf512_mask:
  12474. case X86::BI__builtin_ia32_compressdi128_mask:
  12475. case X86::BI__builtin_ia32_compressdi256_mask:
  12476. case X86::BI__builtin_ia32_compressdi512_mask:
  12477. case X86::BI__builtin_ia32_compresssi128_mask:
  12478. case X86::BI__builtin_ia32_compresssi256_mask:
  12479. case X86::BI__builtin_ia32_compresssi512_mask:
  12480. case X86::BI__builtin_ia32_compresshi128_mask:
  12481. case X86::BI__builtin_ia32_compresshi256_mask:
  12482. case X86::BI__builtin_ia32_compresshi512_mask:
  12483. case X86::BI__builtin_ia32_compressqi128_mask:
  12484. case X86::BI__builtin_ia32_compressqi256_mask:
  12485. case X86::BI__builtin_ia32_compressqi512_mask:
  12486. return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
  12487. case X86::BI__builtin_ia32_gather3div2df:
  12488. case X86::BI__builtin_ia32_gather3div2di:
  12489. case X86::BI__builtin_ia32_gather3div4df:
  12490. case X86::BI__builtin_ia32_gather3div4di:
  12491. case X86::BI__builtin_ia32_gather3div4sf:
  12492. case X86::BI__builtin_ia32_gather3div4si:
  12493. case X86::BI__builtin_ia32_gather3div8sf:
  12494. case X86::BI__builtin_ia32_gather3div8si:
  12495. case X86::BI__builtin_ia32_gather3siv2df:
  12496. case X86::BI__builtin_ia32_gather3siv2di:
  12497. case X86::BI__builtin_ia32_gather3siv4df:
  12498. case X86::BI__builtin_ia32_gather3siv4di:
  12499. case X86::BI__builtin_ia32_gather3siv4sf:
  12500. case X86::BI__builtin_ia32_gather3siv4si:
  12501. case X86::BI__builtin_ia32_gather3siv8sf:
  12502. case X86::BI__builtin_ia32_gather3siv8si:
  12503. case X86::BI__builtin_ia32_gathersiv8df:
  12504. case X86::BI__builtin_ia32_gathersiv16sf:
  12505. case X86::BI__builtin_ia32_gatherdiv8df:
  12506. case X86::BI__builtin_ia32_gatherdiv16sf:
  12507. case X86::BI__builtin_ia32_gathersiv8di:
  12508. case X86::BI__builtin_ia32_gathersiv16si:
  12509. case X86::BI__builtin_ia32_gatherdiv8di:
  12510. case X86::BI__builtin_ia32_gatherdiv16si: {
  12511. Intrinsic::ID IID;
  12512. switch (BuiltinID) {
  12513. default: llvm_unreachable("Unexpected builtin");
  12514. case X86::BI__builtin_ia32_gather3div2df:
  12515. IID = Intrinsic::x86_avx512_mask_gather3div2_df;
  12516. break;
  12517. case X86::BI__builtin_ia32_gather3div2di:
  12518. IID = Intrinsic::x86_avx512_mask_gather3div2_di;
  12519. break;
  12520. case X86::BI__builtin_ia32_gather3div4df:
  12521. IID = Intrinsic::x86_avx512_mask_gather3div4_df;
  12522. break;
  12523. case X86::BI__builtin_ia32_gather3div4di:
  12524. IID = Intrinsic::x86_avx512_mask_gather3div4_di;
  12525. break;
  12526. case X86::BI__builtin_ia32_gather3div4sf:
  12527. IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
  12528. break;
  12529. case X86::BI__builtin_ia32_gather3div4si:
  12530. IID = Intrinsic::x86_avx512_mask_gather3div4_si;
  12531. break;
  12532. case X86::BI__builtin_ia32_gather3div8sf:
  12533. IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
  12534. break;
  12535. case X86::BI__builtin_ia32_gather3div8si:
  12536. IID = Intrinsic::x86_avx512_mask_gather3div8_si;
  12537. break;
  12538. case X86::BI__builtin_ia32_gather3siv2df:
  12539. IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
  12540. break;
  12541. case X86::BI__builtin_ia32_gather3siv2di:
  12542. IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
  12543. break;
  12544. case X86::BI__builtin_ia32_gather3siv4df:
  12545. IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
  12546. break;
  12547. case X86::BI__builtin_ia32_gather3siv4di:
  12548. IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
  12549. break;
  12550. case X86::BI__builtin_ia32_gather3siv4sf:
  12551. IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
  12552. break;
  12553. case X86::BI__builtin_ia32_gather3siv4si:
  12554. IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
  12555. break;
  12556. case X86::BI__builtin_ia32_gather3siv8sf:
  12557. IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
  12558. break;
  12559. case X86::BI__builtin_ia32_gather3siv8si:
  12560. IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
  12561. break;
  12562. case X86::BI__builtin_ia32_gathersiv8df:
  12563. IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
  12564. break;
  12565. case X86::BI__builtin_ia32_gathersiv16sf:
  12566. IID = Intrinsic::x86_avx512_mask_gather_dps_512;
  12567. break;
  12568. case X86::BI__builtin_ia32_gatherdiv8df:
  12569. IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
  12570. break;
  12571. case X86::BI__builtin_ia32_gatherdiv16sf:
  12572. IID = Intrinsic::x86_avx512_mask_gather_qps_512;
  12573. break;
  12574. case X86::BI__builtin_ia32_gathersiv8di:
  12575. IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
  12576. break;
  12577. case X86::BI__builtin_ia32_gathersiv16si:
  12578. IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
  12579. break;
  12580. case X86::BI__builtin_ia32_gatherdiv8di:
  12581. IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
  12582. break;
  12583. case X86::BI__builtin_ia32_gatherdiv16si:
  12584. IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
  12585. break;
  12586. }
  12587. unsigned MinElts = std::min(
  12588. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
  12589. cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
  12590. Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
  12591. Function *Intr = CGM.getIntrinsic(IID);
  12592. return Builder.CreateCall(Intr, Ops);
  12593. }
  12594. case X86::BI__builtin_ia32_scattersiv8df:
  12595. case X86::BI__builtin_ia32_scattersiv16sf:
  12596. case X86::BI__builtin_ia32_scatterdiv8df:
  12597. case X86::BI__builtin_ia32_scatterdiv16sf:
  12598. case X86::BI__builtin_ia32_scattersiv8di:
  12599. case X86::BI__builtin_ia32_scattersiv16si:
  12600. case X86::BI__builtin_ia32_scatterdiv8di:
  12601. case X86::BI__builtin_ia32_scatterdiv16si:
  12602. case X86::BI__builtin_ia32_scatterdiv2df:
  12603. case X86::BI__builtin_ia32_scatterdiv2di:
  12604. case X86::BI__builtin_ia32_scatterdiv4df:
  12605. case X86::BI__builtin_ia32_scatterdiv4di:
  12606. case X86::BI__builtin_ia32_scatterdiv4sf:
  12607. case X86::BI__builtin_ia32_scatterdiv4si:
  12608. case X86::BI__builtin_ia32_scatterdiv8sf:
  12609. case X86::BI__builtin_ia32_scatterdiv8si:
  12610. case X86::BI__builtin_ia32_scattersiv2df:
  12611. case X86::BI__builtin_ia32_scattersiv2di:
  12612. case X86::BI__builtin_ia32_scattersiv4df:
  12613. case X86::BI__builtin_ia32_scattersiv4di:
  12614. case X86::BI__builtin_ia32_scattersiv4sf:
  12615. case X86::BI__builtin_ia32_scattersiv4si:
  12616. case X86::BI__builtin_ia32_scattersiv8sf:
  12617. case X86::BI__builtin_ia32_scattersiv8si: {
  12618. Intrinsic::ID IID;
  12619. switch (BuiltinID) {
  12620. default: llvm_unreachable("Unexpected builtin");
  12621. case X86::BI__builtin_ia32_scattersiv8df:
  12622. IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
  12623. break;
  12624. case X86::BI__builtin_ia32_scattersiv16sf:
  12625. IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
  12626. break;
  12627. case X86::BI__builtin_ia32_scatterdiv8df:
  12628. IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
  12629. break;
  12630. case X86::BI__builtin_ia32_scatterdiv16sf:
  12631. IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
  12632. break;
  12633. case X86::BI__builtin_ia32_scattersiv8di:
  12634. IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
  12635. break;
  12636. case X86::BI__builtin_ia32_scattersiv16si:
  12637. IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
  12638. break;
  12639. case X86::BI__builtin_ia32_scatterdiv8di:
  12640. IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
  12641. break;
  12642. case X86::BI__builtin_ia32_scatterdiv16si:
  12643. IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
  12644. break;
  12645. case X86::BI__builtin_ia32_scatterdiv2df:
  12646. IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
  12647. break;
  12648. case X86::BI__builtin_ia32_scatterdiv2di:
  12649. IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
  12650. break;
  12651. case X86::BI__builtin_ia32_scatterdiv4df:
  12652. IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
  12653. break;
  12654. case X86::BI__builtin_ia32_scatterdiv4di:
  12655. IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
  12656. break;
  12657. case X86::BI__builtin_ia32_scatterdiv4sf:
  12658. IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
  12659. break;
  12660. case X86::BI__builtin_ia32_scatterdiv4si:
  12661. IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
  12662. break;
  12663. case X86::BI__builtin_ia32_scatterdiv8sf:
  12664. IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
  12665. break;
  12666. case X86::BI__builtin_ia32_scatterdiv8si:
  12667. IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
  12668. break;
  12669. case X86::BI__builtin_ia32_scattersiv2df:
  12670. IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
  12671. break;
  12672. case X86::BI__builtin_ia32_scattersiv2di:
  12673. IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
  12674. break;
  12675. case X86::BI__builtin_ia32_scattersiv4df:
  12676. IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
  12677. break;
  12678. case X86::BI__builtin_ia32_scattersiv4di:
  12679. IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
  12680. break;
  12681. case X86::BI__builtin_ia32_scattersiv4sf:
  12682. IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
  12683. break;
  12684. case X86::BI__builtin_ia32_scattersiv4si:
  12685. IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
  12686. break;
  12687. case X86::BI__builtin_ia32_scattersiv8sf:
  12688. IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
  12689. break;
  12690. case X86::BI__builtin_ia32_scattersiv8si:
  12691. IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
  12692. break;
  12693. }
  12694. unsigned MinElts = std::min(
  12695. cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
  12696. cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
  12697. Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
  12698. Function *Intr = CGM.getIntrinsic(IID);
  12699. return Builder.CreateCall(Intr, Ops);
  12700. }
  12701. case X86::BI__builtin_ia32_vextractf128_pd256:
  12702. case X86::BI__builtin_ia32_vextractf128_ps256:
  12703. case X86::BI__builtin_ia32_vextractf128_si256:
  12704. case X86::BI__builtin_ia32_extract128i256:
  12705. case X86::BI__builtin_ia32_extractf64x4_mask:
  12706. case X86::BI__builtin_ia32_extractf32x4_mask:
  12707. case X86::BI__builtin_ia32_extracti64x4_mask:
  12708. case X86::BI__builtin_ia32_extracti32x4_mask:
  12709. case X86::BI__builtin_ia32_extractf32x8_mask:
  12710. case X86::BI__builtin_ia32_extracti32x8_mask:
  12711. case X86::BI__builtin_ia32_extractf32x4_256_mask:
  12712. case X86::BI__builtin_ia32_extracti32x4_256_mask:
  12713. case X86::BI__builtin_ia32_extractf64x2_256_mask:
  12714. case X86::BI__builtin_ia32_extracti64x2_256_mask:
  12715. case X86::BI__builtin_ia32_extractf64x2_512_mask:
  12716. case X86::BI__builtin_ia32_extracti64x2_512_mask: {
  12717. auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
  12718. unsigned NumElts = DstTy->getNumElements();
  12719. unsigned SrcNumElts =
  12720. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12721. unsigned SubVectors = SrcNumElts / NumElts;
  12722. unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
  12723. assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
  12724. Index &= SubVectors - 1; // Remove any extra bits.
  12725. Index *= NumElts;
  12726. int Indices[16];
  12727. for (unsigned i = 0; i != NumElts; ++i)
  12728. Indices[i] = i + Index;
  12729. Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
  12730. "extract");
  12731. if (Ops.size() == 4)
  12732. Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
  12733. return Res;
  12734. }
  12735. case X86::BI__builtin_ia32_vinsertf128_pd256:
  12736. case X86::BI__builtin_ia32_vinsertf128_ps256:
  12737. case X86::BI__builtin_ia32_vinsertf128_si256:
  12738. case X86::BI__builtin_ia32_insert128i256:
  12739. case X86::BI__builtin_ia32_insertf64x4:
  12740. case X86::BI__builtin_ia32_insertf32x4:
  12741. case X86::BI__builtin_ia32_inserti64x4:
  12742. case X86::BI__builtin_ia32_inserti32x4:
  12743. case X86::BI__builtin_ia32_insertf32x8:
  12744. case X86::BI__builtin_ia32_inserti32x8:
  12745. case X86::BI__builtin_ia32_insertf32x4_256:
  12746. case X86::BI__builtin_ia32_inserti32x4_256:
  12747. case X86::BI__builtin_ia32_insertf64x2_256:
  12748. case X86::BI__builtin_ia32_inserti64x2_256:
  12749. case X86::BI__builtin_ia32_insertf64x2_512:
  12750. case X86::BI__builtin_ia32_inserti64x2_512: {
  12751. unsigned DstNumElts =
  12752. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12753. unsigned SrcNumElts =
  12754. cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
  12755. unsigned SubVectors = DstNumElts / SrcNumElts;
  12756. unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
  12757. assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
  12758. Index &= SubVectors - 1; // Remove any extra bits.
  12759. Index *= SrcNumElts;
  12760. int Indices[16];
  12761. for (unsigned i = 0; i != DstNumElts; ++i)
  12762. Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
  12763. Value *Op1 = Builder.CreateShuffleVector(
  12764. Ops[1], ArrayRef(Indices, DstNumElts), "widen");
  12765. for (unsigned i = 0; i != DstNumElts; ++i) {
  12766. if (i >= Index && i < (Index + SrcNumElts))
  12767. Indices[i] = (i - Index) + DstNumElts;
  12768. else
  12769. Indices[i] = i;
  12770. }
  12771. return Builder.CreateShuffleVector(Ops[0], Op1,
  12772. ArrayRef(Indices, DstNumElts), "insert");
  12773. }
  12774. case X86::BI__builtin_ia32_pmovqd512_mask:
  12775. case X86::BI__builtin_ia32_pmovwb512_mask: {
  12776. Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
  12777. return EmitX86Select(*this, Ops[2], Res, Ops[1]);
  12778. }
  12779. case X86::BI__builtin_ia32_pmovdb512_mask:
  12780. case X86::BI__builtin_ia32_pmovdw512_mask:
  12781. case X86::BI__builtin_ia32_pmovqw512_mask: {
  12782. if (const auto *C = dyn_cast<Constant>(Ops[2]))
  12783. if (C->isAllOnesValue())
  12784. return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
  12785. Intrinsic::ID IID;
  12786. switch (BuiltinID) {
  12787. default: llvm_unreachable("Unsupported intrinsic!");
  12788. case X86::BI__builtin_ia32_pmovdb512_mask:
  12789. IID = Intrinsic::x86_avx512_mask_pmov_db_512;
  12790. break;
  12791. case X86::BI__builtin_ia32_pmovdw512_mask:
  12792. IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
  12793. break;
  12794. case X86::BI__builtin_ia32_pmovqw512_mask:
  12795. IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
  12796. break;
  12797. }
  12798. Function *Intr = CGM.getIntrinsic(IID);
  12799. return Builder.CreateCall(Intr, Ops);
  12800. }
  12801. case X86::BI__builtin_ia32_pblendw128:
  12802. case X86::BI__builtin_ia32_blendpd:
  12803. case X86::BI__builtin_ia32_blendps:
  12804. case X86::BI__builtin_ia32_blendpd256:
  12805. case X86::BI__builtin_ia32_blendps256:
  12806. case X86::BI__builtin_ia32_pblendw256:
  12807. case X86::BI__builtin_ia32_pblendd128:
  12808. case X86::BI__builtin_ia32_pblendd256: {
  12809. unsigned NumElts =
  12810. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12811. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12812. int Indices[16];
  12813. // If there are more than 8 elements, the immediate is used twice so make
  12814. // sure we handle that.
  12815. for (unsigned i = 0; i != NumElts; ++i)
  12816. Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
  12817. return Builder.CreateShuffleVector(Ops[0], Ops[1],
  12818. ArrayRef(Indices, NumElts), "blend");
  12819. }
  12820. case X86::BI__builtin_ia32_pshuflw:
  12821. case X86::BI__builtin_ia32_pshuflw256:
  12822. case X86::BI__builtin_ia32_pshuflw512: {
  12823. uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12824. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12825. unsigned NumElts = Ty->getNumElements();
  12826. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12827. Imm = (Imm & 0xff) * 0x01010101;
  12828. int Indices[32];
  12829. for (unsigned l = 0; l != NumElts; l += 8) {
  12830. for (unsigned i = 0; i != 4; ++i) {
  12831. Indices[l + i] = l + (Imm & 3);
  12832. Imm >>= 2;
  12833. }
  12834. for (unsigned i = 4; i != 8; ++i)
  12835. Indices[l + i] = l + i;
  12836. }
  12837. return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
  12838. "pshuflw");
  12839. }
  12840. case X86::BI__builtin_ia32_pshufhw:
  12841. case X86::BI__builtin_ia32_pshufhw256:
  12842. case X86::BI__builtin_ia32_pshufhw512: {
  12843. uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12844. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12845. unsigned NumElts = Ty->getNumElements();
  12846. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12847. Imm = (Imm & 0xff) * 0x01010101;
  12848. int Indices[32];
  12849. for (unsigned l = 0; l != NumElts; l += 8) {
  12850. for (unsigned i = 0; i != 4; ++i)
  12851. Indices[l + i] = l + i;
  12852. for (unsigned i = 4; i != 8; ++i) {
  12853. Indices[l + i] = l + 4 + (Imm & 3);
  12854. Imm >>= 2;
  12855. }
  12856. }
  12857. return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
  12858. "pshufhw");
  12859. }
  12860. case X86::BI__builtin_ia32_pshufd:
  12861. case X86::BI__builtin_ia32_pshufd256:
  12862. case X86::BI__builtin_ia32_pshufd512:
  12863. case X86::BI__builtin_ia32_vpermilpd:
  12864. case X86::BI__builtin_ia32_vpermilps:
  12865. case X86::BI__builtin_ia32_vpermilpd256:
  12866. case X86::BI__builtin_ia32_vpermilps256:
  12867. case X86::BI__builtin_ia32_vpermilpd512:
  12868. case X86::BI__builtin_ia32_vpermilps512: {
  12869. uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12870. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12871. unsigned NumElts = Ty->getNumElements();
  12872. unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
  12873. unsigned NumLaneElts = NumElts / NumLanes;
  12874. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12875. Imm = (Imm & 0xff) * 0x01010101;
  12876. int Indices[16];
  12877. for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
  12878. for (unsigned i = 0; i != NumLaneElts; ++i) {
  12879. Indices[i + l] = (Imm % NumLaneElts) + l;
  12880. Imm /= NumLaneElts;
  12881. }
  12882. }
  12883. return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
  12884. "permil");
  12885. }
  12886. case X86::BI__builtin_ia32_shufpd:
  12887. case X86::BI__builtin_ia32_shufpd256:
  12888. case X86::BI__builtin_ia32_shufpd512:
  12889. case X86::BI__builtin_ia32_shufps:
  12890. case X86::BI__builtin_ia32_shufps256:
  12891. case X86::BI__builtin_ia32_shufps512: {
  12892. uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12893. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12894. unsigned NumElts = Ty->getNumElements();
  12895. unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
  12896. unsigned NumLaneElts = NumElts / NumLanes;
  12897. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12898. Imm = (Imm & 0xff) * 0x01010101;
  12899. int Indices[16];
  12900. for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
  12901. for (unsigned i = 0; i != NumLaneElts; ++i) {
  12902. unsigned Index = Imm % NumLaneElts;
  12903. Imm /= NumLaneElts;
  12904. if (i >= (NumLaneElts / 2))
  12905. Index += NumElts;
  12906. Indices[l + i] = l + Index;
  12907. }
  12908. }
  12909. return Builder.CreateShuffleVector(Ops[0], Ops[1],
  12910. ArrayRef(Indices, NumElts), "shufp");
  12911. }
  12912. case X86::BI__builtin_ia32_permdi256:
  12913. case X86::BI__builtin_ia32_permdf256:
  12914. case X86::BI__builtin_ia32_permdi512:
  12915. case X86::BI__builtin_ia32_permdf512: {
  12916. unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12917. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12918. unsigned NumElts = Ty->getNumElements();
  12919. // These intrinsics operate on 256-bit lanes of four 64-bit elements.
  12920. int Indices[8];
  12921. for (unsigned l = 0; l != NumElts; l += 4)
  12922. for (unsigned i = 0; i != 4; ++i)
  12923. Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
  12924. return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
  12925. "perm");
  12926. }
  12927. case X86::BI__builtin_ia32_palignr128:
  12928. case X86::BI__builtin_ia32_palignr256:
  12929. case X86::BI__builtin_ia32_palignr512: {
  12930. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
  12931. unsigned NumElts =
  12932. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12933. assert(NumElts % 16 == 0);
  12934. // If palignr is shifting the pair of vectors more than the size of two
  12935. // lanes, emit zero.
  12936. if (ShiftVal >= 32)
  12937. return llvm::Constant::getNullValue(ConvertType(E->getType()));
  12938. // If palignr is shifting the pair of input vectors more than one lane,
  12939. // but less than two lanes, convert to shifting in zeroes.
  12940. if (ShiftVal > 16) {
  12941. ShiftVal -= 16;
  12942. Ops[1] = Ops[0];
  12943. Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
  12944. }
  12945. int Indices[64];
  12946. // 256-bit palignr operates on 128-bit lanes so we need to handle that
  12947. for (unsigned l = 0; l != NumElts; l += 16) {
  12948. for (unsigned i = 0; i != 16; ++i) {
  12949. unsigned Idx = ShiftVal + i;
  12950. if (Idx >= 16)
  12951. Idx += NumElts - 16; // End of lane, switch operand.
  12952. Indices[l + i] = Idx + l;
  12953. }
  12954. }
  12955. return Builder.CreateShuffleVector(Ops[1], Ops[0],
  12956. ArrayRef(Indices, NumElts), "palignr");
  12957. }
  12958. case X86::BI__builtin_ia32_alignd128:
  12959. case X86::BI__builtin_ia32_alignd256:
  12960. case X86::BI__builtin_ia32_alignd512:
  12961. case X86::BI__builtin_ia32_alignq128:
  12962. case X86::BI__builtin_ia32_alignq256:
  12963. case X86::BI__builtin_ia32_alignq512: {
  12964. unsigned NumElts =
  12965. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12966. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
  12967. // Mask the shift amount to width of a vector.
  12968. ShiftVal &= NumElts - 1;
  12969. int Indices[16];
  12970. for (unsigned i = 0; i != NumElts; ++i)
  12971. Indices[i] = i + ShiftVal;
  12972. return Builder.CreateShuffleVector(Ops[1], Ops[0],
  12973. ArrayRef(Indices, NumElts), "valign");
  12974. }
  12975. case X86::BI__builtin_ia32_shuf_f32x4_256:
  12976. case X86::BI__builtin_ia32_shuf_f64x2_256:
  12977. case X86::BI__builtin_ia32_shuf_i32x4_256:
  12978. case X86::BI__builtin_ia32_shuf_i64x2_256:
  12979. case X86::BI__builtin_ia32_shuf_f32x4:
  12980. case X86::BI__builtin_ia32_shuf_f64x2:
  12981. case X86::BI__builtin_ia32_shuf_i32x4:
  12982. case X86::BI__builtin_ia32_shuf_i64x2: {
  12983. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12984. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12985. unsigned NumElts = Ty->getNumElements();
  12986. unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
  12987. unsigned NumLaneElts = NumElts / NumLanes;
  12988. int Indices[16];
  12989. for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
  12990. unsigned Index = (Imm % NumLanes) * NumLaneElts;
  12991. Imm /= NumLanes; // Discard the bits we just used.
  12992. if (l >= (NumElts / 2))
  12993. Index += NumElts; // Switch to other source.
  12994. for (unsigned i = 0; i != NumLaneElts; ++i) {
  12995. Indices[l + i] = Index + i;
  12996. }
  12997. }
  12998. return Builder.CreateShuffleVector(Ops[0], Ops[1],
  12999. ArrayRef(Indices, NumElts), "shuf");
  13000. }
  13001. case X86::BI__builtin_ia32_vperm2f128_pd256:
  13002. case X86::BI__builtin_ia32_vperm2f128_ps256:
  13003. case X86::BI__builtin_ia32_vperm2f128_si256:
  13004. case X86::BI__builtin_ia32_permti256: {
  13005. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  13006. unsigned NumElts =
  13007. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13008. // This takes a very simple approach since there are two lanes and a
  13009. // shuffle can have 2 inputs. So we reserve the first input for the first
  13010. // lane and the second input for the second lane. This may result in
  13011. // duplicate sources, but this can be dealt with in the backend.
  13012. Value *OutOps[2];
  13013. int Indices[8];
  13014. for (unsigned l = 0; l != 2; ++l) {
  13015. // Determine the source for this lane.
  13016. if (Imm & (1 << ((l * 4) + 3)))
  13017. OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
  13018. else if (Imm & (1 << ((l * 4) + 1)))
  13019. OutOps[l] = Ops[1];
  13020. else
  13021. OutOps[l] = Ops[0];
  13022. for (unsigned i = 0; i != NumElts/2; ++i) {
  13023. // Start with ith element of the source for this lane.
  13024. unsigned Idx = (l * NumElts) + i;
  13025. // If bit 0 of the immediate half is set, switch to the high half of
  13026. // the source.
  13027. if (Imm & (1 << (l * 4)))
  13028. Idx += NumElts/2;
  13029. Indices[(l * (NumElts/2)) + i] = Idx;
  13030. }
  13031. }
  13032. return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
  13033. ArrayRef(Indices, NumElts), "vperm");
  13034. }
  13035. case X86::BI__builtin_ia32_pslldqi128_byteshift:
  13036. case X86::BI__builtin_ia32_pslldqi256_byteshift:
  13037. case X86::BI__builtin_ia32_pslldqi512_byteshift: {
  13038. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  13039. auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
  13040. // Builtin type is vXi64 so multiply by 8 to get bytes.
  13041. unsigned NumElts = ResultType->getNumElements() * 8;
  13042. // If pslldq is shifting the vector more than 15 bytes, emit zero.
  13043. if (ShiftVal >= 16)
  13044. return llvm::Constant::getNullValue(ResultType);
  13045. int Indices[64];
  13046. // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
  13047. for (unsigned l = 0; l != NumElts; l += 16) {
  13048. for (unsigned i = 0; i != 16; ++i) {
  13049. unsigned Idx = NumElts + i - ShiftVal;
  13050. if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
  13051. Indices[l + i] = Idx + l;
  13052. }
  13053. }
  13054. auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
  13055. Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
  13056. Value *Zero = llvm::Constant::getNullValue(VecTy);
  13057. Value *SV = Builder.CreateShuffleVector(
  13058. Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
  13059. return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
  13060. }
  13061. case X86::BI__builtin_ia32_psrldqi128_byteshift:
  13062. case X86::BI__builtin_ia32_psrldqi256_byteshift:
  13063. case X86::BI__builtin_ia32_psrldqi512_byteshift: {
  13064. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  13065. auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
  13066. // Builtin type is vXi64 so multiply by 8 to get bytes.
  13067. unsigned NumElts = ResultType->getNumElements() * 8;
  13068. // If psrldq is shifting the vector more than 15 bytes, emit zero.
  13069. if (ShiftVal >= 16)
  13070. return llvm::Constant::getNullValue(ResultType);
  13071. int Indices[64];
  13072. // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
  13073. for (unsigned l = 0; l != NumElts; l += 16) {
  13074. for (unsigned i = 0; i != 16; ++i) {
  13075. unsigned Idx = i + ShiftVal;
  13076. if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
  13077. Indices[l + i] = Idx + l;
  13078. }
  13079. }
  13080. auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
  13081. Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
  13082. Value *Zero = llvm::Constant::getNullValue(VecTy);
  13083. Value *SV = Builder.CreateShuffleVector(
  13084. Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
  13085. return Builder.CreateBitCast(SV, ResultType, "cast");
  13086. }
  13087. case X86::BI__builtin_ia32_kshiftliqi:
  13088. case X86::BI__builtin_ia32_kshiftlihi:
  13089. case X86::BI__builtin_ia32_kshiftlisi:
  13090. case X86::BI__builtin_ia32_kshiftlidi: {
  13091. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  13092. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13093. if (ShiftVal >= NumElts)
  13094. return llvm::Constant::getNullValue(Ops[0]->getType());
  13095. Value *In = getMaskVecValue(*this, Ops[0], NumElts);
  13096. int Indices[64];
  13097. for (unsigned i = 0; i != NumElts; ++i)
  13098. Indices[i] = NumElts + i - ShiftVal;
  13099. Value *Zero = llvm::Constant::getNullValue(In->getType());
  13100. Value *SV = Builder.CreateShuffleVector(
  13101. Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
  13102. return Builder.CreateBitCast(SV, Ops[0]->getType());
  13103. }
  13104. case X86::BI__builtin_ia32_kshiftriqi:
  13105. case X86::BI__builtin_ia32_kshiftrihi:
  13106. case X86::BI__builtin_ia32_kshiftrisi:
  13107. case X86::BI__builtin_ia32_kshiftridi: {
  13108. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  13109. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13110. if (ShiftVal >= NumElts)
  13111. return llvm::Constant::getNullValue(Ops[0]->getType());
  13112. Value *In = getMaskVecValue(*this, Ops[0], NumElts);
  13113. int Indices[64];
  13114. for (unsigned i = 0; i != NumElts; ++i)
  13115. Indices[i] = i + ShiftVal;
  13116. Value *Zero = llvm::Constant::getNullValue(In->getType());
  13117. Value *SV = Builder.CreateShuffleVector(
  13118. In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
  13119. return Builder.CreateBitCast(SV, Ops[0]->getType());
  13120. }
  13121. case X86::BI__builtin_ia32_movnti:
  13122. case X86::BI__builtin_ia32_movnti64:
  13123. case X86::BI__builtin_ia32_movntsd:
  13124. case X86::BI__builtin_ia32_movntss: {
  13125. llvm::MDNode *Node = llvm::MDNode::get(
  13126. getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
  13127. Value *Ptr = Ops[0];
  13128. Value *Src = Ops[1];
  13129. // Extract the 0'th element of the source vector.
  13130. if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
  13131. BuiltinID == X86::BI__builtin_ia32_movntss)
  13132. Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
  13133. // Convert the type of the pointer to a pointer to the stored type.
  13134. Value *BC = Builder.CreateBitCast(
  13135. Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
  13136. // Unaligned nontemporal store of the scalar value.
  13137. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
  13138. SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
  13139. SI->setAlignment(llvm::Align(1));
  13140. return SI;
  13141. }
  13142. // Rotate is a special case of funnel shift - 1st 2 args are the same.
  13143. case X86::BI__builtin_ia32_vprotb:
  13144. case X86::BI__builtin_ia32_vprotw:
  13145. case X86::BI__builtin_ia32_vprotd:
  13146. case X86::BI__builtin_ia32_vprotq:
  13147. case X86::BI__builtin_ia32_vprotbi:
  13148. case X86::BI__builtin_ia32_vprotwi:
  13149. case X86::BI__builtin_ia32_vprotdi:
  13150. case X86::BI__builtin_ia32_vprotqi:
  13151. case X86::BI__builtin_ia32_prold128:
  13152. case X86::BI__builtin_ia32_prold256:
  13153. case X86::BI__builtin_ia32_prold512:
  13154. case X86::BI__builtin_ia32_prolq128:
  13155. case X86::BI__builtin_ia32_prolq256:
  13156. case X86::BI__builtin_ia32_prolq512:
  13157. case X86::BI__builtin_ia32_prolvd128:
  13158. case X86::BI__builtin_ia32_prolvd256:
  13159. case X86::BI__builtin_ia32_prolvd512:
  13160. case X86::BI__builtin_ia32_prolvq128:
  13161. case X86::BI__builtin_ia32_prolvq256:
  13162. case X86::BI__builtin_ia32_prolvq512:
  13163. return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
  13164. case X86::BI__builtin_ia32_prord128:
  13165. case X86::BI__builtin_ia32_prord256:
  13166. case X86::BI__builtin_ia32_prord512:
  13167. case X86::BI__builtin_ia32_prorq128:
  13168. case X86::BI__builtin_ia32_prorq256:
  13169. case X86::BI__builtin_ia32_prorq512:
  13170. case X86::BI__builtin_ia32_prorvd128:
  13171. case X86::BI__builtin_ia32_prorvd256:
  13172. case X86::BI__builtin_ia32_prorvd512:
  13173. case X86::BI__builtin_ia32_prorvq128:
  13174. case X86::BI__builtin_ia32_prorvq256:
  13175. case X86::BI__builtin_ia32_prorvq512:
  13176. return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
  13177. case X86::BI__builtin_ia32_selectb_128:
  13178. case X86::BI__builtin_ia32_selectb_256:
  13179. case X86::BI__builtin_ia32_selectb_512:
  13180. case X86::BI__builtin_ia32_selectw_128:
  13181. case X86::BI__builtin_ia32_selectw_256:
  13182. case X86::BI__builtin_ia32_selectw_512:
  13183. case X86::BI__builtin_ia32_selectd_128:
  13184. case X86::BI__builtin_ia32_selectd_256:
  13185. case X86::BI__builtin_ia32_selectd_512:
  13186. case X86::BI__builtin_ia32_selectq_128:
  13187. case X86::BI__builtin_ia32_selectq_256:
  13188. case X86::BI__builtin_ia32_selectq_512:
  13189. case X86::BI__builtin_ia32_selectph_128:
  13190. case X86::BI__builtin_ia32_selectph_256:
  13191. case X86::BI__builtin_ia32_selectph_512:
  13192. case X86::BI__builtin_ia32_selectpbf_128:
  13193. case X86::BI__builtin_ia32_selectpbf_256:
  13194. case X86::BI__builtin_ia32_selectpbf_512:
  13195. case X86::BI__builtin_ia32_selectps_128:
  13196. case X86::BI__builtin_ia32_selectps_256:
  13197. case X86::BI__builtin_ia32_selectps_512:
  13198. case X86::BI__builtin_ia32_selectpd_128:
  13199. case X86::BI__builtin_ia32_selectpd_256:
  13200. case X86::BI__builtin_ia32_selectpd_512:
  13201. return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
  13202. case X86::BI__builtin_ia32_selectsh_128:
  13203. case X86::BI__builtin_ia32_selectsbf_128:
  13204. case X86::BI__builtin_ia32_selectss_128:
  13205. case X86::BI__builtin_ia32_selectsd_128: {
  13206. Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  13207. Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  13208. A = EmitX86ScalarSelect(*this, Ops[0], A, B);
  13209. return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
  13210. }
  13211. case X86::BI__builtin_ia32_cmpb128_mask:
  13212. case X86::BI__builtin_ia32_cmpb256_mask:
  13213. case X86::BI__builtin_ia32_cmpb512_mask:
  13214. case X86::BI__builtin_ia32_cmpw128_mask:
  13215. case X86::BI__builtin_ia32_cmpw256_mask:
  13216. case X86::BI__builtin_ia32_cmpw512_mask:
  13217. case X86::BI__builtin_ia32_cmpd128_mask:
  13218. case X86::BI__builtin_ia32_cmpd256_mask:
  13219. case X86::BI__builtin_ia32_cmpd512_mask:
  13220. case X86::BI__builtin_ia32_cmpq128_mask:
  13221. case X86::BI__builtin_ia32_cmpq256_mask:
  13222. case X86::BI__builtin_ia32_cmpq512_mask: {
  13223. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  13224. return EmitX86MaskedCompare(*this, CC, true, Ops);
  13225. }
  13226. case X86::BI__builtin_ia32_ucmpb128_mask:
  13227. case X86::BI__builtin_ia32_ucmpb256_mask:
  13228. case X86::BI__builtin_ia32_ucmpb512_mask:
  13229. case X86::BI__builtin_ia32_ucmpw128_mask:
  13230. case X86::BI__builtin_ia32_ucmpw256_mask:
  13231. case X86::BI__builtin_ia32_ucmpw512_mask:
  13232. case X86::BI__builtin_ia32_ucmpd128_mask:
  13233. case X86::BI__builtin_ia32_ucmpd256_mask:
  13234. case X86::BI__builtin_ia32_ucmpd512_mask:
  13235. case X86::BI__builtin_ia32_ucmpq128_mask:
  13236. case X86::BI__builtin_ia32_ucmpq256_mask:
  13237. case X86::BI__builtin_ia32_ucmpq512_mask: {
  13238. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  13239. return EmitX86MaskedCompare(*this, CC, false, Ops);
  13240. }
  13241. case X86::BI__builtin_ia32_vpcomb:
  13242. case X86::BI__builtin_ia32_vpcomw:
  13243. case X86::BI__builtin_ia32_vpcomd:
  13244. case X86::BI__builtin_ia32_vpcomq:
  13245. return EmitX86vpcom(*this, Ops, true);
  13246. case X86::BI__builtin_ia32_vpcomub:
  13247. case X86::BI__builtin_ia32_vpcomuw:
  13248. case X86::BI__builtin_ia32_vpcomud:
  13249. case X86::BI__builtin_ia32_vpcomuq:
  13250. return EmitX86vpcom(*this, Ops, false);
  13251. case X86::BI__builtin_ia32_kortestcqi:
  13252. case X86::BI__builtin_ia32_kortestchi:
  13253. case X86::BI__builtin_ia32_kortestcsi:
  13254. case X86::BI__builtin_ia32_kortestcdi: {
  13255. Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
  13256. Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
  13257. Value *Cmp = Builder.CreateICmpEQ(Or, C);
  13258. return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
  13259. }
  13260. case X86::BI__builtin_ia32_kortestzqi:
  13261. case X86::BI__builtin_ia32_kortestzhi:
  13262. case X86::BI__builtin_ia32_kortestzsi:
  13263. case X86::BI__builtin_ia32_kortestzdi: {
  13264. Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
  13265. Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
  13266. Value *Cmp = Builder.CreateICmpEQ(Or, C);
  13267. return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
  13268. }
  13269. case X86::BI__builtin_ia32_ktestcqi:
  13270. case X86::BI__builtin_ia32_ktestzqi:
  13271. case X86::BI__builtin_ia32_ktestchi:
  13272. case X86::BI__builtin_ia32_ktestzhi:
  13273. case X86::BI__builtin_ia32_ktestcsi:
  13274. case X86::BI__builtin_ia32_ktestzsi:
  13275. case X86::BI__builtin_ia32_ktestcdi:
  13276. case X86::BI__builtin_ia32_ktestzdi: {
  13277. Intrinsic::ID IID;
  13278. switch (BuiltinID) {
  13279. default: llvm_unreachable("Unsupported intrinsic!");
  13280. case X86::BI__builtin_ia32_ktestcqi:
  13281. IID = Intrinsic::x86_avx512_ktestc_b;
  13282. break;
  13283. case X86::BI__builtin_ia32_ktestzqi:
  13284. IID = Intrinsic::x86_avx512_ktestz_b;
  13285. break;
  13286. case X86::BI__builtin_ia32_ktestchi:
  13287. IID = Intrinsic::x86_avx512_ktestc_w;
  13288. break;
  13289. case X86::BI__builtin_ia32_ktestzhi:
  13290. IID = Intrinsic::x86_avx512_ktestz_w;
  13291. break;
  13292. case X86::BI__builtin_ia32_ktestcsi:
  13293. IID = Intrinsic::x86_avx512_ktestc_d;
  13294. break;
  13295. case X86::BI__builtin_ia32_ktestzsi:
  13296. IID = Intrinsic::x86_avx512_ktestz_d;
  13297. break;
  13298. case X86::BI__builtin_ia32_ktestcdi:
  13299. IID = Intrinsic::x86_avx512_ktestc_q;
  13300. break;
  13301. case X86::BI__builtin_ia32_ktestzdi:
  13302. IID = Intrinsic::x86_avx512_ktestz_q;
  13303. break;
  13304. }
  13305. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13306. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  13307. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  13308. Function *Intr = CGM.getIntrinsic(IID);
  13309. return Builder.CreateCall(Intr, {LHS, RHS});
  13310. }
  13311. case X86::BI__builtin_ia32_kaddqi:
  13312. case X86::BI__builtin_ia32_kaddhi:
  13313. case X86::BI__builtin_ia32_kaddsi:
  13314. case X86::BI__builtin_ia32_kadddi: {
  13315. Intrinsic::ID IID;
  13316. switch (BuiltinID) {
  13317. default: llvm_unreachable("Unsupported intrinsic!");
  13318. case X86::BI__builtin_ia32_kaddqi:
  13319. IID = Intrinsic::x86_avx512_kadd_b;
  13320. break;
  13321. case X86::BI__builtin_ia32_kaddhi:
  13322. IID = Intrinsic::x86_avx512_kadd_w;
  13323. break;
  13324. case X86::BI__builtin_ia32_kaddsi:
  13325. IID = Intrinsic::x86_avx512_kadd_d;
  13326. break;
  13327. case X86::BI__builtin_ia32_kadddi:
  13328. IID = Intrinsic::x86_avx512_kadd_q;
  13329. break;
  13330. }
  13331. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13332. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  13333. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  13334. Function *Intr = CGM.getIntrinsic(IID);
  13335. Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
  13336. return Builder.CreateBitCast(Res, Ops[0]->getType());
  13337. }
  13338. case X86::BI__builtin_ia32_kandqi:
  13339. case X86::BI__builtin_ia32_kandhi:
  13340. case X86::BI__builtin_ia32_kandsi:
  13341. case X86::BI__builtin_ia32_kanddi:
  13342. return EmitX86MaskLogic(*this, Instruction::And, Ops);
  13343. case X86::BI__builtin_ia32_kandnqi:
  13344. case X86::BI__builtin_ia32_kandnhi:
  13345. case X86::BI__builtin_ia32_kandnsi:
  13346. case X86::BI__builtin_ia32_kandndi:
  13347. return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
  13348. case X86::BI__builtin_ia32_korqi:
  13349. case X86::BI__builtin_ia32_korhi:
  13350. case X86::BI__builtin_ia32_korsi:
  13351. case X86::BI__builtin_ia32_kordi:
  13352. return EmitX86MaskLogic(*this, Instruction::Or, Ops);
  13353. case X86::BI__builtin_ia32_kxnorqi:
  13354. case X86::BI__builtin_ia32_kxnorhi:
  13355. case X86::BI__builtin_ia32_kxnorsi:
  13356. case X86::BI__builtin_ia32_kxnordi:
  13357. return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
  13358. case X86::BI__builtin_ia32_kxorqi:
  13359. case X86::BI__builtin_ia32_kxorhi:
  13360. case X86::BI__builtin_ia32_kxorsi:
  13361. case X86::BI__builtin_ia32_kxordi:
  13362. return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
  13363. case X86::BI__builtin_ia32_knotqi:
  13364. case X86::BI__builtin_ia32_knothi:
  13365. case X86::BI__builtin_ia32_knotsi:
  13366. case X86::BI__builtin_ia32_knotdi: {
  13367. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13368. Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
  13369. return Builder.CreateBitCast(Builder.CreateNot(Res),
  13370. Ops[0]->getType());
  13371. }
  13372. case X86::BI__builtin_ia32_kmovb:
  13373. case X86::BI__builtin_ia32_kmovw:
  13374. case X86::BI__builtin_ia32_kmovd:
  13375. case X86::BI__builtin_ia32_kmovq: {
  13376. // Bitcast to vXi1 type and then back to integer. This gets the mask
  13377. // register type into the IR, but might be optimized out depending on
  13378. // what's around it.
  13379. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13380. Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
  13381. return Builder.CreateBitCast(Res, Ops[0]->getType());
  13382. }
  13383. case X86::BI__builtin_ia32_kunpckdi:
  13384. case X86::BI__builtin_ia32_kunpcksi:
  13385. case X86::BI__builtin_ia32_kunpckhi: {
  13386. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13387. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  13388. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  13389. int Indices[64];
  13390. for (unsigned i = 0; i != NumElts; ++i)
  13391. Indices[i] = i;
  13392. // First extract half of each vector. This gives better codegen than
  13393. // doing it in a single shuffle.
  13394. LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
  13395. RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
  13396. // Concat the vectors.
  13397. // NOTE: Operands are swapped to match the intrinsic definition.
  13398. Value *Res =
  13399. Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
  13400. return Builder.CreateBitCast(Res, Ops[0]->getType());
  13401. }
  13402. case X86::BI__builtin_ia32_vplzcntd_128:
  13403. case X86::BI__builtin_ia32_vplzcntd_256:
  13404. case X86::BI__builtin_ia32_vplzcntd_512:
  13405. case X86::BI__builtin_ia32_vplzcntq_128:
  13406. case X86::BI__builtin_ia32_vplzcntq_256:
  13407. case X86::BI__builtin_ia32_vplzcntq_512: {
  13408. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
  13409. return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
  13410. }
  13411. case X86::BI__builtin_ia32_sqrtss:
  13412. case X86::BI__builtin_ia32_sqrtsd: {
  13413. Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  13414. Function *F;
  13415. if (Builder.getIsFPConstrained()) {
  13416. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13417. F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
  13418. A->getType());
  13419. A = Builder.CreateConstrainedFPCall(F, {A});
  13420. } else {
  13421. F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
  13422. A = Builder.CreateCall(F, {A});
  13423. }
  13424. return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
  13425. }
  13426. case X86::BI__builtin_ia32_sqrtsh_round_mask:
  13427. case X86::BI__builtin_ia32_sqrtsd_round_mask:
  13428. case X86::BI__builtin_ia32_sqrtss_round_mask: {
  13429. unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
  13430. // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
  13431. // otherwise keep the intrinsic.
  13432. if (CC != 4) {
  13433. Intrinsic::ID IID;
  13434. switch (BuiltinID) {
  13435. default:
  13436. llvm_unreachable("Unsupported intrinsic!");
  13437. case X86::BI__builtin_ia32_sqrtsh_round_mask:
  13438. IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
  13439. break;
  13440. case X86::BI__builtin_ia32_sqrtsd_round_mask:
  13441. IID = Intrinsic::x86_avx512_mask_sqrt_sd;
  13442. break;
  13443. case X86::BI__builtin_ia32_sqrtss_round_mask:
  13444. IID = Intrinsic::x86_avx512_mask_sqrt_ss;
  13445. break;
  13446. }
  13447. return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13448. }
  13449. Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  13450. Function *F;
  13451. if (Builder.getIsFPConstrained()) {
  13452. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13453. F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
  13454. A->getType());
  13455. A = Builder.CreateConstrainedFPCall(F, A);
  13456. } else {
  13457. F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
  13458. A = Builder.CreateCall(F, A);
  13459. }
  13460. Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  13461. A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
  13462. return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
  13463. }
  13464. case X86::BI__builtin_ia32_sqrtpd256:
  13465. case X86::BI__builtin_ia32_sqrtpd:
  13466. case X86::BI__builtin_ia32_sqrtps256:
  13467. case X86::BI__builtin_ia32_sqrtps:
  13468. case X86::BI__builtin_ia32_sqrtph256:
  13469. case X86::BI__builtin_ia32_sqrtph:
  13470. case X86::BI__builtin_ia32_sqrtph512:
  13471. case X86::BI__builtin_ia32_sqrtps512:
  13472. case X86::BI__builtin_ia32_sqrtpd512: {
  13473. if (Ops.size() == 2) {
  13474. unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  13475. // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
  13476. // otherwise keep the intrinsic.
  13477. if (CC != 4) {
  13478. Intrinsic::ID IID;
  13479. switch (BuiltinID) {
  13480. default:
  13481. llvm_unreachable("Unsupported intrinsic!");
  13482. case X86::BI__builtin_ia32_sqrtph512:
  13483. IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
  13484. break;
  13485. case X86::BI__builtin_ia32_sqrtps512:
  13486. IID = Intrinsic::x86_avx512_sqrt_ps_512;
  13487. break;
  13488. case X86::BI__builtin_ia32_sqrtpd512:
  13489. IID = Intrinsic::x86_avx512_sqrt_pd_512;
  13490. break;
  13491. }
  13492. return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13493. }
  13494. }
  13495. if (Builder.getIsFPConstrained()) {
  13496. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13497. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
  13498. Ops[0]->getType());
  13499. return Builder.CreateConstrainedFPCall(F, Ops[0]);
  13500. } else {
  13501. Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
  13502. return Builder.CreateCall(F, Ops[0]);
  13503. }
  13504. }
  13505. case X86::BI__builtin_ia32_pmuludq128:
  13506. case X86::BI__builtin_ia32_pmuludq256:
  13507. case X86::BI__builtin_ia32_pmuludq512:
  13508. return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
  13509. case X86::BI__builtin_ia32_pmuldq128:
  13510. case X86::BI__builtin_ia32_pmuldq256:
  13511. case X86::BI__builtin_ia32_pmuldq512:
  13512. return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
  13513. case X86::BI__builtin_ia32_pternlogd512_mask:
  13514. case X86::BI__builtin_ia32_pternlogq512_mask:
  13515. case X86::BI__builtin_ia32_pternlogd128_mask:
  13516. case X86::BI__builtin_ia32_pternlogd256_mask:
  13517. case X86::BI__builtin_ia32_pternlogq128_mask:
  13518. case X86::BI__builtin_ia32_pternlogq256_mask:
  13519. return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
  13520. case X86::BI__builtin_ia32_pternlogd512_maskz:
  13521. case X86::BI__builtin_ia32_pternlogq512_maskz:
  13522. case X86::BI__builtin_ia32_pternlogd128_maskz:
  13523. case X86::BI__builtin_ia32_pternlogd256_maskz:
  13524. case X86::BI__builtin_ia32_pternlogq128_maskz:
  13525. case X86::BI__builtin_ia32_pternlogq256_maskz:
  13526. return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
  13527. case X86::BI__builtin_ia32_vpshldd128:
  13528. case X86::BI__builtin_ia32_vpshldd256:
  13529. case X86::BI__builtin_ia32_vpshldd512:
  13530. case X86::BI__builtin_ia32_vpshldq128:
  13531. case X86::BI__builtin_ia32_vpshldq256:
  13532. case X86::BI__builtin_ia32_vpshldq512:
  13533. case X86::BI__builtin_ia32_vpshldw128:
  13534. case X86::BI__builtin_ia32_vpshldw256:
  13535. case X86::BI__builtin_ia32_vpshldw512:
  13536. return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
  13537. case X86::BI__builtin_ia32_vpshrdd128:
  13538. case X86::BI__builtin_ia32_vpshrdd256:
  13539. case X86::BI__builtin_ia32_vpshrdd512:
  13540. case X86::BI__builtin_ia32_vpshrdq128:
  13541. case X86::BI__builtin_ia32_vpshrdq256:
  13542. case X86::BI__builtin_ia32_vpshrdq512:
  13543. case X86::BI__builtin_ia32_vpshrdw128:
  13544. case X86::BI__builtin_ia32_vpshrdw256:
  13545. case X86::BI__builtin_ia32_vpshrdw512:
  13546. // Ops 0 and 1 are swapped.
  13547. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
  13548. case X86::BI__builtin_ia32_vpshldvd128:
  13549. case X86::BI__builtin_ia32_vpshldvd256:
  13550. case X86::BI__builtin_ia32_vpshldvd512:
  13551. case X86::BI__builtin_ia32_vpshldvq128:
  13552. case X86::BI__builtin_ia32_vpshldvq256:
  13553. case X86::BI__builtin_ia32_vpshldvq512:
  13554. case X86::BI__builtin_ia32_vpshldvw128:
  13555. case X86::BI__builtin_ia32_vpshldvw256:
  13556. case X86::BI__builtin_ia32_vpshldvw512:
  13557. return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
  13558. case X86::BI__builtin_ia32_vpshrdvd128:
  13559. case X86::BI__builtin_ia32_vpshrdvd256:
  13560. case X86::BI__builtin_ia32_vpshrdvd512:
  13561. case X86::BI__builtin_ia32_vpshrdvq128:
  13562. case X86::BI__builtin_ia32_vpshrdvq256:
  13563. case X86::BI__builtin_ia32_vpshrdvq512:
  13564. case X86::BI__builtin_ia32_vpshrdvw128:
  13565. case X86::BI__builtin_ia32_vpshrdvw256:
  13566. case X86::BI__builtin_ia32_vpshrdvw512:
  13567. // Ops 0 and 1 are swapped.
  13568. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
  13569. // Reductions
  13570. case X86::BI__builtin_ia32_reduce_fadd_pd512:
  13571. case X86::BI__builtin_ia32_reduce_fadd_ps512:
  13572. case X86::BI__builtin_ia32_reduce_fadd_ph512:
  13573. case X86::BI__builtin_ia32_reduce_fadd_ph256:
  13574. case X86::BI__builtin_ia32_reduce_fadd_ph128: {
  13575. Function *F =
  13576. CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
  13577. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  13578. Builder.getFastMathFlags().setAllowReassoc();
  13579. return Builder.CreateCall(F, {Ops[0], Ops[1]});
  13580. }
  13581. case X86::BI__builtin_ia32_reduce_fmul_pd512:
  13582. case X86::BI__builtin_ia32_reduce_fmul_ps512:
  13583. case X86::BI__builtin_ia32_reduce_fmul_ph512:
  13584. case X86::BI__builtin_ia32_reduce_fmul_ph256:
  13585. case X86::BI__builtin_ia32_reduce_fmul_ph128: {
  13586. Function *F =
  13587. CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
  13588. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  13589. Builder.getFastMathFlags().setAllowReassoc();
  13590. return Builder.CreateCall(F, {Ops[0], Ops[1]});
  13591. }
  13592. case X86::BI__builtin_ia32_reduce_fmax_pd512:
  13593. case X86::BI__builtin_ia32_reduce_fmax_ps512:
  13594. case X86::BI__builtin_ia32_reduce_fmax_ph512:
  13595. case X86::BI__builtin_ia32_reduce_fmax_ph256:
  13596. case X86::BI__builtin_ia32_reduce_fmax_ph128: {
  13597. Function *F =
  13598. CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
  13599. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  13600. Builder.getFastMathFlags().setNoNaNs();
  13601. return Builder.CreateCall(F, {Ops[0]});
  13602. }
  13603. case X86::BI__builtin_ia32_reduce_fmin_pd512:
  13604. case X86::BI__builtin_ia32_reduce_fmin_ps512:
  13605. case X86::BI__builtin_ia32_reduce_fmin_ph512:
  13606. case X86::BI__builtin_ia32_reduce_fmin_ph256:
  13607. case X86::BI__builtin_ia32_reduce_fmin_ph128: {
  13608. Function *F =
  13609. CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
  13610. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  13611. Builder.getFastMathFlags().setNoNaNs();
  13612. return Builder.CreateCall(F, {Ops[0]});
  13613. }
  13614. // 3DNow!
  13615. case X86::BI__builtin_ia32_pswapdsf:
  13616. case X86::BI__builtin_ia32_pswapdsi: {
  13617. llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
  13618. Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
  13619. llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
  13620. return Builder.CreateCall(F, Ops, "pswapd");
  13621. }
  13622. case X86::BI__builtin_ia32_rdrand16_step:
  13623. case X86::BI__builtin_ia32_rdrand32_step:
  13624. case X86::BI__builtin_ia32_rdrand64_step:
  13625. case X86::BI__builtin_ia32_rdseed16_step:
  13626. case X86::BI__builtin_ia32_rdseed32_step:
  13627. case X86::BI__builtin_ia32_rdseed64_step: {
  13628. Intrinsic::ID ID;
  13629. switch (BuiltinID) {
  13630. default: llvm_unreachable("Unsupported intrinsic!");
  13631. case X86::BI__builtin_ia32_rdrand16_step:
  13632. ID = Intrinsic::x86_rdrand_16;
  13633. break;
  13634. case X86::BI__builtin_ia32_rdrand32_step:
  13635. ID = Intrinsic::x86_rdrand_32;
  13636. break;
  13637. case X86::BI__builtin_ia32_rdrand64_step:
  13638. ID = Intrinsic::x86_rdrand_64;
  13639. break;
  13640. case X86::BI__builtin_ia32_rdseed16_step:
  13641. ID = Intrinsic::x86_rdseed_16;
  13642. break;
  13643. case X86::BI__builtin_ia32_rdseed32_step:
  13644. ID = Intrinsic::x86_rdseed_32;
  13645. break;
  13646. case X86::BI__builtin_ia32_rdseed64_step:
  13647. ID = Intrinsic::x86_rdseed_64;
  13648. break;
  13649. }
  13650. Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
  13651. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
  13652. Ops[0]);
  13653. return Builder.CreateExtractValue(Call, 1);
  13654. }
  13655. case X86::BI__builtin_ia32_addcarryx_u32:
  13656. case X86::BI__builtin_ia32_addcarryx_u64:
  13657. case X86::BI__builtin_ia32_subborrow_u32:
  13658. case X86::BI__builtin_ia32_subborrow_u64: {
  13659. Intrinsic::ID IID;
  13660. switch (BuiltinID) {
  13661. default: llvm_unreachable("Unsupported intrinsic!");
  13662. case X86::BI__builtin_ia32_addcarryx_u32:
  13663. IID = Intrinsic::x86_addcarry_32;
  13664. break;
  13665. case X86::BI__builtin_ia32_addcarryx_u64:
  13666. IID = Intrinsic::x86_addcarry_64;
  13667. break;
  13668. case X86::BI__builtin_ia32_subborrow_u32:
  13669. IID = Intrinsic::x86_subborrow_32;
  13670. break;
  13671. case X86::BI__builtin_ia32_subborrow_u64:
  13672. IID = Intrinsic::x86_subborrow_64;
  13673. break;
  13674. }
  13675. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
  13676. { Ops[0], Ops[1], Ops[2] });
  13677. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
  13678. Ops[3]);
  13679. return Builder.CreateExtractValue(Call, 0);
  13680. }
  13681. case X86::BI__builtin_ia32_fpclassps128_mask:
  13682. case X86::BI__builtin_ia32_fpclassps256_mask:
  13683. case X86::BI__builtin_ia32_fpclassps512_mask:
  13684. case X86::BI__builtin_ia32_fpclassph128_mask:
  13685. case X86::BI__builtin_ia32_fpclassph256_mask:
  13686. case X86::BI__builtin_ia32_fpclassph512_mask:
  13687. case X86::BI__builtin_ia32_fpclasspd128_mask:
  13688. case X86::BI__builtin_ia32_fpclasspd256_mask:
  13689. case X86::BI__builtin_ia32_fpclasspd512_mask: {
  13690. unsigned NumElts =
  13691. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13692. Value *MaskIn = Ops[2];
  13693. Ops.erase(&Ops[2]);
  13694. Intrinsic::ID ID;
  13695. switch (BuiltinID) {
  13696. default: llvm_unreachable("Unsupported intrinsic!");
  13697. case X86::BI__builtin_ia32_fpclassph128_mask:
  13698. ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
  13699. break;
  13700. case X86::BI__builtin_ia32_fpclassph256_mask:
  13701. ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
  13702. break;
  13703. case X86::BI__builtin_ia32_fpclassph512_mask:
  13704. ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
  13705. break;
  13706. case X86::BI__builtin_ia32_fpclassps128_mask:
  13707. ID = Intrinsic::x86_avx512_fpclass_ps_128;
  13708. break;
  13709. case X86::BI__builtin_ia32_fpclassps256_mask:
  13710. ID = Intrinsic::x86_avx512_fpclass_ps_256;
  13711. break;
  13712. case X86::BI__builtin_ia32_fpclassps512_mask:
  13713. ID = Intrinsic::x86_avx512_fpclass_ps_512;
  13714. break;
  13715. case X86::BI__builtin_ia32_fpclasspd128_mask:
  13716. ID = Intrinsic::x86_avx512_fpclass_pd_128;
  13717. break;
  13718. case X86::BI__builtin_ia32_fpclasspd256_mask:
  13719. ID = Intrinsic::x86_avx512_fpclass_pd_256;
  13720. break;
  13721. case X86::BI__builtin_ia32_fpclasspd512_mask:
  13722. ID = Intrinsic::x86_avx512_fpclass_pd_512;
  13723. break;
  13724. }
  13725. Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  13726. return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
  13727. }
  13728. case X86::BI__builtin_ia32_vp2intersect_q_512:
  13729. case X86::BI__builtin_ia32_vp2intersect_q_256:
  13730. case X86::BI__builtin_ia32_vp2intersect_q_128:
  13731. case X86::BI__builtin_ia32_vp2intersect_d_512:
  13732. case X86::BI__builtin_ia32_vp2intersect_d_256:
  13733. case X86::BI__builtin_ia32_vp2intersect_d_128: {
  13734. unsigned NumElts =
  13735. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13736. Intrinsic::ID ID;
  13737. switch (BuiltinID) {
  13738. default: llvm_unreachable("Unsupported intrinsic!");
  13739. case X86::BI__builtin_ia32_vp2intersect_q_512:
  13740. ID = Intrinsic::x86_avx512_vp2intersect_q_512;
  13741. break;
  13742. case X86::BI__builtin_ia32_vp2intersect_q_256:
  13743. ID = Intrinsic::x86_avx512_vp2intersect_q_256;
  13744. break;
  13745. case X86::BI__builtin_ia32_vp2intersect_q_128:
  13746. ID = Intrinsic::x86_avx512_vp2intersect_q_128;
  13747. break;
  13748. case X86::BI__builtin_ia32_vp2intersect_d_512:
  13749. ID = Intrinsic::x86_avx512_vp2intersect_d_512;
  13750. break;
  13751. case X86::BI__builtin_ia32_vp2intersect_d_256:
  13752. ID = Intrinsic::x86_avx512_vp2intersect_d_256;
  13753. break;
  13754. case X86::BI__builtin_ia32_vp2intersect_d_128:
  13755. ID = Intrinsic::x86_avx512_vp2intersect_d_128;
  13756. break;
  13757. }
  13758. Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
  13759. Value *Result = Builder.CreateExtractValue(Call, 0);
  13760. Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
  13761. Builder.CreateDefaultAlignedStore(Result, Ops[2]);
  13762. Result = Builder.CreateExtractValue(Call, 1);
  13763. Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
  13764. return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
  13765. }
  13766. case X86::BI__builtin_ia32_vpmultishiftqb128:
  13767. case X86::BI__builtin_ia32_vpmultishiftqb256:
  13768. case X86::BI__builtin_ia32_vpmultishiftqb512: {
  13769. Intrinsic::ID ID;
  13770. switch (BuiltinID) {
  13771. default: llvm_unreachable("Unsupported intrinsic!");
  13772. case X86::BI__builtin_ia32_vpmultishiftqb128:
  13773. ID = Intrinsic::x86_avx512_pmultishift_qb_128;
  13774. break;
  13775. case X86::BI__builtin_ia32_vpmultishiftqb256:
  13776. ID = Intrinsic::x86_avx512_pmultishift_qb_256;
  13777. break;
  13778. case X86::BI__builtin_ia32_vpmultishiftqb512:
  13779. ID = Intrinsic::x86_avx512_pmultishift_qb_512;
  13780. break;
  13781. }
  13782. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  13783. }
  13784. case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
  13785. case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
  13786. case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
  13787. unsigned NumElts =
  13788. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13789. Value *MaskIn = Ops[2];
  13790. Ops.erase(&Ops[2]);
  13791. Intrinsic::ID ID;
  13792. switch (BuiltinID) {
  13793. default: llvm_unreachable("Unsupported intrinsic!");
  13794. case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
  13795. ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
  13796. break;
  13797. case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
  13798. ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
  13799. break;
  13800. case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
  13801. ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
  13802. break;
  13803. }
  13804. Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  13805. return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
  13806. }
  13807. // packed comparison intrinsics
  13808. case X86::BI__builtin_ia32_cmpeqps:
  13809. case X86::BI__builtin_ia32_cmpeqpd:
  13810. return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
  13811. case X86::BI__builtin_ia32_cmpltps:
  13812. case X86::BI__builtin_ia32_cmpltpd:
  13813. return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
  13814. case X86::BI__builtin_ia32_cmpleps:
  13815. case X86::BI__builtin_ia32_cmplepd:
  13816. return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
  13817. case X86::BI__builtin_ia32_cmpunordps:
  13818. case X86::BI__builtin_ia32_cmpunordpd:
  13819. return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
  13820. case X86::BI__builtin_ia32_cmpneqps:
  13821. case X86::BI__builtin_ia32_cmpneqpd:
  13822. return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
  13823. case X86::BI__builtin_ia32_cmpnltps:
  13824. case X86::BI__builtin_ia32_cmpnltpd:
  13825. return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
  13826. case X86::BI__builtin_ia32_cmpnleps:
  13827. case X86::BI__builtin_ia32_cmpnlepd:
  13828. return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
  13829. case X86::BI__builtin_ia32_cmpordps:
  13830. case X86::BI__builtin_ia32_cmpordpd:
  13831. return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
  13832. case X86::BI__builtin_ia32_cmpph128_mask:
  13833. case X86::BI__builtin_ia32_cmpph256_mask:
  13834. case X86::BI__builtin_ia32_cmpph512_mask:
  13835. case X86::BI__builtin_ia32_cmpps128_mask:
  13836. case X86::BI__builtin_ia32_cmpps256_mask:
  13837. case X86::BI__builtin_ia32_cmpps512_mask:
  13838. case X86::BI__builtin_ia32_cmppd128_mask:
  13839. case X86::BI__builtin_ia32_cmppd256_mask:
  13840. case X86::BI__builtin_ia32_cmppd512_mask:
  13841. IsMaskFCmp = true;
  13842. [[fallthrough]];
  13843. case X86::BI__builtin_ia32_cmpps:
  13844. case X86::BI__builtin_ia32_cmpps256:
  13845. case X86::BI__builtin_ia32_cmppd:
  13846. case X86::BI__builtin_ia32_cmppd256: {
  13847. // Lowering vector comparisons to fcmp instructions, while
  13848. // ignoring signalling behaviour requested
  13849. // ignoring rounding mode requested
  13850. // This is only possible if fp-model is not strict and FENV_ACCESS is off.
  13851. // The third argument is the comparison condition, and integer in the
  13852. // range [0, 31]
  13853. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
  13854. // Lowering to IR fcmp instruction.
  13855. // Ignoring requested signaling behaviour,
  13856. // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
  13857. FCmpInst::Predicate Pred;
  13858. bool IsSignaling;
  13859. // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
  13860. // behavior is inverted. We'll handle that after the switch.
  13861. switch (CC & 0xf) {
  13862. case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
  13863. case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
  13864. case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
  13865. case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
  13866. case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
  13867. case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
  13868. case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
  13869. case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
  13870. case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
  13871. case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
  13872. case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
  13873. case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
  13874. case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
  13875. case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
  13876. case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
  13877. case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
  13878. default: llvm_unreachable("Unhandled CC");
  13879. }
  13880. // Invert the signalling behavior for 16-31.
  13881. if (CC & 0x10)
  13882. IsSignaling = !IsSignaling;
  13883. // If the predicate is true or false and we're using constrained intrinsics,
  13884. // we don't have a compare intrinsic we can use. Just use the legacy X86
  13885. // specific intrinsic.
  13886. // If the intrinsic is mask enabled and we're using constrained intrinsics,
  13887. // use the legacy X86 specific intrinsic.
  13888. if (Builder.getIsFPConstrained() &&
  13889. (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
  13890. IsMaskFCmp)) {
  13891. Intrinsic::ID IID;
  13892. switch (BuiltinID) {
  13893. default: llvm_unreachable("Unexpected builtin");
  13894. case X86::BI__builtin_ia32_cmpps:
  13895. IID = Intrinsic::x86_sse_cmp_ps;
  13896. break;
  13897. case X86::BI__builtin_ia32_cmpps256:
  13898. IID = Intrinsic::x86_avx_cmp_ps_256;
  13899. break;
  13900. case X86::BI__builtin_ia32_cmppd:
  13901. IID = Intrinsic::x86_sse2_cmp_pd;
  13902. break;
  13903. case X86::BI__builtin_ia32_cmppd256:
  13904. IID = Intrinsic::x86_avx_cmp_pd_256;
  13905. break;
  13906. case X86::BI__builtin_ia32_cmpps512_mask:
  13907. IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
  13908. break;
  13909. case X86::BI__builtin_ia32_cmppd512_mask:
  13910. IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
  13911. break;
  13912. case X86::BI__builtin_ia32_cmpps128_mask:
  13913. IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
  13914. break;
  13915. case X86::BI__builtin_ia32_cmpps256_mask:
  13916. IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
  13917. break;
  13918. case X86::BI__builtin_ia32_cmppd128_mask:
  13919. IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
  13920. break;
  13921. case X86::BI__builtin_ia32_cmppd256_mask:
  13922. IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
  13923. break;
  13924. }
  13925. Function *Intr = CGM.getIntrinsic(IID);
  13926. if (IsMaskFCmp) {
  13927. unsigned NumElts =
  13928. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13929. Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
  13930. Value *Cmp = Builder.CreateCall(Intr, Ops);
  13931. return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
  13932. }
  13933. return Builder.CreateCall(Intr, Ops);
  13934. }
  13935. // Builtins without the _mask suffix return a vector of integers
  13936. // of the same width as the input vectors
  13937. if (IsMaskFCmp) {
  13938. // We ignore SAE if strict FP is disabled. We only keep precise
  13939. // exception behavior under strict FP.
  13940. // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
  13941. // object will be required.
  13942. unsigned NumElts =
  13943. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13944. Value *Cmp;
  13945. if (IsSignaling)
  13946. Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
  13947. else
  13948. Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
  13949. return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
  13950. }
  13951. return getVectorFCmpIR(Pred, IsSignaling);
  13952. }
  13953. // SSE scalar comparison intrinsics
  13954. case X86::BI__builtin_ia32_cmpeqss:
  13955. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
  13956. case X86::BI__builtin_ia32_cmpltss:
  13957. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
  13958. case X86::BI__builtin_ia32_cmpless:
  13959. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
  13960. case X86::BI__builtin_ia32_cmpunordss:
  13961. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
  13962. case X86::BI__builtin_ia32_cmpneqss:
  13963. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
  13964. case X86::BI__builtin_ia32_cmpnltss:
  13965. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
  13966. case X86::BI__builtin_ia32_cmpnless:
  13967. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
  13968. case X86::BI__builtin_ia32_cmpordss:
  13969. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
  13970. case X86::BI__builtin_ia32_cmpeqsd:
  13971. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
  13972. case X86::BI__builtin_ia32_cmpltsd:
  13973. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
  13974. case X86::BI__builtin_ia32_cmplesd:
  13975. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
  13976. case X86::BI__builtin_ia32_cmpunordsd:
  13977. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
  13978. case X86::BI__builtin_ia32_cmpneqsd:
  13979. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
  13980. case X86::BI__builtin_ia32_cmpnltsd:
  13981. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
  13982. case X86::BI__builtin_ia32_cmpnlesd:
  13983. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
  13984. case X86::BI__builtin_ia32_cmpordsd:
  13985. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
  13986. // f16c half2float intrinsics
  13987. case X86::BI__builtin_ia32_vcvtph2ps:
  13988. case X86::BI__builtin_ia32_vcvtph2ps256:
  13989. case X86::BI__builtin_ia32_vcvtph2ps_mask:
  13990. case X86::BI__builtin_ia32_vcvtph2ps256_mask:
  13991. case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
  13992. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13993. return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
  13994. }
  13995. // AVX512 bf16 intrinsics
  13996. case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
  13997. Ops[2] = getMaskVecValue(
  13998. *this, Ops[2],
  13999. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
  14000. Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
  14001. return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  14002. }
  14003. case X86::BI__builtin_ia32_cvtsbf162ss_32:
  14004. return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
  14005. case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
  14006. case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
  14007. Intrinsic::ID IID;
  14008. switch (BuiltinID) {
  14009. default: llvm_unreachable("Unsupported intrinsic!");
  14010. case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
  14011. IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
  14012. break;
  14013. case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
  14014. IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
  14015. break;
  14016. }
  14017. Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
  14018. return EmitX86Select(*this, Ops[2], Res, Ops[1]);
  14019. }
  14020. case X86::BI__cpuid:
  14021. case X86::BI__cpuidex: {
  14022. Value *FuncId = EmitScalarExpr(E->getArg(1));
  14023. Value *SubFuncId = BuiltinID == X86::BI__cpuidex
  14024. ? EmitScalarExpr(E->getArg(2))
  14025. : llvm::ConstantInt::get(Int32Ty, 0);
  14026. llvm::StructType *CpuidRetTy =
  14027. llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
  14028. llvm::FunctionType *FTy =
  14029. llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
  14030. StringRef Asm, Constraints;
  14031. if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
  14032. Asm = "cpuid";
  14033. Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
  14034. } else {
  14035. // x86-64 uses %rbx as the base register, so preserve it.
  14036. Asm = "xchgq %rbx, ${1:q}\n"
  14037. "cpuid\n"
  14038. "xchgq %rbx, ${1:q}";
  14039. Constraints = "={ax},=r,={cx},={dx},0,2";
  14040. }
  14041. llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
  14042. /*hasSideEffects=*/false);
  14043. Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
  14044. Value *BasePtr = EmitScalarExpr(E->getArg(0));
  14045. Value *Store = nullptr;
  14046. for (unsigned i = 0; i < 4; i++) {
  14047. Value *Extracted = Builder.CreateExtractValue(IACall, i);
  14048. Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
  14049. Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
  14050. }
  14051. // Return the last store instruction to signal that we have emitted the
  14052. // the intrinsic.
  14053. return Store;
  14054. }
  14055. case X86::BI__emul:
  14056. case X86::BI__emulu: {
  14057. llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
  14058. bool isSigned = (BuiltinID == X86::BI__emul);
  14059. Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
  14060. Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
  14061. return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
  14062. }
  14063. case X86::BI__mulh:
  14064. case X86::BI__umulh:
  14065. case X86::BI_mul128:
  14066. case X86::BI_umul128: {
  14067. llvm::Type *ResType = ConvertType(E->getType());
  14068. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  14069. bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
  14070. Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
  14071. Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
  14072. Value *MulResult, *HigherBits;
  14073. if (IsSigned) {
  14074. MulResult = Builder.CreateNSWMul(LHS, RHS);
  14075. HigherBits = Builder.CreateAShr(MulResult, 64);
  14076. } else {
  14077. MulResult = Builder.CreateNUWMul(LHS, RHS);
  14078. HigherBits = Builder.CreateLShr(MulResult, 64);
  14079. }
  14080. HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
  14081. if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
  14082. return HigherBits;
  14083. Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
  14084. Builder.CreateStore(HigherBits, HighBitsAddress);
  14085. return Builder.CreateIntCast(MulResult, ResType, IsSigned);
  14086. }
  14087. case X86::BI__faststorefence: {
  14088. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  14089. llvm::SyncScope::System);
  14090. }
  14091. case X86::BI__shiftleft128:
  14092. case X86::BI__shiftright128: {
  14093. llvm::Function *F = CGM.getIntrinsic(
  14094. BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
  14095. Int64Ty);
  14096. // Flip low/high ops and zero-extend amount to matching type.
  14097. // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
  14098. // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
  14099. std::swap(Ops[0], Ops[1]);
  14100. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  14101. return Builder.CreateCall(F, Ops);
  14102. }
  14103. case X86::BI_ReadWriteBarrier:
  14104. case X86::BI_ReadBarrier:
  14105. case X86::BI_WriteBarrier: {
  14106. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  14107. llvm::SyncScope::SingleThread);
  14108. }
  14109. case X86::BI_AddressOfReturnAddress: {
  14110. Function *F =
  14111. CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
  14112. return Builder.CreateCall(F);
  14113. }
  14114. case X86::BI__stosb: {
  14115. // We treat __stosb as a volatile memset - it may not generate "rep stosb"
  14116. // instruction, but it will create a memset that won't be optimized away.
  14117. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
  14118. }
  14119. case X86::BI__ud2:
  14120. // llvm.trap makes a ud2a instruction on x86.
  14121. return EmitTrapCall(Intrinsic::trap);
  14122. case X86::BI__int2c: {
  14123. // This syscall signals a driver assertion failure in x86 NT kernels.
  14124. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
  14125. llvm::InlineAsm *IA =
  14126. llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
  14127. llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
  14128. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  14129. llvm::Attribute::NoReturn);
  14130. llvm::CallInst *CI = Builder.CreateCall(IA);
  14131. CI->setAttributes(NoReturnAttr);
  14132. return CI;
  14133. }
  14134. case X86::BI__readfsbyte:
  14135. case X86::BI__readfsword:
  14136. case X86::BI__readfsdword:
  14137. case X86::BI__readfsqword: {
  14138. llvm::Type *IntTy = ConvertType(E->getType());
  14139. Value *Ptr =
  14140. Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
  14141. LoadInst *Load = Builder.CreateAlignedLoad(
  14142. IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
  14143. Load->setVolatile(true);
  14144. return Load;
  14145. }
  14146. case X86::BI__readgsbyte:
  14147. case X86::BI__readgsword:
  14148. case X86::BI__readgsdword:
  14149. case X86::BI__readgsqword: {
  14150. llvm::Type *IntTy = ConvertType(E->getType());
  14151. Value *Ptr =
  14152. Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
  14153. LoadInst *Load = Builder.CreateAlignedLoad(
  14154. IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
  14155. Load->setVolatile(true);
  14156. return Load;
  14157. }
  14158. case X86::BI__builtin_ia32_encodekey128_u32: {
  14159. Intrinsic::ID IID = Intrinsic::x86_encodekey128;
  14160. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
  14161. for (int i = 0; i < 3; ++i) {
  14162. Value *Extract = Builder.CreateExtractValue(Call, i + 1);
  14163. Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
  14164. Ptr = Builder.CreateBitCast(
  14165. Ptr, llvm::PointerType::getUnqual(Extract->getType()));
  14166. Builder.CreateAlignedStore(Extract, Ptr, Align(1));
  14167. }
  14168. return Builder.CreateExtractValue(Call, 0);
  14169. }
  14170. case X86::BI__builtin_ia32_encodekey256_u32: {
  14171. Intrinsic::ID IID = Intrinsic::x86_encodekey256;
  14172. Value *Call =
  14173. Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
  14174. for (int i = 0; i < 4; ++i) {
  14175. Value *Extract = Builder.CreateExtractValue(Call, i + 1);
  14176. Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
  14177. Ptr = Builder.CreateBitCast(
  14178. Ptr, llvm::PointerType::getUnqual(Extract->getType()));
  14179. Builder.CreateAlignedStore(Extract, Ptr, Align(1));
  14180. }
  14181. return Builder.CreateExtractValue(Call, 0);
  14182. }
  14183. case X86::BI__builtin_ia32_aesenc128kl_u8:
  14184. case X86::BI__builtin_ia32_aesdec128kl_u8:
  14185. case X86::BI__builtin_ia32_aesenc256kl_u8:
  14186. case X86::BI__builtin_ia32_aesdec256kl_u8: {
  14187. Intrinsic::ID IID;
  14188. StringRef BlockName;
  14189. switch (BuiltinID) {
  14190. default:
  14191. llvm_unreachable("Unexpected builtin");
  14192. case X86::BI__builtin_ia32_aesenc128kl_u8:
  14193. IID = Intrinsic::x86_aesenc128kl;
  14194. BlockName = "aesenc128kl";
  14195. break;
  14196. case X86::BI__builtin_ia32_aesdec128kl_u8:
  14197. IID = Intrinsic::x86_aesdec128kl;
  14198. BlockName = "aesdec128kl";
  14199. break;
  14200. case X86::BI__builtin_ia32_aesenc256kl_u8:
  14201. IID = Intrinsic::x86_aesenc256kl;
  14202. BlockName = "aesenc256kl";
  14203. break;
  14204. case X86::BI__builtin_ia32_aesdec256kl_u8:
  14205. IID = Intrinsic::x86_aesdec256kl;
  14206. BlockName = "aesdec256kl";
  14207. break;
  14208. }
  14209. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
  14210. BasicBlock *NoError =
  14211. createBasicBlock(BlockName + "_no_error", this->CurFn);
  14212. BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
  14213. BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
  14214. Value *Ret = Builder.CreateExtractValue(Call, 0);
  14215. Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
  14216. Value *Out = Builder.CreateExtractValue(Call, 1);
  14217. Builder.CreateCondBr(Succ, NoError, Error);
  14218. Builder.SetInsertPoint(NoError);
  14219. Builder.CreateDefaultAlignedStore(Out, Ops[0]);
  14220. Builder.CreateBr(End);
  14221. Builder.SetInsertPoint(Error);
  14222. Constant *Zero = llvm::Constant::getNullValue(Out->getType());
  14223. Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
  14224. Builder.CreateBr(End);
  14225. Builder.SetInsertPoint(End);
  14226. return Builder.CreateExtractValue(Call, 0);
  14227. }
  14228. case X86::BI__builtin_ia32_aesencwide128kl_u8:
  14229. case X86::BI__builtin_ia32_aesdecwide128kl_u8:
  14230. case X86::BI__builtin_ia32_aesencwide256kl_u8:
  14231. case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
  14232. Intrinsic::ID IID;
  14233. StringRef BlockName;
  14234. switch (BuiltinID) {
  14235. case X86::BI__builtin_ia32_aesencwide128kl_u8:
  14236. IID = Intrinsic::x86_aesencwide128kl;
  14237. BlockName = "aesencwide128kl";
  14238. break;
  14239. case X86::BI__builtin_ia32_aesdecwide128kl_u8:
  14240. IID = Intrinsic::x86_aesdecwide128kl;
  14241. BlockName = "aesdecwide128kl";
  14242. break;
  14243. case X86::BI__builtin_ia32_aesencwide256kl_u8:
  14244. IID = Intrinsic::x86_aesencwide256kl;
  14245. BlockName = "aesencwide256kl";
  14246. break;
  14247. case X86::BI__builtin_ia32_aesdecwide256kl_u8:
  14248. IID = Intrinsic::x86_aesdecwide256kl;
  14249. BlockName = "aesdecwide256kl";
  14250. break;
  14251. }
  14252. llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
  14253. Value *InOps[9];
  14254. InOps[0] = Ops[2];
  14255. for (int i = 0; i != 8; ++i) {
  14256. Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
  14257. InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
  14258. }
  14259. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
  14260. BasicBlock *NoError =
  14261. createBasicBlock(BlockName + "_no_error", this->CurFn);
  14262. BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
  14263. BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
  14264. Value *Ret = Builder.CreateExtractValue(Call, 0);
  14265. Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
  14266. Builder.CreateCondBr(Succ, NoError, Error);
  14267. Builder.SetInsertPoint(NoError);
  14268. for (int i = 0; i != 8; ++i) {
  14269. Value *Extract = Builder.CreateExtractValue(Call, i + 1);
  14270. Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
  14271. Builder.CreateAlignedStore(Extract, Ptr, Align(16));
  14272. }
  14273. Builder.CreateBr(End);
  14274. Builder.SetInsertPoint(Error);
  14275. for (int i = 0; i != 8; ++i) {
  14276. Value *Out = Builder.CreateExtractValue(Call, i + 1);
  14277. Constant *Zero = llvm::Constant::getNullValue(Out->getType());
  14278. Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
  14279. Builder.CreateAlignedStore(Zero, Ptr, Align(16));
  14280. }
  14281. Builder.CreateBr(End);
  14282. Builder.SetInsertPoint(End);
  14283. return Builder.CreateExtractValue(Call, 0);
  14284. }
  14285. case X86::BI__builtin_ia32_vfcmaddcph512_mask:
  14286. IsConjFMA = true;
  14287. [[fallthrough]];
  14288. case X86::BI__builtin_ia32_vfmaddcph512_mask: {
  14289. Intrinsic::ID IID = IsConjFMA
  14290. ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
  14291. : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
  14292. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  14293. return EmitX86Select(*this, Ops[3], Call, Ops[0]);
  14294. }
  14295. case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
  14296. IsConjFMA = true;
  14297. [[fallthrough]];
  14298. case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
  14299. Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
  14300. : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
  14301. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  14302. Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
  14303. return EmitX86Select(*this, And, Call, Ops[0]);
  14304. }
  14305. case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
  14306. IsConjFMA = true;
  14307. [[fallthrough]];
  14308. case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
  14309. Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
  14310. : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
  14311. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  14312. static constexpr int Mask[] = {0, 5, 6, 7};
  14313. return Builder.CreateShuffleVector(Call, Ops[2], Mask);
  14314. }
  14315. case X86::BI__builtin_ia32_prefetchi:
  14316. return Builder.CreateCall(
  14317. CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
  14318. {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
  14319. llvm::ConstantInt::get(Int32Ty, 0)});
  14320. }
  14321. }
  14322. Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
  14323. const CallExpr *E) {
  14324. // Do not emit the builtin arguments in the arguments of a function call,
  14325. // because the evaluation order of function arguments is not specified in C++.
  14326. // This is important when testing to ensure the arguments are emitted in the
  14327. // same order every time. Eg:
  14328. // Instead of:
  14329. // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
  14330. // EmitScalarExpr(E->getArg(1)), "swdiv");
  14331. // Use:
  14332. // Value *Op0 = EmitScalarExpr(E->getArg(0));
  14333. // Value *Op1 = EmitScalarExpr(E->getArg(1));
  14334. // return Builder.CreateFDiv(Op0, Op1, "swdiv")
  14335. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  14336. switch (BuiltinID) {
  14337. default: return nullptr;
  14338. // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
  14339. // call __builtin_readcyclecounter.
  14340. case PPC::BI__builtin_ppc_get_timebase:
  14341. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
  14342. // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
  14343. case PPC::BI__builtin_altivec_lvx:
  14344. case PPC::BI__builtin_altivec_lvxl:
  14345. case PPC::BI__builtin_altivec_lvebx:
  14346. case PPC::BI__builtin_altivec_lvehx:
  14347. case PPC::BI__builtin_altivec_lvewx:
  14348. case PPC::BI__builtin_altivec_lvsl:
  14349. case PPC::BI__builtin_altivec_lvsr:
  14350. case PPC::BI__builtin_vsx_lxvd2x:
  14351. case PPC::BI__builtin_vsx_lxvw4x:
  14352. case PPC::BI__builtin_vsx_lxvd2x_be:
  14353. case PPC::BI__builtin_vsx_lxvw4x_be:
  14354. case PPC::BI__builtin_vsx_lxvl:
  14355. case PPC::BI__builtin_vsx_lxvll:
  14356. {
  14357. SmallVector<Value *, 2> Ops;
  14358. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  14359. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  14360. if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
  14361. BuiltinID == PPC::BI__builtin_vsx_lxvll){
  14362. Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
  14363. }else {
  14364. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  14365. Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
  14366. Ops.pop_back();
  14367. }
  14368. switch (BuiltinID) {
  14369. default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
  14370. case PPC::BI__builtin_altivec_lvx:
  14371. ID = Intrinsic::ppc_altivec_lvx;
  14372. break;
  14373. case PPC::BI__builtin_altivec_lvxl:
  14374. ID = Intrinsic::ppc_altivec_lvxl;
  14375. break;
  14376. case PPC::BI__builtin_altivec_lvebx:
  14377. ID = Intrinsic::ppc_altivec_lvebx;
  14378. break;
  14379. case PPC::BI__builtin_altivec_lvehx:
  14380. ID = Intrinsic::ppc_altivec_lvehx;
  14381. break;
  14382. case PPC::BI__builtin_altivec_lvewx:
  14383. ID = Intrinsic::ppc_altivec_lvewx;
  14384. break;
  14385. case PPC::BI__builtin_altivec_lvsl:
  14386. ID = Intrinsic::ppc_altivec_lvsl;
  14387. break;
  14388. case PPC::BI__builtin_altivec_lvsr:
  14389. ID = Intrinsic::ppc_altivec_lvsr;
  14390. break;
  14391. case PPC::BI__builtin_vsx_lxvd2x:
  14392. ID = Intrinsic::ppc_vsx_lxvd2x;
  14393. break;
  14394. case PPC::BI__builtin_vsx_lxvw4x:
  14395. ID = Intrinsic::ppc_vsx_lxvw4x;
  14396. break;
  14397. case PPC::BI__builtin_vsx_lxvd2x_be:
  14398. ID = Intrinsic::ppc_vsx_lxvd2x_be;
  14399. break;
  14400. case PPC::BI__builtin_vsx_lxvw4x_be:
  14401. ID = Intrinsic::ppc_vsx_lxvw4x_be;
  14402. break;
  14403. case PPC::BI__builtin_vsx_lxvl:
  14404. ID = Intrinsic::ppc_vsx_lxvl;
  14405. break;
  14406. case PPC::BI__builtin_vsx_lxvll:
  14407. ID = Intrinsic::ppc_vsx_lxvll;
  14408. break;
  14409. }
  14410. llvm::Function *F = CGM.getIntrinsic(ID);
  14411. return Builder.CreateCall(F, Ops, "");
  14412. }
  14413. // vec_st, vec_xst_be
  14414. case PPC::BI__builtin_altivec_stvx:
  14415. case PPC::BI__builtin_altivec_stvxl:
  14416. case PPC::BI__builtin_altivec_stvebx:
  14417. case PPC::BI__builtin_altivec_stvehx:
  14418. case PPC::BI__builtin_altivec_stvewx:
  14419. case PPC::BI__builtin_vsx_stxvd2x:
  14420. case PPC::BI__builtin_vsx_stxvw4x:
  14421. case PPC::BI__builtin_vsx_stxvd2x_be:
  14422. case PPC::BI__builtin_vsx_stxvw4x_be:
  14423. case PPC::BI__builtin_vsx_stxvl:
  14424. case PPC::BI__builtin_vsx_stxvll:
  14425. {
  14426. SmallVector<Value *, 3> Ops;
  14427. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  14428. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  14429. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  14430. if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
  14431. BuiltinID == PPC::BI__builtin_vsx_stxvll ){
  14432. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  14433. }else {
  14434. Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
  14435. Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
  14436. Ops.pop_back();
  14437. }
  14438. switch (BuiltinID) {
  14439. default: llvm_unreachable("Unsupported st intrinsic!");
  14440. case PPC::BI__builtin_altivec_stvx:
  14441. ID = Intrinsic::ppc_altivec_stvx;
  14442. break;
  14443. case PPC::BI__builtin_altivec_stvxl:
  14444. ID = Intrinsic::ppc_altivec_stvxl;
  14445. break;
  14446. case PPC::BI__builtin_altivec_stvebx:
  14447. ID = Intrinsic::ppc_altivec_stvebx;
  14448. break;
  14449. case PPC::BI__builtin_altivec_stvehx:
  14450. ID = Intrinsic::ppc_altivec_stvehx;
  14451. break;
  14452. case PPC::BI__builtin_altivec_stvewx:
  14453. ID = Intrinsic::ppc_altivec_stvewx;
  14454. break;
  14455. case PPC::BI__builtin_vsx_stxvd2x:
  14456. ID = Intrinsic::ppc_vsx_stxvd2x;
  14457. break;
  14458. case PPC::BI__builtin_vsx_stxvw4x:
  14459. ID = Intrinsic::ppc_vsx_stxvw4x;
  14460. break;
  14461. case PPC::BI__builtin_vsx_stxvd2x_be:
  14462. ID = Intrinsic::ppc_vsx_stxvd2x_be;
  14463. break;
  14464. case PPC::BI__builtin_vsx_stxvw4x_be:
  14465. ID = Intrinsic::ppc_vsx_stxvw4x_be;
  14466. break;
  14467. case PPC::BI__builtin_vsx_stxvl:
  14468. ID = Intrinsic::ppc_vsx_stxvl;
  14469. break;
  14470. case PPC::BI__builtin_vsx_stxvll:
  14471. ID = Intrinsic::ppc_vsx_stxvll;
  14472. break;
  14473. }
  14474. llvm::Function *F = CGM.getIntrinsic(ID);
  14475. return Builder.CreateCall(F, Ops, "");
  14476. }
  14477. case PPC::BI__builtin_vsx_ldrmb: {
  14478. // Essentially boils down to performing an unaligned VMX load sequence so
  14479. // as to avoid crossing a page boundary and then shuffling the elements
  14480. // into the right side of the vector register.
  14481. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14482. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14483. int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
  14484. llvm::Type *ResTy = ConvertType(E->getType());
  14485. bool IsLE = getTarget().isLittleEndian();
  14486. // If the user wants the entire vector, just load the entire vector.
  14487. if (NumBytes == 16) {
  14488. Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo());
  14489. Value *LD =
  14490. Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1)));
  14491. if (!IsLE)
  14492. return LD;
  14493. // Reverse the bytes on LE.
  14494. SmallVector<int, 16> RevMask;
  14495. for (int Idx = 0; Idx < 16; Idx++)
  14496. RevMask.push_back(15 - Idx);
  14497. return Builder.CreateShuffleVector(LD, LD, RevMask);
  14498. }
  14499. llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
  14500. llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
  14501. : Intrinsic::ppc_altivec_lvsl);
  14502. llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
  14503. Value *HiMem = Builder.CreateGEP(
  14504. Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
  14505. Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
  14506. Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
  14507. Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
  14508. Op0 = IsLE ? HiLd : LoLd;
  14509. Op1 = IsLE ? LoLd : HiLd;
  14510. Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
  14511. Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
  14512. if (IsLE) {
  14513. SmallVector<int, 16> Consts;
  14514. for (int Idx = 0; Idx < 16; Idx++) {
  14515. int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
  14516. : 16 - (NumBytes - Idx);
  14517. Consts.push_back(Val);
  14518. }
  14519. return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
  14520. Zero, Consts);
  14521. }
  14522. SmallVector<Constant *, 16> Consts;
  14523. for (int Idx = 0; Idx < 16; Idx++)
  14524. Consts.push_back(Builder.getInt8(NumBytes + Idx));
  14525. Value *Mask2 = ConstantVector::get(Consts);
  14526. return Builder.CreateBitCast(
  14527. Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
  14528. }
  14529. case PPC::BI__builtin_vsx_strmb: {
  14530. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14531. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14532. Value *Op2 = EmitScalarExpr(E->getArg(2));
  14533. int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
  14534. bool IsLE = getTarget().isLittleEndian();
  14535. auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
  14536. // Storing the whole vector, simply store it on BE and reverse bytes and
  14537. // store on LE.
  14538. if (Width == 16) {
  14539. Value *BC = Builder.CreateBitCast(Op0, Op2->getType()->getPointerTo());
  14540. Value *StVec = Op2;
  14541. if (IsLE) {
  14542. SmallVector<int, 16> RevMask;
  14543. for (int Idx = 0; Idx < 16; Idx++)
  14544. RevMask.push_back(15 - Idx);
  14545. StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
  14546. }
  14547. return Builder.CreateStore(
  14548. StVec, Address(BC, Op2->getType(), CharUnits::fromQuantity(1)));
  14549. }
  14550. auto *ConvTy = Int64Ty;
  14551. unsigned NumElts = 0;
  14552. switch (Width) {
  14553. default:
  14554. llvm_unreachable("width for stores must be a power of 2");
  14555. case 8:
  14556. ConvTy = Int64Ty;
  14557. NumElts = 2;
  14558. break;
  14559. case 4:
  14560. ConvTy = Int32Ty;
  14561. NumElts = 4;
  14562. break;
  14563. case 2:
  14564. ConvTy = Int16Ty;
  14565. NumElts = 8;
  14566. break;
  14567. case 1:
  14568. ConvTy = Int8Ty;
  14569. NumElts = 16;
  14570. break;
  14571. }
  14572. Value *Vec = Builder.CreateBitCast(
  14573. Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
  14574. Value *Ptr =
  14575. Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
  14576. Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo());
  14577. Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
  14578. if (IsLE && Width > 1) {
  14579. Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
  14580. Elt = Builder.CreateCall(F, Elt);
  14581. }
  14582. return Builder.CreateStore(
  14583. Elt, Address(PtrBC, ConvTy, CharUnits::fromQuantity(1)));
  14584. };
  14585. unsigned Stored = 0;
  14586. unsigned RemainingBytes = NumBytes;
  14587. Value *Result;
  14588. if (NumBytes == 16)
  14589. return StoreSubVec(16, 0, 0);
  14590. if (NumBytes >= 8) {
  14591. Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
  14592. RemainingBytes -= 8;
  14593. Stored += 8;
  14594. }
  14595. if (RemainingBytes >= 4) {
  14596. Result = StoreSubVec(4, NumBytes - Stored - 4,
  14597. IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
  14598. RemainingBytes -= 4;
  14599. Stored += 4;
  14600. }
  14601. if (RemainingBytes >= 2) {
  14602. Result = StoreSubVec(2, NumBytes - Stored - 2,
  14603. IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
  14604. RemainingBytes -= 2;
  14605. Stored += 2;
  14606. }
  14607. if (RemainingBytes)
  14608. Result =
  14609. StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
  14610. return Result;
  14611. }
  14612. // Square root
  14613. case PPC::BI__builtin_vsx_xvsqrtsp:
  14614. case PPC::BI__builtin_vsx_xvsqrtdp: {
  14615. llvm::Type *ResultType = ConvertType(E->getType());
  14616. Value *X = EmitScalarExpr(E->getArg(0));
  14617. if (Builder.getIsFPConstrained()) {
  14618. llvm::Function *F = CGM.getIntrinsic(
  14619. Intrinsic::experimental_constrained_sqrt, ResultType);
  14620. return Builder.CreateConstrainedFPCall(F, X);
  14621. } else {
  14622. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  14623. return Builder.CreateCall(F, X);
  14624. }
  14625. }
  14626. // Count leading zeros
  14627. case PPC::BI__builtin_altivec_vclzb:
  14628. case PPC::BI__builtin_altivec_vclzh:
  14629. case PPC::BI__builtin_altivec_vclzw:
  14630. case PPC::BI__builtin_altivec_vclzd: {
  14631. llvm::Type *ResultType = ConvertType(E->getType());
  14632. Value *X = EmitScalarExpr(E->getArg(0));
  14633. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  14634. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
  14635. return Builder.CreateCall(F, {X, Undef});
  14636. }
  14637. case PPC::BI__builtin_altivec_vctzb:
  14638. case PPC::BI__builtin_altivec_vctzh:
  14639. case PPC::BI__builtin_altivec_vctzw:
  14640. case PPC::BI__builtin_altivec_vctzd: {
  14641. llvm::Type *ResultType = ConvertType(E->getType());
  14642. Value *X = EmitScalarExpr(E->getArg(0));
  14643. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  14644. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
  14645. return Builder.CreateCall(F, {X, Undef});
  14646. }
  14647. case PPC::BI__builtin_altivec_vinsd:
  14648. case PPC::BI__builtin_altivec_vinsw:
  14649. case PPC::BI__builtin_altivec_vinsd_elt:
  14650. case PPC::BI__builtin_altivec_vinsw_elt: {
  14651. llvm::Type *ResultType = ConvertType(E->getType());
  14652. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14653. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14654. Value *Op2 = EmitScalarExpr(E->getArg(2));
  14655. bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
  14656. BuiltinID == PPC::BI__builtin_altivec_vinsd);
  14657. bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
  14658. BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
  14659. // The third argument must be a compile time constant.
  14660. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
  14661. assert(ArgCI &&
  14662. "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
  14663. // Valid value for the third argument is dependent on the input type and
  14664. // builtin called.
  14665. int ValidMaxValue = 0;
  14666. if (IsUnaligned)
  14667. ValidMaxValue = (Is32bit) ? 12 : 8;
  14668. else
  14669. ValidMaxValue = (Is32bit) ? 3 : 1;
  14670. // Get value of third argument.
  14671. int64_t ConstArg = ArgCI->getSExtValue();
  14672. // Compose range checking error message.
  14673. std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
  14674. RangeErrMsg += " number " + llvm::to_string(ConstArg);
  14675. RangeErrMsg += " is outside of the valid range [0, ";
  14676. RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
  14677. // Issue error if third argument is not within the valid range.
  14678. if (ConstArg < 0 || ConstArg > ValidMaxValue)
  14679. CGM.Error(E->getExprLoc(), RangeErrMsg);
  14680. // Input to vec_replace_elt is an element index, convert to byte index.
  14681. if (!IsUnaligned) {
  14682. ConstArg *= Is32bit ? 4 : 8;
  14683. // Fix the constant according to endianess.
  14684. if (getTarget().isLittleEndian())
  14685. ConstArg = (Is32bit ? 12 : 8) - ConstArg;
  14686. }
  14687. ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
  14688. Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
  14689. // Casting input to vector int as per intrinsic definition.
  14690. Op0 =
  14691. Is32bit
  14692. ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
  14693. : Builder.CreateBitCast(Op0,
  14694. llvm::FixedVectorType::get(Int64Ty, 2));
  14695. return Builder.CreateBitCast(
  14696. Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
  14697. }
  14698. case PPC::BI__builtin_altivec_vpopcntb:
  14699. case PPC::BI__builtin_altivec_vpopcnth:
  14700. case PPC::BI__builtin_altivec_vpopcntw:
  14701. case PPC::BI__builtin_altivec_vpopcntd: {
  14702. llvm::Type *ResultType = ConvertType(E->getType());
  14703. Value *X = EmitScalarExpr(E->getArg(0));
  14704. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  14705. return Builder.CreateCall(F, X);
  14706. }
  14707. case PPC::BI__builtin_altivec_vadduqm:
  14708. case PPC::BI__builtin_altivec_vsubuqm: {
  14709. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14710. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14711. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  14712. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
  14713. Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
  14714. if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
  14715. return Builder.CreateAdd(Op0, Op1, "vadduqm");
  14716. else
  14717. return Builder.CreateSub(Op0, Op1, "vsubuqm");
  14718. }
  14719. case PPC::BI__builtin_altivec_vaddcuq_c:
  14720. case PPC::BI__builtin_altivec_vsubcuq_c: {
  14721. SmallVector<Value *, 2> Ops;
  14722. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14723. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14724. llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
  14725. llvm::IntegerType::get(getLLVMContext(), 128), 1);
  14726. Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
  14727. Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
  14728. ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
  14729. ? Intrinsic::ppc_altivec_vaddcuq
  14730. : Intrinsic::ppc_altivec_vsubcuq;
  14731. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
  14732. }
  14733. case PPC::BI__builtin_altivec_vaddeuqm_c:
  14734. case PPC::BI__builtin_altivec_vaddecuq_c:
  14735. case PPC::BI__builtin_altivec_vsubeuqm_c:
  14736. case PPC::BI__builtin_altivec_vsubecuq_c: {
  14737. SmallVector<Value *, 3> Ops;
  14738. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14739. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14740. Value *Op2 = EmitScalarExpr(E->getArg(2));
  14741. llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
  14742. llvm::IntegerType::get(getLLVMContext(), 128), 1);
  14743. Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
  14744. Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
  14745. Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
  14746. switch (BuiltinID) {
  14747. default:
  14748. llvm_unreachable("Unsupported intrinsic!");
  14749. case PPC::BI__builtin_altivec_vaddeuqm_c:
  14750. ID = Intrinsic::ppc_altivec_vaddeuqm;
  14751. break;
  14752. case PPC::BI__builtin_altivec_vaddecuq_c:
  14753. ID = Intrinsic::ppc_altivec_vaddecuq;
  14754. break;
  14755. case PPC::BI__builtin_altivec_vsubeuqm_c:
  14756. ID = Intrinsic::ppc_altivec_vsubeuqm;
  14757. break;
  14758. case PPC::BI__builtin_altivec_vsubecuq_c:
  14759. ID = Intrinsic::ppc_altivec_vsubecuq;
  14760. break;
  14761. }
  14762. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
  14763. }
  14764. // Rotate and insert under mask operation.
  14765. // __rldimi(rs, is, shift, mask)
  14766. // (rotl64(rs, shift) & mask) | (is & ~mask)
  14767. // __rlwimi(rs, is, shift, mask)
  14768. // (rotl(rs, shift) & mask) | (is & ~mask)
  14769. case PPC::BI__builtin_ppc_rldimi:
  14770. case PPC::BI__builtin_ppc_rlwimi: {
  14771. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14772. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14773. Value *Op2 = EmitScalarExpr(E->getArg(2));
  14774. Value *Op3 = EmitScalarExpr(E->getArg(3));
  14775. llvm::Type *Ty = Op0->getType();
  14776. Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
  14777. if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
  14778. Op2 = Builder.CreateZExt(Op2, Int64Ty);
  14779. Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
  14780. Value *X = Builder.CreateAnd(Shift, Op3);
  14781. Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
  14782. return Builder.CreateOr(X, Y);
  14783. }
  14784. // Rotate and insert under mask operation.
  14785. // __rlwnm(rs, shift, mask)
  14786. // rotl(rs, shift) & mask
  14787. case PPC::BI__builtin_ppc_rlwnm: {
  14788. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14789. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14790. Value *Op2 = EmitScalarExpr(E->getArg(2));
  14791. llvm::Type *Ty = Op0->getType();
  14792. Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
  14793. Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
  14794. return Builder.CreateAnd(Shift, Op2);
  14795. }
  14796. case PPC::BI__builtin_ppc_poppar4:
  14797. case PPC::BI__builtin_ppc_poppar8: {
  14798. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14799. llvm::Type *ArgType = Op0->getType();
  14800. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  14801. Value *Tmp = Builder.CreateCall(F, Op0);
  14802. llvm::Type *ResultType = ConvertType(E->getType());
  14803. Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
  14804. if (Result->getType() != ResultType)
  14805. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  14806. "cast");
  14807. return Result;
  14808. }
  14809. case PPC::BI__builtin_ppc_cmpb: {
  14810. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14811. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14812. if (getTarget().getTriple().isPPC64()) {
  14813. Function *F =
  14814. CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
  14815. return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
  14816. }
  14817. // For 32 bit, emit the code as below:
  14818. // %conv = trunc i64 %a to i32
  14819. // %conv1 = trunc i64 %b to i32
  14820. // %shr = lshr i64 %a, 32
  14821. // %conv2 = trunc i64 %shr to i32
  14822. // %shr3 = lshr i64 %b, 32
  14823. // %conv4 = trunc i64 %shr3 to i32
  14824. // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
  14825. // %conv5 = zext i32 %0 to i64
  14826. // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
  14827. // %conv614 = zext i32 %1 to i64
  14828. // %shl = shl nuw i64 %conv614, 32
  14829. // %or = or i64 %shl, %conv5
  14830. // ret i64 %or
  14831. Function *F =
  14832. CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
  14833. Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
  14834. Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
  14835. Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
  14836. Value *ArgOneHi =
  14837. Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
  14838. Value *ArgTwoHi =
  14839. Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
  14840. Value *ResLo = Builder.CreateZExt(
  14841. Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
  14842. Value *ResHiShift = Builder.CreateZExt(
  14843. Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
  14844. Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
  14845. return Builder.CreateOr(ResLo, ResHi);
  14846. }
  14847. // Copy sign
  14848. case PPC::BI__builtin_vsx_xvcpsgnsp:
  14849. case PPC::BI__builtin_vsx_xvcpsgndp: {
  14850. llvm::Type *ResultType = ConvertType(E->getType());
  14851. Value *X = EmitScalarExpr(E->getArg(0));
  14852. Value *Y = EmitScalarExpr(E->getArg(1));
  14853. ID = Intrinsic::copysign;
  14854. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  14855. return Builder.CreateCall(F, {X, Y});
  14856. }
  14857. // Rounding/truncation
  14858. case PPC::BI__builtin_vsx_xvrspip:
  14859. case PPC::BI__builtin_vsx_xvrdpip:
  14860. case PPC::BI__builtin_vsx_xvrdpim:
  14861. case PPC::BI__builtin_vsx_xvrspim:
  14862. case PPC::BI__builtin_vsx_xvrdpi:
  14863. case PPC::BI__builtin_vsx_xvrspi:
  14864. case PPC::BI__builtin_vsx_xvrdpic:
  14865. case PPC::BI__builtin_vsx_xvrspic:
  14866. case PPC::BI__builtin_vsx_xvrdpiz:
  14867. case PPC::BI__builtin_vsx_xvrspiz: {
  14868. llvm::Type *ResultType = ConvertType(E->getType());
  14869. Value *X = EmitScalarExpr(E->getArg(0));
  14870. if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
  14871. BuiltinID == PPC::BI__builtin_vsx_xvrspim)
  14872. ID = Builder.getIsFPConstrained()
  14873. ? Intrinsic::experimental_constrained_floor
  14874. : Intrinsic::floor;
  14875. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
  14876. BuiltinID == PPC::BI__builtin_vsx_xvrspi)
  14877. ID = Builder.getIsFPConstrained()
  14878. ? Intrinsic::experimental_constrained_round
  14879. : Intrinsic::round;
  14880. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
  14881. BuiltinID == PPC::BI__builtin_vsx_xvrspic)
  14882. ID = Builder.getIsFPConstrained()
  14883. ? Intrinsic::experimental_constrained_rint
  14884. : Intrinsic::rint;
  14885. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
  14886. BuiltinID == PPC::BI__builtin_vsx_xvrspip)
  14887. ID = Builder.getIsFPConstrained()
  14888. ? Intrinsic::experimental_constrained_ceil
  14889. : Intrinsic::ceil;
  14890. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
  14891. BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
  14892. ID = Builder.getIsFPConstrained()
  14893. ? Intrinsic::experimental_constrained_trunc
  14894. : Intrinsic::trunc;
  14895. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  14896. return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
  14897. : Builder.CreateCall(F, X);
  14898. }
  14899. // Absolute value
  14900. case PPC::BI__builtin_vsx_xvabsdp:
  14901. case PPC::BI__builtin_vsx_xvabssp: {
  14902. llvm::Type *ResultType = ConvertType(E->getType());
  14903. Value *X = EmitScalarExpr(E->getArg(0));
  14904. llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  14905. return Builder.CreateCall(F, X);
  14906. }
  14907. // Fastmath by default
  14908. case PPC::BI__builtin_ppc_recipdivf:
  14909. case PPC::BI__builtin_ppc_recipdivd:
  14910. case PPC::BI__builtin_ppc_rsqrtf:
  14911. case PPC::BI__builtin_ppc_rsqrtd: {
  14912. FastMathFlags FMF = Builder.getFastMathFlags();
  14913. Builder.getFastMathFlags().setFast();
  14914. llvm::Type *ResultType = ConvertType(E->getType());
  14915. Value *X = EmitScalarExpr(E->getArg(0));
  14916. if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
  14917. BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
  14918. Value *Y = EmitScalarExpr(E->getArg(1));
  14919. Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
  14920. Builder.getFastMathFlags() &= (FMF);
  14921. return FDiv;
  14922. }
  14923. auto *One = ConstantFP::get(ResultType, 1.0);
  14924. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  14925. Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
  14926. Builder.getFastMathFlags() &= (FMF);
  14927. return FDiv;
  14928. }
  14929. case PPC::BI__builtin_ppc_alignx: {
  14930. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14931. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14932. ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
  14933. if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
  14934. AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
  14935. llvm::Value::MaximumAlignment);
  14936. emitAlignmentAssumption(Op1, E->getArg(1),
  14937. /*The expr loc is sufficient.*/ SourceLocation(),
  14938. AlignmentCI, nullptr);
  14939. return Op1;
  14940. }
  14941. case PPC::BI__builtin_ppc_rdlam: {
  14942. Value *Op0 = EmitScalarExpr(E->getArg(0));
  14943. Value *Op1 = EmitScalarExpr(E->getArg(1));
  14944. Value *Op2 = EmitScalarExpr(E->getArg(2));
  14945. llvm::Type *Ty = Op0->getType();
  14946. Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
  14947. Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
  14948. Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
  14949. return Builder.CreateAnd(Rotate, Op2);
  14950. }
  14951. case PPC::BI__builtin_ppc_load2r: {
  14952. Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
  14953. Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  14954. Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
  14955. return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
  14956. }
  14957. // FMA variations
  14958. case PPC::BI__builtin_ppc_fnmsub:
  14959. case PPC::BI__builtin_ppc_fnmsubs:
  14960. case PPC::BI__builtin_vsx_xvmaddadp:
  14961. case PPC::BI__builtin_vsx_xvmaddasp:
  14962. case PPC::BI__builtin_vsx_xvnmaddadp:
  14963. case PPC::BI__builtin_vsx_xvnmaddasp:
  14964. case PPC::BI__builtin_vsx_xvmsubadp:
  14965. case PPC::BI__builtin_vsx_xvmsubasp:
  14966. case PPC::BI__builtin_vsx_xvnmsubadp:
  14967. case PPC::BI__builtin_vsx_xvnmsubasp: {
  14968. llvm::Type *ResultType = ConvertType(E->getType());
  14969. Value *X = EmitScalarExpr(E->getArg(0));
  14970. Value *Y = EmitScalarExpr(E->getArg(1));
  14971. Value *Z = EmitScalarExpr(E->getArg(2));
  14972. llvm::Function *F;
  14973. if (Builder.getIsFPConstrained())
  14974. F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  14975. else
  14976. F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  14977. switch (BuiltinID) {
  14978. case PPC::BI__builtin_vsx_xvmaddadp:
  14979. case PPC::BI__builtin_vsx_xvmaddasp:
  14980. if (Builder.getIsFPConstrained())
  14981. return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
  14982. else
  14983. return Builder.CreateCall(F, {X, Y, Z});
  14984. case PPC::BI__builtin_vsx_xvnmaddadp:
  14985. case PPC::BI__builtin_vsx_xvnmaddasp:
  14986. if (Builder.getIsFPConstrained())
  14987. return Builder.CreateFNeg(
  14988. Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
  14989. else
  14990. return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
  14991. case PPC::BI__builtin_vsx_xvmsubadp:
  14992. case PPC::BI__builtin_vsx_xvmsubasp:
  14993. if (Builder.getIsFPConstrained())
  14994. return Builder.CreateConstrainedFPCall(
  14995. F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  14996. else
  14997. return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  14998. case PPC::BI__builtin_ppc_fnmsub:
  14999. case PPC::BI__builtin_ppc_fnmsubs:
  15000. case PPC::BI__builtin_vsx_xvnmsubadp:
  15001. case PPC::BI__builtin_vsx_xvnmsubasp:
  15002. if (Builder.getIsFPConstrained())
  15003. return Builder.CreateFNeg(
  15004. Builder.CreateConstrainedFPCall(
  15005. F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
  15006. "neg");
  15007. else
  15008. return Builder.CreateCall(
  15009. CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
  15010. }
  15011. llvm_unreachable("Unknown FMA operation");
  15012. return nullptr; // Suppress no-return warning
  15013. }
  15014. case PPC::BI__builtin_vsx_insertword: {
  15015. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15016. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15017. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15018. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
  15019. // Third argument is a compile time constant int. It must be clamped to
  15020. // to the range [0, 12].
  15021. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
  15022. assert(ArgCI &&
  15023. "Third arg to xxinsertw intrinsic must be constant integer");
  15024. const int64_t MaxIndex = 12;
  15025. int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
  15026. // The builtin semantics don't exactly match the xxinsertw instructions
  15027. // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
  15028. // word from the first argument, and inserts it in the second argument. The
  15029. // instruction extracts the word from its second input register and inserts
  15030. // it into its first input register, so swap the first and second arguments.
  15031. std::swap(Op0, Op1);
  15032. // Need to cast the second argument from a vector of unsigned int to a
  15033. // vector of long long.
  15034. Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
  15035. if (getTarget().isLittleEndian()) {
  15036. // Reverse the double words in the vector we will extract from.
  15037. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
  15038. Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
  15039. // Reverse the index.
  15040. Index = MaxIndex - Index;
  15041. }
  15042. // Intrinsic expects the first arg to be a vector of int.
  15043. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
  15044. Op2 = ConstantInt::getSigned(Int32Ty, Index);
  15045. return Builder.CreateCall(F, {Op0, Op1, Op2});
  15046. }
  15047. case PPC::BI__builtin_vsx_extractuword: {
  15048. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15049. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15050. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
  15051. // Intrinsic expects the first argument to be a vector of doublewords.
  15052. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
  15053. // The second argument is a compile time constant int that needs to
  15054. // be clamped to the range [0, 12].
  15055. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
  15056. assert(ArgCI &&
  15057. "Second Arg to xxextractuw intrinsic must be a constant integer!");
  15058. const int64_t MaxIndex = 12;
  15059. int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
  15060. if (getTarget().isLittleEndian()) {
  15061. // Reverse the index.
  15062. Index = MaxIndex - Index;
  15063. Op1 = ConstantInt::getSigned(Int32Ty, Index);
  15064. // Emit the call, then reverse the double words of the results vector.
  15065. Value *Call = Builder.CreateCall(F, {Op0, Op1});
  15066. Value *ShuffleCall =
  15067. Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
  15068. return ShuffleCall;
  15069. } else {
  15070. Op1 = ConstantInt::getSigned(Int32Ty, Index);
  15071. return Builder.CreateCall(F, {Op0, Op1});
  15072. }
  15073. }
  15074. case PPC::BI__builtin_vsx_xxpermdi: {
  15075. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15076. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15077. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15078. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
  15079. assert(ArgCI && "Third arg must be constant integer!");
  15080. unsigned Index = ArgCI->getZExtValue();
  15081. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
  15082. Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
  15083. // Account for endianness by treating this as just a shuffle. So we use the
  15084. // same indices for both LE and BE in order to produce expected results in
  15085. // both cases.
  15086. int ElemIdx0 = (Index & 2) >> 1;
  15087. int ElemIdx1 = 2 + (Index & 1);
  15088. int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
  15089. Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
  15090. QualType BIRetType = E->getType();
  15091. auto RetTy = ConvertType(BIRetType);
  15092. return Builder.CreateBitCast(ShuffleCall, RetTy);
  15093. }
  15094. case PPC::BI__builtin_vsx_xxsldwi: {
  15095. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15096. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15097. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15098. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
  15099. assert(ArgCI && "Third argument must be a compile time constant");
  15100. unsigned Index = ArgCI->getZExtValue() & 0x3;
  15101. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
  15102. Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
  15103. // Create a shuffle mask
  15104. int ElemIdx0;
  15105. int ElemIdx1;
  15106. int ElemIdx2;
  15107. int ElemIdx3;
  15108. if (getTarget().isLittleEndian()) {
  15109. // Little endian element N comes from element 8+N-Index of the
  15110. // concatenated wide vector (of course, using modulo arithmetic on
  15111. // the total number of elements).
  15112. ElemIdx0 = (8 - Index) % 8;
  15113. ElemIdx1 = (9 - Index) % 8;
  15114. ElemIdx2 = (10 - Index) % 8;
  15115. ElemIdx3 = (11 - Index) % 8;
  15116. } else {
  15117. // Big endian ElemIdx<N> = Index + N
  15118. ElemIdx0 = Index;
  15119. ElemIdx1 = Index + 1;
  15120. ElemIdx2 = Index + 2;
  15121. ElemIdx3 = Index + 3;
  15122. }
  15123. int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
  15124. Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
  15125. QualType BIRetType = E->getType();
  15126. auto RetTy = ConvertType(BIRetType);
  15127. return Builder.CreateBitCast(ShuffleCall, RetTy);
  15128. }
  15129. case PPC::BI__builtin_pack_vector_int128: {
  15130. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15131. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15132. bool isLittleEndian = getTarget().isLittleEndian();
  15133. Value *PoisonValue =
  15134. llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
  15135. Value *Res = Builder.CreateInsertElement(
  15136. PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
  15137. Res = Builder.CreateInsertElement(Res, Op1,
  15138. (uint64_t)(isLittleEndian ? 0 : 1));
  15139. return Builder.CreateBitCast(Res, ConvertType(E->getType()));
  15140. }
  15141. case PPC::BI__builtin_unpack_vector_int128: {
  15142. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15143. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15144. ConstantInt *Index = cast<ConstantInt>(Op1);
  15145. Value *Unpacked = Builder.CreateBitCast(
  15146. Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
  15147. if (getTarget().isLittleEndian())
  15148. Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
  15149. return Builder.CreateExtractElement(Unpacked, Index);
  15150. }
  15151. case PPC::BI__builtin_ppc_sthcx: {
  15152. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
  15153. Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  15154. Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
  15155. return Builder.CreateCall(F, {Op0, Op1});
  15156. }
  15157. // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
  15158. // Some of the MMA instructions accumulate their result into an existing
  15159. // accumulator whereas the others generate a new accumulator. So we need to
  15160. // use custom code generation to expand a builtin call with a pointer to a
  15161. // load (if the corresponding instruction accumulates its result) followed by
  15162. // the call to the intrinsic and a store of the result.
  15163. #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \
  15164. case PPC::BI__builtin_##Name:
  15165. #include "clang/Basic/BuiltinsPPC.def"
  15166. {
  15167. SmallVector<Value *, 4> Ops;
  15168. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
  15169. if (E->getArg(i)->getType()->isArrayType())
  15170. Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
  15171. else
  15172. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  15173. // The first argument of these two builtins is a pointer used to store their
  15174. // result. However, the llvm intrinsics return their result in multiple
  15175. // return values. So, here we emit code extracting these values from the
  15176. // intrinsic results and storing them using that pointer.
  15177. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
  15178. BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
  15179. BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
  15180. unsigned NumVecs = 2;
  15181. auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
  15182. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
  15183. NumVecs = 4;
  15184. Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
  15185. }
  15186. llvm::Function *F = CGM.getIntrinsic(Intrinsic);
  15187. Address Addr = EmitPointerWithAlignment(E->getArg(1));
  15188. Value *Vec = Builder.CreateLoad(Addr);
  15189. Value *Call = Builder.CreateCall(F, {Vec});
  15190. llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  15191. Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo());
  15192. for (unsigned i=0; i<NumVecs; i++) {
  15193. Value *Vec = Builder.CreateExtractValue(Call, i);
  15194. llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
  15195. Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
  15196. Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
  15197. }
  15198. return Call;
  15199. }
  15200. if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
  15201. BuiltinID == PPC::BI__builtin_mma_build_acc) {
  15202. // Reverse the order of the operands for LE, so the
  15203. // same builtin call can be used on both LE and BE
  15204. // without the need for the programmer to swap operands.
  15205. // The operands are reversed starting from the second argument,
  15206. // the first operand is the pointer to the pair/accumulator
  15207. // that is being built.
  15208. if (getTarget().isLittleEndian())
  15209. std::reverse(Ops.begin() + 1, Ops.end());
  15210. }
  15211. bool Accumulate;
  15212. switch (BuiltinID) {
  15213. #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
  15214. case PPC::BI__builtin_##Name: \
  15215. ID = Intrinsic::ppc_##Intr; \
  15216. Accumulate = Acc; \
  15217. break;
  15218. #include "clang/Basic/BuiltinsPPC.def"
  15219. }
  15220. if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
  15221. BuiltinID == PPC::BI__builtin_vsx_stxvp ||
  15222. BuiltinID == PPC::BI__builtin_mma_lxvp ||
  15223. BuiltinID == PPC::BI__builtin_mma_stxvp) {
  15224. if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
  15225. BuiltinID == PPC::BI__builtin_mma_lxvp) {
  15226. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  15227. Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
  15228. } else {
  15229. Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
  15230. Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
  15231. }
  15232. Ops.pop_back();
  15233. llvm::Function *F = CGM.getIntrinsic(ID);
  15234. return Builder.CreateCall(F, Ops, "");
  15235. }
  15236. SmallVector<Value*, 4> CallOps;
  15237. if (Accumulate) {
  15238. Address Addr = EmitPointerWithAlignment(E->getArg(0));
  15239. Value *Acc = Builder.CreateLoad(Addr);
  15240. CallOps.push_back(Acc);
  15241. }
  15242. for (unsigned i=1; i<Ops.size(); i++)
  15243. CallOps.push_back(Ops[i]);
  15244. llvm::Function *F = CGM.getIntrinsic(ID);
  15245. Value *Call = Builder.CreateCall(F, CallOps);
  15246. return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
  15247. }
  15248. case PPC::BI__builtin_ppc_compare_and_swap:
  15249. case PPC::BI__builtin_ppc_compare_and_swaplp: {
  15250. Address Addr = EmitPointerWithAlignment(E->getArg(0));
  15251. Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
  15252. Value *OldVal = Builder.CreateLoad(OldValAddr);
  15253. QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
  15254. LValue LV = MakeAddrLValue(Addr, AtomicTy);
  15255. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15256. auto Pair = EmitAtomicCompareExchange(
  15257. LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
  15258. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
  15259. // Unlike c11's atomic_compare_exchange, according to
  15260. // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
  15261. // > In either case, the contents of the memory location specified by addr
  15262. // > are copied into the memory location specified by old_val_addr.
  15263. // But it hasn't specified storing to OldValAddr is atomic or not and
  15264. // which order to use. Now following XL's codegen, treat it as a normal
  15265. // store.
  15266. Value *LoadedVal = Pair.first.getScalarVal();
  15267. Builder.CreateStore(LoadedVal, OldValAddr);
  15268. return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
  15269. }
  15270. case PPC::BI__builtin_ppc_fetch_and_add:
  15271. case PPC::BI__builtin_ppc_fetch_and_addlp: {
  15272. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  15273. llvm::AtomicOrdering::Monotonic);
  15274. }
  15275. case PPC::BI__builtin_ppc_fetch_and_and:
  15276. case PPC::BI__builtin_ppc_fetch_and_andlp: {
  15277. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  15278. llvm::AtomicOrdering::Monotonic);
  15279. }
  15280. case PPC::BI__builtin_ppc_fetch_and_or:
  15281. case PPC::BI__builtin_ppc_fetch_and_orlp: {
  15282. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  15283. llvm::AtomicOrdering::Monotonic);
  15284. }
  15285. case PPC::BI__builtin_ppc_fetch_and_swap:
  15286. case PPC::BI__builtin_ppc_fetch_and_swaplp: {
  15287. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  15288. llvm::AtomicOrdering::Monotonic);
  15289. }
  15290. case PPC::BI__builtin_ppc_ldarx:
  15291. case PPC::BI__builtin_ppc_lwarx:
  15292. case PPC::BI__builtin_ppc_lharx:
  15293. case PPC::BI__builtin_ppc_lbarx:
  15294. return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
  15295. case PPC::BI__builtin_ppc_mfspr: {
  15296. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15297. llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
  15298. ? Int32Ty
  15299. : Int64Ty;
  15300. Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
  15301. return Builder.CreateCall(F, {Op0});
  15302. }
  15303. case PPC::BI__builtin_ppc_mtspr: {
  15304. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15305. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15306. llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
  15307. ? Int32Ty
  15308. : Int64Ty;
  15309. Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
  15310. return Builder.CreateCall(F, {Op0, Op1});
  15311. }
  15312. case PPC::BI__builtin_ppc_popcntb: {
  15313. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  15314. llvm::Type *ArgType = ArgValue->getType();
  15315. Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
  15316. return Builder.CreateCall(F, {ArgValue}, "popcntb");
  15317. }
  15318. case PPC::BI__builtin_ppc_mtfsf: {
  15319. // The builtin takes a uint32 that needs to be cast to an
  15320. // f64 to be passed to the intrinsic.
  15321. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15322. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15323. Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
  15324. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
  15325. return Builder.CreateCall(F, {Op0, Cast}, "");
  15326. }
  15327. case PPC::BI__builtin_ppc_swdiv_nochk:
  15328. case PPC::BI__builtin_ppc_swdivs_nochk: {
  15329. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15330. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15331. FastMathFlags FMF = Builder.getFastMathFlags();
  15332. Builder.getFastMathFlags().setFast();
  15333. Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
  15334. Builder.getFastMathFlags() &= (FMF);
  15335. return FDiv;
  15336. }
  15337. case PPC::BI__builtin_ppc_fric:
  15338. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  15339. *this, E, Intrinsic::rint,
  15340. Intrinsic::experimental_constrained_rint))
  15341. .getScalarVal();
  15342. case PPC::BI__builtin_ppc_frim:
  15343. case PPC::BI__builtin_ppc_frims:
  15344. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  15345. *this, E, Intrinsic::floor,
  15346. Intrinsic::experimental_constrained_floor))
  15347. .getScalarVal();
  15348. case PPC::BI__builtin_ppc_frin:
  15349. case PPC::BI__builtin_ppc_frins:
  15350. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  15351. *this, E, Intrinsic::round,
  15352. Intrinsic::experimental_constrained_round))
  15353. .getScalarVal();
  15354. case PPC::BI__builtin_ppc_frip:
  15355. case PPC::BI__builtin_ppc_frips:
  15356. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  15357. *this, E, Intrinsic::ceil,
  15358. Intrinsic::experimental_constrained_ceil))
  15359. .getScalarVal();
  15360. case PPC::BI__builtin_ppc_friz:
  15361. case PPC::BI__builtin_ppc_frizs:
  15362. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  15363. *this, E, Intrinsic::trunc,
  15364. Intrinsic::experimental_constrained_trunc))
  15365. .getScalarVal();
  15366. case PPC::BI__builtin_ppc_fsqrt:
  15367. case PPC::BI__builtin_ppc_fsqrts:
  15368. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  15369. *this, E, Intrinsic::sqrt,
  15370. Intrinsic::experimental_constrained_sqrt))
  15371. .getScalarVal();
  15372. case PPC::BI__builtin_ppc_test_data_class: {
  15373. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15374. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15375. return Builder.CreateCall(
  15376. CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
  15377. {Op0, Op1}, "test_data_class");
  15378. }
  15379. case PPC::BI__builtin_ppc_maxfe: {
  15380. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15381. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15382. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15383. Value *Op3 = EmitScalarExpr(E->getArg(3));
  15384. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
  15385. {Op0, Op1, Op2, Op3});
  15386. }
  15387. case PPC::BI__builtin_ppc_maxfl: {
  15388. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15389. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15390. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15391. Value *Op3 = EmitScalarExpr(E->getArg(3));
  15392. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
  15393. {Op0, Op1, Op2, Op3});
  15394. }
  15395. case PPC::BI__builtin_ppc_maxfs: {
  15396. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15397. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15398. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15399. Value *Op3 = EmitScalarExpr(E->getArg(3));
  15400. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
  15401. {Op0, Op1, Op2, Op3});
  15402. }
  15403. case PPC::BI__builtin_ppc_minfe: {
  15404. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15405. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15406. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15407. Value *Op3 = EmitScalarExpr(E->getArg(3));
  15408. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
  15409. {Op0, Op1, Op2, Op3});
  15410. }
  15411. case PPC::BI__builtin_ppc_minfl: {
  15412. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15413. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15414. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15415. Value *Op3 = EmitScalarExpr(E->getArg(3));
  15416. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
  15417. {Op0, Op1, Op2, Op3});
  15418. }
  15419. case PPC::BI__builtin_ppc_minfs: {
  15420. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15421. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15422. Value *Op2 = EmitScalarExpr(E->getArg(2));
  15423. Value *Op3 = EmitScalarExpr(E->getArg(3));
  15424. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
  15425. {Op0, Op1, Op2, Op3});
  15426. }
  15427. case PPC::BI__builtin_ppc_swdiv:
  15428. case PPC::BI__builtin_ppc_swdivs: {
  15429. Value *Op0 = EmitScalarExpr(E->getArg(0));
  15430. Value *Op1 = EmitScalarExpr(E->getArg(1));
  15431. return Builder.CreateFDiv(Op0, Op1, "swdiv");
  15432. }
  15433. }
  15434. }
  15435. namespace {
  15436. // If \p E is not null pointer, insert address space cast to match return
  15437. // type of \p E if necessary.
  15438. Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
  15439. const CallExpr *E = nullptr) {
  15440. auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
  15441. auto *Call = CGF.Builder.CreateCall(F);
  15442. Call->addRetAttr(
  15443. Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
  15444. Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
  15445. if (!E)
  15446. return Call;
  15447. QualType BuiltinRetType = E->getType();
  15448. auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
  15449. if (RetTy == Call->getType())
  15450. return Call;
  15451. return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
  15452. }
  15453. Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
  15454. auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
  15455. auto *Call = CGF.Builder.CreateCall(F);
  15456. Call->addRetAttr(
  15457. Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
  15458. Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
  15459. return Call;
  15460. }
  15461. // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
  15462. Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
  15463. bool IsCOV_5 = CGF.getTarget().getTargetOpts().CodeObjectVersion ==
  15464. clang::TargetOptions::COV_5;
  15465. Constant *Offset;
  15466. Value *DP;
  15467. if (IsCOV_5) {
  15468. // Indexing the implicit kernarg segment.
  15469. Offset = llvm::ConstantInt::get(CGF.Int32Ty, 12 + Index * 2);
  15470. DP = EmitAMDGPUImplicitArgPtr(CGF);
  15471. } else {
  15472. // Indexing the HSA kernel_dispatch_packet struct.
  15473. Offset = llvm::ConstantInt::get(CGF.Int32Ty, 4 + Index * 2);
  15474. DP = EmitAMDGPUDispatchPtr(CGF);
  15475. }
  15476. auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
  15477. auto *DstTy =
  15478. CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
  15479. auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
  15480. auto *LD = CGF.Builder.CreateLoad(
  15481. Address(Cast, CGF.Int16Ty, CharUnits::fromQuantity(2)));
  15482. llvm::MDBuilder MDHelper(CGF.getLLVMContext());
  15483. llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
  15484. APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
  15485. LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
  15486. LD->setMetadata(llvm::LLVMContext::MD_noundef,
  15487. llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
  15488. LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
  15489. llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
  15490. return LD;
  15491. }
  15492. // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
  15493. Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
  15494. const unsigned XOffset = 12;
  15495. auto *DP = EmitAMDGPUDispatchPtr(CGF);
  15496. // Indexing the HSA kernel_dispatch_packet struct.
  15497. auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
  15498. auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
  15499. auto *DstTy =
  15500. CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
  15501. auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
  15502. auto *LD = CGF.Builder.CreateLoad(
  15503. Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4)));
  15504. LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
  15505. llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
  15506. return LD;
  15507. }
  15508. } // namespace
  15509. // For processing memory ordering and memory scope arguments of various
  15510. // amdgcn builtins.
  15511. // \p Order takes a C++11 comptabile memory-ordering specifier and converts
  15512. // it into LLVM's memory ordering specifier using atomic C ABI, and writes
  15513. // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
  15514. // specific SyncScopeID and writes it to \p SSID.
  15515. void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
  15516. llvm::AtomicOrdering &AO,
  15517. llvm::SyncScope::ID &SSID) {
  15518. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  15519. // Map C11/C++11 memory ordering to LLVM memory ordering
  15520. assert(llvm::isValidAtomicOrderingCABI(ord));
  15521. switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
  15522. case llvm::AtomicOrderingCABI::acquire:
  15523. case llvm::AtomicOrderingCABI::consume:
  15524. AO = llvm::AtomicOrdering::Acquire;
  15525. break;
  15526. case llvm::AtomicOrderingCABI::release:
  15527. AO = llvm::AtomicOrdering::Release;
  15528. break;
  15529. case llvm::AtomicOrderingCABI::acq_rel:
  15530. AO = llvm::AtomicOrdering::AcquireRelease;
  15531. break;
  15532. case llvm::AtomicOrderingCABI::seq_cst:
  15533. AO = llvm::AtomicOrdering::SequentiallyConsistent;
  15534. break;
  15535. case llvm::AtomicOrderingCABI::relaxed:
  15536. AO = llvm::AtomicOrdering::Monotonic;
  15537. break;
  15538. }
  15539. StringRef scp;
  15540. llvm::getConstantStringInfo(Scope, scp);
  15541. SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
  15542. }
  15543. Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
  15544. const CallExpr *E) {
  15545. llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
  15546. llvm::SyncScope::ID SSID;
  15547. switch (BuiltinID) {
  15548. case AMDGPU::BI__builtin_amdgcn_div_scale:
  15549. case AMDGPU::BI__builtin_amdgcn_div_scalef: {
  15550. // Translate from the intrinsics's struct return to the builtin's out
  15551. // argument.
  15552. Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
  15553. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  15554. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  15555. llvm::Value *Z = EmitScalarExpr(E->getArg(2));
  15556. llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
  15557. X->getType());
  15558. llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
  15559. llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
  15560. llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
  15561. llvm::Type *RealFlagType = FlagOutPtr.getElementType();
  15562. llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
  15563. Builder.CreateStore(FlagExt, FlagOutPtr);
  15564. return Result;
  15565. }
  15566. case AMDGPU::BI__builtin_amdgcn_div_fmas:
  15567. case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
  15568. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15569. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15570. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15571. llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
  15572. llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
  15573. Src0->getType());
  15574. llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
  15575. return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
  15576. }
  15577. case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
  15578. return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
  15579. case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
  15580. return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
  15581. case AMDGPU::BI__builtin_amdgcn_mov_dpp:
  15582. case AMDGPU::BI__builtin_amdgcn_update_dpp: {
  15583. llvm::SmallVector<llvm::Value *, 6> Args;
  15584. for (unsigned I = 0; I != E->getNumArgs(); ++I)
  15585. Args.push_back(EmitScalarExpr(E->getArg(I)));
  15586. assert(Args.size() == 5 || Args.size() == 6);
  15587. if (Args.size() == 5)
  15588. Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
  15589. Function *F =
  15590. CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
  15591. return Builder.CreateCall(F, Args);
  15592. }
  15593. case AMDGPU::BI__builtin_amdgcn_div_fixup:
  15594. case AMDGPU::BI__builtin_amdgcn_div_fixupf:
  15595. case AMDGPU::BI__builtin_amdgcn_div_fixuph:
  15596. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
  15597. case AMDGPU::BI__builtin_amdgcn_trig_preop:
  15598. case AMDGPU::BI__builtin_amdgcn_trig_preopf:
  15599. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
  15600. case AMDGPU::BI__builtin_amdgcn_rcp:
  15601. case AMDGPU::BI__builtin_amdgcn_rcpf:
  15602. case AMDGPU::BI__builtin_amdgcn_rcph:
  15603. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
  15604. case AMDGPU::BI__builtin_amdgcn_sqrt:
  15605. case AMDGPU::BI__builtin_amdgcn_sqrtf:
  15606. case AMDGPU::BI__builtin_amdgcn_sqrth:
  15607. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
  15608. case AMDGPU::BI__builtin_amdgcn_rsq:
  15609. case AMDGPU::BI__builtin_amdgcn_rsqf:
  15610. case AMDGPU::BI__builtin_amdgcn_rsqh:
  15611. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
  15612. case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
  15613. case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
  15614. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
  15615. case AMDGPU::BI__builtin_amdgcn_sinf:
  15616. case AMDGPU::BI__builtin_amdgcn_sinh:
  15617. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
  15618. case AMDGPU::BI__builtin_amdgcn_cosf:
  15619. case AMDGPU::BI__builtin_amdgcn_cosh:
  15620. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
  15621. case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
  15622. return EmitAMDGPUDispatchPtr(*this, E);
  15623. case AMDGPU::BI__builtin_amdgcn_log_clampf:
  15624. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
  15625. case AMDGPU::BI__builtin_amdgcn_ldexp:
  15626. case AMDGPU::BI__builtin_amdgcn_ldexpf:
  15627. case AMDGPU::BI__builtin_amdgcn_ldexph:
  15628. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
  15629. case AMDGPU::BI__builtin_amdgcn_frexp_mant:
  15630. case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
  15631. case AMDGPU::BI__builtin_amdgcn_frexp_manth:
  15632. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
  15633. case AMDGPU::BI__builtin_amdgcn_frexp_exp:
  15634. case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
  15635. Value *Src0 = EmitScalarExpr(E->getArg(0));
  15636. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
  15637. { Builder.getInt32Ty(), Src0->getType() });
  15638. return Builder.CreateCall(F, Src0);
  15639. }
  15640. case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
  15641. Value *Src0 = EmitScalarExpr(E->getArg(0));
  15642. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
  15643. { Builder.getInt16Ty(), Src0->getType() });
  15644. return Builder.CreateCall(F, Src0);
  15645. }
  15646. case AMDGPU::BI__builtin_amdgcn_fract:
  15647. case AMDGPU::BI__builtin_amdgcn_fractf:
  15648. case AMDGPU::BI__builtin_amdgcn_fracth:
  15649. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
  15650. case AMDGPU::BI__builtin_amdgcn_lerp:
  15651. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
  15652. case AMDGPU::BI__builtin_amdgcn_ubfe:
  15653. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
  15654. case AMDGPU::BI__builtin_amdgcn_sbfe:
  15655. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
  15656. case AMDGPU::BI__builtin_amdgcn_ballot_w32:
  15657. case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
  15658. llvm::Type *ResultType = ConvertType(E->getType());
  15659. llvm::Value *Src = EmitScalarExpr(E->getArg(0));
  15660. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
  15661. return Builder.CreateCall(F, { Src });
  15662. }
  15663. case AMDGPU::BI__builtin_amdgcn_uicmp:
  15664. case AMDGPU::BI__builtin_amdgcn_uicmpl:
  15665. case AMDGPU::BI__builtin_amdgcn_sicmp:
  15666. case AMDGPU::BI__builtin_amdgcn_sicmpl: {
  15667. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15668. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15669. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15670. // FIXME-GFX10: How should 32 bit mask be handled?
  15671. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
  15672. { Builder.getInt64Ty(), Src0->getType() });
  15673. return Builder.CreateCall(F, { Src0, Src1, Src2 });
  15674. }
  15675. case AMDGPU::BI__builtin_amdgcn_fcmp:
  15676. case AMDGPU::BI__builtin_amdgcn_fcmpf: {
  15677. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15678. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15679. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15680. // FIXME-GFX10: How should 32 bit mask be handled?
  15681. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
  15682. { Builder.getInt64Ty(), Src0->getType() });
  15683. return Builder.CreateCall(F, { Src0, Src1, Src2 });
  15684. }
  15685. case AMDGPU::BI__builtin_amdgcn_class:
  15686. case AMDGPU::BI__builtin_amdgcn_classf:
  15687. case AMDGPU::BI__builtin_amdgcn_classh:
  15688. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
  15689. case AMDGPU::BI__builtin_amdgcn_fmed3f:
  15690. case AMDGPU::BI__builtin_amdgcn_fmed3h:
  15691. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
  15692. case AMDGPU::BI__builtin_amdgcn_ds_append:
  15693. case AMDGPU::BI__builtin_amdgcn_ds_consume: {
  15694. Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
  15695. Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
  15696. Value *Src0 = EmitScalarExpr(E->getArg(0));
  15697. Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
  15698. return Builder.CreateCall(F, { Src0, Builder.getFalse() });
  15699. }
  15700. case AMDGPU::BI__builtin_amdgcn_ds_faddf:
  15701. case AMDGPU::BI__builtin_amdgcn_ds_fminf:
  15702. case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
  15703. Intrinsic::ID Intrin;
  15704. switch (BuiltinID) {
  15705. case AMDGPU::BI__builtin_amdgcn_ds_faddf:
  15706. Intrin = Intrinsic::amdgcn_ds_fadd;
  15707. break;
  15708. case AMDGPU::BI__builtin_amdgcn_ds_fminf:
  15709. Intrin = Intrinsic::amdgcn_ds_fmin;
  15710. break;
  15711. case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
  15712. Intrin = Intrinsic::amdgcn_ds_fmax;
  15713. break;
  15714. }
  15715. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15716. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15717. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15718. llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
  15719. llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
  15720. llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
  15721. llvm::FunctionType *FTy = F->getFunctionType();
  15722. llvm::Type *PTy = FTy->getParamType(0);
  15723. Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
  15724. return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
  15725. }
  15726. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
  15727. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
  15728. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
  15729. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
  15730. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
  15731. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
  15732. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
  15733. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
  15734. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
  15735. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
  15736. Intrinsic::ID IID;
  15737. llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
  15738. switch (BuiltinID) {
  15739. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
  15740. ArgTy = llvm::Type::getFloatTy(getLLVMContext());
  15741. IID = Intrinsic::amdgcn_global_atomic_fadd;
  15742. break;
  15743. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
  15744. ArgTy = llvm::FixedVectorType::get(
  15745. llvm::Type::getHalfTy(getLLVMContext()), 2);
  15746. IID = Intrinsic::amdgcn_global_atomic_fadd;
  15747. break;
  15748. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
  15749. IID = Intrinsic::amdgcn_global_atomic_fadd;
  15750. break;
  15751. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
  15752. IID = Intrinsic::amdgcn_global_atomic_fmin;
  15753. break;
  15754. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
  15755. IID = Intrinsic::amdgcn_global_atomic_fmax;
  15756. break;
  15757. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
  15758. IID = Intrinsic::amdgcn_flat_atomic_fadd;
  15759. break;
  15760. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
  15761. IID = Intrinsic::amdgcn_flat_atomic_fmin;
  15762. break;
  15763. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
  15764. IID = Intrinsic::amdgcn_flat_atomic_fmax;
  15765. break;
  15766. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
  15767. ArgTy = llvm::Type::getFloatTy(getLLVMContext());
  15768. IID = Intrinsic::amdgcn_flat_atomic_fadd;
  15769. break;
  15770. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
  15771. ArgTy = llvm::FixedVectorType::get(
  15772. llvm::Type::getHalfTy(getLLVMContext()), 2);
  15773. IID = Intrinsic::amdgcn_flat_atomic_fadd;
  15774. break;
  15775. }
  15776. llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
  15777. llvm::Value *Val = EmitScalarExpr(E->getArg(1));
  15778. llvm::Function *F =
  15779. CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
  15780. return Builder.CreateCall(F, {Addr, Val});
  15781. }
  15782. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
  15783. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
  15784. Intrinsic::ID IID;
  15785. switch (BuiltinID) {
  15786. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
  15787. IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
  15788. break;
  15789. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
  15790. IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
  15791. break;
  15792. }
  15793. llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
  15794. llvm::Value *Val = EmitScalarExpr(E->getArg(1));
  15795. llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
  15796. return Builder.CreateCall(F, {Addr, Val});
  15797. }
  15798. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
  15799. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
  15800. Intrinsic::ID IID;
  15801. llvm::Type *ArgTy;
  15802. switch (BuiltinID) {
  15803. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
  15804. ArgTy = llvm::Type::getFloatTy(getLLVMContext());
  15805. IID = Intrinsic::amdgcn_ds_fadd;
  15806. break;
  15807. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
  15808. ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
  15809. IID = Intrinsic::amdgcn_ds_fadd;
  15810. break;
  15811. }
  15812. llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
  15813. llvm::Value *Val = EmitScalarExpr(E->getArg(1));
  15814. llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
  15815. llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
  15816. llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
  15817. llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
  15818. llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
  15819. return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
  15820. }
  15821. case AMDGPU::BI__builtin_amdgcn_read_exec: {
  15822. CallInst *CI = cast<CallInst>(
  15823. EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
  15824. CI->setConvergent();
  15825. return CI;
  15826. }
  15827. case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
  15828. case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
  15829. StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
  15830. "exec_lo" : "exec_hi";
  15831. CallInst *CI = cast<CallInst>(
  15832. EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
  15833. CI->setConvergent();
  15834. return CI;
  15835. }
  15836. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
  15837. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
  15838. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
  15839. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
  15840. llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
  15841. llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
  15842. llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
  15843. llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
  15844. llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
  15845. llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
  15846. // The builtins take these arguments as vec4 where the last element is
  15847. // ignored. The intrinsic takes them as vec3.
  15848. RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
  15849. ArrayRef<int>{0, 1, 2});
  15850. RayDir =
  15851. Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
  15852. RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
  15853. ArrayRef<int>{0, 1, 2});
  15854. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
  15855. {NodePtr->getType(), RayDir->getType()});
  15856. return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
  15857. RayInverseDir, TextureDescr});
  15858. }
  15859. case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
  15860. SmallVector<Value *, 4> Args;
  15861. for (int i = 0, e = E->getNumArgs(); i != e; ++i)
  15862. Args.push_back(EmitScalarExpr(E->getArg(i)));
  15863. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
  15864. Value *Call = Builder.CreateCall(F, Args);
  15865. Value *Rtn = Builder.CreateExtractValue(Call, 0);
  15866. Value *A = Builder.CreateExtractValue(Call, 1);
  15867. llvm::Type *RetTy = ConvertType(E->getType());
  15868. Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
  15869. (uint64_t)0);
  15870. return Builder.CreateInsertElement(I0, A, 1);
  15871. }
  15872. case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
  15873. case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
  15874. case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
  15875. case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
  15876. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
  15877. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
  15878. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
  15879. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
  15880. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
  15881. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
  15882. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
  15883. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: {
  15884. // These operations perform a matrix multiplication and accumulation of
  15885. // the form:
  15886. // D = A * B + C
  15887. // The return type always matches the type of matrix C.
  15888. unsigned ArgForMatchingRetType;
  15889. unsigned BuiltinWMMAOp;
  15890. switch (BuiltinID) {
  15891. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
  15892. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
  15893. ArgForMatchingRetType = 2;
  15894. BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
  15895. break;
  15896. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
  15897. case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
  15898. ArgForMatchingRetType = 2;
  15899. BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
  15900. break;
  15901. case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
  15902. case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
  15903. ArgForMatchingRetType = 2;
  15904. BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
  15905. break;
  15906. case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
  15907. case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
  15908. ArgForMatchingRetType = 2;
  15909. BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
  15910. break;
  15911. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
  15912. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
  15913. ArgForMatchingRetType = 4;
  15914. BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
  15915. break;
  15916. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
  15917. case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
  15918. ArgForMatchingRetType = 4;
  15919. BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
  15920. break;
  15921. }
  15922. SmallVector<Value *, 6> Args;
  15923. for (int i = 0, e = E->getNumArgs(); i != e; ++i)
  15924. Args.push_back(EmitScalarExpr(E->getArg(i)));
  15925. Function *F = CGM.getIntrinsic(BuiltinWMMAOp,
  15926. {Args[ArgForMatchingRetType]->getType()});
  15927. return Builder.CreateCall(F, Args);
  15928. }
  15929. // amdgcn workitem
  15930. case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
  15931. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
  15932. case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
  15933. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
  15934. case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
  15935. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
  15936. // amdgcn workgroup size
  15937. case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
  15938. return EmitAMDGPUWorkGroupSize(*this, 0);
  15939. case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
  15940. return EmitAMDGPUWorkGroupSize(*this, 1);
  15941. case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
  15942. return EmitAMDGPUWorkGroupSize(*this, 2);
  15943. // amdgcn grid size
  15944. case AMDGPU::BI__builtin_amdgcn_grid_size_x:
  15945. return EmitAMDGPUGridSize(*this, 0);
  15946. case AMDGPU::BI__builtin_amdgcn_grid_size_y:
  15947. return EmitAMDGPUGridSize(*this, 1);
  15948. case AMDGPU::BI__builtin_amdgcn_grid_size_z:
  15949. return EmitAMDGPUGridSize(*this, 2);
  15950. // r600 intrinsics
  15951. case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
  15952. case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
  15953. return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
  15954. case AMDGPU::BI__builtin_r600_read_tidig_x:
  15955. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
  15956. case AMDGPU::BI__builtin_r600_read_tidig_y:
  15957. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
  15958. case AMDGPU::BI__builtin_r600_read_tidig_z:
  15959. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
  15960. case AMDGPU::BI__builtin_amdgcn_alignbit: {
  15961. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15962. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15963. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15964. Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
  15965. return Builder.CreateCall(F, { Src0, Src1, Src2 });
  15966. }
  15967. case AMDGPU::BI__builtin_amdgcn_fence: {
  15968. ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
  15969. EmitScalarExpr(E->getArg(1)), AO, SSID);
  15970. return Builder.CreateFence(AO, SSID);
  15971. }
  15972. case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
  15973. case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
  15974. case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
  15975. case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
  15976. unsigned BuiltinAtomicOp;
  15977. llvm::Type *ResultType = ConvertType(E->getType());
  15978. switch (BuiltinID) {
  15979. case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
  15980. case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
  15981. BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
  15982. break;
  15983. case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
  15984. case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
  15985. BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
  15986. break;
  15987. }
  15988. Value *Ptr = EmitScalarExpr(E->getArg(0));
  15989. Value *Val = EmitScalarExpr(E->getArg(1));
  15990. llvm::Function *F =
  15991. CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
  15992. ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
  15993. EmitScalarExpr(E->getArg(3)), AO, SSID);
  15994. // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
  15995. // scope as unsigned values
  15996. Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
  15997. Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
  15998. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  15999. bool Volatile =
  16000. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  16001. Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
  16002. return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
  16003. }
  16004. case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
  16005. case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
  16006. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  16007. llvm::Type *ResultType = ConvertType(E->getType());
  16008. // s_sendmsg_rtn is mangled using return type only.
  16009. Function *F =
  16010. CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
  16011. return Builder.CreateCall(F, {Arg});
  16012. }
  16013. default:
  16014. return nullptr;
  16015. }
  16016. }
  16017. /// Handle a SystemZ function in which the final argument is a pointer
  16018. /// to an int that receives the post-instruction CC value. At the LLVM level
  16019. /// this is represented as a function that returns a {result, cc} pair.
  16020. static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
  16021. unsigned IntrinsicID,
  16022. const CallExpr *E) {
  16023. unsigned NumArgs = E->getNumArgs() - 1;
  16024. SmallVector<Value *, 8> Args(NumArgs);
  16025. for (unsigned I = 0; I < NumArgs; ++I)
  16026. Args[I] = CGF.EmitScalarExpr(E->getArg(I));
  16027. Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
  16028. Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
  16029. Value *Call = CGF.Builder.CreateCall(F, Args);
  16030. Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
  16031. CGF.Builder.CreateStore(CC, CCPtr);
  16032. return CGF.Builder.CreateExtractValue(Call, 0);
  16033. }
  16034. Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
  16035. const CallExpr *E) {
  16036. switch (BuiltinID) {
  16037. case SystemZ::BI__builtin_tbegin: {
  16038. Value *TDB = EmitScalarExpr(E->getArg(0));
  16039. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
  16040. Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
  16041. return Builder.CreateCall(F, {TDB, Control});
  16042. }
  16043. case SystemZ::BI__builtin_tbegin_nofloat: {
  16044. Value *TDB = EmitScalarExpr(E->getArg(0));
  16045. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
  16046. Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
  16047. return Builder.CreateCall(F, {TDB, Control});
  16048. }
  16049. case SystemZ::BI__builtin_tbeginc: {
  16050. Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
  16051. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
  16052. Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
  16053. return Builder.CreateCall(F, {TDB, Control});
  16054. }
  16055. case SystemZ::BI__builtin_tabort: {
  16056. Value *Data = EmitScalarExpr(E->getArg(0));
  16057. Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
  16058. return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
  16059. }
  16060. case SystemZ::BI__builtin_non_tx_store: {
  16061. Value *Address = EmitScalarExpr(E->getArg(0));
  16062. Value *Data = EmitScalarExpr(E->getArg(1));
  16063. Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
  16064. return Builder.CreateCall(F, {Data, Address});
  16065. }
  16066. // Vector builtins. Note that most vector builtins are mapped automatically
  16067. // to target-specific LLVM intrinsics. The ones handled specially here can
  16068. // be represented via standard LLVM IR, which is preferable to enable common
  16069. // LLVM optimizations.
  16070. case SystemZ::BI__builtin_s390_vpopctb:
  16071. case SystemZ::BI__builtin_s390_vpopcth:
  16072. case SystemZ::BI__builtin_s390_vpopctf:
  16073. case SystemZ::BI__builtin_s390_vpopctg: {
  16074. llvm::Type *ResultType = ConvertType(E->getType());
  16075. Value *X = EmitScalarExpr(E->getArg(0));
  16076. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  16077. return Builder.CreateCall(F, X);
  16078. }
  16079. case SystemZ::BI__builtin_s390_vclzb:
  16080. case SystemZ::BI__builtin_s390_vclzh:
  16081. case SystemZ::BI__builtin_s390_vclzf:
  16082. case SystemZ::BI__builtin_s390_vclzg: {
  16083. llvm::Type *ResultType = ConvertType(E->getType());
  16084. Value *X = EmitScalarExpr(E->getArg(0));
  16085. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  16086. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
  16087. return Builder.CreateCall(F, {X, Undef});
  16088. }
  16089. case SystemZ::BI__builtin_s390_vctzb:
  16090. case SystemZ::BI__builtin_s390_vctzh:
  16091. case SystemZ::BI__builtin_s390_vctzf:
  16092. case SystemZ::BI__builtin_s390_vctzg: {
  16093. llvm::Type *ResultType = ConvertType(E->getType());
  16094. Value *X = EmitScalarExpr(E->getArg(0));
  16095. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  16096. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
  16097. return Builder.CreateCall(F, {X, Undef});
  16098. }
  16099. case SystemZ::BI__builtin_s390_vfsqsb:
  16100. case SystemZ::BI__builtin_s390_vfsqdb: {
  16101. llvm::Type *ResultType = ConvertType(E->getType());
  16102. Value *X = EmitScalarExpr(E->getArg(0));
  16103. if (Builder.getIsFPConstrained()) {
  16104. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
  16105. return Builder.CreateConstrainedFPCall(F, { X });
  16106. } else {
  16107. Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  16108. return Builder.CreateCall(F, X);
  16109. }
  16110. }
  16111. case SystemZ::BI__builtin_s390_vfmasb:
  16112. case SystemZ::BI__builtin_s390_vfmadb: {
  16113. llvm::Type *ResultType = ConvertType(E->getType());
  16114. Value *X = EmitScalarExpr(E->getArg(0));
  16115. Value *Y = EmitScalarExpr(E->getArg(1));
  16116. Value *Z = EmitScalarExpr(E->getArg(2));
  16117. if (Builder.getIsFPConstrained()) {
  16118. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  16119. return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
  16120. } else {
  16121. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  16122. return Builder.CreateCall(F, {X, Y, Z});
  16123. }
  16124. }
  16125. case SystemZ::BI__builtin_s390_vfmssb:
  16126. case SystemZ::BI__builtin_s390_vfmsdb: {
  16127. llvm::Type *ResultType = ConvertType(E->getType());
  16128. Value *X = EmitScalarExpr(E->getArg(0));
  16129. Value *Y = EmitScalarExpr(E->getArg(1));
  16130. Value *Z = EmitScalarExpr(E->getArg(2));
  16131. if (Builder.getIsFPConstrained()) {
  16132. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  16133. return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  16134. } else {
  16135. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  16136. return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  16137. }
  16138. }
  16139. case SystemZ::BI__builtin_s390_vfnmasb:
  16140. case SystemZ::BI__builtin_s390_vfnmadb: {
  16141. llvm::Type *ResultType = ConvertType(E->getType());
  16142. Value *X = EmitScalarExpr(E->getArg(0));
  16143. Value *Y = EmitScalarExpr(E->getArg(1));
  16144. Value *Z = EmitScalarExpr(E->getArg(2));
  16145. if (Builder.getIsFPConstrained()) {
  16146. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  16147. return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
  16148. } else {
  16149. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  16150. return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
  16151. }
  16152. }
  16153. case SystemZ::BI__builtin_s390_vfnmssb:
  16154. case SystemZ::BI__builtin_s390_vfnmsdb: {
  16155. llvm::Type *ResultType = ConvertType(E->getType());
  16156. Value *X = EmitScalarExpr(E->getArg(0));
  16157. Value *Y = EmitScalarExpr(E->getArg(1));
  16158. Value *Z = EmitScalarExpr(E->getArg(2));
  16159. if (Builder.getIsFPConstrained()) {
  16160. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  16161. Value *NegZ = Builder.CreateFNeg(Z, "sub");
  16162. return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
  16163. } else {
  16164. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  16165. Value *NegZ = Builder.CreateFNeg(Z, "neg");
  16166. return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
  16167. }
  16168. }
  16169. case SystemZ::BI__builtin_s390_vflpsb:
  16170. case SystemZ::BI__builtin_s390_vflpdb: {
  16171. llvm::Type *ResultType = ConvertType(E->getType());
  16172. Value *X = EmitScalarExpr(E->getArg(0));
  16173. Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  16174. return Builder.CreateCall(F, X);
  16175. }
  16176. case SystemZ::BI__builtin_s390_vflnsb:
  16177. case SystemZ::BI__builtin_s390_vflndb: {
  16178. llvm::Type *ResultType = ConvertType(E->getType());
  16179. Value *X = EmitScalarExpr(E->getArg(0));
  16180. Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  16181. return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
  16182. }
  16183. case SystemZ::BI__builtin_s390_vfisb:
  16184. case SystemZ::BI__builtin_s390_vfidb: {
  16185. llvm::Type *ResultType = ConvertType(E->getType());
  16186. Value *X = EmitScalarExpr(E->getArg(0));
  16187. // Constant-fold the M4 and M5 mask arguments.
  16188. llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
  16189. llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
  16190. // Check whether this instance can be represented via a LLVM standard
  16191. // intrinsic. We only support some combinations of M4 and M5.
  16192. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  16193. Intrinsic::ID CI;
  16194. switch (M4.getZExtValue()) {
  16195. default: break;
  16196. case 0: // IEEE-inexact exception allowed
  16197. switch (M5.getZExtValue()) {
  16198. default: break;
  16199. case 0: ID = Intrinsic::rint;
  16200. CI = Intrinsic::experimental_constrained_rint; break;
  16201. }
  16202. break;
  16203. case 4: // IEEE-inexact exception suppressed
  16204. switch (M5.getZExtValue()) {
  16205. default: break;
  16206. case 0: ID = Intrinsic::nearbyint;
  16207. CI = Intrinsic::experimental_constrained_nearbyint; break;
  16208. case 1: ID = Intrinsic::round;
  16209. CI = Intrinsic::experimental_constrained_round; break;
  16210. case 5: ID = Intrinsic::trunc;
  16211. CI = Intrinsic::experimental_constrained_trunc; break;
  16212. case 6: ID = Intrinsic::ceil;
  16213. CI = Intrinsic::experimental_constrained_ceil; break;
  16214. case 7: ID = Intrinsic::floor;
  16215. CI = Intrinsic::experimental_constrained_floor; break;
  16216. }
  16217. break;
  16218. }
  16219. if (ID != Intrinsic::not_intrinsic) {
  16220. if (Builder.getIsFPConstrained()) {
  16221. Function *F = CGM.getIntrinsic(CI, ResultType);
  16222. return Builder.CreateConstrainedFPCall(F, X);
  16223. } else {
  16224. Function *F = CGM.getIntrinsic(ID, ResultType);
  16225. return Builder.CreateCall(F, X);
  16226. }
  16227. }
  16228. switch (BuiltinID) { // FIXME: constrained version?
  16229. case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
  16230. case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
  16231. default: llvm_unreachable("Unknown BuiltinID");
  16232. }
  16233. Function *F = CGM.getIntrinsic(ID);
  16234. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  16235. Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
  16236. return Builder.CreateCall(F, {X, M4Value, M5Value});
  16237. }
  16238. case SystemZ::BI__builtin_s390_vfmaxsb:
  16239. case SystemZ::BI__builtin_s390_vfmaxdb: {
  16240. llvm::Type *ResultType = ConvertType(E->getType());
  16241. Value *X = EmitScalarExpr(E->getArg(0));
  16242. Value *Y = EmitScalarExpr(E->getArg(1));
  16243. // Constant-fold the M4 mask argument.
  16244. llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
  16245. // Check whether this instance can be represented via a LLVM standard
  16246. // intrinsic. We only support some values of M4.
  16247. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  16248. Intrinsic::ID CI;
  16249. switch (M4.getZExtValue()) {
  16250. default: break;
  16251. case 4: ID = Intrinsic::maxnum;
  16252. CI = Intrinsic::experimental_constrained_maxnum; break;
  16253. }
  16254. if (ID != Intrinsic::not_intrinsic) {
  16255. if (Builder.getIsFPConstrained()) {
  16256. Function *F = CGM.getIntrinsic(CI, ResultType);
  16257. return Builder.CreateConstrainedFPCall(F, {X, Y});
  16258. } else {
  16259. Function *F = CGM.getIntrinsic(ID, ResultType);
  16260. return Builder.CreateCall(F, {X, Y});
  16261. }
  16262. }
  16263. switch (BuiltinID) {
  16264. case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
  16265. case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
  16266. default: llvm_unreachable("Unknown BuiltinID");
  16267. }
  16268. Function *F = CGM.getIntrinsic(ID);
  16269. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  16270. return Builder.CreateCall(F, {X, Y, M4Value});
  16271. }
  16272. case SystemZ::BI__builtin_s390_vfminsb:
  16273. case SystemZ::BI__builtin_s390_vfmindb: {
  16274. llvm::Type *ResultType = ConvertType(E->getType());
  16275. Value *X = EmitScalarExpr(E->getArg(0));
  16276. Value *Y = EmitScalarExpr(E->getArg(1));
  16277. // Constant-fold the M4 mask argument.
  16278. llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
  16279. // Check whether this instance can be represented via a LLVM standard
  16280. // intrinsic. We only support some values of M4.
  16281. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  16282. Intrinsic::ID CI;
  16283. switch (M4.getZExtValue()) {
  16284. default: break;
  16285. case 4: ID = Intrinsic::minnum;
  16286. CI = Intrinsic::experimental_constrained_minnum; break;
  16287. }
  16288. if (ID != Intrinsic::not_intrinsic) {
  16289. if (Builder.getIsFPConstrained()) {
  16290. Function *F = CGM.getIntrinsic(CI, ResultType);
  16291. return Builder.CreateConstrainedFPCall(F, {X, Y});
  16292. } else {
  16293. Function *F = CGM.getIntrinsic(ID, ResultType);
  16294. return Builder.CreateCall(F, {X, Y});
  16295. }
  16296. }
  16297. switch (BuiltinID) {
  16298. case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
  16299. case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
  16300. default: llvm_unreachable("Unknown BuiltinID");
  16301. }
  16302. Function *F = CGM.getIntrinsic(ID);
  16303. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  16304. return Builder.CreateCall(F, {X, Y, M4Value});
  16305. }
  16306. case SystemZ::BI__builtin_s390_vlbrh:
  16307. case SystemZ::BI__builtin_s390_vlbrf:
  16308. case SystemZ::BI__builtin_s390_vlbrg: {
  16309. llvm::Type *ResultType = ConvertType(E->getType());
  16310. Value *X = EmitScalarExpr(E->getArg(0));
  16311. Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
  16312. return Builder.CreateCall(F, X);
  16313. }
  16314. // Vector intrinsics that output the post-instruction CC value.
  16315. #define INTRINSIC_WITH_CC(NAME) \
  16316. case SystemZ::BI__builtin_##NAME: \
  16317. return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
  16318. INTRINSIC_WITH_CC(s390_vpkshs);
  16319. INTRINSIC_WITH_CC(s390_vpksfs);
  16320. INTRINSIC_WITH_CC(s390_vpksgs);
  16321. INTRINSIC_WITH_CC(s390_vpklshs);
  16322. INTRINSIC_WITH_CC(s390_vpklsfs);
  16323. INTRINSIC_WITH_CC(s390_vpklsgs);
  16324. INTRINSIC_WITH_CC(s390_vceqbs);
  16325. INTRINSIC_WITH_CC(s390_vceqhs);
  16326. INTRINSIC_WITH_CC(s390_vceqfs);
  16327. INTRINSIC_WITH_CC(s390_vceqgs);
  16328. INTRINSIC_WITH_CC(s390_vchbs);
  16329. INTRINSIC_WITH_CC(s390_vchhs);
  16330. INTRINSIC_WITH_CC(s390_vchfs);
  16331. INTRINSIC_WITH_CC(s390_vchgs);
  16332. INTRINSIC_WITH_CC(s390_vchlbs);
  16333. INTRINSIC_WITH_CC(s390_vchlhs);
  16334. INTRINSIC_WITH_CC(s390_vchlfs);
  16335. INTRINSIC_WITH_CC(s390_vchlgs);
  16336. INTRINSIC_WITH_CC(s390_vfaebs);
  16337. INTRINSIC_WITH_CC(s390_vfaehs);
  16338. INTRINSIC_WITH_CC(s390_vfaefs);
  16339. INTRINSIC_WITH_CC(s390_vfaezbs);
  16340. INTRINSIC_WITH_CC(s390_vfaezhs);
  16341. INTRINSIC_WITH_CC(s390_vfaezfs);
  16342. INTRINSIC_WITH_CC(s390_vfeebs);
  16343. INTRINSIC_WITH_CC(s390_vfeehs);
  16344. INTRINSIC_WITH_CC(s390_vfeefs);
  16345. INTRINSIC_WITH_CC(s390_vfeezbs);
  16346. INTRINSIC_WITH_CC(s390_vfeezhs);
  16347. INTRINSIC_WITH_CC(s390_vfeezfs);
  16348. INTRINSIC_WITH_CC(s390_vfenebs);
  16349. INTRINSIC_WITH_CC(s390_vfenehs);
  16350. INTRINSIC_WITH_CC(s390_vfenefs);
  16351. INTRINSIC_WITH_CC(s390_vfenezbs);
  16352. INTRINSIC_WITH_CC(s390_vfenezhs);
  16353. INTRINSIC_WITH_CC(s390_vfenezfs);
  16354. INTRINSIC_WITH_CC(s390_vistrbs);
  16355. INTRINSIC_WITH_CC(s390_vistrhs);
  16356. INTRINSIC_WITH_CC(s390_vistrfs);
  16357. INTRINSIC_WITH_CC(s390_vstrcbs);
  16358. INTRINSIC_WITH_CC(s390_vstrchs);
  16359. INTRINSIC_WITH_CC(s390_vstrcfs);
  16360. INTRINSIC_WITH_CC(s390_vstrczbs);
  16361. INTRINSIC_WITH_CC(s390_vstrczhs);
  16362. INTRINSIC_WITH_CC(s390_vstrczfs);
  16363. INTRINSIC_WITH_CC(s390_vfcesbs);
  16364. INTRINSIC_WITH_CC(s390_vfcedbs);
  16365. INTRINSIC_WITH_CC(s390_vfchsbs);
  16366. INTRINSIC_WITH_CC(s390_vfchdbs);
  16367. INTRINSIC_WITH_CC(s390_vfchesbs);
  16368. INTRINSIC_WITH_CC(s390_vfchedbs);
  16369. INTRINSIC_WITH_CC(s390_vftcisb);
  16370. INTRINSIC_WITH_CC(s390_vftcidb);
  16371. INTRINSIC_WITH_CC(s390_vstrsb);
  16372. INTRINSIC_WITH_CC(s390_vstrsh);
  16373. INTRINSIC_WITH_CC(s390_vstrsf);
  16374. INTRINSIC_WITH_CC(s390_vstrszb);
  16375. INTRINSIC_WITH_CC(s390_vstrszh);
  16376. INTRINSIC_WITH_CC(s390_vstrszf);
  16377. #undef INTRINSIC_WITH_CC
  16378. default:
  16379. return nullptr;
  16380. }
  16381. }
  16382. namespace {
  16383. // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
  16384. struct NVPTXMmaLdstInfo {
  16385. unsigned NumResults; // Number of elements to load/store
  16386. // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
  16387. unsigned IID_col;
  16388. unsigned IID_row;
  16389. };
  16390. #define MMA_INTR(geom_op_type, layout) \
  16391. Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
  16392. #define MMA_LDST(n, geom_op_type) \
  16393. { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
  16394. static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
  16395. switch (BuiltinID) {
  16396. // FP MMA loads
  16397. case NVPTX::BI__hmma_m16n16k16_ld_a:
  16398. return MMA_LDST(8, m16n16k16_load_a_f16);
  16399. case NVPTX::BI__hmma_m16n16k16_ld_b:
  16400. return MMA_LDST(8, m16n16k16_load_b_f16);
  16401. case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
  16402. return MMA_LDST(4, m16n16k16_load_c_f16);
  16403. case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
  16404. return MMA_LDST(8, m16n16k16_load_c_f32);
  16405. case NVPTX::BI__hmma_m32n8k16_ld_a:
  16406. return MMA_LDST(8, m32n8k16_load_a_f16);
  16407. case NVPTX::BI__hmma_m32n8k16_ld_b:
  16408. return MMA_LDST(8, m32n8k16_load_b_f16);
  16409. case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
  16410. return MMA_LDST(4, m32n8k16_load_c_f16);
  16411. case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
  16412. return MMA_LDST(8, m32n8k16_load_c_f32);
  16413. case NVPTX::BI__hmma_m8n32k16_ld_a:
  16414. return MMA_LDST(8, m8n32k16_load_a_f16);
  16415. case NVPTX::BI__hmma_m8n32k16_ld_b:
  16416. return MMA_LDST(8, m8n32k16_load_b_f16);
  16417. case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
  16418. return MMA_LDST(4, m8n32k16_load_c_f16);
  16419. case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
  16420. return MMA_LDST(8, m8n32k16_load_c_f32);
  16421. // Integer MMA loads
  16422. case NVPTX::BI__imma_m16n16k16_ld_a_s8:
  16423. return MMA_LDST(2, m16n16k16_load_a_s8);
  16424. case NVPTX::BI__imma_m16n16k16_ld_a_u8:
  16425. return MMA_LDST(2, m16n16k16_load_a_u8);
  16426. case NVPTX::BI__imma_m16n16k16_ld_b_s8:
  16427. return MMA_LDST(2, m16n16k16_load_b_s8);
  16428. case NVPTX::BI__imma_m16n16k16_ld_b_u8:
  16429. return MMA_LDST(2, m16n16k16_load_b_u8);
  16430. case NVPTX::BI__imma_m16n16k16_ld_c:
  16431. return MMA_LDST(8, m16n16k16_load_c_s32);
  16432. case NVPTX::BI__imma_m32n8k16_ld_a_s8:
  16433. return MMA_LDST(4, m32n8k16_load_a_s8);
  16434. case NVPTX::BI__imma_m32n8k16_ld_a_u8:
  16435. return MMA_LDST(4, m32n8k16_load_a_u8);
  16436. case NVPTX::BI__imma_m32n8k16_ld_b_s8:
  16437. return MMA_LDST(1, m32n8k16_load_b_s8);
  16438. case NVPTX::BI__imma_m32n8k16_ld_b_u8:
  16439. return MMA_LDST(1, m32n8k16_load_b_u8);
  16440. case NVPTX::BI__imma_m32n8k16_ld_c:
  16441. return MMA_LDST(8, m32n8k16_load_c_s32);
  16442. case NVPTX::BI__imma_m8n32k16_ld_a_s8:
  16443. return MMA_LDST(1, m8n32k16_load_a_s8);
  16444. case NVPTX::BI__imma_m8n32k16_ld_a_u8:
  16445. return MMA_LDST(1, m8n32k16_load_a_u8);
  16446. case NVPTX::BI__imma_m8n32k16_ld_b_s8:
  16447. return MMA_LDST(4, m8n32k16_load_b_s8);
  16448. case NVPTX::BI__imma_m8n32k16_ld_b_u8:
  16449. return MMA_LDST(4, m8n32k16_load_b_u8);
  16450. case NVPTX::BI__imma_m8n32k16_ld_c:
  16451. return MMA_LDST(8, m8n32k16_load_c_s32);
  16452. // Sub-integer MMA loads.
  16453. // Only row/col layout is supported by A/B fragments.
  16454. case NVPTX::BI__imma_m8n8k32_ld_a_s4:
  16455. return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
  16456. case NVPTX::BI__imma_m8n8k32_ld_a_u4:
  16457. return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
  16458. case NVPTX::BI__imma_m8n8k32_ld_b_s4:
  16459. return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
  16460. case NVPTX::BI__imma_m8n8k32_ld_b_u4:
  16461. return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
  16462. case NVPTX::BI__imma_m8n8k32_ld_c:
  16463. return MMA_LDST(2, m8n8k32_load_c_s32);
  16464. case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
  16465. return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
  16466. case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
  16467. return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
  16468. case NVPTX::BI__bmma_m8n8k128_ld_c:
  16469. return MMA_LDST(2, m8n8k128_load_c_s32);
  16470. // Double MMA loads
  16471. case NVPTX::BI__dmma_m8n8k4_ld_a:
  16472. return MMA_LDST(1, m8n8k4_load_a_f64);
  16473. case NVPTX::BI__dmma_m8n8k4_ld_b:
  16474. return MMA_LDST(1, m8n8k4_load_b_f64);
  16475. case NVPTX::BI__dmma_m8n8k4_ld_c:
  16476. return MMA_LDST(2, m8n8k4_load_c_f64);
  16477. // Alternate float MMA loads
  16478. case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
  16479. return MMA_LDST(4, m16n16k16_load_a_bf16);
  16480. case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
  16481. return MMA_LDST(4, m16n16k16_load_b_bf16);
  16482. case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
  16483. return MMA_LDST(2, m8n32k16_load_a_bf16);
  16484. case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
  16485. return MMA_LDST(8, m8n32k16_load_b_bf16);
  16486. case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
  16487. return MMA_LDST(8, m32n8k16_load_a_bf16);
  16488. case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
  16489. return MMA_LDST(2, m32n8k16_load_b_bf16);
  16490. case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
  16491. return MMA_LDST(4, m16n16k8_load_a_tf32);
  16492. case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
  16493. return MMA_LDST(4, m16n16k8_load_b_tf32);
  16494. case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
  16495. return MMA_LDST(8, m16n16k8_load_c_f32);
  16496. // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
  16497. // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
  16498. // use fragment C for both loads and stores.
  16499. // FP MMA stores.
  16500. case NVPTX::BI__hmma_m16n16k16_st_c_f16:
  16501. return MMA_LDST(4, m16n16k16_store_d_f16);
  16502. case NVPTX::BI__hmma_m16n16k16_st_c_f32:
  16503. return MMA_LDST(8, m16n16k16_store_d_f32);
  16504. case NVPTX::BI__hmma_m32n8k16_st_c_f16:
  16505. return MMA_LDST(4, m32n8k16_store_d_f16);
  16506. case NVPTX::BI__hmma_m32n8k16_st_c_f32:
  16507. return MMA_LDST(8, m32n8k16_store_d_f32);
  16508. case NVPTX::BI__hmma_m8n32k16_st_c_f16:
  16509. return MMA_LDST(4, m8n32k16_store_d_f16);
  16510. case NVPTX::BI__hmma_m8n32k16_st_c_f32:
  16511. return MMA_LDST(8, m8n32k16_store_d_f32);
  16512. // Integer and sub-integer MMA stores.
  16513. // Another naming quirk. Unlike other MMA builtins that use PTX types in the
  16514. // name, integer loads/stores use LLVM's i32.
  16515. case NVPTX::BI__imma_m16n16k16_st_c_i32:
  16516. return MMA_LDST(8, m16n16k16_store_d_s32);
  16517. case NVPTX::BI__imma_m32n8k16_st_c_i32:
  16518. return MMA_LDST(8, m32n8k16_store_d_s32);
  16519. case NVPTX::BI__imma_m8n32k16_st_c_i32:
  16520. return MMA_LDST(8, m8n32k16_store_d_s32);
  16521. case NVPTX::BI__imma_m8n8k32_st_c_i32:
  16522. return MMA_LDST(2, m8n8k32_store_d_s32);
  16523. case NVPTX::BI__bmma_m8n8k128_st_c_i32:
  16524. return MMA_LDST(2, m8n8k128_store_d_s32);
  16525. // Double MMA store
  16526. case NVPTX::BI__dmma_m8n8k4_st_c_f64:
  16527. return MMA_LDST(2, m8n8k4_store_d_f64);
  16528. // Alternate float MMA store
  16529. case NVPTX::BI__mma_m16n16k8_st_c_f32:
  16530. return MMA_LDST(8, m16n16k8_store_d_f32);
  16531. default:
  16532. llvm_unreachable("Unknown MMA builtin");
  16533. }
  16534. }
  16535. #undef MMA_LDST
  16536. #undef MMA_INTR
  16537. struct NVPTXMmaInfo {
  16538. unsigned NumEltsA;
  16539. unsigned NumEltsB;
  16540. unsigned NumEltsC;
  16541. unsigned NumEltsD;
  16542. // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
  16543. // over 'col' for layout. The index of non-satf variants is expected to match
  16544. // the undocumented layout constants used by CUDA's mma.hpp.
  16545. std::array<unsigned, 8> Variants;
  16546. unsigned getMMAIntrinsic(int Layout, bool Satf) {
  16547. unsigned Index = Layout + 4 * Satf;
  16548. if (Index >= Variants.size())
  16549. return 0;
  16550. return Variants[Index];
  16551. }
  16552. };
  16553. // Returns an intrinsic that matches Layout and Satf for valid combinations of
  16554. // Layout and Satf, 0 otherwise.
  16555. static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
  16556. // clang-format off
  16557. #define MMA_VARIANTS(geom, type) \
  16558. Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
  16559. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
  16560. Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
  16561. Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
  16562. #define MMA_SATF_VARIANTS(geom, type) \
  16563. MMA_VARIANTS(geom, type), \
  16564. Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
  16565. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
  16566. Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
  16567. Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
  16568. // Sub-integer MMA only supports row.col layout.
  16569. #define MMA_VARIANTS_I4(geom, type) \
  16570. 0, \
  16571. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
  16572. 0, \
  16573. 0, \
  16574. 0, \
  16575. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
  16576. 0, \
  16577. 0
  16578. // b1 MMA does not support .satfinite.
  16579. #define MMA_VARIANTS_B1_XOR(geom, type) \
  16580. 0, \
  16581. Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
  16582. 0, \
  16583. 0, \
  16584. 0, \
  16585. 0, \
  16586. 0, \
  16587. 0
  16588. #define MMA_VARIANTS_B1_AND(geom, type) \
  16589. 0, \
  16590. Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
  16591. 0, \
  16592. 0, \
  16593. 0, \
  16594. 0, \
  16595. 0, \
  16596. 0
  16597. // clang-format on
  16598. switch (BuiltinID) {
  16599. // FP MMA
  16600. // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
  16601. // NumEltsN of return value are ordered as A,B,C,D.
  16602. case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
  16603. return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
  16604. case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
  16605. return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
  16606. case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
  16607. return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
  16608. case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
  16609. return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
  16610. case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
  16611. return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
  16612. case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
  16613. return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
  16614. case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
  16615. return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
  16616. case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
  16617. return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
  16618. case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
  16619. return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
  16620. case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
  16621. return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
  16622. case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
  16623. return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
  16624. case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
  16625. return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
  16626. // Integer MMA
  16627. case NVPTX::BI__imma_m16n16k16_mma_s8:
  16628. return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
  16629. case NVPTX::BI__imma_m16n16k16_mma_u8:
  16630. return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
  16631. case NVPTX::BI__imma_m32n8k16_mma_s8:
  16632. return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
  16633. case NVPTX::BI__imma_m32n8k16_mma_u8:
  16634. return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
  16635. case NVPTX::BI__imma_m8n32k16_mma_s8:
  16636. return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
  16637. case NVPTX::BI__imma_m8n32k16_mma_u8:
  16638. return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
  16639. // Sub-integer MMA
  16640. case NVPTX::BI__imma_m8n8k32_mma_s4:
  16641. return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
  16642. case NVPTX::BI__imma_m8n8k32_mma_u4:
  16643. return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
  16644. case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
  16645. return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
  16646. case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
  16647. return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
  16648. // Double MMA
  16649. case NVPTX::BI__dmma_m8n8k4_mma_f64:
  16650. return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
  16651. // Alternate FP MMA
  16652. case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
  16653. return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
  16654. case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
  16655. return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
  16656. case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
  16657. return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
  16658. case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
  16659. return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
  16660. default:
  16661. llvm_unreachable("Unexpected builtin ID.");
  16662. }
  16663. #undef MMA_VARIANTS
  16664. #undef MMA_SATF_VARIANTS
  16665. #undef MMA_VARIANTS_I4
  16666. #undef MMA_VARIANTS_B1_AND
  16667. #undef MMA_VARIANTS_B1_XOR
  16668. }
  16669. } // namespace
  16670. Value *
  16671. CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
  16672. auto MakeLdg = [&](unsigned IntrinsicID) {
  16673. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16674. QualType ArgType = E->getArg(0)->getType();
  16675. clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(ArgType);
  16676. llvm::Type *ElemTy = ConvertTypeForMem(ArgType->getPointeeType());
  16677. return Builder.CreateCall(
  16678. CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
  16679. {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
  16680. };
  16681. auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
  16682. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16683. llvm::Type *ElemTy =
  16684. ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
  16685. return Builder.CreateCall(
  16686. CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
  16687. {Ptr, EmitScalarExpr(E->getArg(1))});
  16688. };
  16689. switch (BuiltinID) {
  16690. case NVPTX::BI__nvvm_atom_add_gen_i:
  16691. case NVPTX::BI__nvvm_atom_add_gen_l:
  16692. case NVPTX::BI__nvvm_atom_add_gen_ll:
  16693. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
  16694. case NVPTX::BI__nvvm_atom_sub_gen_i:
  16695. case NVPTX::BI__nvvm_atom_sub_gen_l:
  16696. case NVPTX::BI__nvvm_atom_sub_gen_ll:
  16697. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
  16698. case NVPTX::BI__nvvm_atom_and_gen_i:
  16699. case NVPTX::BI__nvvm_atom_and_gen_l:
  16700. case NVPTX::BI__nvvm_atom_and_gen_ll:
  16701. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
  16702. case NVPTX::BI__nvvm_atom_or_gen_i:
  16703. case NVPTX::BI__nvvm_atom_or_gen_l:
  16704. case NVPTX::BI__nvvm_atom_or_gen_ll:
  16705. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
  16706. case NVPTX::BI__nvvm_atom_xor_gen_i:
  16707. case NVPTX::BI__nvvm_atom_xor_gen_l:
  16708. case NVPTX::BI__nvvm_atom_xor_gen_ll:
  16709. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
  16710. case NVPTX::BI__nvvm_atom_xchg_gen_i:
  16711. case NVPTX::BI__nvvm_atom_xchg_gen_l:
  16712. case NVPTX::BI__nvvm_atom_xchg_gen_ll:
  16713. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
  16714. case NVPTX::BI__nvvm_atom_max_gen_i:
  16715. case NVPTX::BI__nvvm_atom_max_gen_l:
  16716. case NVPTX::BI__nvvm_atom_max_gen_ll:
  16717. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
  16718. case NVPTX::BI__nvvm_atom_max_gen_ui:
  16719. case NVPTX::BI__nvvm_atom_max_gen_ul:
  16720. case NVPTX::BI__nvvm_atom_max_gen_ull:
  16721. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
  16722. case NVPTX::BI__nvvm_atom_min_gen_i:
  16723. case NVPTX::BI__nvvm_atom_min_gen_l:
  16724. case NVPTX::BI__nvvm_atom_min_gen_ll:
  16725. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
  16726. case NVPTX::BI__nvvm_atom_min_gen_ui:
  16727. case NVPTX::BI__nvvm_atom_min_gen_ul:
  16728. case NVPTX::BI__nvvm_atom_min_gen_ull:
  16729. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
  16730. case NVPTX::BI__nvvm_atom_cas_gen_i:
  16731. case NVPTX::BI__nvvm_atom_cas_gen_l:
  16732. case NVPTX::BI__nvvm_atom_cas_gen_ll:
  16733. // __nvvm_atom_cas_gen_* should return the old value rather than the
  16734. // success flag.
  16735. return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
  16736. case NVPTX::BI__nvvm_atom_add_gen_f:
  16737. case NVPTX::BI__nvvm_atom_add_gen_d: {
  16738. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16739. Value *Val = EmitScalarExpr(E->getArg(1));
  16740. return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
  16741. AtomicOrdering::SequentiallyConsistent);
  16742. }
  16743. case NVPTX::BI__nvvm_atom_inc_gen_ui: {
  16744. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16745. Value *Val = EmitScalarExpr(E->getArg(1));
  16746. Function *FnALI32 =
  16747. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
  16748. return Builder.CreateCall(FnALI32, {Ptr, Val});
  16749. }
  16750. case NVPTX::BI__nvvm_atom_dec_gen_ui: {
  16751. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16752. Value *Val = EmitScalarExpr(E->getArg(1));
  16753. Function *FnALD32 =
  16754. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
  16755. return Builder.CreateCall(FnALD32, {Ptr, Val});
  16756. }
  16757. case NVPTX::BI__nvvm_ldg_c:
  16758. case NVPTX::BI__nvvm_ldg_c2:
  16759. case NVPTX::BI__nvvm_ldg_c4:
  16760. case NVPTX::BI__nvvm_ldg_s:
  16761. case NVPTX::BI__nvvm_ldg_s2:
  16762. case NVPTX::BI__nvvm_ldg_s4:
  16763. case NVPTX::BI__nvvm_ldg_i:
  16764. case NVPTX::BI__nvvm_ldg_i2:
  16765. case NVPTX::BI__nvvm_ldg_i4:
  16766. case NVPTX::BI__nvvm_ldg_l:
  16767. case NVPTX::BI__nvvm_ldg_ll:
  16768. case NVPTX::BI__nvvm_ldg_ll2:
  16769. case NVPTX::BI__nvvm_ldg_uc:
  16770. case NVPTX::BI__nvvm_ldg_uc2:
  16771. case NVPTX::BI__nvvm_ldg_uc4:
  16772. case NVPTX::BI__nvvm_ldg_us:
  16773. case NVPTX::BI__nvvm_ldg_us2:
  16774. case NVPTX::BI__nvvm_ldg_us4:
  16775. case NVPTX::BI__nvvm_ldg_ui:
  16776. case NVPTX::BI__nvvm_ldg_ui2:
  16777. case NVPTX::BI__nvvm_ldg_ui4:
  16778. case NVPTX::BI__nvvm_ldg_ul:
  16779. case NVPTX::BI__nvvm_ldg_ull:
  16780. case NVPTX::BI__nvvm_ldg_ull2:
  16781. // PTX Interoperability section 2.2: "For a vector with an even number of
  16782. // elements, its alignment is set to number of elements times the alignment
  16783. // of its member: n*alignof(t)."
  16784. return MakeLdg(Intrinsic::nvvm_ldg_global_i);
  16785. case NVPTX::BI__nvvm_ldg_f:
  16786. case NVPTX::BI__nvvm_ldg_f2:
  16787. case NVPTX::BI__nvvm_ldg_f4:
  16788. case NVPTX::BI__nvvm_ldg_d:
  16789. case NVPTX::BI__nvvm_ldg_d2:
  16790. return MakeLdg(Intrinsic::nvvm_ldg_global_f);
  16791. case NVPTX::BI__nvvm_atom_cta_add_gen_i:
  16792. case NVPTX::BI__nvvm_atom_cta_add_gen_l:
  16793. case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
  16794. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
  16795. case NVPTX::BI__nvvm_atom_sys_add_gen_i:
  16796. case NVPTX::BI__nvvm_atom_sys_add_gen_l:
  16797. case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
  16798. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
  16799. case NVPTX::BI__nvvm_atom_cta_add_gen_f:
  16800. case NVPTX::BI__nvvm_atom_cta_add_gen_d:
  16801. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
  16802. case NVPTX::BI__nvvm_atom_sys_add_gen_f:
  16803. case NVPTX::BI__nvvm_atom_sys_add_gen_d:
  16804. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
  16805. case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
  16806. case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
  16807. case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
  16808. return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
  16809. case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
  16810. case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
  16811. case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
  16812. return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
  16813. case NVPTX::BI__nvvm_atom_cta_max_gen_i:
  16814. case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
  16815. case NVPTX::BI__nvvm_atom_cta_max_gen_l:
  16816. case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
  16817. case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
  16818. case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
  16819. return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
  16820. case NVPTX::BI__nvvm_atom_sys_max_gen_i:
  16821. case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
  16822. case NVPTX::BI__nvvm_atom_sys_max_gen_l:
  16823. case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
  16824. case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
  16825. case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
  16826. return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
  16827. case NVPTX::BI__nvvm_atom_cta_min_gen_i:
  16828. case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
  16829. case NVPTX::BI__nvvm_atom_cta_min_gen_l:
  16830. case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
  16831. case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
  16832. case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
  16833. return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
  16834. case NVPTX::BI__nvvm_atom_sys_min_gen_i:
  16835. case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
  16836. case NVPTX::BI__nvvm_atom_sys_min_gen_l:
  16837. case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
  16838. case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
  16839. case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
  16840. return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
  16841. case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
  16842. return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
  16843. case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
  16844. return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
  16845. case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
  16846. return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
  16847. case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
  16848. return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
  16849. case NVPTX::BI__nvvm_atom_cta_and_gen_i:
  16850. case NVPTX::BI__nvvm_atom_cta_and_gen_l:
  16851. case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
  16852. return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
  16853. case NVPTX::BI__nvvm_atom_sys_and_gen_i:
  16854. case NVPTX::BI__nvvm_atom_sys_and_gen_l:
  16855. case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
  16856. return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
  16857. case NVPTX::BI__nvvm_atom_cta_or_gen_i:
  16858. case NVPTX::BI__nvvm_atom_cta_or_gen_l:
  16859. case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
  16860. return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
  16861. case NVPTX::BI__nvvm_atom_sys_or_gen_i:
  16862. case NVPTX::BI__nvvm_atom_sys_or_gen_l:
  16863. case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
  16864. return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
  16865. case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
  16866. case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
  16867. case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
  16868. return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
  16869. case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
  16870. case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
  16871. case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
  16872. return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
  16873. case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
  16874. case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
  16875. case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
  16876. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16877. llvm::Type *ElemTy =
  16878. ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
  16879. return Builder.CreateCall(
  16880. CGM.getIntrinsic(
  16881. Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
  16882. {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
  16883. }
  16884. case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
  16885. case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
  16886. case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
  16887. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16888. llvm::Type *ElemTy =
  16889. ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
  16890. return Builder.CreateCall(
  16891. CGM.getIntrinsic(
  16892. Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
  16893. {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
  16894. }
  16895. case NVPTX::BI__nvvm_match_all_sync_i32p:
  16896. case NVPTX::BI__nvvm_match_all_sync_i64p: {
  16897. Value *Mask = EmitScalarExpr(E->getArg(0));
  16898. Value *Val = EmitScalarExpr(E->getArg(1));
  16899. Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
  16900. Value *ResultPair = Builder.CreateCall(
  16901. CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
  16902. ? Intrinsic::nvvm_match_all_sync_i32p
  16903. : Intrinsic::nvvm_match_all_sync_i64p),
  16904. {Mask, Val});
  16905. Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
  16906. PredOutPtr.getElementType());
  16907. Builder.CreateStore(Pred, PredOutPtr);
  16908. return Builder.CreateExtractValue(ResultPair, 0);
  16909. }
  16910. // FP MMA loads
  16911. case NVPTX::BI__hmma_m16n16k16_ld_a:
  16912. case NVPTX::BI__hmma_m16n16k16_ld_b:
  16913. case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
  16914. case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
  16915. case NVPTX::BI__hmma_m32n8k16_ld_a:
  16916. case NVPTX::BI__hmma_m32n8k16_ld_b:
  16917. case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
  16918. case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
  16919. case NVPTX::BI__hmma_m8n32k16_ld_a:
  16920. case NVPTX::BI__hmma_m8n32k16_ld_b:
  16921. case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
  16922. case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
  16923. // Integer MMA loads.
  16924. case NVPTX::BI__imma_m16n16k16_ld_a_s8:
  16925. case NVPTX::BI__imma_m16n16k16_ld_a_u8:
  16926. case NVPTX::BI__imma_m16n16k16_ld_b_s8:
  16927. case NVPTX::BI__imma_m16n16k16_ld_b_u8:
  16928. case NVPTX::BI__imma_m16n16k16_ld_c:
  16929. case NVPTX::BI__imma_m32n8k16_ld_a_s8:
  16930. case NVPTX::BI__imma_m32n8k16_ld_a_u8:
  16931. case NVPTX::BI__imma_m32n8k16_ld_b_s8:
  16932. case NVPTX::BI__imma_m32n8k16_ld_b_u8:
  16933. case NVPTX::BI__imma_m32n8k16_ld_c:
  16934. case NVPTX::BI__imma_m8n32k16_ld_a_s8:
  16935. case NVPTX::BI__imma_m8n32k16_ld_a_u8:
  16936. case NVPTX::BI__imma_m8n32k16_ld_b_s8:
  16937. case NVPTX::BI__imma_m8n32k16_ld_b_u8:
  16938. case NVPTX::BI__imma_m8n32k16_ld_c:
  16939. // Sub-integer MMA loads.
  16940. case NVPTX::BI__imma_m8n8k32_ld_a_s4:
  16941. case NVPTX::BI__imma_m8n8k32_ld_a_u4:
  16942. case NVPTX::BI__imma_m8n8k32_ld_b_s4:
  16943. case NVPTX::BI__imma_m8n8k32_ld_b_u4:
  16944. case NVPTX::BI__imma_m8n8k32_ld_c:
  16945. case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
  16946. case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
  16947. case NVPTX::BI__bmma_m8n8k128_ld_c:
  16948. // Double MMA loads.
  16949. case NVPTX::BI__dmma_m8n8k4_ld_a:
  16950. case NVPTX::BI__dmma_m8n8k4_ld_b:
  16951. case NVPTX::BI__dmma_m8n8k4_ld_c:
  16952. // Alternate float MMA loads.
  16953. case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
  16954. case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
  16955. case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
  16956. case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
  16957. case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
  16958. case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
  16959. case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
  16960. case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
  16961. case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
  16962. Address Dst = EmitPointerWithAlignment(E->getArg(0));
  16963. Value *Src = EmitScalarExpr(E->getArg(1));
  16964. Value *Ldm = EmitScalarExpr(E->getArg(2));
  16965. std::optional<llvm::APSInt> isColMajorArg =
  16966. E->getArg(3)->getIntegerConstantExpr(getContext());
  16967. if (!isColMajorArg)
  16968. return nullptr;
  16969. bool isColMajor = isColMajorArg->getSExtValue();
  16970. NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
  16971. unsigned IID = isColMajor ? II.IID_col : II.IID_row;
  16972. if (IID == 0)
  16973. return nullptr;
  16974. Value *Result =
  16975. Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
  16976. // Save returned values.
  16977. assert(II.NumResults);
  16978. if (II.NumResults == 1) {
  16979. Builder.CreateAlignedStore(Result, Dst.getPointer(),
  16980. CharUnits::fromQuantity(4));
  16981. } else {
  16982. for (unsigned i = 0; i < II.NumResults; ++i) {
  16983. Builder.CreateAlignedStore(
  16984. Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
  16985. Dst.getElementType()),
  16986. Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
  16987. llvm::ConstantInt::get(IntTy, i)),
  16988. CharUnits::fromQuantity(4));
  16989. }
  16990. }
  16991. return Result;
  16992. }
  16993. case NVPTX::BI__hmma_m16n16k16_st_c_f16:
  16994. case NVPTX::BI__hmma_m16n16k16_st_c_f32:
  16995. case NVPTX::BI__hmma_m32n8k16_st_c_f16:
  16996. case NVPTX::BI__hmma_m32n8k16_st_c_f32:
  16997. case NVPTX::BI__hmma_m8n32k16_st_c_f16:
  16998. case NVPTX::BI__hmma_m8n32k16_st_c_f32:
  16999. case NVPTX::BI__imma_m16n16k16_st_c_i32:
  17000. case NVPTX::BI__imma_m32n8k16_st_c_i32:
  17001. case NVPTX::BI__imma_m8n32k16_st_c_i32:
  17002. case NVPTX::BI__imma_m8n8k32_st_c_i32:
  17003. case NVPTX::BI__bmma_m8n8k128_st_c_i32:
  17004. case NVPTX::BI__dmma_m8n8k4_st_c_f64:
  17005. case NVPTX::BI__mma_m16n16k8_st_c_f32: {
  17006. Value *Dst = EmitScalarExpr(E->getArg(0));
  17007. Address Src = EmitPointerWithAlignment(E->getArg(1));
  17008. Value *Ldm = EmitScalarExpr(E->getArg(2));
  17009. std::optional<llvm::APSInt> isColMajorArg =
  17010. E->getArg(3)->getIntegerConstantExpr(getContext());
  17011. if (!isColMajorArg)
  17012. return nullptr;
  17013. bool isColMajor = isColMajorArg->getSExtValue();
  17014. NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
  17015. unsigned IID = isColMajor ? II.IID_col : II.IID_row;
  17016. if (IID == 0)
  17017. return nullptr;
  17018. Function *Intrinsic =
  17019. CGM.getIntrinsic(IID, Dst->getType());
  17020. llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
  17021. SmallVector<Value *, 10> Values = {Dst};
  17022. for (unsigned i = 0; i < II.NumResults; ++i) {
  17023. Value *V = Builder.CreateAlignedLoad(
  17024. Src.getElementType(),
  17025. Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
  17026. llvm::ConstantInt::get(IntTy, i)),
  17027. CharUnits::fromQuantity(4));
  17028. Values.push_back(Builder.CreateBitCast(V, ParamType));
  17029. }
  17030. Values.push_back(Ldm);
  17031. Value *Result = Builder.CreateCall(Intrinsic, Values);
  17032. return Result;
  17033. }
  17034. // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
  17035. // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
  17036. case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
  17037. case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
  17038. case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
  17039. case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
  17040. case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
  17041. case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
  17042. case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
  17043. case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
  17044. case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
  17045. case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
  17046. case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
  17047. case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
  17048. case NVPTX::BI__imma_m16n16k16_mma_s8:
  17049. case NVPTX::BI__imma_m16n16k16_mma_u8:
  17050. case NVPTX::BI__imma_m32n8k16_mma_s8:
  17051. case NVPTX::BI__imma_m32n8k16_mma_u8:
  17052. case NVPTX::BI__imma_m8n32k16_mma_s8:
  17053. case NVPTX::BI__imma_m8n32k16_mma_u8:
  17054. case NVPTX::BI__imma_m8n8k32_mma_s4:
  17055. case NVPTX::BI__imma_m8n8k32_mma_u4:
  17056. case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
  17057. case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
  17058. case NVPTX::BI__dmma_m8n8k4_mma_f64:
  17059. case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
  17060. case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
  17061. case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
  17062. case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
  17063. Address Dst = EmitPointerWithAlignment(E->getArg(0));
  17064. Address SrcA = EmitPointerWithAlignment(E->getArg(1));
  17065. Address SrcB = EmitPointerWithAlignment(E->getArg(2));
  17066. Address SrcC = EmitPointerWithAlignment(E->getArg(3));
  17067. std::optional<llvm::APSInt> LayoutArg =
  17068. E->getArg(4)->getIntegerConstantExpr(getContext());
  17069. if (!LayoutArg)
  17070. return nullptr;
  17071. int Layout = LayoutArg->getSExtValue();
  17072. if (Layout < 0 || Layout > 3)
  17073. return nullptr;
  17074. llvm::APSInt SatfArg;
  17075. if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
  17076. BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
  17077. SatfArg = 0; // .b1 does not have satf argument.
  17078. else if (std::optional<llvm::APSInt> OptSatfArg =
  17079. E->getArg(5)->getIntegerConstantExpr(getContext()))
  17080. SatfArg = *OptSatfArg;
  17081. else
  17082. return nullptr;
  17083. bool Satf = SatfArg.getSExtValue();
  17084. NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
  17085. unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
  17086. if (IID == 0) // Unsupported combination of Layout/Satf.
  17087. return nullptr;
  17088. SmallVector<Value *, 24> Values;
  17089. Function *Intrinsic = CGM.getIntrinsic(IID);
  17090. llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
  17091. // Load A
  17092. for (unsigned i = 0; i < MI.NumEltsA; ++i) {
  17093. Value *V = Builder.CreateAlignedLoad(
  17094. SrcA.getElementType(),
  17095. Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
  17096. llvm::ConstantInt::get(IntTy, i)),
  17097. CharUnits::fromQuantity(4));
  17098. Values.push_back(Builder.CreateBitCast(V, AType));
  17099. }
  17100. // Load B
  17101. llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
  17102. for (unsigned i = 0; i < MI.NumEltsB; ++i) {
  17103. Value *V = Builder.CreateAlignedLoad(
  17104. SrcB.getElementType(),
  17105. Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
  17106. llvm::ConstantInt::get(IntTy, i)),
  17107. CharUnits::fromQuantity(4));
  17108. Values.push_back(Builder.CreateBitCast(V, BType));
  17109. }
  17110. // Load C
  17111. llvm::Type *CType =
  17112. Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
  17113. for (unsigned i = 0; i < MI.NumEltsC; ++i) {
  17114. Value *V = Builder.CreateAlignedLoad(
  17115. SrcC.getElementType(),
  17116. Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
  17117. llvm::ConstantInt::get(IntTy, i)),
  17118. CharUnits::fromQuantity(4));
  17119. Values.push_back(Builder.CreateBitCast(V, CType));
  17120. }
  17121. Value *Result = Builder.CreateCall(Intrinsic, Values);
  17122. llvm::Type *DType = Dst.getElementType();
  17123. for (unsigned i = 0; i < MI.NumEltsD; ++i)
  17124. Builder.CreateAlignedStore(
  17125. Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
  17126. Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
  17127. llvm::ConstantInt::get(IntTy, i)),
  17128. CharUnits::fromQuantity(4));
  17129. return Result;
  17130. }
  17131. default:
  17132. return nullptr;
  17133. }
  17134. }
  17135. namespace {
  17136. struct BuiltinAlignArgs {
  17137. llvm::Value *Src = nullptr;
  17138. llvm::Type *SrcType = nullptr;
  17139. llvm::Value *Alignment = nullptr;
  17140. llvm::Value *Mask = nullptr;
  17141. llvm::IntegerType *IntType = nullptr;
  17142. BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
  17143. QualType AstType = E->getArg(0)->getType();
  17144. if (AstType->isArrayType())
  17145. Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
  17146. else
  17147. Src = CGF.EmitScalarExpr(E->getArg(0));
  17148. SrcType = Src->getType();
  17149. if (SrcType->isPointerTy()) {
  17150. IntType = IntegerType::get(
  17151. CGF.getLLVMContext(),
  17152. CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
  17153. } else {
  17154. assert(SrcType->isIntegerTy());
  17155. IntType = cast<llvm::IntegerType>(SrcType);
  17156. }
  17157. Alignment = CGF.EmitScalarExpr(E->getArg(1));
  17158. Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
  17159. auto *One = llvm::ConstantInt::get(IntType, 1);
  17160. Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
  17161. }
  17162. };
  17163. } // namespace
  17164. /// Generate (x & (y-1)) == 0.
  17165. RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
  17166. BuiltinAlignArgs Args(E, *this);
  17167. llvm::Value *SrcAddress = Args.Src;
  17168. if (Args.SrcType->isPointerTy())
  17169. SrcAddress =
  17170. Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
  17171. return RValue::get(Builder.CreateICmpEQ(
  17172. Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
  17173. llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
  17174. }
  17175. /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
  17176. /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
  17177. /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
  17178. /// TODO: actually use ptrmask once most optimization passes know about it.
  17179. RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
  17180. BuiltinAlignArgs Args(E, *this);
  17181. llvm::Value *SrcAddr = Args.Src;
  17182. if (Args.Src->getType()->isPointerTy())
  17183. SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
  17184. llvm::Value *SrcForMask = SrcAddr;
  17185. if (AlignUp) {
  17186. // When aligning up we have to first add the mask to ensure we go over the
  17187. // next alignment value and then align down to the next valid multiple.
  17188. // By adding the mask, we ensure that align_up on an already aligned
  17189. // value will not change the value.
  17190. SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
  17191. }
  17192. // Invert the mask to only clear the lower bits.
  17193. llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
  17194. llvm::Value *Result =
  17195. Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
  17196. if (Args.Src->getType()->isPointerTy()) {
  17197. /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
  17198. // Result = Builder.CreateIntrinsic(
  17199. // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
  17200. // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
  17201. Result->setName("aligned_intptr");
  17202. llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
  17203. // The result must point to the same underlying allocation. This means we
  17204. // can use an inbounds GEP to enable better optimization.
  17205. Value *Base = EmitCastToVoidPtr(Args.Src);
  17206. if (getLangOpts().isSignedOverflowDefined())
  17207. Result = Builder.CreateGEP(Int8Ty, Base, Difference, "aligned_result");
  17208. else
  17209. Result = EmitCheckedInBoundsGEP(Int8Ty, Base, Difference,
  17210. /*SignedIndices=*/true,
  17211. /*isSubtraction=*/!AlignUp,
  17212. E->getExprLoc(), "aligned_result");
  17213. Result = Builder.CreatePointerCast(Result, Args.SrcType);
  17214. // Emit an alignment assumption to ensure that the new alignment is
  17215. // propagated to loads/stores, etc.
  17216. emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
  17217. }
  17218. assert(Result->getType() == Args.SrcType);
  17219. return RValue::get(Result);
  17220. }
  17221. Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
  17222. const CallExpr *E) {
  17223. switch (BuiltinID) {
  17224. case WebAssembly::BI__builtin_wasm_memory_size: {
  17225. llvm::Type *ResultType = ConvertType(E->getType());
  17226. Value *I = EmitScalarExpr(E->getArg(0));
  17227. Function *Callee =
  17228. CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
  17229. return Builder.CreateCall(Callee, I);
  17230. }
  17231. case WebAssembly::BI__builtin_wasm_memory_grow: {
  17232. llvm::Type *ResultType = ConvertType(E->getType());
  17233. Value *Args[] = {EmitScalarExpr(E->getArg(0)),
  17234. EmitScalarExpr(E->getArg(1))};
  17235. Function *Callee =
  17236. CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
  17237. return Builder.CreateCall(Callee, Args);
  17238. }
  17239. case WebAssembly::BI__builtin_wasm_tls_size: {
  17240. llvm::Type *ResultType = ConvertType(E->getType());
  17241. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
  17242. return Builder.CreateCall(Callee);
  17243. }
  17244. case WebAssembly::BI__builtin_wasm_tls_align: {
  17245. llvm::Type *ResultType = ConvertType(E->getType());
  17246. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
  17247. return Builder.CreateCall(Callee);
  17248. }
  17249. case WebAssembly::BI__builtin_wasm_tls_base: {
  17250. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
  17251. return Builder.CreateCall(Callee);
  17252. }
  17253. case WebAssembly::BI__builtin_wasm_throw: {
  17254. Value *Tag = EmitScalarExpr(E->getArg(0));
  17255. Value *Obj = EmitScalarExpr(E->getArg(1));
  17256. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
  17257. return Builder.CreateCall(Callee, {Tag, Obj});
  17258. }
  17259. case WebAssembly::BI__builtin_wasm_rethrow: {
  17260. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
  17261. return Builder.CreateCall(Callee);
  17262. }
  17263. case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
  17264. Value *Addr = EmitScalarExpr(E->getArg(0));
  17265. Value *Expected = EmitScalarExpr(E->getArg(1));
  17266. Value *Timeout = EmitScalarExpr(E->getArg(2));
  17267. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
  17268. return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
  17269. }
  17270. case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
  17271. Value *Addr = EmitScalarExpr(E->getArg(0));
  17272. Value *Expected = EmitScalarExpr(E->getArg(1));
  17273. Value *Timeout = EmitScalarExpr(E->getArg(2));
  17274. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
  17275. return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
  17276. }
  17277. case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
  17278. Value *Addr = EmitScalarExpr(E->getArg(0));
  17279. Value *Count = EmitScalarExpr(E->getArg(1));
  17280. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
  17281. return Builder.CreateCall(Callee, {Addr, Count});
  17282. }
  17283. case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
  17284. case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
  17285. case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
  17286. case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
  17287. Value *Src = EmitScalarExpr(E->getArg(0));
  17288. llvm::Type *ResT = ConvertType(E->getType());
  17289. Function *Callee =
  17290. CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
  17291. return Builder.CreateCall(Callee, {Src});
  17292. }
  17293. case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
  17294. case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
  17295. case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
  17296. case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
  17297. Value *Src = EmitScalarExpr(E->getArg(0));
  17298. llvm::Type *ResT = ConvertType(E->getType());
  17299. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
  17300. {ResT, Src->getType()});
  17301. return Builder.CreateCall(Callee, {Src});
  17302. }
  17303. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
  17304. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
  17305. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
  17306. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
  17307. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
  17308. Value *Src = EmitScalarExpr(E->getArg(0));
  17309. llvm::Type *ResT = ConvertType(E->getType());
  17310. Function *Callee =
  17311. CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
  17312. return Builder.CreateCall(Callee, {Src});
  17313. }
  17314. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
  17315. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
  17316. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
  17317. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
  17318. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
  17319. Value *Src = EmitScalarExpr(E->getArg(0));
  17320. llvm::Type *ResT = ConvertType(E->getType());
  17321. Function *Callee =
  17322. CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
  17323. return Builder.CreateCall(Callee, {Src});
  17324. }
  17325. case WebAssembly::BI__builtin_wasm_min_f32:
  17326. case WebAssembly::BI__builtin_wasm_min_f64:
  17327. case WebAssembly::BI__builtin_wasm_min_f32x4:
  17328. case WebAssembly::BI__builtin_wasm_min_f64x2: {
  17329. Value *LHS = EmitScalarExpr(E->getArg(0));
  17330. Value *RHS = EmitScalarExpr(E->getArg(1));
  17331. Function *Callee =
  17332. CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
  17333. return Builder.CreateCall(Callee, {LHS, RHS});
  17334. }
  17335. case WebAssembly::BI__builtin_wasm_max_f32:
  17336. case WebAssembly::BI__builtin_wasm_max_f64:
  17337. case WebAssembly::BI__builtin_wasm_max_f32x4:
  17338. case WebAssembly::BI__builtin_wasm_max_f64x2: {
  17339. Value *LHS = EmitScalarExpr(E->getArg(0));
  17340. Value *RHS = EmitScalarExpr(E->getArg(1));
  17341. Function *Callee =
  17342. CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
  17343. return Builder.CreateCall(Callee, {LHS, RHS});
  17344. }
  17345. case WebAssembly::BI__builtin_wasm_pmin_f32x4:
  17346. case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
  17347. Value *LHS = EmitScalarExpr(E->getArg(0));
  17348. Value *RHS = EmitScalarExpr(E->getArg(1));
  17349. Function *Callee =
  17350. CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
  17351. return Builder.CreateCall(Callee, {LHS, RHS});
  17352. }
  17353. case WebAssembly::BI__builtin_wasm_pmax_f32x4:
  17354. case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
  17355. Value *LHS = EmitScalarExpr(E->getArg(0));
  17356. Value *RHS = EmitScalarExpr(E->getArg(1));
  17357. Function *Callee =
  17358. CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
  17359. return Builder.CreateCall(Callee, {LHS, RHS});
  17360. }
  17361. case WebAssembly::BI__builtin_wasm_ceil_f32x4:
  17362. case WebAssembly::BI__builtin_wasm_floor_f32x4:
  17363. case WebAssembly::BI__builtin_wasm_trunc_f32x4:
  17364. case WebAssembly::BI__builtin_wasm_nearest_f32x4:
  17365. case WebAssembly::BI__builtin_wasm_ceil_f64x2:
  17366. case WebAssembly::BI__builtin_wasm_floor_f64x2:
  17367. case WebAssembly::BI__builtin_wasm_trunc_f64x2:
  17368. case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
  17369. unsigned IntNo;
  17370. switch (BuiltinID) {
  17371. case WebAssembly::BI__builtin_wasm_ceil_f32x4:
  17372. case WebAssembly::BI__builtin_wasm_ceil_f64x2:
  17373. IntNo = Intrinsic::ceil;
  17374. break;
  17375. case WebAssembly::BI__builtin_wasm_floor_f32x4:
  17376. case WebAssembly::BI__builtin_wasm_floor_f64x2:
  17377. IntNo = Intrinsic::floor;
  17378. break;
  17379. case WebAssembly::BI__builtin_wasm_trunc_f32x4:
  17380. case WebAssembly::BI__builtin_wasm_trunc_f64x2:
  17381. IntNo = Intrinsic::trunc;
  17382. break;
  17383. case WebAssembly::BI__builtin_wasm_nearest_f32x4:
  17384. case WebAssembly::BI__builtin_wasm_nearest_f64x2:
  17385. IntNo = Intrinsic::nearbyint;
  17386. break;
  17387. default:
  17388. llvm_unreachable("unexpected builtin ID");
  17389. }
  17390. Value *Value = EmitScalarExpr(E->getArg(0));
  17391. Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
  17392. return Builder.CreateCall(Callee, Value);
  17393. }
  17394. case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
  17395. Value *Src = EmitScalarExpr(E->getArg(0));
  17396. Value *Indices = EmitScalarExpr(E->getArg(1));
  17397. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
  17398. return Builder.CreateCall(Callee, {Src, Indices});
  17399. }
  17400. case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
  17401. case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
  17402. case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
  17403. case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
  17404. case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
  17405. case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
  17406. case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
  17407. case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
  17408. unsigned IntNo;
  17409. switch (BuiltinID) {
  17410. case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
  17411. case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
  17412. IntNo = Intrinsic::sadd_sat;
  17413. break;
  17414. case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
  17415. case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
  17416. IntNo = Intrinsic::uadd_sat;
  17417. break;
  17418. case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
  17419. case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
  17420. IntNo = Intrinsic::wasm_sub_sat_signed;
  17421. break;
  17422. case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
  17423. case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
  17424. IntNo = Intrinsic::wasm_sub_sat_unsigned;
  17425. break;
  17426. default:
  17427. llvm_unreachable("unexpected builtin ID");
  17428. }
  17429. Value *LHS = EmitScalarExpr(E->getArg(0));
  17430. Value *RHS = EmitScalarExpr(E->getArg(1));
  17431. Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
  17432. return Builder.CreateCall(Callee, {LHS, RHS});
  17433. }
  17434. case WebAssembly::BI__builtin_wasm_abs_i8x16:
  17435. case WebAssembly::BI__builtin_wasm_abs_i16x8:
  17436. case WebAssembly::BI__builtin_wasm_abs_i32x4:
  17437. case WebAssembly::BI__builtin_wasm_abs_i64x2: {
  17438. Value *Vec = EmitScalarExpr(E->getArg(0));
  17439. Value *Neg = Builder.CreateNeg(Vec, "neg");
  17440. Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
  17441. Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
  17442. return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
  17443. }
  17444. case WebAssembly::BI__builtin_wasm_min_s_i8x16:
  17445. case WebAssembly::BI__builtin_wasm_min_u_i8x16:
  17446. case WebAssembly::BI__builtin_wasm_max_s_i8x16:
  17447. case WebAssembly::BI__builtin_wasm_max_u_i8x16:
  17448. case WebAssembly::BI__builtin_wasm_min_s_i16x8:
  17449. case WebAssembly::BI__builtin_wasm_min_u_i16x8:
  17450. case WebAssembly::BI__builtin_wasm_max_s_i16x8:
  17451. case WebAssembly::BI__builtin_wasm_max_u_i16x8:
  17452. case WebAssembly::BI__builtin_wasm_min_s_i32x4:
  17453. case WebAssembly::BI__builtin_wasm_min_u_i32x4:
  17454. case WebAssembly::BI__builtin_wasm_max_s_i32x4:
  17455. case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
  17456. Value *LHS = EmitScalarExpr(E->getArg(0));
  17457. Value *RHS = EmitScalarExpr(E->getArg(1));
  17458. Value *ICmp;
  17459. switch (BuiltinID) {
  17460. case WebAssembly::BI__builtin_wasm_min_s_i8x16:
  17461. case WebAssembly::BI__builtin_wasm_min_s_i16x8:
  17462. case WebAssembly::BI__builtin_wasm_min_s_i32x4:
  17463. ICmp = Builder.CreateICmpSLT(LHS, RHS);
  17464. break;
  17465. case WebAssembly::BI__builtin_wasm_min_u_i8x16:
  17466. case WebAssembly::BI__builtin_wasm_min_u_i16x8:
  17467. case WebAssembly::BI__builtin_wasm_min_u_i32x4:
  17468. ICmp = Builder.CreateICmpULT(LHS, RHS);
  17469. break;
  17470. case WebAssembly::BI__builtin_wasm_max_s_i8x16:
  17471. case WebAssembly::BI__builtin_wasm_max_s_i16x8:
  17472. case WebAssembly::BI__builtin_wasm_max_s_i32x4:
  17473. ICmp = Builder.CreateICmpSGT(LHS, RHS);
  17474. break;
  17475. case WebAssembly::BI__builtin_wasm_max_u_i8x16:
  17476. case WebAssembly::BI__builtin_wasm_max_u_i16x8:
  17477. case WebAssembly::BI__builtin_wasm_max_u_i32x4:
  17478. ICmp = Builder.CreateICmpUGT(LHS, RHS);
  17479. break;
  17480. default:
  17481. llvm_unreachable("unexpected builtin ID");
  17482. }
  17483. return Builder.CreateSelect(ICmp, LHS, RHS);
  17484. }
  17485. case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
  17486. case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
  17487. Value *LHS = EmitScalarExpr(E->getArg(0));
  17488. Value *RHS = EmitScalarExpr(E->getArg(1));
  17489. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
  17490. ConvertType(E->getType()));
  17491. return Builder.CreateCall(Callee, {LHS, RHS});
  17492. }
  17493. case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
  17494. Value *LHS = EmitScalarExpr(E->getArg(0));
  17495. Value *RHS = EmitScalarExpr(E->getArg(1));
  17496. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
  17497. return Builder.CreateCall(Callee, {LHS, RHS});
  17498. }
  17499. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
  17500. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
  17501. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
  17502. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
  17503. Value *Vec = EmitScalarExpr(E->getArg(0));
  17504. unsigned IntNo;
  17505. switch (BuiltinID) {
  17506. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
  17507. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
  17508. IntNo = Intrinsic::wasm_extadd_pairwise_signed;
  17509. break;
  17510. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
  17511. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
  17512. IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
  17513. break;
  17514. default:
  17515. llvm_unreachable("unexpected builtin ID");
  17516. }
  17517. Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
  17518. return Builder.CreateCall(Callee, Vec);
  17519. }
  17520. case WebAssembly::BI__builtin_wasm_bitselect: {
  17521. Value *V1 = EmitScalarExpr(E->getArg(0));
  17522. Value *V2 = EmitScalarExpr(E->getArg(1));
  17523. Value *C = EmitScalarExpr(E->getArg(2));
  17524. Function *Callee =
  17525. CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
  17526. return Builder.CreateCall(Callee, {V1, V2, C});
  17527. }
  17528. case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
  17529. Value *LHS = EmitScalarExpr(E->getArg(0));
  17530. Value *RHS = EmitScalarExpr(E->getArg(1));
  17531. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
  17532. return Builder.CreateCall(Callee, {LHS, RHS});
  17533. }
  17534. case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
  17535. Value *Vec = EmitScalarExpr(E->getArg(0));
  17536. Function *Callee =
  17537. CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
  17538. return Builder.CreateCall(Callee, {Vec});
  17539. }
  17540. case WebAssembly::BI__builtin_wasm_any_true_v128:
  17541. case WebAssembly::BI__builtin_wasm_all_true_i8x16:
  17542. case WebAssembly::BI__builtin_wasm_all_true_i16x8:
  17543. case WebAssembly::BI__builtin_wasm_all_true_i32x4:
  17544. case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
  17545. unsigned IntNo;
  17546. switch (BuiltinID) {
  17547. case WebAssembly::BI__builtin_wasm_any_true_v128:
  17548. IntNo = Intrinsic::wasm_anytrue;
  17549. break;
  17550. case WebAssembly::BI__builtin_wasm_all_true_i8x16:
  17551. case WebAssembly::BI__builtin_wasm_all_true_i16x8:
  17552. case WebAssembly::BI__builtin_wasm_all_true_i32x4:
  17553. case WebAssembly::BI__builtin_wasm_all_true_i64x2:
  17554. IntNo = Intrinsic::wasm_alltrue;
  17555. break;
  17556. default:
  17557. llvm_unreachable("unexpected builtin ID");
  17558. }
  17559. Value *Vec = EmitScalarExpr(E->getArg(0));
  17560. Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
  17561. return Builder.CreateCall(Callee, {Vec});
  17562. }
  17563. case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
  17564. case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
  17565. case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
  17566. case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
  17567. Value *Vec = EmitScalarExpr(E->getArg(0));
  17568. Function *Callee =
  17569. CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
  17570. return Builder.CreateCall(Callee, {Vec});
  17571. }
  17572. case WebAssembly::BI__builtin_wasm_abs_f32x4:
  17573. case WebAssembly::BI__builtin_wasm_abs_f64x2: {
  17574. Value *Vec = EmitScalarExpr(E->getArg(0));
  17575. Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
  17576. return Builder.CreateCall(Callee, {Vec});
  17577. }
  17578. case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
  17579. case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
  17580. Value *Vec = EmitScalarExpr(E->getArg(0));
  17581. Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
  17582. return Builder.CreateCall(Callee, {Vec});
  17583. }
  17584. case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
  17585. case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
  17586. case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
  17587. case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
  17588. Value *Low = EmitScalarExpr(E->getArg(0));
  17589. Value *High = EmitScalarExpr(E->getArg(1));
  17590. unsigned IntNo;
  17591. switch (BuiltinID) {
  17592. case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
  17593. case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
  17594. IntNo = Intrinsic::wasm_narrow_signed;
  17595. break;
  17596. case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
  17597. case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
  17598. IntNo = Intrinsic::wasm_narrow_unsigned;
  17599. break;
  17600. default:
  17601. llvm_unreachable("unexpected builtin ID");
  17602. }
  17603. Function *Callee =
  17604. CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
  17605. return Builder.CreateCall(Callee, {Low, High});
  17606. }
  17607. case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
  17608. case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
  17609. Value *Vec = EmitScalarExpr(E->getArg(0));
  17610. unsigned IntNo;
  17611. switch (BuiltinID) {
  17612. case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
  17613. IntNo = Intrinsic::fptosi_sat;
  17614. break;
  17615. case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
  17616. IntNo = Intrinsic::fptoui_sat;
  17617. break;
  17618. default:
  17619. llvm_unreachable("unexpected builtin ID");
  17620. }
  17621. llvm::Type *SrcT = Vec->getType();
  17622. llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
  17623. Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
  17624. Value *Trunc = Builder.CreateCall(Callee, Vec);
  17625. Value *Splat = Constant::getNullValue(TruncT);
  17626. return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
  17627. }
  17628. case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
  17629. Value *Ops[18];
  17630. size_t OpIdx = 0;
  17631. Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
  17632. Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
  17633. while (OpIdx < 18) {
  17634. std::optional<llvm::APSInt> LaneConst =
  17635. E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
  17636. assert(LaneConst && "Constant arg isn't actually constant?");
  17637. Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
  17638. }
  17639. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
  17640. return Builder.CreateCall(Callee, Ops);
  17641. }
  17642. case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
  17643. case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
  17644. case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
  17645. case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
  17646. Value *A = EmitScalarExpr(E->getArg(0));
  17647. Value *B = EmitScalarExpr(E->getArg(1));
  17648. Value *C = EmitScalarExpr(E->getArg(2));
  17649. unsigned IntNo;
  17650. switch (BuiltinID) {
  17651. case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
  17652. case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
  17653. IntNo = Intrinsic::wasm_relaxed_madd;
  17654. break;
  17655. case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
  17656. case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
  17657. IntNo = Intrinsic::wasm_relaxed_nmadd;
  17658. break;
  17659. default:
  17660. llvm_unreachable("unexpected builtin ID");
  17661. }
  17662. Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
  17663. return Builder.CreateCall(Callee, {A, B, C});
  17664. }
  17665. case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
  17666. case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
  17667. case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
  17668. case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
  17669. Value *A = EmitScalarExpr(E->getArg(0));
  17670. Value *B = EmitScalarExpr(E->getArg(1));
  17671. Value *C = EmitScalarExpr(E->getArg(2));
  17672. Function *Callee =
  17673. CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
  17674. return Builder.CreateCall(Callee, {A, B, C});
  17675. }
  17676. case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
  17677. Value *Src = EmitScalarExpr(E->getArg(0));
  17678. Value *Indices = EmitScalarExpr(E->getArg(1));
  17679. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
  17680. return Builder.CreateCall(Callee, {Src, Indices});
  17681. }
  17682. case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
  17683. case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
  17684. case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
  17685. case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
  17686. Value *LHS = EmitScalarExpr(E->getArg(0));
  17687. Value *RHS = EmitScalarExpr(E->getArg(1));
  17688. unsigned IntNo;
  17689. switch (BuiltinID) {
  17690. case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
  17691. case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
  17692. IntNo = Intrinsic::wasm_relaxed_min;
  17693. break;
  17694. case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
  17695. case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
  17696. IntNo = Intrinsic::wasm_relaxed_max;
  17697. break;
  17698. default:
  17699. llvm_unreachable("unexpected builtin ID");
  17700. }
  17701. Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
  17702. return Builder.CreateCall(Callee, {LHS, RHS});
  17703. }
  17704. case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
  17705. case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
  17706. case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
  17707. case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
  17708. Value *Vec = EmitScalarExpr(E->getArg(0));
  17709. unsigned IntNo;
  17710. switch (BuiltinID) {
  17711. case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
  17712. IntNo = Intrinsic::wasm_relaxed_trunc_signed;
  17713. break;
  17714. case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
  17715. IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
  17716. break;
  17717. case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
  17718. IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
  17719. break;
  17720. case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
  17721. IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
  17722. break;
  17723. default:
  17724. llvm_unreachable("unexpected builtin ID");
  17725. }
  17726. Function *Callee = CGM.getIntrinsic(IntNo);
  17727. return Builder.CreateCall(Callee, {Vec});
  17728. }
  17729. case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
  17730. Value *LHS = EmitScalarExpr(E->getArg(0));
  17731. Value *RHS = EmitScalarExpr(E->getArg(1));
  17732. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
  17733. return Builder.CreateCall(Callee, {LHS, RHS});
  17734. }
  17735. case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
  17736. Value *LHS = EmitScalarExpr(E->getArg(0));
  17737. Value *RHS = EmitScalarExpr(E->getArg(1));
  17738. Function *Callee =
  17739. CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
  17740. return Builder.CreateCall(Callee, {LHS, RHS});
  17741. }
  17742. case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
  17743. Value *LHS = EmitScalarExpr(E->getArg(0));
  17744. Value *RHS = EmitScalarExpr(E->getArg(1));
  17745. Value *Acc = EmitScalarExpr(E->getArg(2));
  17746. Function *Callee =
  17747. CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
  17748. return Builder.CreateCall(Callee, {LHS, RHS, Acc});
  17749. }
  17750. case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
  17751. Value *LHS = EmitScalarExpr(E->getArg(0));
  17752. Value *RHS = EmitScalarExpr(E->getArg(1));
  17753. Value *Acc = EmitScalarExpr(E->getArg(2));
  17754. Function *Callee =
  17755. CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
  17756. return Builder.CreateCall(Callee, {LHS, RHS, Acc});
  17757. }
  17758. default:
  17759. return nullptr;
  17760. }
  17761. }
  17762. static std::pair<Intrinsic::ID, unsigned>
  17763. getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
  17764. struct Info {
  17765. unsigned BuiltinID;
  17766. Intrinsic::ID IntrinsicID;
  17767. unsigned VecLen;
  17768. };
  17769. static Info Infos[] = {
  17770. #define CUSTOM_BUILTIN_MAPPING(x,s) \
  17771. { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
  17772. CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
  17773. CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
  17774. CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
  17775. CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
  17776. CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
  17777. CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
  17778. CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
  17779. CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
  17780. CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
  17781. CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
  17782. CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
  17783. CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
  17784. CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
  17785. CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
  17786. CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
  17787. CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
  17788. CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
  17789. CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
  17790. CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
  17791. CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
  17792. CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
  17793. CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
  17794. // Legacy builtins that take a vector in place of a vector predicate.
  17795. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
  17796. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
  17797. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
  17798. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
  17799. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
  17800. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
  17801. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
  17802. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
  17803. #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
  17804. #undef CUSTOM_BUILTIN_MAPPING
  17805. };
  17806. auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
  17807. static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
  17808. (void)SortOnce;
  17809. const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
  17810. if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
  17811. return {Intrinsic::not_intrinsic, 0};
  17812. return {F->IntrinsicID, F->VecLen};
  17813. }
  17814. Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
  17815. const CallExpr *E) {
  17816. Intrinsic::ID ID;
  17817. unsigned VecLen;
  17818. std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
  17819. auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
  17820. // The base pointer is passed by address, so it needs to be loaded.
  17821. Address A = EmitPointerWithAlignment(E->getArg(0));
  17822. Address BP = Address(Builder.CreateBitCast(
  17823. A.getPointer(), Int8PtrPtrTy), Int8PtrTy, A.getAlignment());
  17824. llvm::Value *Base = Builder.CreateLoad(BP);
  17825. // The treatment of both loads and stores is the same: the arguments for
  17826. // the builtin are the same as the arguments for the intrinsic.
  17827. // Load:
  17828. // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
  17829. // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
  17830. // Store:
  17831. // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
  17832. // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
  17833. SmallVector<llvm::Value*,5> Ops = { Base };
  17834. for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
  17835. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  17836. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
  17837. // The load intrinsics generate two results (Value, NewBase), stores
  17838. // generate one (NewBase). The new base address needs to be stored.
  17839. llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
  17840. : Result;
  17841. llvm::Value *LV = Builder.CreateBitCast(
  17842. EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo());
  17843. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  17844. llvm::Value *RetVal =
  17845. Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
  17846. if (IsLoad)
  17847. RetVal = Builder.CreateExtractValue(Result, 0);
  17848. return RetVal;
  17849. };
  17850. // Handle the conversion of bit-reverse load intrinsics to bit code.
  17851. // The intrinsic call after this function only reads from memory and the
  17852. // write to memory is dealt by the store instruction.
  17853. auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
  17854. // The intrinsic generates one result, which is the new value for the base
  17855. // pointer. It needs to be returned. The result of the load instruction is
  17856. // passed to intrinsic by address, so the value needs to be stored.
  17857. llvm::Value *BaseAddress =
  17858. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  17859. // Expressions like &(*pt++) will be incremented per evaluation.
  17860. // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
  17861. // per call.
  17862. Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
  17863. DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
  17864. Int8Ty, DestAddr.getAlignment());
  17865. llvm::Value *DestAddress = DestAddr.getPointer();
  17866. // Operands are Base, Dest, Modifier.
  17867. // The intrinsic format in LLVM IR is defined as
  17868. // { ValueType, i8* } (i8*, i32).
  17869. llvm::Value *Result = Builder.CreateCall(
  17870. CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
  17871. // The value needs to be stored as the variable is passed by reference.
  17872. llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
  17873. // The store needs to be truncated to fit the destination type.
  17874. // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
  17875. // to be handled with stores of respective destination type.
  17876. DestVal = Builder.CreateTrunc(DestVal, DestTy);
  17877. llvm::Value *DestForStore =
  17878. Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
  17879. Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
  17880. // The updated value of the base pointer is returned.
  17881. return Builder.CreateExtractValue(Result, 1);
  17882. };
  17883. auto V2Q = [this, VecLen] (llvm::Value *Vec) {
  17884. Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
  17885. : Intrinsic::hexagon_V6_vandvrt;
  17886. return Builder.CreateCall(CGM.getIntrinsic(ID),
  17887. {Vec, Builder.getInt32(-1)});
  17888. };
  17889. auto Q2V = [this, VecLen] (llvm::Value *Pred) {
  17890. Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
  17891. : Intrinsic::hexagon_V6_vandqrt;
  17892. return Builder.CreateCall(CGM.getIntrinsic(ID),
  17893. {Pred, Builder.getInt32(-1)});
  17894. };
  17895. switch (BuiltinID) {
  17896. // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
  17897. // and the corresponding C/C++ builtins use loads/stores to update
  17898. // the predicate.
  17899. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
  17900. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
  17901. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
  17902. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
  17903. // Get the type from the 0-th argument.
  17904. llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
  17905. Address PredAddr = Builder.CreateElementBitCast(
  17906. EmitPointerWithAlignment(E->getArg(2)), VecType);
  17907. llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
  17908. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
  17909. {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
  17910. llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
  17911. Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
  17912. PredAddr.getAlignment());
  17913. return Builder.CreateExtractValue(Result, 0);
  17914. }
  17915. // These are identical to the builtins above, except they don't consume
  17916. // input carry, only generate carry-out. Since they still produce two
  17917. // outputs, generate the store of the predicate, but no load.
  17918. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
  17919. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
  17920. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
  17921. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
  17922. // Get the type from the 0-th argument.
  17923. llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
  17924. Address PredAddr = Builder.CreateElementBitCast(
  17925. EmitPointerWithAlignment(E->getArg(2)), VecType);
  17926. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
  17927. {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
  17928. llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
  17929. Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
  17930. PredAddr.getAlignment());
  17931. return Builder.CreateExtractValue(Result, 0);
  17932. }
  17933. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
  17934. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
  17935. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
  17936. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
  17937. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
  17938. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
  17939. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
  17940. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
  17941. SmallVector<llvm::Value*,4> Ops;
  17942. const Expr *PredOp = E->getArg(0);
  17943. // There will be an implicit cast to a boolean vector. Strip it.
  17944. if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
  17945. if (Cast->getCastKind() == CK_BitCast)
  17946. PredOp = Cast->getSubExpr();
  17947. Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
  17948. }
  17949. for (int i = 1, e = E->getNumArgs(); i != e; ++i)
  17950. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  17951. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  17952. }
  17953. case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
  17954. case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
  17955. case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
  17956. case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
  17957. case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
  17958. case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
  17959. case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
  17960. case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
  17961. case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
  17962. case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
  17963. case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
  17964. case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
  17965. return MakeCircOp(ID, /*IsLoad=*/true);
  17966. case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
  17967. case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
  17968. case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
  17969. case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
  17970. case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
  17971. case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
  17972. case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
  17973. case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
  17974. case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
  17975. case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
  17976. return MakeCircOp(ID, /*IsLoad=*/false);
  17977. case Hexagon::BI__builtin_brev_ldub:
  17978. return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
  17979. case Hexagon::BI__builtin_brev_ldb:
  17980. return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
  17981. case Hexagon::BI__builtin_brev_lduh:
  17982. return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
  17983. case Hexagon::BI__builtin_brev_ldh:
  17984. return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
  17985. case Hexagon::BI__builtin_brev_ldw:
  17986. return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
  17987. case Hexagon::BI__builtin_brev_ldd:
  17988. return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
  17989. } // switch
  17990. return nullptr;
  17991. }
  17992. Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
  17993. const CallExpr *E,
  17994. ReturnValueSlot ReturnValue) {
  17995. SmallVector<Value *, 4> Ops;
  17996. llvm::Type *ResultType = ConvertType(E->getType());
  17997. // Find out if any arguments are required to be integer constant expressions.
  17998. unsigned ICEArguments = 0;
  17999. ASTContext::GetBuiltinTypeError Error;
  18000. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  18001. if (Error == ASTContext::GE_Missing_type) {
  18002. // Vector intrinsics don't have a type string.
  18003. assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
  18004. BuiltinID <= clang::RISCV::LastRVVBuiltin);
  18005. ICEArguments = 0;
  18006. if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
  18007. BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
  18008. ICEArguments = 1 << 1;
  18009. } else {
  18010. assert(Error == ASTContext::GE_None && "Unexpected error");
  18011. }
  18012. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  18013. // If this is a normal argument, just emit it as a scalar.
  18014. if ((ICEArguments & (1 << i)) == 0) {
  18015. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  18016. continue;
  18017. }
  18018. // If this is required to be a constant, constant fold it so that we know
  18019. // that the generated intrinsic gets a ConstantInt.
  18020. Ops.push_back(llvm::ConstantInt::get(
  18021. getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
  18022. }
  18023. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  18024. unsigned NF = 1;
  18025. // The 0th bit simulates the `vta` of RVV
  18026. // The 1st bit simulates the `vma` of RVV
  18027. constexpr unsigned RVV_VTA = 0x1;
  18028. constexpr unsigned RVV_VMA = 0x2;
  18029. int PolicyAttrs = 0;
  18030. bool IsMasked = false;
  18031. // Required for overloaded intrinsics.
  18032. llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
  18033. switch (BuiltinID) {
  18034. default: llvm_unreachable("unexpected builtin ID");
  18035. case RISCV::BI__builtin_riscv_orc_b_32:
  18036. case RISCV::BI__builtin_riscv_orc_b_64:
  18037. case RISCV::BI__builtin_riscv_clz_32:
  18038. case RISCV::BI__builtin_riscv_clz_64:
  18039. case RISCV::BI__builtin_riscv_ctz_32:
  18040. case RISCV::BI__builtin_riscv_ctz_64:
  18041. case RISCV::BI__builtin_riscv_clmul:
  18042. case RISCV::BI__builtin_riscv_clmulh:
  18043. case RISCV::BI__builtin_riscv_clmulr:
  18044. case RISCV::BI__builtin_riscv_xperm4:
  18045. case RISCV::BI__builtin_riscv_xperm8:
  18046. case RISCV::BI__builtin_riscv_brev8:
  18047. case RISCV::BI__builtin_riscv_zip_32:
  18048. case RISCV::BI__builtin_riscv_unzip_32: {
  18049. switch (BuiltinID) {
  18050. default: llvm_unreachable("unexpected builtin ID");
  18051. // Zbb
  18052. case RISCV::BI__builtin_riscv_orc_b_32:
  18053. case RISCV::BI__builtin_riscv_orc_b_64:
  18054. ID = Intrinsic::riscv_orc_b;
  18055. break;
  18056. case RISCV::BI__builtin_riscv_clz_32:
  18057. case RISCV::BI__builtin_riscv_clz_64: {
  18058. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
  18059. return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
  18060. }
  18061. case RISCV::BI__builtin_riscv_ctz_32:
  18062. case RISCV::BI__builtin_riscv_ctz_64: {
  18063. Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
  18064. return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
  18065. }
  18066. // Zbc
  18067. case RISCV::BI__builtin_riscv_clmul:
  18068. ID = Intrinsic::riscv_clmul;
  18069. break;
  18070. case RISCV::BI__builtin_riscv_clmulh:
  18071. ID = Intrinsic::riscv_clmulh;
  18072. break;
  18073. case RISCV::BI__builtin_riscv_clmulr:
  18074. ID = Intrinsic::riscv_clmulr;
  18075. break;
  18076. // Zbkx
  18077. case RISCV::BI__builtin_riscv_xperm8:
  18078. ID = Intrinsic::riscv_xperm8;
  18079. break;
  18080. case RISCV::BI__builtin_riscv_xperm4:
  18081. ID = Intrinsic::riscv_xperm4;
  18082. break;
  18083. // Zbkb
  18084. case RISCV::BI__builtin_riscv_brev8:
  18085. ID = Intrinsic::riscv_brev8;
  18086. break;
  18087. case RISCV::BI__builtin_riscv_zip_32:
  18088. ID = Intrinsic::riscv_zip;
  18089. break;
  18090. case RISCV::BI__builtin_riscv_unzip_32:
  18091. ID = Intrinsic::riscv_unzip;
  18092. break;
  18093. }
  18094. IntrinsicTypes = {ResultType};
  18095. break;
  18096. }
  18097. // Zk builtins
  18098. // Zknd
  18099. case RISCV::BI__builtin_riscv_aes32dsi_32:
  18100. ID = Intrinsic::riscv_aes32dsi;
  18101. break;
  18102. case RISCV::BI__builtin_riscv_aes32dsmi_32:
  18103. ID = Intrinsic::riscv_aes32dsmi;
  18104. break;
  18105. case RISCV::BI__builtin_riscv_aes64ds_64:
  18106. ID = Intrinsic::riscv_aes64ds;
  18107. break;
  18108. case RISCV::BI__builtin_riscv_aes64dsm_64:
  18109. ID = Intrinsic::riscv_aes64dsm;
  18110. break;
  18111. case RISCV::BI__builtin_riscv_aes64im_64:
  18112. ID = Intrinsic::riscv_aes64im;
  18113. break;
  18114. // Zkne
  18115. case RISCV::BI__builtin_riscv_aes32esi_32:
  18116. ID = Intrinsic::riscv_aes32esi;
  18117. break;
  18118. case RISCV::BI__builtin_riscv_aes32esmi_32:
  18119. ID = Intrinsic::riscv_aes32esmi;
  18120. break;
  18121. case RISCV::BI__builtin_riscv_aes64es_64:
  18122. ID = Intrinsic::riscv_aes64es;
  18123. break;
  18124. case RISCV::BI__builtin_riscv_aes64esm_64:
  18125. ID = Intrinsic::riscv_aes64esm;
  18126. break;
  18127. // Zknd & Zkne
  18128. case RISCV::BI__builtin_riscv_aes64ks1i_64:
  18129. ID = Intrinsic::riscv_aes64ks1i;
  18130. break;
  18131. case RISCV::BI__builtin_riscv_aes64ks2_64:
  18132. ID = Intrinsic::riscv_aes64ks2;
  18133. break;
  18134. // Zknh
  18135. case RISCV::BI__builtin_riscv_sha256sig0:
  18136. ID = Intrinsic::riscv_sha256sig0;
  18137. IntrinsicTypes = {ResultType};
  18138. break;
  18139. case RISCV::BI__builtin_riscv_sha256sig1:
  18140. ID = Intrinsic::riscv_sha256sig1;
  18141. IntrinsicTypes = {ResultType};
  18142. break;
  18143. case RISCV::BI__builtin_riscv_sha256sum0:
  18144. ID = Intrinsic::riscv_sha256sum0;
  18145. IntrinsicTypes = {ResultType};
  18146. break;
  18147. case RISCV::BI__builtin_riscv_sha256sum1:
  18148. ID = Intrinsic::riscv_sha256sum1;
  18149. IntrinsicTypes = {ResultType};
  18150. break;
  18151. case RISCV::BI__builtin_riscv_sha512sig0_64:
  18152. ID = Intrinsic::riscv_sha512sig0;
  18153. break;
  18154. case RISCV::BI__builtin_riscv_sha512sig0h_32:
  18155. ID = Intrinsic::riscv_sha512sig0h;
  18156. break;
  18157. case RISCV::BI__builtin_riscv_sha512sig0l_32:
  18158. ID = Intrinsic::riscv_sha512sig0l;
  18159. break;
  18160. case RISCV::BI__builtin_riscv_sha512sig1_64:
  18161. ID = Intrinsic::riscv_sha512sig1;
  18162. break;
  18163. case RISCV::BI__builtin_riscv_sha512sig1h_32:
  18164. ID = Intrinsic::riscv_sha512sig1h;
  18165. break;
  18166. case RISCV::BI__builtin_riscv_sha512sig1l_32:
  18167. ID = Intrinsic::riscv_sha512sig1l;
  18168. break;
  18169. case RISCV::BI__builtin_riscv_sha512sum0_64:
  18170. ID = Intrinsic::riscv_sha512sum0;
  18171. break;
  18172. case RISCV::BI__builtin_riscv_sha512sum0r_32:
  18173. ID = Intrinsic::riscv_sha512sum0r;
  18174. break;
  18175. case RISCV::BI__builtin_riscv_sha512sum1_64:
  18176. ID = Intrinsic::riscv_sha512sum1;
  18177. break;
  18178. case RISCV::BI__builtin_riscv_sha512sum1r_32:
  18179. ID = Intrinsic::riscv_sha512sum1r;
  18180. break;
  18181. // Zksed
  18182. case RISCV::BI__builtin_riscv_sm4ks:
  18183. ID = Intrinsic::riscv_sm4ks;
  18184. IntrinsicTypes = {ResultType};
  18185. break;
  18186. case RISCV::BI__builtin_riscv_sm4ed:
  18187. ID = Intrinsic::riscv_sm4ed;
  18188. IntrinsicTypes = {ResultType};
  18189. break;
  18190. // Zksh
  18191. case RISCV::BI__builtin_riscv_sm3p0:
  18192. ID = Intrinsic::riscv_sm3p0;
  18193. IntrinsicTypes = {ResultType};
  18194. break;
  18195. case RISCV::BI__builtin_riscv_sm3p1:
  18196. ID = Intrinsic::riscv_sm3p1;
  18197. IntrinsicTypes = {ResultType};
  18198. break;
  18199. // Vector builtins are handled from here.
  18200. #include "clang/Basic/riscv_vector_builtin_cg.inc"
  18201. }
  18202. assert(ID != Intrinsic::not_intrinsic);
  18203. llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
  18204. return Builder.CreateCall(F, Ops, "");
  18205. }
  18206. Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID,
  18207. const CallExpr *E) {
  18208. SmallVector<Value *, 4> Ops;
  18209. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
  18210. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  18211. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  18212. switch (BuiltinID) {
  18213. default:
  18214. llvm_unreachable("unexpected builtin ID.");
  18215. case LoongArch::BI__builtin_loongarch_cacop_d:
  18216. ID = Intrinsic::loongarch_cacop_d;
  18217. break;
  18218. case LoongArch::BI__builtin_loongarch_cacop_w:
  18219. ID = Intrinsic::loongarch_cacop_w;
  18220. break;
  18221. case LoongArch::BI__builtin_loongarch_dbar:
  18222. ID = Intrinsic::loongarch_dbar;
  18223. break;
  18224. case LoongArch::BI__builtin_loongarch_break:
  18225. ID = Intrinsic::loongarch_break;
  18226. break;
  18227. case LoongArch::BI__builtin_loongarch_ibar:
  18228. ID = Intrinsic::loongarch_ibar;
  18229. break;
  18230. case LoongArch::BI__builtin_loongarch_movfcsr2gr:
  18231. ID = Intrinsic::loongarch_movfcsr2gr;
  18232. break;
  18233. case LoongArch::BI__builtin_loongarch_movgr2fcsr:
  18234. ID = Intrinsic::loongarch_movgr2fcsr;
  18235. break;
  18236. case LoongArch::BI__builtin_loongarch_syscall:
  18237. ID = Intrinsic::loongarch_syscall;
  18238. break;
  18239. case LoongArch::BI__builtin_loongarch_crc_w_b_w:
  18240. ID = Intrinsic::loongarch_crc_w_b_w;
  18241. break;
  18242. case LoongArch::BI__builtin_loongarch_crc_w_h_w:
  18243. ID = Intrinsic::loongarch_crc_w_h_w;
  18244. break;
  18245. case LoongArch::BI__builtin_loongarch_crc_w_w_w:
  18246. ID = Intrinsic::loongarch_crc_w_w_w;
  18247. break;
  18248. case LoongArch::BI__builtin_loongarch_crc_w_d_w:
  18249. ID = Intrinsic::loongarch_crc_w_d_w;
  18250. break;
  18251. case LoongArch::BI__builtin_loongarch_crcc_w_b_w:
  18252. ID = Intrinsic::loongarch_crcc_w_b_w;
  18253. break;
  18254. case LoongArch::BI__builtin_loongarch_crcc_w_h_w:
  18255. ID = Intrinsic::loongarch_crcc_w_h_w;
  18256. break;
  18257. case LoongArch::BI__builtin_loongarch_crcc_w_w_w:
  18258. ID = Intrinsic::loongarch_crcc_w_w_w;
  18259. break;
  18260. case LoongArch::BI__builtin_loongarch_crcc_w_d_w:
  18261. ID = Intrinsic::loongarch_crcc_w_d_w;
  18262. break;
  18263. case LoongArch::BI__builtin_loongarch_csrrd_w:
  18264. ID = Intrinsic::loongarch_csrrd_w;
  18265. break;
  18266. case LoongArch::BI__builtin_loongarch_csrwr_w:
  18267. ID = Intrinsic::loongarch_csrwr_w;
  18268. break;
  18269. case LoongArch::BI__builtin_loongarch_csrxchg_w:
  18270. ID = Intrinsic::loongarch_csrxchg_w;
  18271. break;
  18272. case LoongArch::BI__builtin_loongarch_csrrd_d:
  18273. ID = Intrinsic::loongarch_csrrd_d;
  18274. break;
  18275. case LoongArch::BI__builtin_loongarch_csrwr_d:
  18276. ID = Intrinsic::loongarch_csrwr_d;
  18277. break;
  18278. case LoongArch::BI__builtin_loongarch_csrxchg_d:
  18279. ID = Intrinsic::loongarch_csrxchg_d;
  18280. break;
  18281. case LoongArch::BI__builtin_loongarch_iocsrrd_b:
  18282. ID = Intrinsic::loongarch_iocsrrd_b;
  18283. break;
  18284. case LoongArch::BI__builtin_loongarch_iocsrrd_h:
  18285. ID = Intrinsic::loongarch_iocsrrd_h;
  18286. break;
  18287. case LoongArch::BI__builtin_loongarch_iocsrrd_w:
  18288. ID = Intrinsic::loongarch_iocsrrd_w;
  18289. break;
  18290. case LoongArch::BI__builtin_loongarch_iocsrrd_d:
  18291. ID = Intrinsic::loongarch_iocsrrd_d;
  18292. break;
  18293. case LoongArch::BI__builtin_loongarch_iocsrwr_b:
  18294. ID = Intrinsic::loongarch_iocsrwr_b;
  18295. break;
  18296. case LoongArch::BI__builtin_loongarch_iocsrwr_h:
  18297. ID = Intrinsic::loongarch_iocsrwr_h;
  18298. break;
  18299. case LoongArch::BI__builtin_loongarch_iocsrwr_w:
  18300. ID = Intrinsic::loongarch_iocsrwr_w;
  18301. break;
  18302. case LoongArch::BI__builtin_loongarch_iocsrwr_d:
  18303. ID = Intrinsic::loongarch_iocsrwr_d;
  18304. break;
  18305. case LoongArch::BI__builtin_loongarch_cpucfg:
  18306. ID = Intrinsic::loongarch_cpucfg;
  18307. break;
  18308. case LoongArch::BI__builtin_loongarch_asrtle_d:
  18309. ID = Intrinsic::loongarch_asrtle_d;
  18310. break;
  18311. case LoongArch::BI__builtin_loongarch_asrtgt_d:
  18312. ID = Intrinsic::loongarch_asrtgt_d;
  18313. break;
  18314. case LoongArch::BI__builtin_loongarch_lddir_d:
  18315. ID = Intrinsic::loongarch_lddir_d;
  18316. break;
  18317. case LoongArch::BI__builtin_loongarch_ldpte_d:
  18318. ID = Intrinsic::loongarch_ldpte_d;
  18319. break;
  18320. // TODO: Support more Intrinsics.
  18321. }
  18322. assert(ID != Intrinsic::not_intrinsic);
  18323. llvm::Function *F = CGM.getIntrinsic(ID);
  18324. return Builder.CreateCall(F, Ops);
  18325. }