CGBuiltin.cpp 777 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895
  1. //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This contains code to emit Builtin calls as LLVM code.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "CGCUDARuntime.h"
  13. #include "CGCXXABI.h"
  14. #include "CGObjCRuntime.h"
  15. #include "CGOpenCLRuntime.h"
  16. #include "CGRecordLayout.h"
  17. #include "CodeGenFunction.h"
  18. #include "CodeGenModule.h"
  19. #include "ConstantEmitter.h"
  20. #include "PatternInit.h"
  21. #include "TargetInfo.h"
  22. #include "clang/AST/ASTContext.h"
  23. #include "clang/AST/Attr.h"
  24. #include "clang/AST/Decl.h"
  25. #include "clang/AST/OSLog.h"
  26. #include "clang/Basic/TargetBuiltins.h"
  27. #include "clang/Basic/TargetInfo.h"
  28. #include "clang/CodeGen/CGFunctionInfo.h"
  29. #include "llvm/ADT/APFloat.h"
  30. #include "llvm/ADT/APInt.h"
  31. #include "llvm/ADT/SmallPtrSet.h"
  32. #include "llvm/ADT/StringExtras.h"
  33. #include "llvm/Analysis/ValueTracking.h"
  34. #include "llvm/IR/DataLayout.h"
  35. #include "llvm/IR/InlineAsm.h"
  36. #include "llvm/IR/Intrinsics.h"
  37. #include "llvm/IR/IntrinsicsAArch64.h"
  38. #include "llvm/IR/IntrinsicsAMDGPU.h"
  39. #include "llvm/IR/IntrinsicsARM.h"
  40. #include "llvm/IR/IntrinsicsBPF.h"
  41. #include "llvm/IR/IntrinsicsHexagon.h"
  42. #include "llvm/IR/IntrinsicsNVPTX.h"
  43. #include "llvm/IR/IntrinsicsPowerPC.h"
  44. #include "llvm/IR/IntrinsicsR600.h"
  45. #include "llvm/IR/IntrinsicsRISCV.h"
  46. #include "llvm/IR/IntrinsicsS390.h"
  47. #include "llvm/IR/IntrinsicsWebAssembly.h"
  48. #include "llvm/IR/IntrinsicsX86.h"
  49. #include "llvm/IR/MDBuilder.h"
  50. #include "llvm/IR/MatrixBuilder.h"
  51. #include "llvm/Support/ConvertUTF.h"
  52. #include "llvm/Support/ScopedPrinter.h"
  53. #include "llvm/Support/X86TargetParser.h"
  54. #include <sstream>
  55. using namespace clang;
  56. using namespace CodeGen;
  57. using namespace llvm;
  58. static
  59. int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
  60. return std::min(High, std::max(Low, Value));
  61. }
  62. static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
  63. Align AlignmentInBytes) {
  64. ConstantInt *Byte;
  65. switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
  66. case LangOptions::TrivialAutoVarInitKind::Uninitialized:
  67. // Nothing to initialize.
  68. return;
  69. case LangOptions::TrivialAutoVarInitKind::Zero:
  70. Byte = CGF.Builder.getInt8(0x00);
  71. break;
  72. case LangOptions::TrivialAutoVarInitKind::Pattern: {
  73. llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
  74. Byte = llvm::dyn_cast<llvm::ConstantInt>(
  75. initializationPatternFor(CGF.CGM, Int8));
  76. break;
  77. }
  78. }
  79. if (CGF.CGM.stopAutoInit())
  80. return;
  81. auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
  82. I->addAnnotationMetadata("auto-init");
  83. }
  84. /// getBuiltinLibFunction - Given a builtin id for a function like
  85. /// "__builtin_fabsf", return a Function* for "fabsf".
  86. llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
  87. unsigned BuiltinID) {
  88. assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
  89. // Get the name, skip over the __builtin_ prefix (if necessary).
  90. StringRef Name;
  91. GlobalDecl D(FD);
  92. // TODO: This list should be expanded or refactored after all GCC-compatible
  93. // std libcall builtins are implemented.
  94. static SmallDenseMap<unsigned, StringRef, 8> F128Builtins{
  95. {Builtin::BI__builtin_printf, "__printfieee128"},
  96. {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
  97. {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
  98. {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
  99. {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
  100. {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
  101. {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
  102. };
  103. // If the builtin has been declared explicitly with an assembler label,
  104. // use the mangled name. This differs from the plain label on platforms
  105. // that prefix labels.
  106. if (FD->hasAttr<AsmLabelAttr>())
  107. Name = getMangledName(D);
  108. else {
  109. // TODO: This mutation should also be applied to other targets other than
  110. // PPC, after backend supports IEEE 128-bit style libcalls.
  111. if (getTriple().isPPC64() &&
  112. &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
  113. F128Builtins.find(BuiltinID) != F128Builtins.end())
  114. Name = F128Builtins[BuiltinID];
  115. else
  116. Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
  117. }
  118. llvm::FunctionType *Ty =
  119. cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
  120. return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
  121. }
  122. /// Emit the conversions required to turn the given value into an
  123. /// integer of the given size.
  124. static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
  125. QualType T, llvm::IntegerType *IntType) {
  126. V = CGF.EmitToMemory(V, T);
  127. if (V->getType()->isPointerTy())
  128. return CGF.Builder.CreatePtrToInt(V, IntType);
  129. assert(V->getType() == IntType);
  130. return V;
  131. }
  132. static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
  133. QualType T, llvm::Type *ResultType) {
  134. V = CGF.EmitFromMemory(V, T);
  135. if (ResultType->isPointerTy())
  136. return CGF.Builder.CreateIntToPtr(V, ResultType);
  137. assert(V->getType() == ResultType);
  138. return V;
  139. }
  140. /// Utility to insert an atomic instruction based on Intrinsic::ID
  141. /// and the expression node.
  142. static Value *MakeBinaryAtomicValue(
  143. CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
  144. AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
  145. QualType T = E->getType();
  146. assert(E->getArg(0)->getType()->isPointerType());
  147. assert(CGF.getContext().hasSameUnqualifiedType(T,
  148. E->getArg(0)->getType()->getPointeeType()));
  149. assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
  150. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  151. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  152. llvm::IntegerType *IntType =
  153. llvm::IntegerType::get(CGF.getLLVMContext(),
  154. CGF.getContext().getTypeSize(T));
  155. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  156. llvm::Value *Args[2];
  157. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  158. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  159. llvm::Type *ValueType = Args[1]->getType();
  160. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  161. llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
  162. Kind, Args[0], Args[1], Ordering);
  163. return EmitFromInt(CGF, Result, T, ValueType);
  164. }
  165. static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
  166. Value *Val = CGF.EmitScalarExpr(E->getArg(0));
  167. Value *Address = CGF.EmitScalarExpr(E->getArg(1));
  168. // Convert the type of the pointer to a pointer to the stored type.
  169. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
  170. unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
  171. Value *BC = CGF.Builder.CreateBitCast(
  172. Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
  173. LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
  174. LV.setNontemporal(true);
  175. CGF.EmitStoreOfScalar(Val, LV, false);
  176. return nullptr;
  177. }
  178. static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
  179. Value *Address = CGF.EmitScalarExpr(E->getArg(0));
  180. LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
  181. LV.setNontemporal(true);
  182. return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
  183. }
  184. static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
  185. llvm::AtomicRMWInst::BinOp Kind,
  186. const CallExpr *E) {
  187. return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
  188. }
  189. /// Utility to insert an atomic instruction based Intrinsic::ID and
  190. /// the expression node, where the return value is the result of the
  191. /// operation.
  192. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
  193. llvm::AtomicRMWInst::BinOp Kind,
  194. const CallExpr *E,
  195. Instruction::BinaryOps Op,
  196. bool Invert = false) {
  197. QualType T = E->getType();
  198. assert(E->getArg(0)->getType()->isPointerType());
  199. assert(CGF.getContext().hasSameUnqualifiedType(T,
  200. E->getArg(0)->getType()->getPointeeType()));
  201. assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
  202. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  203. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  204. llvm::IntegerType *IntType =
  205. llvm::IntegerType::get(CGF.getLLVMContext(),
  206. CGF.getContext().getTypeSize(T));
  207. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  208. llvm::Value *Args[2];
  209. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  210. llvm::Type *ValueType = Args[1]->getType();
  211. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  212. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  213. llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
  214. Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
  215. Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
  216. if (Invert)
  217. Result =
  218. CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
  219. llvm::ConstantInt::getAllOnesValue(IntType));
  220. Result = EmitFromInt(CGF, Result, T, ValueType);
  221. return RValue::get(Result);
  222. }
  223. /// Utility to insert an atomic cmpxchg instruction.
  224. ///
  225. /// @param CGF The current codegen function.
  226. /// @param E Builtin call expression to convert to cmpxchg.
  227. /// arg0 - address to operate on
  228. /// arg1 - value to compare with
  229. /// arg2 - new value
  230. /// @param ReturnBool Specifies whether to return success flag of
  231. /// cmpxchg result or the old value.
  232. ///
  233. /// @returns result of cmpxchg, according to ReturnBool
  234. ///
  235. /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
  236. /// invoke the function EmitAtomicCmpXchgForMSIntrin.
  237. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
  238. bool ReturnBool) {
  239. QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
  240. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  241. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  242. llvm::IntegerType *IntType = llvm::IntegerType::get(
  243. CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
  244. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  245. Value *Args[3];
  246. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  247. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  248. llvm::Type *ValueType = Args[1]->getType();
  249. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  250. Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
  251. Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
  252. Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
  253. llvm::AtomicOrdering::SequentiallyConsistent);
  254. if (ReturnBool)
  255. // Extract boolean success flag and zext it to int.
  256. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
  257. CGF.ConvertType(E->getType()));
  258. else
  259. // Extract old value and emit it using the same type as compare value.
  260. return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
  261. ValueType);
  262. }
  263. /// This function should be invoked to emit atomic cmpxchg for Microsoft's
  264. /// _InterlockedCompareExchange* intrinsics which have the following signature:
  265. /// T _InterlockedCompareExchange(T volatile *Destination,
  266. /// T Exchange,
  267. /// T Comparand);
  268. ///
  269. /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
  270. /// cmpxchg *Destination, Comparand, Exchange.
  271. /// So we need to swap Comparand and Exchange when invoking
  272. /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
  273. /// function MakeAtomicCmpXchgValue since it expects the arguments to be
  274. /// already swapped.
  275. static
  276. Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
  277. AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
  278. assert(E->getArg(0)->getType()->isPointerType());
  279. assert(CGF.getContext().hasSameUnqualifiedType(
  280. E->getType(), E->getArg(0)->getType()->getPointeeType()));
  281. assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
  282. E->getArg(1)->getType()));
  283. assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
  284. E->getArg(2)->getType()));
  285. auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
  286. auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
  287. auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
  288. // For Release ordering, the failure ordering should be Monotonic.
  289. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
  290. AtomicOrdering::Monotonic :
  291. SuccessOrdering;
  292. // The atomic instruction is marked volatile for consistency with MSVC. This
  293. // blocks the few atomics optimizations that LLVM has. If we want to optimize
  294. // _Interlocked* operations in the future, we will have to remove the volatile
  295. // marker.
  296. auto *Result = CGF.Builder.CreateAtomicCmpXchg(
  297. Destination, Comparand, Exchange,
  298. SuccessOrdering, FailureOrdering);
  299. Result->setVolatile(true);
  300. return CGF.Builder.CreateExtractValue(Result, 0);
  301. }
  302. // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
  303. // prototyped like this:
  304. //
  305. // unsigned char _InterlockedCompareExchange128...(
  306. // __int64 volatile * _Destination,
  307. // __int64 _ExchangeHigh,
  308. // __int64 _ExchangeLow,
  309. // __int64 * _ComparandResult);
  310. static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
  311. const CallExpr *E,
  312. AtomicOrdering SuccessOrdering) {
  313. assert(E->getNumArgs() == 4);
  314. llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
  315. llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
  316. llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
  317. llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
  318. assert(Destination->getType()->isPointerTy());
  319. assert(!ExchangeHigh->getType()->isPointerTy());
  320. assert(!ExchangeLow->getType()->isPointerTy());
  321. assert(ComparandPtr->getType()->isPointerTy());
  322. // For Release ordering, the failure ordering should be Monotonic.
  323. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
  324. ? AtomicOrdering::Monotonic
  325. : SuccessOrdering;
  326. // Convert to i128 pointers and values.
  327. llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
  328. llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
  329. Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy);
  330. Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy),
  331. CGF.getContext().toCharUnitsFromBits(128));
  332. // (((i128)hi) << 64) | ((i128)lo)
  333. ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
  334. ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
  335. ExchangeHigh =
  336. CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
  337. llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
  338. // Load the comparand for the instruction.
  339. llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
  340. auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
  341. SuccessOrdering, FailureOrdering);
  342. // The atomic instruction is marked volatile for consistency with MSVC. This
  343. // blocks the few atomics optimizations that LLVM has. If we want to optimize
  344. // _Interlocked* operations in the future, we will have to remove the volatile
  345. // marker.
  346. CXI->setVolatile(true);
  347. // Store the result as an outparameter.
  348. CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
  349. ComparandResult);
  350. // Get the success boolean and zero extend it to i8.
  351. Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
  352. return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
  353. }
  354. static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
  355. AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
  356. assert(E->getArg(0)->getType()->isPointerType());
  357. auto *IntTy = CGF.ConvertType(E->getType());
  358. auto *Result = CGF.Builder.CreateAtomicRMW(
  359. AtomicRMWInst::Add,
  360. CGF.EmitScalarExpr(E->getArg(0)),
  361. ConstantInt::get(IntTy, 1),
  362. Ordering);
  363. return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
  364. }
  365. static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
  366. AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
  367. assert(E->getArg(0)->getType()->isPointerType());
  368. auto *IntTy = CGF.ConvertType(E->getType());
  369. auto *Result = CGF.Builder.CreateAtomicRMW(
  370. AtomicRMWInst::Sub,
  371. CGF.EmitScalarExpr(E->getArg(0)),
  372. ConstantInt::get(IntTy, 1),
  373. Ordering);
  374. return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
  375. }
  376. // Build a plain volatile load.
  377. static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
  378. Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
  379. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  380. CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
  381. llvm::Type *ITy =
  382. llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
  383. Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  384. llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
  385. Load->setVolatile(true);
  386. return Load;
  387. }
  388. // Build a plain volatile store.
  389. static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
  390. Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
  391. Value *Value = CGF.EmitScalarExpr(E->getArg(1));
  392. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  393. CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
  394. llvm::Type *ITy =
  395. llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8);
  396. Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  397. llvm::StoreInst *Store =
  398. CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
  399. Store->setVolatile(true);
  400. return Store;
  401. }
  402. // Emit a simple mangled intrinsic that has 1 argument and a return type
  403. // matching the argument type. Depending on mode, this may be a constrained
  404. // floating-point intrinsic.
  405. static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  406. const CallExpr *E, unsigned IntrinsicID,
  407. unsigned ConstrainedIntrinsicID) {
  408. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  409. if (CGF.Builder.getIsFPConstrained()) {
  410. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  411. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
  412. return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
  413. } else {
  414. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  415. return CGF.Builder.CreateCall(F, Src0);
  416. }
  417. }
  418. // Emit an intrinsic that has 2 operands of the same type as its result.
  419. // Depending on mode, this may be a constrained floating-point intrinsic.
  420. static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  421. const CallExpr *E, unsigned IntrinsicID,
  422. unsigned ConstrainedIntrinsicID) {
  423. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  424. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  425. if (CGF.Builder.getIsFPConstrained()) {
  426. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  427. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
  428. return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
  429. } else {
  430. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  431. return CGF.Builder.CreateCall(F, { Src0, Src1 });
  432. }
  433. }
  434. // Emit an intrinsic that has 3 operands of the same type as its result.
  435. // Depending on mode, this may be a constrained floating-point intrinsic.
  436. static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  437. const CallExpr *E, unsigned IntrinsicID,
  438. unsigned ConstrainedIntrinsicID) {
  439. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  440. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  441. llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
  442. if (CGF.Builder.getIsFPConstrained()) {
  443. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  444. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
  445. return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
  446. } else {
  447. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  448. return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
  449. }
  450. }
  451. // Emit an intrinsic where all operands are of the same type as the result.
  452. // Depending on mode, this may be a constrained floating-point intrinsic.
  453. static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  454. unsigned IntrinsicID,
  455. unsigned ConstrainedIntrinsicID,
  456. llvm::Type *Ty,
  457. ArrayRef<Value *> Args) {
  458. Function *F;
  459. if (CGF.Builder.getIsFPConstrained())
  460. F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
  461. else
  462. F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
  463. if (CGF.Builder.getIsFPConstrained())
  464. return CGF.Builder.CreateConstrainedFPCall(F, Args);
  465. else
  466. return CGF.Builder.CreateCall(F, Args);
  467. }
  468. // Emit a simple mangled intrinsic that has 1 argument and a return type
  469. // matching the argument type.
  470. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
  471. unsigned IntrinsicID,
  472. llvm::StringRef Name = "") {
  473. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  474. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  475. return CGF.Builder.CreateCall(F, Src0, Name);
  476. }
  477. // Emit an intrinsic that has 2 operands of the same type as its result.
  478. static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
  479. const CallExpr *E,
  480. unsigned IntrinsicID) {
  481. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  482. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  483. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  484. return CGF.Builder.CreateCall(F, { Src0, Src1 });
  485. }
  486. // Emit an intrinsic that has 3 operands of the same type as its result.
  487. static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
  488. const CallExpr *E,
  489. unsigned IntrinsicID) {
  490. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  491. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  492. llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
  493. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  494. return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
  495. }
  496. // Emit an intrinsic that has 1 float or double operand, and 1 integer.
  497. static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
  498. const CallExpr *E,
  499. unsigned IntrinsicID) {
  500. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  501. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  502. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  503. return CGF.Builder.CreateCall(F, {Src0, Src1});
  504. }
  505. // Emit an intrinsic that has overloaded integer result and fp operand.
  506. static Value *
  507. emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
  508. unsigned IntrinsicID,
  509. unsigned ConstrainedIntrinsicID) {
  510. llvm::Type *ResultType = CGF.ConvertType(E->getType());
  511. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  512. if (CGF.Builder.getIsFPConstrained()) {
  513. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  514. Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
  515. {ResultType, Src0->getType()});
  516. return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
  517. } else {
  518. Function *F =
  519. CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
  520. return CGF.Builder.CreateCall(F, Src0);
  521. }
  522. }
  523. /// EmitFAbs - Emit a call to @llvm.fabs().
  524. static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
  525. Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
  526. llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
  527. Call->setDoesNotAccessMemory();
  528. return Call;
  529. }
  530. /// Emit the computation of the sign bit for a floating point value. Returns
  531. /// the i1 sign bit value.
  532. static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
  533. LLVMContext &C = CGF.CGM.getLLVMContext();
  534. llvm::Type *Ty = V->getType();
  535. int Width = Ty->getPrimitiveSizeInBits();
  536. llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
  537. V = CGF.Builder.CreateBitCast(V, IntTy);
  538. if (Ty->isPPC_FP128Ty()) {
  539. // We want the sign bit of the higher-order double. The bitcast we just
  540. // did works as if the double-double was stored to memory and then
  541. // read as an i128. The "store" will put the higher-order double in the
  542. // lower address in both little- and big-Endian modes, but the "load"
  543. // will treat those bits as a different part of the i128: the low bits in
  544. // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
  545. // we need to shift the high bits down to the low before truncating.
  546. Width >>= 1;
  547. if (CGF.getTarget().isBigEndian()) {
  548. Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
  549. V = CGF.Builder.CreateLShr(V, ShiftCst);
  550. }
  551. // We are truncating value in order to extract the higher-order
  552. // double, which we will be using to extract the sign from.
  553. IntTy = llvm::IntegerType::get(C, Width);
  554. V = CGF.Builder.CreateTrunc(V, IntTy);
  555. }
  556. Value *Zero = llvm::Constant::getNullValue(IntTy);
  557. return CGF.Builder.CreateICmpSLT(V, Zero);
  558. }
  559. static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
  560. const CallExpr *E, llvm::Constant *calleeValue) {
  561. CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
  562. return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
  563. }
  564. /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
  565. /// depending on IntrinsicID.
  566. ///
  567. /// \arg CGF The current codegen function.
  568. /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
  569. /// \arg X The first argument to the llvm.*.with.overflow.*.
  570. /// \arg Y The second argument to the llvm.*.with.overflow.*.
  571. /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
  572. /// \returns The result (i.e. sum/product) returned by the intrinsic.
  573. static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
  574. const llvm::Intrinsic::ID IntrinsicID,
  575. llvm::Value *X, llvm::Value *Y,
  576. llvm::Value *&Carry) {
  577. // Make sure we have integers of the same width.
  578. assert(X->getType() == Y->getType() &&
  579. "Arguments must be the same type. (Did you forget to make sure both "
  580. "arguments have the same integer width?)");
  581. Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
  582. llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
  583. Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
  584. return CGF.Builder.CreateExtractValue(Tmp, 0);
  585. }
  586. static Value *emitRangedBuiltin(CodeGenFunction &CGF,
  587. unsigned IntrinsicID,
  588. int low, int high) {
  589. llvm::MDBuilder MDHelper(CGF.getLLVMContext());
  590. llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
  591. Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
  592. llvm::Instruction *Call = CGF.Builder.CreateCall(F);
  593. Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
  594. return Call;
  595. }
  596. namespace {
  597. struct WidthAndSignedness {
  598. unsigned Width;
  599. bool Signed;
  600. };
  601. }
  602. static WidthAndSignedness
  603. getIntegerWidthAndSignedness(const clang::ASTContext &context,
  604. const clang::QualType Type) {
  605. assert(Type->isIntegerType() && "Given type is not an integer.");
  606. unsigned Width = Type->isBooleanType() ? 1
  607. : Type->isBitIntType() ? context.getIntWidth(Type)
  608. : context.getTypeInfo(Type).Width;
  609. bool Signed = Type->isSignedIntegerType();
  610. return {Width, Signed};
  611. }
  612. // Given one or more integer types, this function produces an integer type that
  613. // encompasses them: any value in one of the given types could be expressed in
  614. // the encompassing type.
  615. static struct WidthAndSignedness
  616. EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
  617. assert(Types.size() > 0 && "Empty list of types.");
  618. // If any of the given types is signed, we must return a signed type.
  619. bool Signed = false;
  620. for (const auto &Type : Types) {
  621. Signed |= Type.Signed;
  622. }
  623. // The encompassing type must have a width greater than or equal to the width
  624. // of the specified types. Additionally, if the encompassing type is signed,
  625. // its width must be strictly greater than the width of any unsigned types
  626. // given.
  627. unsigned Width = 0;
  628. for (const auto &Type : Types) {
  629. unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
  630. if (Width < MinWidth) {
  631. Width = MinWidth;
  632. }
  633. }
  634. return {Width, Signed};
  635. }
  636. Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
  637. llvm::Type *DestType = Int8PtrTy;
  638. if (ArgValue->getType() != DestType)
  639. ArgValue =
  640. Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
  641. Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
  642. return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
  643. }
  644. /// Checks if using the result of __builtin_object_size(p, @p From) in place of
  645. /// __builtin_object_size(p, @p To) is correct
  646. static bool areBOSTypesCompatible(int From, int To) {
  647. // Note: Our __builtin_object_size implementation currently treats Type=0 and
  648. // Type=2 identically. Encoding this implementation detail here may make
  649. // improving __builtin_object_size difficult in the future, so it's omitted.
  650. return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
  651. }
  652. static llvm::Value *
  653. getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
  654. return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
  655. }
  656. llvm::Value *
  657. CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
  658. llvm::IntegerType *ResType,
  659. llvm::Value *EmittedE,
  660. bool IsDynamic) {
  661. uint64_t ObjectSize;
  662. if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
  663. return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
  664. return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
  665. }
  666. /// Returns a Value corresponding to the size of the given expression.
  667. /// This Value may be either of the following:
  668. /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
  669. /// it)
  670. /// - A call to the @llvm.objectsize intrinsic
  671. ///
  672. /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
  673. /// and we wouldn't otherwise try to reference a pass_object_size parameter,
  674. /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
  675. llvm::Value *
  676. CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
  677. llvm::IntegerType *ResType,
  678. llvm::Value *EmittedE, bool IsDynamic) {
  679. // We need to reference an argument if the pointer is a parameter with the
  680. // pass_object_size attribute.
  681. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
  682. auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
  683. auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
  684. if (Param != nullptr && PS != nullptr &&
  685. areBOSTypesCompatible(PS->getType(), Type)) {
  686. auto Iter = SizeArguments.find(Param);
  687. assert(Iter != SizeArguments.end());
  688. const ImplicitParamDecl *D = Iter->second;
  689. auto DIter = LocalDeclMap.find(D);
  690. assert(DIter != LocalDeclMap.end());
  691. return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
  692. getContext().getSizeType(), E->getBeginLoc());
  693. }
  694. }
  695. // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
  696. // evaluate E for side-effects. In either case, we shouldn't lower to
  697. // @llvm.objectsize.
  698. if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
  699. return getDefaultBuiltinObjectSizeResult(Type, ResType);
  700. Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
  701. assert(Ptr->getType()->isPointerTy() &&
  702. "Non-pointer passed to __builtin_object_size?");
  703. Function *F =
  704. CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
  705. // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
  706. Value *Min = Builder.getInt1((Type & 2) != 0);
  707. // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
  708. Value *NullIsUnknown = Builder.getTrue();
  709. Value *Dynamic = Builder.getInt1(IsDynamic);
  710. return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
  711. }
  712. namespace {
  713. /// A struct to generically describe a bit test intrinsic.
  714. struct BitTest {
  715. enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
  716. enum InterlockingKind : uint8_t {
  717. Unlocked,
  718. Sequential,
  719. Acquire,
  720. Release,
  721. NoFence
  722. };
  723. ActionKind Action;
  724. InterlockingKind Interlocking;
  725. bool Is64Bit;
  726. static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
  727. };
  728. } // namespace
  729. BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
  730. switch (BuiltinID) {
  731. // Main portable variants.
  732. case Builtin::BI_bittest:
  733. return {TestOnly, Unlocked, false};
  734. case Builtin::BI_bittestandcomplement:
  735. return {Complement, Unlocked, false};
  736. case Builtin::BI_bittestandreset:
  737. return {Reset, Unlocked, false};
  738. case Builtin::BI_bittestandset:
  739. return {Set, Unlocked, false};
  740. case Builtin::BI_interlockedbittestandreset:
  741. return {Reset, Sequential, false};
  742. case Builtin::BI_interlockedbittestandset:
  743. return {Set, Sequential, false};
  744. // X86-specific 64-bit variants.
  745. case Builtin::BI_bittest64:
  746. return {TestOnly, Unlocked, true};
  747. case Builtin::BI_bittestandcomplement64:
  748. return {Complement, Unlocked, true};
  749. case Builtin::BI_bittestandreset64:
  750. return {Reset, Unlocked, true};
  751. case Builtin::BI_bittestandset64:
  752. return {Set, Unlocked, true};
  753. case Builtin::BI_interlockedbittestandreset64:
  754. return {Reset, Sequential, true};
  755. case Builtin::BI_interlockedbittestandset64:
  756. return {Set, Sequential, true};
  757. // ARM/AArch64-specific ordering variants.
  758. case Builtin::BI_interlockedbittestandset_acq:
  759. return {Set, Acquire, false};
  760. case Builtin::BI_interlockedbittestandset_rel:
  761. return {Set, Release, false};
  762. case Builtin::BI_interlockedbittestandset_nf:
  763. return {Set, NoFence, false};
  764. case Builtin::BI_interlockedbittestandreset_acq:
  765. return {Reset, Acquire, false};
  766. case Builtin::BI_interlockedbittestandreset_rel:
  767. return {Reset, Release, false};
  768. case Builtin::BI_interlockedbittestandreset_nf:
  769. return {Reset, NoFence, false};
  770. }
  771. llvm_unreachable("expected only bittest intrinsics");
  772. }
  773. static char bitActionToX86BTCode(BitTest::ActionKind A) {
  774. switch (A) {
  775. case BitTest::TestOnly: return '\0';
  776. case BitTest::Complement: return 'c';
  777. case BitTest::Reset: return 'r';
  778. case BitTest::Set: return 's';
  779. }
  780. llvm_unreachable("invalid action");
  781. }
  782. static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
  783. BitTest BT,
  784. const CallExpr *E, Value *BitBase,
  785. Value *BitPos) {
  786. char Action = bitActionToX86BTCode(BT.Action);
  787. char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
  788. // Build the assembly.
  789. SmallString<64> Asm;
  790. raw_svector_ostream AsmOS(Asm);
  791. if (BT.Interlocking != BitTest::Unlocked)
  792. AsmOS << "lock ";
  793. AsmOS << "bt";
  794. if (Action)
  795. AsmOS << Action;
  796. AsmOS << SizeSuffix << " $2, ($1)";
  797. // Build the constraints. FIXME: We should support immediates when possible.
  798. std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
  799. std::string MachineClobbers = CGF.getTarget().getClobbers();
  800. if (!MachineClobbers.empty()) {
  801. Constraints += ',';
  802. Constraints += MachineClobbers;
  803. }
  804. llvm::IntegerType *IntType = llvm::IntegerType::get(
  805. CGF.getLLVMContext(),
  806. CGF.getContext().getTypeSize(E->getArg(1)->getType()));
  807. llvm::Type *IntPtrType = IntType->getPointerTo();
  808. llvm::FunctionType *FTy =
  809. llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
  810. llvm::InlineAsm *IA =
  811. llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
  812. return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
  813. }
  814. static llvm::AtomicOrdering
  815. getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
  816. switch (I) {
  817. case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
  818. case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
  819. case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
  820. case BitTest::Release: return llvm::AtomicOrdering::Release;
  821. case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
  822. }
  823. llvm_unreachable("invalid interlocking");
  824. }
  825. /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
  826. /// bits and a bit position and read and optionally modify the bit at that
  827. /// position. The position index can be arbitrarily large, i.e. it can be larger
  828. /// than 31 or 63, so we need an indexed load in the general case.
  829. static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
  830. unsigned BuiltinID,
  831. const CallExpr *E) {
  832. Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
  833. Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
  834. BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
  835. // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
  836. // indexing operation internally. Use them if possible.
  837. if (CGF.getTarget().getTriple().isX86())
  838. return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
  839. // Otherwise, use generic code to load one byte and test the bit. Use all but
  840. // the bottom three bits as the array index, and the bottom three bits to form
  841. // a mask.
  842. // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
  843. Value *ByteIndex = CGF.Builder.CreateAShr(
  844. BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
  845. Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
  846. Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
  847. ByteIndex, "bittest.byteaddr"),
  848. CharUnits::One());
  849. Value *PosLow =
  850. CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
  851. llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
  852. // The updating instructions will need a mask.
  853. Value *Mask = nullptr;
  854. if (BT.Action != BitTest::TestOnly) {
  855. Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
  856. "bittest.mask");
  857. }
  858. // Check the action and ordering of the interlocked intrinsics.
  859. llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
  860. Value *OldByte = nullptr;
  861. if (Ordering != llvm::AtomicOrdering::NotAtomic) {
  862. // Emit a combined atomicrmw load/store operation for the interlocked
  863. // intrinsics.
  864. llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
  865. if (BT.Action == BitTest::Reset) {
  866. Mask = CGF.Builder.CreateNot(Mask);
  867. RMWOp = llvm::AtomicRMWInst::And;
  868. }
  869. OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
  870. Ordering);
  871. } else {
  872. // Emit a plain load for the non-interlocked intrinsics.
  873. OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
  874. Value *NewByte = nullptr;
  875. switch (BT.Action) {
  876. case BitTest::TestOnly:
  877. // Don't store anything.
  878. break;
  879. case BitTest::Complement:
  880. NewByte = CGF.Builder.CreateXor(OldByte, Mask);
  881. break;
  882. case BitTest::Reset:
  883. NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
  884. break;
  885. case BitTest::Set:
  886. NewByte = CGF.Builder.CreateOr(OldByte, Mask);
  887. break;
  888. }
  889. if (NewByte)
  890. CGF.Builder.CreateStore(NewByte, ByteAddr);
  891. }
  892. // However we loaded the old byte, either by plain load or atomicrmw, shift
  893. // the bit into the low position and mask it to 0 or 1.
  894. Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
  895. return CGF.Builder.CreateAnd(
  896. ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
  897. }
  898. static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
  899. unsigned BuiltinID,
  900. const CallExpr *E) {
  901. Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
  902. SmallString<64> Asm;
  903. raw_svector_ostream AsmOS(Asm);
  904. llvm::IntegerType *RetType = CGF.Int32Ty;
  905. switch (BuiltinID) {
  906. case clang::PPC::BI__builtin_ppc_ldarx:
  907. AsmOS << "ldarx ";
  908. RetType = CGF.Int64Ty;
  909. break;
  910. case clang::PPC::BI__builtin_ppc_lwarx:
  911. AsmOS << "lwarx ";
  912. RetType = CGF.Int32Ty;
  913. break;
  914. case clang::PPC::BI__builtin_ppc_lharx:
  915. AsmOS << "lharx ";
  916. RetType = CGF.Int16Ty;
  917. break;
  918. case clang::PPC::BI__builtin_ppc_lbarx:
  919. AsmOS << "lbarx ";
  920. RetType = CGF.Int8Ty;
  921. break;
  922. default:
  923. llvm_unreachable("Expected only PowerPC load reserve intrinsics");
  924. }
  925. AsmOS << "$0, ${1:y}";
  926. std::string Constraints = "=r,*Z,~{memory}";
  927. std::string MachineClobbers = CGF.getTarget().getClobbers();
  928. if (!MachineClobbers.empty()) {
  929. Constraints += ',';
  930. Constraints += MachineClobbers;
  931. }
  932. llvm::Type *IntPtrType = RetType->getPointerTo();
  933. llvm::FunctionType *FTy =
  934. llvm::FunctionType::get(RetType, {IntPtrType}, false);
  935. llvm::InlineAsm *IA =
  936. llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
  937. llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
  938. CI->addParamAttr(
  939. 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
  940. return CI;
  941. }
  942. namespace {
  943. enum class MSVCSetJmpKind {
  944. _setjmpex,
  945. _setjmp3,
  946. _setjmp
  947. };
  948. }
  949. /// MSVC handles setjmp a bit differently on different platforms. On every
  950. /// architecture except 32-bit x86, the frame address is passed. On x86, extra
  951. /// parameters can be passed as variadic arguments, but we always pass none.
  952. static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
  953. const CallExpr *E) {
  954. llvm::Value *Arg1 = nullptr;
  955. llvm::Type *Arg1Ty = nullptr;
  956. StringRef Name;
  957. bool IsVarArg = false;
  958. if (SJKind == MSVCSetJmpKind::_setjmp3) {
  959. Name = "_setjmp3";
  960. Arg1Ty = CGF.Int32Ty;
  961. Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
  962. IsVarArg = true;
  963. } else {
  964. Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
  965. Arg1Ty = CGF.Int8PtrTy;
  966. if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
  967. Arg1 = CGF.Builder.CreateCall(
  968. CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
  969. } else
  970. Arg1 = CGF.Builder.CreateCall(
  971. CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
  972. llvm::ConstantInt::get(CGF.Int32Ty, 0));
  973. }
  974. // Mark the call site and declaration with ReturnsTwice.
  975. llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
  976. llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
  977. CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
  978. llvm::Attribute::ReturnsTwice);
  979. llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
  980. llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
  981. ReturnsTwiceAttr, /*Local=*/true);
  982. llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
  983. CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
  984. llvm::Value *Args[] = {Buf, Arg1};
  985. llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
  986. CB->setAttributes(ReturnsTwiceAttr);
  987. return RValue::get(CB);
  988. }
  989. // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
  990. // we handle them here.
  991. enum class CodeGenFunction::MSVCIntrin {
  992. _BitScanForward,
  993. _BitScanReverse,
  994. _InterlockedAnd,
  995. _InterlockedDecrement,
  996. _InterlockedExchange,
  997. _InterlockedExchangeAdd,
  998. _InterlockedExchangeSub,
  999. _InterlockedIncrement,
  1000. _InterlockedOr,
  1001. _InterlockedXor,
  1002. _InterlockedExchangeAdd_acq,
  1003. _InterlockedExchangeAdd_rel,
  1004. _InterlockedExchangeAdd_nf,
  1005. _InterlockedExchange_acq,
  1006. _InterlockedExchange_rel,
  1007. _InterlockedExchange_nf,
  1008. _InterlockedCompareExchange_acq,
  1009. _InterlockedCompareExchange_rel,
  1010. _InterlockedCompareExchange_nf,
  1011. _InterlockedCompareExchange128,
  1012. _InterlockedCompareExchange128_acq,
  1013. _InterlockedCompareExchange128_rel,
  1014. _InterlockedCompareExchange128_nf,
  1015. _InterlockedOr_acq,
  1016. _InterlockedOr_rel,
  1017. _InterlockedOr_nf,
  1018. _InterlockedXor_acq,
  1019. _InterlockedXor_rel,
  1020. _InterlockedXor_nf,
  1021. _InterlockedAnd_acq,
  1022. _InterlockedAnd_rel,
  1023. _InterlockedAnd_nf,
  1024. _InterlockedIncrement_acq,
  1025. _InterlockedIncrement_rel,
  1026. _InterlockedIncrement_nf,
  1027. _InterlockedDecrement_acq,
  1028. _InterlockedDecrement_rel,
  1029. _InterlockedDecrement_nf,
  1030. __fastfail,
  1031. };
  1032. static Optional<CodeGenFunction::MSVCIntrin>
  1033. translateArmToMsvcIntrin(unsigned BuiltinID) {
  1034. using MSVCIntrin = CodeGenFunction::MSVCIntrin;
  1035. switch (BuiltinID) {
  1036. default:
  1037. return None;
  1038. case ARM::BI_BitScanForward:
  1039. case ARM::BI_BitScanForward64:
  1040. return MSVCIntrin::_BitScanForward;
  1041. case ARM::BI_BitScanReverse:
  1042. case ARM::BI_BitScanReverse64:
  1043. return MSVCIntrin::_BitScanReverse;
  1044. case ARM::BI_InterlockedAnd64:
  1045. return MSVCIntrin::_InterlockedAnd;
  1046. case ARM::BI_InterlockedExchange64:
  1047. return MSVCIntrin::_InterlockedExchange;
  1048. case ARM::BI_InterlockedExchangeAdd64:
  1049. return MSVCIntrin::_InterlockedExchangeAdd;
  1050. case ARM::BI_InterlockedExchangeSub64:
  1051. return MSVCIntrin::_InterlockedExchangeSub;
  1052. case ARM::BI_InterlockedOr64:
  1053. return MSVCIntrin::_InterlockedOr;
  1054. case ARM::BI_InterlockedXor64:
  1055. return MSVCIntrin::_InterlockedXor;
  1056. case ARM::BI_InterlockedDecrement64:
  1057. return MSVCIntrin::_InterlockedDecrement;
  1058. case ARM::BI_InterlockedIncrement64:
  1059. return MSVCIntrin::_InterlockedIncrement;
  1060. case ARM::BI_InterlockedExchangeAdd8_acq:
  1061. case ARM::BI_InterlockedExchangeAdd16_acq:
  1062. case ARM::BI_InterlockedExchangeAdd_acq:
  1063. case ARM::BI_InterlockedExchangeAdd64_acq:
  1064. return MSVCIntrin::_InterlockedExchangeAdd_acq;
  1065. case ARM::BI_InterlockedExchangeAdd8_rel:
  1066. case ARM::BI_InterlockedExchangeAdd16_rel:
  1067. case ARM::BI_InterlockedExchangeAdd_rel:
  1068. case ARM::BI_InterlockedExchangeAdd64_rel:
  1069. return MSVCIntrin::_InterlockedExchangeAdd_rel;
  1070. case ARM::BI_InterlockedExchangeAdd8_nf:
  1071. case ARM::BI_InterlockedExchangeAdd16_nf:
  1072. case ARM::BI_InterlockedExchangeAdd_nf:
  1073. case ARM::BI_InterlockedExchangeAdd64_nf:
  1074. return MSVCIntrin::_InterlockedExchangeAdd_nf;
  1075. case ARM::BI_InterlockedExchange8_acq:
  1076. case ARM::BI_InterlockedExchange16_acq:
  1077. case ARM::BI_InterlockedExchange_acq:
  1078. case ARM::BI_InterlockedExchange64_acq:
  1079. return MSVCIntrin::_InterlockedExchange_acq;
  1080. case ARM::BI_InterlockedExchange8_rel:
  1081. case ARM::BI_InterlockedExchange16_rel:
  1082. case ARM::BI_InterlockedExchange_rel:
  1083. case ARM::BI_InterlockedExchange64_rel:
  1084. return MSVCIntrin::_InterlockedExchange_rel;
  1085. case ARM::BI_InterlockedExchange8_nf:
  1086. case ARM::BI_InterlockedExchange16_nf:
  1087. case ARM::BI_InterlockedExchange_nf:
  1088. case ARM::BI_InterlockedExchange64_nf:
  1089. return MSVCIntrin::_InterlockedExchange_nf;
  1090. case ARM::BI_InterlockedCompareExchange8_acq:
  1091. case ARM::BI_InterlockedCompareExchange16_acq:
  1092. case ARM::BI_InterlockedCompareExchange_acq:
  1093. case ARM::BI_InterlockedCompareExchange64_acq:
  1094. return MSVCIntrin::_InterlockedCompareExchange_acq;
  1095. case ARM::BI_InterlockedCompareExchange8_rel:
  1096. case ARM::BI_InterlockedCompareExchange16_rel:
  1097. case ARM::BI_InterlockedCompareExchange_rel:
  1098. case ARM::BI_InterlockedCompareExchange64_rel:
  1099. return MSVCIntrin::_InterlockedCompareExchange_rel;
  1100. case ARM::BI_InterlockedCompareExchange8_nf:
  1101. case ARM::BI_InterlockedCompareExchange16_nf:
  1102. case ARM::BI_InterlockedCompareExchange_nf:
  1103. case ARM::BI_InterlockedCompareExchange64_nf:
  1104. return MSVCIntrin::_InterlockedCompareExchange_nf;
  1105. case ARM::BI_InterlockedOr8_acq:
  1106. case ARM::BI_InterlockedOr16_acq:
  1107. case ARM::BI_InterlockedOr_acq:
  1108. case ARM::BI_InterlockedOr64_acq:
  1109. return MSVCIntrin::_InterlockedOr_acq;
  1110. case ARM::BI_InterlockedOr8_rel:
  1111. case ARM::BI_InterlockedOr16_rel:
  1112. case ARM::BI_InterlockedOr_rel:
  1113. case ARM::BI_InterlockedOr64_rel:
  1114. return MSVCIntrin::_InterlockedOr_rel;
  1115. case ARM::BI_InterlockedOr8_nf:
  1116. case ARM::BI_InterlockedOr16_nf:
  1117. case ARM::BI_InterlockedOr_nf:
  1118. case ARM::BI_InterlockedOr64_nf:
  1119. return MSVCIntrin::_InterlockedOr_nf;
  1120. case ARM::BI_InterlockedXor8_acq:
  1121. case ARM::BI_InterlockedXor16_acq:
  1122. case ARM::BI_InterlockedXor_acq:
  1123. case ARM::BI_InterlockedXor64_acq:
  1124. return MSVCIntrin::_InterlockedXor_acq;
  1125. case ARM::BI_InterlockedXor8_rel:
  1126. case ARM::BI_InterlockedXor16_rel:
  1127. case ARM::BI_InterlockedXor_rel:
  1128. case ARM::BI_InterlockedXor64_rel:
  1129. return MSVCIntrin::_InterlockedXor_rel;
  1130. case ARM::BI_InterlockedXor8_nf:
  1131. case ARM::BI_InterlockedXor16_nf:
  1132. case ARM::BI_InterlockedXor_nf:
  1133. case ARM::BI_InterlockedXor64_nf:
  1134. return MSVCIntrin::_InterlockedXor_nf;
  1135. case ARM::BI_InterlockedAnd8_acq:
  1136. case ARM::BI_InterlockedAnd16_acq:
  1137. case ARM::BI_InterlockedAnd_acq:
  1138. case ARM::BI_InterlockedAnd64_acq:
  1139. return MSVCIntrin::_InterlockedAnd_acq;
  1140. case ARM::BI_InterlockedAnd8_rel:
  1141. case ARM::BI_InterlockedAnd16_rel:
  1142. case ARM::BI_InterlockedAnd_rel:
  1143. case ARM::BI_InterlockedAnd64_rel:
  1144. return MSVCIntrin::_InterlockedAnd_rel;
  1145. case ARM::BI_InterlockedAnd8_nf:
  1146. case ARM::BI_InterlockedAnd16_nf:
  1147. case ARM::BI_InterlockedAnd_nf:
  1148. case ARM::BI_InterlockedAnd64_nf:
  1149. return MSVCIntrin::_InterlockedAnd_nf;
  1150. case ARM::BI_InterlockedIncrement16_acq:
  1151. case ARM::BI_InterlockedIncrement_acq:
  1152. case ARM::BI_InterlockedIncrement64_acq:
  1153. return MSVCIntrin::_InterlockedIncrement_acq;
  1154. case ARM::BI_InterlockedIncrement16_rel:
  1155. case ARM::BI_InterlockedIncrement_rel:
  1156. case ARM::BI_InterlockedIncrement64_rel:
  1157. return MSVCIntrin::_InterlockedIncrement_rel;
  1158. case ARM::BI_InterlockedIncrement16_nf:
  1159. case ARM::BI_InterlockedIncrement_nf:
  1160. case ARM::BI_InterlockedIncrement64_nf:
  1161. return MSVCIntrin::_InterlockedIncrement_nf;
  1162. case ARM::BI_InterlockedDecrement16_acq:
  1163. case ARM::BI_InterlockedDecrement_acq:
  1164. case ARM::BI_InterlockedDecrement64_acq:
  1165. return MSVCIntrin::_InterlockedDecrement_acq;
  1166. case ARM::BI_InterlockedDecrement16_rel:
  1167. case ARM::BI_InterlockedDecrement_rel:
  1168. case ARM::BI_InterlockedDecrement64_rel:
  1169. return MSVCIntrin::_InterlockedDecrement_rel;
  1170. case ARM::BI_InterlockedDecrement16_nf:
  1171. case ARM::BI_InterlockedDecrement_nf:
  1172. case ARM::BI_InterlockedDecrement64_nf:
  1173. return MSVCIntrin::_InterlockedDecrement_nf;
  1174. }
  1175. llvm_unreachable("must return from switch");
  1176. }
  1177. static Optional<CodeGenFunction::MSVCIntrin>
  1178. translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
  1179. using MSVCIntrin = CodeGenFunction::MSVCIntrin;
  1180. switch (BuiltinID) {
  1181. default:
  1182. return None;
  1183. case AArch64::BI_BitScanForward:
  1184. case AArch64::BI_BitScanForward64:
  1185. return MSVCIntrin::_BitScanForward;
  1186. case AArch64::BI_BitScanReverse:
  1187. case AArch64::BI_BitScanReverse64:
  1188. return MSVCIntrin::_BitScanReverse;
  1189. case AArch64::BI_InterlockedAnd64:
  1190. return MSVCIntrin::_InterlockedAnd;
  1191. case AArch64::BI_InterlockedExchange64:
  1192. return MSVCIntrin::_InterlockedExchange;
  1193. case AArch64::BI_InterlockedExchangeAdd64:
  1194. return MSVCIntrin::_InterlockedExchangeAdd;
  1195. case AArch64::BI_InterlockedExchangeSub64:
  1196. return MSVCIntrin::_InterlockedExchangeSub;
  1197. case AArch64::BI_InterlockedOr64:
  1198. return MSVCIntrin::_InterlockedOr;
  1199. case AArch64::BI_InterlockedXor64:
  1200. return MSVCIntrin::_InterlockedXor;
  1201. case AArch64::BI_InterlockedDecrement64:
  1202. return MSVCIntrin::_InterlockedDecrement;
  1203. case AArch64::BI_InterlockedIncrement64:
  1204. return MSVCIntrin::_InterlockedIncrement;
  1205. case AArch64::BI_InterlockedExchangeAdd8_acq:
  1206. case AArch64::BI_InterlockedExchangeAdd16_acq:
  1207. case AArch64::BI_InterlockedExchangeAdd_acq:
  1208. case AArch64::BI_InterlockedExchangeAdd64_acq:
  1209. return MSVCIntrin::_InterlockedExchangeAdd_acq;
  1210. case AArch64::BI_InterlockedExchangeAdd8_rel:
  1211. case AArch64::BI_InterlockedExchangeAdd16_rel:
  1212. case AArch64::BI_InterlockedExchangeAdd_rel:
  1213. case AArch64::BI_InterlockedExchangeAdd64_rel:
  1214. return MSVCIntrin::_InterlockedExchangeAdd_rel;
  1215. case AArch64::BI_InterlockedExchangeAdd8_nf:
  1216. case AArch64::BI_InterlockedExchangeAdd16_nf:
  1217. case AArch64::BI_InterlockedExchangeAdd_nf:
  1218. case AArch64::BI_InterlockedExchangeAdd64_nf:
  1219. return MSVCIntrin::_InterlockedExchangeAdd_nf;
  1220. case AArch64::BI_InterlockedExchange8_acq:
  1221. case AArch64::BI_InterlockedExchange16_acq:
  1222. case AArch64::BI_InterlockedExchange_acq:
  1223. case AArch64::BI_InterlockedExchange64_acq:
  1224. return MSVCIntrin::_InterlockedExchange_acq;
  1225. case AArch64::BI_InterlockedExchange8_rel:
  1226. case AArch64::BI_InterlockedExchange16_rel:
  1227. case AArch64::BI_InterlockedExchange_rel:
  1228. case AArch64::BI_InterlockedExchange64_rel:
  1229. return MSVCIntrin::_InterlockedExchange_rel;
  1230. case AArch64::BI_InterlockedExchange8_nf:
  1231. case AArch64::BI_InterlockedExchange16_nf:
  1232. case AArch64::BI_InterlockedExchange_nf:
  1233. case AArch64::BI_InterlockedExchange64_nf:
  1234. return MSVCIntrin::_InterlockedExchange_nf;
  1235. case AArch64::BI_InterlockedCompareExchange8_acq:
  1236. case AArch64::BI_InterlockedCompareExchange16_acq:
  1237. case AArch64::BI_InterlockedCompareExchange_acq:
  1238. case AArch64::BI_InterlockedCompareExchange64_acq:
  1239. return MSVCIntrin::_InterlockedCompareExchange_acq;
  1240. case AArch64::BI_InterlockedCompareExchange8_rel:
  1241. case AArch64::BI_InterlockedCompareExchange16_rel:
  1242. case AArch64::BI_InterlockedCompareExchange_rel:
  1243. case AArch64::BI_InterlockedCompareExchange64_rel:
  1244. return MSVCIntrin::_InterlockedCompareExchange_rel;
  1245. case AArch64::BI_InterlockedCompareExchange8_nf:
  1246. case AArch64::BI_InterlockedCompareExchange16_nf:
  1247. case AArch64::BI_InterlockedCompareExchange_nf:
  1248. case AArch64::BI_InterlockedCompareExchange64_nf:
  1249. return MSVCIntrin::_InterlockedCompareExchange_nf;
  1250. case AArch64::BI_InterlockedCompareExchange128:
  1251. return MSVCIntrin::_InterlockedCompareExchange128;
  1252. case AArch64::BI_InterlockedCompareExchange128_acq:
  1253. return MSVCIntrin::_InterlockedCompareExchange128_acq;
  1254. case AArch64::BI_InterlockedCompareExchange128_nf:
  1255. return MSVCIntrin::_InterlockedCompareExchange128_nf;
  1256. case AArch64::BI_InterlockedCompareExchange128_rel:
  1257. return MSVCIntrin::_InterlockedCompareExchange128_rel;
  1258. case AArch64::BI_InterlockedOr8_acq:
  1259. case AArch64::BI_InterlockedOr16_acq:
  1260. case AArch64::BI_InterlockedOr_acq:
  1261. case AArch64::BI_InterlockedOr64_acq:
  1262. return MSVCIntrin::_InterlockedOr_acq;
  1263. case AArch64::BI_InterlockedOr8_rel:
  1264. case AArch64::BI_InterlockedOr16_rel:
  1265. case AArch64::BI_InterlockedOr_rel:
  1266. case AArch64::BI_InterlockedOr64_rel:
  1267. return MSVCIntrin::_InterlockedOr_rel;
  1268. case AArch64::BI_InterlockedOr8_nf:
  1269. case AArch64::BI_InterlockedOr16_nf:
  1270. case AArch64::BI_InterlockedOr_nf:
  1271. case AArch64::BI_InterlockedOr64_nf:
  1272. return MSVCIntrin::_InterlockedOr_nf;
  1273. case AArch64::BI_InterlockedXor8_acq:
  1274. case AArch64::BI_InterlockedXor16_acq:
  1275. case AArch64::BI_InterlockedXor_acq:
  1276. case AArch64::BI_InterlockedXor64_acq:
  1277. return MSVCIntrin::_InterlockedXor_acq;
  1278. case AArch64::BI_InterlockedXor8_rel:
  1279. case AArch64::BI_InterlockedXor16_rel:
  1280. case AArch64::BI_InterlockedXor_rel:
  1281. case AArch64::BI_InterlockedXor64_rel:
  1282. return MSVCIntrin::_InterlockedXor_rel;
  1283. case AArch64::BI_InterlockedXor8_nf:
  1284. case AArch64::BI_InterlockedXor16_nf:
  1285. case AArch64::BI_InterlockedXor_nf:
  1286. case AArch64::BI_InterlockedXor64_nf:
  1287. return MSVCIntrin::_InterlockedXor_nf;
  1288. case AArch64::BI_InterlockedAnd8_acq:
  1289. case AArch64::BI_InterlockedAnd16_acq:
  1290. case AArch64::BI_InterlockedAnd_acq:
  1291. case AArch64::BI_InterlockedAnd64_acq:
  1292. return MSVCIntrin::_InterlockedAnd_acq;
  1293. case AArch64::BI_InterlockedAnd8_rel:
  1294. case AArch64::BI_InterlockedAnd16_rel:
  1295. case AArch64::BI_InterlockedAnd_rel:
  1296. case AArch64::BI_InterlockedAnd64_rel:
  1297. return MSVCIntrin::_InterlockedAnd_rel;
  1298. case AArch64::BI_InterlockedAnd8_nf:
  1299. case AArch64::BI_InterlockedAnd16_nf:
  1300. case AArch64::BI_InterlockedAnd_nf:
  1301. case AArch64::BI_InterlockedAnd64_nf:
  1302. return MSVCIntrin::_InterlockedAnd_nf;
  1303. case AArch64::BI_InterlockedIncrement16_acq:
  1304. case AArch64::BI_InterlockedIncrement_acq:
  1305. case AArch64::BI_InterlockedIncrement64_acq:
  1306. return MSVCIntrin::_InterlockedIncrement_acq;
  1307. case AArch64::BI_InterlockedIncrement16_rel:
  1308. case AArch64::BI_InterlockedIncrement_rel:
  1309. case AArch64::BI_InterlockedIncrement64_rel:
  1310. return MSVCIntrin::_InterlockedIncrement_rel;
  1311. case AArch64::BI_InterlockedIncrement16_nf:
  1312. case AArch64::BI_InterlockedIncrement_nf:
  1313. case AArch64::BI_InterlockedIncrement64_nf:
  1314. return MSVCIntrin::_InterlockedIncrement_nf;
  1315. case AArch64::BI_InterlockedDecrement16_acq:
  1316. case AArch64::BI_InterlockedDecrement_acq:
  1317. case AArch64::BI_InterlockedDecrement64_acq:
  1318. return MSVCIntrin::_InterlockedDecrement_acq;
  1319. case AArch64::BI_InterlockedDecrement16_rel:
  1320. case AArch64::BI_InterlockedDecrement_rel:
  1321. case AArch64::BI_InterlockedDecrement64_rel:
  1322. return MSVCIntrin::_InterlockedDecrement_rel;
  1323. case AArch64::BI_InterlockedDecrement16_nf:
  1324. case AArch64::BI_InterlockedDecrement_nf:
  1325. case AArch64::BI_InterlockedDecrement64_nf:
  1326. return MSVCIntrin::_InterlockedDecrement_nf;
  1327. }
  1328. llvm_unreachable("must return from switch");
  1329. }
  1330. static Optional<CodeGenFunction::MSVCIntrin>
  1331. translateX86ToMsvcIntrin(unsigned BuiltinID) {
  1332. using MSVCIntrin = CodeGenFunction::MSVCIntrin;
  1333. switch (BuiltinID) {
  1334. default:
  1335. return None;
  1336. case clang::X86::BI_BitScanForward:
  1337. case clang::X86::BI_BitScanForward64:
  1338. return MSVCIntrin::_BitScanForward;
  1339. case clang::X86::BI_BitScanReverse:
  1340. case clang::X86::BI_BitScanReverse64:
  1341. return MSVCIntrin::_BitScanReverse;
  1342. case clang::X86::BI_InterlockedAnd64:
  1343. return MSVCIntrin::_InterlockedAnd;
  1344. case clang::X86::BI_InterlockedCompareExchange128:
  1345. return MSVCIntrin::_InterlockedCompareExchange128;
  1346. case clang::X86::BI_InterlockedExchange64:
  1347. return MSVCIntrin::_InterlockedExchange;
  1348. case clang::X86::BI_InterlockedExchangeAdd64:
  1349. return MSVCIntrin::_InterlockedExchangeAdd;
  1350. case clang::X86::BI_InterlockedExchangeSub64:
  1351. return MSVCIntrin::_InterlockedExchangeSub;
  1352. case clang::X86::BI_InterlockedOr64:
  1353. return MSVCIntrin::_InterlockedOr;
  1354. case clang::X86::BI_InterlockedXor64:
  1355. return MSVCIntrin::_InterlockedXor;
  1356. case clang::X86::BI_InterlockedDecrement64:
  1357. return MSVCIntrin::_InterlockedDecrement;
  1358. case clang::X86::BI_InterlockedIncrement64:
  1359. return MSVCIntrin::_InterlockedIncrement;
  1360. }
  1361. llvm_unreachable("must return from switch");
  1362. }
  1363. // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
  1364. Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
  1365. const CallExpr *E) {
  1366. switch (BuiltinID) {
  1367. case MSVCIntrin::_BitScanForward:
  1368. case MSVCIntrin::_BitScanReverse: {
  1369. Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
  1370. Value *ArgValue = EmitScalarExpr(E->getArg(1));
  1371. llvm::Type *ArgType = ArgValue->getType();
  1372. llvm::Type *IndexType = IndexAddress.getElementType();
  1373. llvm::Type *ResultType = ConvertType(E->getType());
  1374. Value *ArgZero = llvm::Constant::getNullValue(ArgType);
  1375. Value *ResZero = llvm::Constant::getNullValue(ResultType);
  1376. Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
  1377. BasicBlock *Begin = Builder.GetInsertBlock();
  1378. BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
  1379. Builder.SetInsertPoint(End);
  1380. PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
  1381. Builder.SetInsertPoint(Begin);
  1382. Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
  1383. BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
  1384. Builder.CreateCondBr(IsZero, End, NotZero);
  1385. Result->addIncoming(ResZero, Begin);
  1386. Builder.SetInsertPoint(NotZero);
  1387. if (BuiltinID == MSVCIntrin::_BitScanForward) {
  1388. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  1389. Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
  1390. ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
  1391. Builder.CreateStore(ZeroCount, IndexAddress, false);
  1392. } else {
  1393. unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
  1394. Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
  1395. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  1396. Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
  1397. ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
  1398. Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
  1399. Builder.CreateStore(Index, IndexAddress, false);
  1400. }
  1401. Builder.CreateBr(End);
  1402. Result->addIncoming(ResOne, NotZero);
  1403. Builder.SetInsertPoint(End);
  1404. return Result;
  1405. }
  1406. case MSVCIntrin::_InterlockedAnd:
  1407. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
  1408. case MSVCIntrin::_InterlockedExchange:
  1409. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
  1410. case MSVCIntrin::_InterlockedExchangeAdd:
  1411. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
  1412. case MSVCIntrin::_InterlockedExchangeSub:
  1413. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
  1414. case MSVCIntrin::_InterlockedOr:
  1415. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
  1416. case MSVCIntrin::_InterlockedXor:
  1417. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
  1418. case MSVCIntrin::_InterlockedExchangeAdd_acq:
  1419. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  1420. AtomicOrdering::Acquire);
  1421. case MSVCIntrin::_InterlockedExchangeAdd_rel:
  1422. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  1423. AtomicOrdering::Release);
  1424. case MSVCIntrin::_InterlockedExchangeAdd_nf:
  1425. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  1426. AtomicOrdering::Monotonic);
  1427. case MSVCIntrin::_InterlockedExchange_acq:
  1428. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  1429. AtomicOrdering::Acquire);
  1430. case MSVCIntrin::_InterlockedExchange_rel:
  1431. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  1432. AtomicOrdering::Release);
  1433. case MSVCIntrin::_InterlockedExchange_nf:
  1434. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  1435. AtomicOrdering::Monotonic);
  1436. case MSVCIntrin::_InterlockedCompareExchange_acq:
  1437. return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
  1438. case MSVCIntrin::_InterlockedCompareExchange_rel:
  1439. return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
  1440. case MSVCIntrin::_InterlockedCompareExchange_nf:
  1441. return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
  1442. case MSVCIntrin::_InterlockedCompareExchange128:
  1443. return EmitAtomicCmpXchg128ForMSIntrin(
  1444. *this, E, AtomicOrdering::SequentiallyConsistent);
  1445. case MSVCIntrin::_InterlockedCompareExchange128_acq:
  1446. return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
  1447. case MSVCIntrin::_InterlockedCompareExchange128_rel:
  1448. return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
  1449. case MSVCIntrin::_InterlockedCompareExchange128_nf:
  1450. return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
  1451. case MSVCIntrin::_InterlockedOr_acq:
  1452. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  1453. AtomicOrdering::Acquire);
  1454. case MSVCIntrin::_InterlockedOr_rel:
  1455. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  1456. AtomicOrdering::Release);
  1457. case MSVCIntrin::_InterlockedOr_nf:
  1458. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  1459. AtomicOrdering::Monotonic);
  1460. case MSVCIntrin::_InterlockedXor_acq:
  1461. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
  1462. AtomicOrdering::Acquire);
  1463. case MSVCIntrin::_InterlockedXor_rel:
  1464. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
  1465. AtomicOrdering::Release);
  1466. case MSVCIntrin::_InterlockedXor_nf:
  1467. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
  1468. AtomicOrdering::Monotonic);
  1469. case MSVCIntrin::_InterlockedAnd_acq:
  1470. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  1471. AtomicOrdering::Acquire);
  1472. case MSVCIntrin::_InterlockedAnd_rel:
  1473. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  1474. AtomicOrdering::Release);
  1475. case MSVCIntrin::_InterlockedAnd_nf:
  1476. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  1477. AtomicOrdering::Monotonic);
  1478. case MSVCIntrin::_InterlockedIncrement_acq:
  1479. return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
  1480. case MSVCIntrin::_InterlockedIncrement_rel:
  1481. return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
  1482. case MSVCIntrin::_InterlockedIncrement_nf:
  1483. return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
  1484. case MSVCIntrin::_InterlockedDecrement_acq:
  1485. return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
  1486. case MSVCIntrin::_InterlockedDecrement_rel:
  1487. return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
  1488. case MSVCIntrin::_InterlockedDecrement_nf:
  1489. return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
  1490. case MSVCIntrin::_InterlockedDecrement:
  1491. return EmitAtomicDecrementValue(*this, E);
  1492. case MSVCIntrin::_InterlockedIncrement:
  1493. return EmitAtomicIncrementValue(*this, E);
  1494. case MSVCIntrin::__fastfail: {
  1495. // Request immediate process termination from the kernel. The instruction
  1496. // sequences to do this are documented on MSDN:
  1497. // https://msdn.microsoft.com/en-us/library/dn774154.aspx
  1498. llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
  1499. StringRef Asm, Constraints;
  1500. switch (ISA) {
  1501. default:
  1502. ErrorUnsupported(E, "__fastfail call for this architecture");
  1503. break;
  1504. case llvm::Triple::x86:
  1505. case llvm::Triple::x86_64:
  1506. Asm = "int $$0x29";
  1507. Constraints = "{cx}";
  1508. break;
  1509. case llvm::Triple::thumb:
  1510. Asm = "udf #251";
  1511. Constraints = "{r0}";
  1512. break;
  1513. case llvm::Triple::aarch64:
  1514. Asm = "brk #0xF003";
  1515. Constraints = "{w0}";
  1516. }
  1517. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
  1518. llvm::InlineAsm *IA =
  1519. llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
  1520. llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
  1521. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  1522. llvm::Attribute::NoReturn);
  1523. llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
  1524. CI->setAttributes(NoReturnAttr);
  1525. return CI;
  1526. }
  1527. }
  1528. llvm_unreachable("Incorrect MSVC intrinsic!");
  1529. }
  1530. namespace {
  1531. // ARC cleanup for __builtin_os_log_format
  1532. struct CallObjCArcUse final : EHScopeStack::Cleanup {
  1533. CallObjCArcUse(llvm::Value *object) : object(object) {}
  1534. llvm::Value *object;
  1535. void Emit(CodeGenFunction &CGF, Flags flags) override {
  1536. CGF.EmitARCIntrinsicUse(object);
  1537. }
  1538. };
  1539. }
  1540. Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
  1541. BuiltinCheckKind Kind) {
  1542. assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
  1543. && "Unsupported builtin check kind");
  1544. Value *ArgValue = EmitScalarExpr(E);
  1545. if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
  1546. return ArgValue;
  1547. SanitizerScope SanScope(this);
  1548. Value *Cond = Builder.CreateICmpNE(
  1549. ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
  1550. EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
  1551. SanitizerHandler::InvalidBuiltin,
  1552. {EmitCheckSourceLocation(E->getExprLoc()),
  1553. llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
  1554. None);
  1555. return ArgValue;
  1556. }
  1557. /// Get the argument type for arguments to os_log_helper.
  1558. static CanQualType getOSLogArgType(ASTContext &C, int Size) {
  1559. QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
  1560. return C.getCanonicalType(UnsignedTy);
  1561. }
  1562. llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
  1563. const analyze_os_log::OSLogBufferLayout &Layout,
  1564. CharUnits BufferAlignment) {
  1565. ASTContext &Ctx = getContext();
  1566. llvm::SmallString<64> Name;
  1567. {
  1568. raw_svector_ostream OS(Name);
  1569. OS << "__os_log_helper";
  1570. OS << "_" << BufferAlignment.getQuantity();
  1571. OS << "_" << int(Layout.getSummaryByte());
  1572. OS << "_" << int(Layout.getNumArgsByte());
  1573. for (const auto &Item : Layout.Items)
  1574. OS << "_" << int(Item.getSizeByte()) << "_"
  1575. << int(Item.getDescriptorByte());
  1576. }
  1577. if (llvm::Function *F = CGM.getModule().getFunction(Name))
  1578. return F;
  1579. llvm::SmallVector<QualType, 4> ArgTys;
  1580. FunctionArgList Args;
  1581. Args.push_back(ImplicitParamDecl::Create(
  1582. Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
  1583. ImplicitParamDecl::Other));
  1584. ArgTys.emplace_back(Ctx.VoidPtrTy);
  1585. for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
  1586. char Size = Layout.Items[I].getSizeByte();
  1587. if (!Size)
  1588. continue;
  1589. QualType ArgTy = getOSLogArgType(Ctx, Size);
  1590. Args.push_back(ImplicitParamDecl::Create(
  1591. Ctx, nullptr, SourceLocation(),
  1592. &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
  1593. ImplicitParamDecl::Other));
  1594. ArgTys.emplace_back(ArgTy);
  1595. }
  1596. QualType ReturnTy = Ctx.VoidTy;
  1597. // The helper function has linkonce_odr linkage to enable the linker to merge
  1598. // identical functions. To ensure the merging always happens, 'noinline' is
  1599. // attached to the function when compiling with -Oz.
  1600. const CGFunctionInfo &FI =
  1601. CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
  1602. llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
  1603. llvm::Function *Fn = llvm::Function::Create(
  1604. FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
  1605. Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
  1606. CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
  1607. CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
  1608. Fn->setDoesNotThrow();
  1609. // Attach 'noinline' at -Oz.
  1610. if (CGM.getCodeGenOpts().OptimizeSize == 2)
  1611. Fn->addFnAttr(llvm::Attribute::NoInline);
  1612. auto NL = ApplyDebugLocation::CreateEmpty(*this);
  1613. StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
  1614. // Create a scope with an artificial location for the body of this function.
  1615. auto AL = ApplyDebugLocation::CreateArtificial(*this);
  1616. CharUnits Offset;
  1617. Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"),
  1618. BufferAlignment);
  1619. Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
  1620. Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
  1621. Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
  1622. Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
  1623. unsigned I = 1;
  1624. for (const auto &Item : Layout.Items) {
  1625. Builder.CreateStore(
  1626. Builder.getInt8(Item.getDescriptorByte()),
  1627. Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
  1628. Builder.CreateStore(
  1629. Builder.getInt8(Item.getSizeByte()),
  1630. Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
  1631. CharUnits Size = Item.size();
  1632. if (!Size.getQuantity())
  1633. continue;
  1634. Address Arg = GetAddrOfLocalVar(Args[I]);
  1635. Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
  1636. Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
  1637. "argDataCast");
  1638. Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
  1639. Offset += Size;
  1640. ++I;
  1641. }
  1642. FinishFunction();
  1643. return Fn;
  1644. }
  1645. RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
  1646. assert(E.getNumArgs() >= 2 &&
  1647. "__builtin_os_log_format takes at least 2 arguments");
  1648. ASTContext &Ctx = getContext();
  1649. analyze_os_log::OSLogBufferLayout Layout;
  1650. analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
  1651. Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
  1652. llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
  1653. // Ignore argument 1, the format string. It is not currently used.
  1654. CallArgList Args;
  1655. Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
  1656. for (const auto &Item : Layout.Items) {
  1657. int Size = Item.getSizeByte();
  1658. if (!Size)
  1659. continue;
  1660. llvm::Value *ArgVal;
  1661. if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
  1662. uint64_t Val = 0;
  1663. for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
  1664. Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
  1665. ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
  1666. } else if (const Expr *TheExpr = Item.getExpr()) {
  1667. ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
  1668. // If a temporary object that requires destruction after the full
  1669. // expression is passed, push a lifetime-extended cleanup to extend its
  1670. // lifetime to the end of the enclosing block scope.
  1671. auto LifetimeExtendObject = [&](const Expr *E) {
  1672. E = E->IgnoreParenCasts();
  1673. // Extend lifetimes of objects returned by function calls and message
  1674. // sends.
  1675. // FIXME: We should do this in other cases in which temporaries are
  1676. // created including arguments of non-ARC types (e.g., C++
  1677. // temporaries).
  1678. if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
  1679. return true;
  1680. return false;
  1681. };
  1682. if (TheExpr->getType()->isObjCRetainableType() &&
  1683. getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
  1684. assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
  1685. "Only scalar can be a ObjC retainable type");
  1686. if (!isa<Constant>(ArgVal)) {
  1687. CleanupKind Cleanup = getARCCleanupKind();
  1688. QualType Ty = TheExpr->getType();
  1689. Address Alloca = Address::invalid();
  1690. Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
  1691. ArgVal = EmitARCRetain(Ty, ArgVal);
  1692. Builder.CreateStore(ArgVal, Addr);
  1693. pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
  1694. CodeGenFunction::destroyARCStrongPrecise,
  1695. Cleanup & EHCleanup);
  1696. // Push a clang.arc.use call to ensure ARC optimizer knows that the
  1697. // argument has to be alive.
  1698. if (CGM.getCodeGenOpts().OptimizationLevel != 0)
  1699. pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
  1700. }
  1701. }
  1702. } else {
  1703. ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
  1704. }
  1705. unsigned ArgValSize =
  1706. CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
  1707. llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
  1708. ArgValSize);
  1709. ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
  1710. CanQualType ArgTy = getOSLogArgType(Ctx, Size);
  1711. // If ArgVal has type x86_fp80, zero-extend ArgVal.
  1712. ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
  1713. Args.add(RValue::get(ArgVal), ArgTy);
  1714. }
  1715. const CGFunctionInfo &FI =
  1716. CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
  1717. llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
  1718. Layout, BufAddr.getAlignment());
  1719. EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
  1720. return RValue::get(BufAddr.getPointer());
  1721. }
  1722. static bool isSpecialUnsignedMultiplySignedResult(
  1723. unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
  1724. WidthAndSignedness ResultInfo) {
  1725. return BuiltinID == Builtin::BI__builtin_mul_overflow &&
  1726. Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
  1727. !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
  1728. }
  1729. static RValue EmitCheckedUnsignedMultiplySignedResult(
  1730. CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
  1731. const clang::Expr *Op2, WidthAndSignedness Op2Info,
  1732. const clang::Expr *ResultArg, QualType ResultQTy,
  1733. WidthAndSignedness ResultInfo) {
  1734. assert(isSpecialUnsignedMultiplySignedResult(
  1735. Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
  1736. "Cannot specialize this multiply");
  1737. llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
  1738. llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
  1739. llvm::Value *HasOverflow;
  1740. llvm::Value *Result = EmitOverflowIntrinsic(
  1741. CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
  1742. // The intrinsic call will detect overflow when the value is > UINT_MAX,
  1743. // however, since the original builtin had a signed result, we need to report
  1744. // an overflow when the result is greater than INT_MAX.
  1745. auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
  1746. llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
  1747. llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
  1748. HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
  1749. bool isVolatile =
  1750. ResultArg->getType()->getPointeeType().isVolatileQualified();
  1751. Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
  1752. CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
  1753. isVolatile);
  1754. return RValue::get(HasOverflow);
  1755. }
  1756. /// Determine if a binop is a checked mixed-sign multiply we can specialize.
  1757. static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
  1758. WidthAndSignedness Op1Info,
  1759. WidthAndSignedness Op2Info,
  1760. WidthAndSignedness ResultInfo) {
  1761. return BuiltinID == Builtin::BI__builtin_mul_overflow &&
  1762. std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
  1763. Op1Info.Signed != Op2Info.Signed;
  1764. }
  1765. /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
  1766. /// the generic checked-binop irgen.
  1767. static RValue
  1768. EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
  1769. WidthAndSignedness Op1Info, const clang::Expr *Op2,
  1770. WidthAndSignedness Op2Info,
  1771. const clang::Expr *ResultArg, QualType ResultQTy,
  1772. WidthAndSignedness ResultInfo) {
  1773. assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
  1774. Op2Info, ResultInfo) &&
  1775. "Not a mixed-sign multipliction we can specialize");
  1776. // Emit the signed and unsigned operands.
  1777. const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
  1778. const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
  1779. llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
  1780. llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
  1781. unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
  1782. unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
  1783. // One of the operands may be smaller than the other. If so, [s|z]ext it.
  1784. if (SignedOpWidth < UnsignedOpWidth)
  1785. Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
  1786. if (UnsignedOpWidth < SignedOpWidth)
  1787. Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
  1788. llvm::Type *OpTy = Signed->getType();
  1789. llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
  1790. Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
  1791. llvm::Type *ResTy = ResultPtr.getElementType();
  1792. unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
  1793. // Take the absolute value of the signed operand.
  1794. llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
  1795. llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
  1796. llvm::Value *AbsSigned =
  1797. CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
  1798. // Perform a checked unsigned multiplication.
  1799. llvm::Value *UnsignedOverflow;
  1800. llvm::Value *UnsignedResult =
  1801. EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
  1802. Unsigned, UnsignedOverflow);
  1803. llvm::Value *Overflow, *Result;
  1804. if (ResultInfo.Signed) {
  1805. // Signed overflow occurs if the result is greater than INT_MAX or lesser
  1806. // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
  1807. auto IntMax =
  1808. llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
  1809. llvm::Value *MaxResult =
  1810. CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
  1811. CGF.Builder.CreateZExt(IsNegative, OpTy));
  1812. llvm::Value *SignedOverflow =
  1813. CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
  1814. Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
  1815. // Prepare the signed result (possibly by negating it).
  1816. llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
  1817. llvm::Value *SignedResult =
  1818. CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
  1819. Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
  1820. } else {
  1821. // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
  1822. llvm::Value *Underflow = CGF.Builder.CreateAnd(
  1823. IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
  1824. Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
  1825. if (ResultInfo.Width < OpWidth) {
  1826. auto IntMax =
  1827. llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
  1828. llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
  1829. UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
  1830. Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
  1831. }
  1832. // Negate the product if it would be negative in infinite precision.
  1833. Result = CGF.Builder.CreateSelect(
  1834. IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
  1835. Result = CGF.Builder.CreateTrunc(Result, ResTy);
  1836. }
  1837. assert(Overflow && Result && "Missing overflow or result");
  1838. bool isVolatile =
  1839. ResultArg->getType()->getPointeeType().isVolatileQualified();
  1840. CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
  1841. isVolatile);
  1842. return RValue::get(Overflow);
  1843. }
  1844. static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
  1845. Value *&RecordPtr, CharUnits Align,
  1846. llvm::FunctionCallee Func, int Lvl) {
  1847. ASTContext &Context = CGF.getContext();
  1848. RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition();
  1849. std::string Pad = std::string(Lvl * 4, ' ');
  1850. Value *GString =
  1851. CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
  1852. Value *Res = CGF.Builder.CreateCall(Func, {GString});
  1853. static llvm::DenseMap<QualType, const char *> Types;
  1854. if (Types.empty()) {
  1855. Types[Context.CharTy] = "%c";
  1856. Types[Context.BoolTy] = "%d";
  1857. Types[Context.SignedCharTy] = "%hhd";
  1858. Types[Context.UnsignedCharTy] = "%hhu";
  1859. Types[Context.IntTy] = "%d";
  1860. Types[Context.UnsignedIntTy] = "%u";
  1861. Types[Context.LongTy] = "%ld";
  1862. Types[Context.UnsignedLongTy] = "%lu";
  1863. Types[Context.LongLongTy] = "%lld";
  1864. Types[Context.UnsignedLongLongTy] = "%llu";
  1865. Types[Context.ShortTy] = "%hd";
  1866. Types[Context.UnsignedShortTy] = "%hu";
  1867. Types[Context.VoidPtrTy] = "%p";
  1868. Types[Context.FloatTy] = "%f";
  1869. Types[Context.DoubleTy] = "%f";
  1870. Types[Context.LongDoubleTy] = "%Lf";
  1871. Types[Context.getPointerType(Context.CharTy)] = "%s";
  1872. Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
  1873. }
  1874. for (const auto *FD : RD->fields()) {
  1875. Value *FieldPtr = RecordPtr;
  1876. if (RD->isUnion())
  1877. FieldPtr = CGF.Builder.CreatePointerCast(
  1878. FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
  1879. else
  1880. FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
  1881. FD->getFieldIndex());
  1882. GString = CGF.Builder.CreateGlobalStringPtr(
  1883. llvm::Twine(Pad)
  1884. .concat(FD->getType().getAsString())
  1885. .concat(llvm::Twine(' '))
  1886. .concat(FD->getNameAsString())
  1887. .concat(" : ")
  1888. .str());
  1889. Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
  1890. Res = CGF.Builder.CreateAdd(Res, TmpRes);
  1891. QualType CanonicalType =
  1892. FD->getType().getUnqualifiedType().getCanonicalType();
  1893. // We check whether we are in a recursive type
  1894. if (CanonicalType->isRecordType()) {
  1895. TmpRes = dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
  1896. Res = CGF.Builder.CreateAdd(TmpRes, Res);
  1897. continue;
  1898. }
  1899. // We try to determine the best format to print the current field
  1900. llvm::Twine Format = Types.find(CanonicalType) == Types.end()
  1901. ? Types[Context.VoidPtrTy]
  1902. : Types[CanonicalType];
  1903. Address FieldAddress = Address(FieldPtr, Align);
  1904. FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
  1905. // FIXME Need to handle bitfield here
  1906. GString = CGF.Builder.CreateGlobalStringPtr(
  1907. Format.concat(llvm::Twine('\n')).str());
  1908. TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
  1909. Res = CGF.Builder.CreateAdd(Res, TmpRes);
  1910. }
  1911. GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
  1912. Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
  1913. Res = CGF.Builder.CreateAdd(Res, TmpRes);
  1914. return Res;
  1915. }
  1916. static bool
  1917. TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
  1918. llvm::SmallPtrSetImpl<const Decl *> &Seen) {
  1919. if (const auto *Arr = Ctx.getAsArrayType(Ty))
  1920. Ty = Ctx.getBaseElementType(Arr);
  1921. const auto *Record = Ty->getAsCXXRecordDecl();
  1922. if (!Record)
  1923. return false;
  1924. // We've already checked this type, or are in the process of checking it.
  1925. if (!Seen.insert(Record).second)
  1926. return false;
  1927. assert(Record->hasDefinition() &&
  1928. "Incomplete types should already be diagnosed");
  1929. if (Record->isDynamicClass())
  1930. return true;
  1931. for (FieldDecl *F : Record->fields()) {
  1932. if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
  1933. return true;
  1934. }
  1935. return false;
  1936. }
  1937. /// Determine if the specified type requires laundering by checking if it is a
  1938. /// dynamic class type or contains a subobject which is a dynamic class type.
  1939. static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
  1940. if (!CGM.getCodeGenOpts().StrictVTablePointers)
  1941. return false;
  1942. llvm::SmallPtrSet<const Decl *, 16> Seen;
  1943. return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
  1944. }
  1945. RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
  1946. llvm::Value *Src = EmitScalarExpr(E->getArg(0));
  1947. llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
  1948. // The builtin's shift arg may have a different type than the source arg and
  1949. // result, but the LLVM intrinsic uses the same type for all values.
  1950. llvm::Type *Ty = Src->getType();
  1951. ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
  1952. // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
  1953. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
  1954. Function *F = CGM.getIntrinsic(IID, Ty);
  1955. return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
  1956. }
  1957. // Map math builtins for long-double to f128 version.
  1958. static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
  1959. switch (BuiltinID) {
  1960. #define MUTATE_LDBL(func) \
  1961. case Builtin::BI__builtin_##func##l: \
  1962. return Builtin::BI__builtin_##func##f128;
  1963. MUTATE_LDBL(sqrt)
  1964. MUTATE_LDBL(cbrt)
  1965. MUTATE_LDBL(fabs)
  1966. MUTATE_LDBL(log)
  1967. MUTATE_LDBL(log2)
  1968. MUTATE_LDBL(log10)
  1969. MUTATE_LDBL(log1p)
  1970. MUTATE_LDBL(logb)
  1971. MUTATE_LDBL(exp)
  1972. MUTATE_LDBL(exp2)
  1973. MUTATE_LDBL(expm1)
  1974. MUTATE_LDBL(fdim)
  1975. MUTATE_LDBL(hypot)
  1976. MUTATE_LDBL(ilogb)
  1977. MUTATE_LDBL(pow)
  1978. MUTATE_LDBL(fmin)
  1979. MUTATE_LDBL(fmax)
  1980. MUTATE_LDBL(ceil)
  1981. MUTATE_LDBL(trunc)
  1982. MUTATE_LDBL(rint)
  1983. MUTATE_LDBL(nearbyint)
  1984. MUTATE_LDBL(round)
  1985. MUTATE_LDBL(floor)
  1986. MUTATE_LDBL(lround)
  1987. MUTATE_LDBL(llround)
  1988. MUTATE_LDBL(lrint)
  1989. MUTATE_LDBL(llrint)
  1990. MUTATE_LDBL(fmod)
  1991. MUTATE_LDBL(modf)
  1992. MUTATE_LDBL(nan)
  1993. MUTATE_LDBL(nans)
  1994. MUTATE_LDBL(inf)
  1995. MUTATE_LDBL(fma)
  1996. MUTATE_LDBL(sin)
  1997. MUTATE_LDBL(cos)
  1998. MUTATE_LDBL(tan)
  1999. MUTATE_LDBL(sinh)
  2000. MUTATE_LDBL(cosh)
  2001. MUTATE_LDBL(tanh)
  2002. MUTATE_LDBL(asin)
  2003. MUTATE_LDBL(acos)
  2004. MUTATE_LDBL(atan)
  2005. MUTATE_LDBL(asinh)
  2006. MUTATE_LDBL(acosh)
  2007. MUTATE_LDBL(atanh)
  2008. MUTATE_LDBL(atan2)
  2009. MUTATE_LDBL(erf)
  2010. MUTATE_LDBL(erfc)
  2011. MUTATE_LDBL(ldexp)
  2012. MUTATE_LDBL(frexp)
  2013. MUTATE_LDBL(huge_val)
  2014. MUTATE_LDBL(copysign)
  2015. MUTATE_LDBL(nextafter)
  2016. MUTATE_LDBL(nexttoward)
  2017. MUTATE_LDBL(remainder)
  2018. MUTATE_LDBL(remquo)
  2019. MUTATE_LDBL(scalbln)
  2020. MUTATE_LDBL(scalbn)
  2021. MUTATE_LDBL(tgamma)
  2022. MUTATE_LDBL(lgamma)
  2023. #undef MUTATE_LDBL
  2024. default:
  2025. return BuiltinID;
  2026. }
  2027. }
  2028. RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
  2029. const CallExpr *E,
  2030. ReturnValueSlot ReturnValue) {
  2031. const FunctionDecl *FD = GD.getDecl()->getAsFunction();
  2032. // See if we can constant fold this builtin. If so, don't emit it at all.
  2033. Expr::EvalResult Result;
  2034. if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
  2035. !Result.hasSideEffects()) {
  2036. if (Result.Val.isInt())
  2037. return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
  2038. Result.Val.getInt()));
  2039. if (Result.Val.isFloat())
  2040. return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
  2041. Result.Val.getFloat()));
  2042. }
  2043. // If current long-double semantics is IEEE 128-bit, replace math builtins
  2044. // of long-double with f128 equivalent.
  2045. // TODO: This mutation should also be applied to other targets other than PPC,
  2046. // after backend supports IEEE 128-bit style libcalls.
  2047. if (getTarget().getTriple().isPPC64() &&
  2048. &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
  2049. BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
  2050. // If the builtin has been declared explicitly with an assembler label,
  2051. // disable the specialized emitting below. Ideally we should communicate the
  2052. // rename in IR, or at least avoid generating the intrinsic calls that are
  2053. // likely to get lowered to the renamed library functions.
  2054. const unsigned BuiltinIDIfNoAsmLabel =
  2055. FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
  2056. // There are LLVM math intrinsics/instructions corresponding to math library
  2057. // functions except the LLVM op will never set errno while the math library
  2058. // might. Also, math builtins have the same semantics as their math library
  2059. // twins. Thus, we can transform math library and builtin calls to their
  2060. // LLVM counterparts if the call is marked 'const' (known to never set errno).
  2061. if (FD->hasAttr<ConstAttr>()) {
  2062. switch (BuiltinIDIfNoAsmLabel) {
  2063. case Builtin::BIceil:
  2064. case Builtin::BIceilf:
  2065. case Builtin::BIceill:
  2066. case Builtin::BI__builtin_ceil:
  2067. case Builtin::BI__builtin_ceilf:
  2068. case Builtin::BI__builtin_ceilf16:
  2069. case Builtin::BI__builtin_ceill:
  2070. case Builtin::BI__builtin_ceilf128:
  2071. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2072. Intrinsic::ceil,
  2073. Intrinsic::experimental_constrained_ceil));
  2074. case Builtin::BIcopysign:
  2075. case Builtin::BIcopysignf:
  2076. case Builtin::BIcopysignl:
  2077. case Builtin::BI__builtin_copysign:
  2078. case Builtin::BI__builtin_copysignf:
  2079. case Builtin::BI__builtin_copysignf16:
  2080. case Builtin::BI__builtin_copysignl:
  2081. case Builtin::BI__builtin_copysignf128:
  2082. return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
  2083. case Builtin::BIcos:
  2084. case Builtin::BIcosf:
  2085. case Builtin::BIcosl:
  2086. case Builtin::BI__builtin_cos:
  2087. case Builtin::BI__builtin_cosf:
  2088. case Builtin::BI__builtin_cosf16:
  2089. case Builtin::BI__builtin_cosl:
  2090. case Builtin::BI__builtin_cosf128:
  2091. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2092. Intrinsic::cos,
  2093. Intrinsic::experimental_constrained_cos));
  2094. case Builtin::BIexp:
  2095. case Builtin::BIexpf:
  2096. case Builtin::BIexpl:
  2097. case Builtin::BI__builtin_exp:
  2098. case Builtin::BI__builtin_expf:
  2099. case Builtin::BI__builtin_expf16:
  2100. case Builtin::BI__builtin_expl:
  2101. case Builtin::BI__builtin_expf128:
  2102. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2103. Intrinsic::exp,
  2104. Intrinsic::experimental_constrained_exp));
  2105. case Builtin::BIexp2:
  2106. case Builtin::BIexp2f:
  2107. case Builtin::BIexp2l:
  2108. case Builtin::BI__builtin_exp2:
  2109. case Builtin::BI__builtin_exp2f:
  2110. case Builtin::BI__builtin_exp2f16:
  2111. case Builtin::BI__builtin_exp2l:
  2112. case Builtin::BI__builtin_exp2f128:
  2113. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2114. Intrinsic::exp2,
  2115. Intrinsic::experimental_constrained_exp2));
  2116. case Builtin::BIfabs:
  2117. case Builtin::BIfabsf:
  2118. case Builtin::BIfabsl:
  2119. case Builtin::BI__builtin_fabs:
  2120. case Builtin::BI__builtin_fabsf:
  2121. case Builtin::BI__builtin_fabsf16:
  2122. case Builtin::BI__builtin_fabsl:
  2123. case Builtin::BI__builtin_fabsf128:
  2124. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
  2125. case Builtin::BIfloor:
  2126. case Builtin::BIfloorf:
  2127. case Builtin::BIfloorl:
  2128. case Builtin::BI__builtin_floor:
  2129. case Builtin::BI__builtin_floorf:
  2130. case Builtin::BI__builtin_floorf16:
  2131. case Builtin::BI__builtin_floorl:
  2132. case Builtin::BI__builtin_floorf128:
  2133. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2134. Intrinsic::floor,
  2135. Intrinsic::experimental_constrained_floor));
  2136. case Builtin::BIfma:
  2137. case Builtin::BIfmaf:
  2138. case Builtin::BIfmal:
  2139. case Builtin::BI__builtin_fma:
  2140. case Builtin::BI__builtin_fmaf:
  2141. case Builtin::BI__builtin_fmaf16:
  2142. case Builtin::BI__builtin_fmal:
  2143. case Builtin::BI__builtin_fmaf128:
  2144. return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
  2145. Intrinsic::fma,
  2146. Intrinsic::experimental_constrained_fma));
  2147. case Builtin::BIfmax:
  2148. case Builtin::BIfmaxf:
  2149. case Builtin::BIfmaxl:
  2150. case Builtin::BI__builtin_fmax:
  2151. case Builtin::BI__builtin_fmaxf:
  2152. case Builtin::BI__builtin_fmaxf16:
  2153. case Builtin::BI__builtin_fmaxl:
  2154. case Builtin::BI__builtin_fmaxf128:
  2155. return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
  2156. Intrinsic::maxnum,
  2157. Intrinsic::experimental_constrained_maxnum));
  2158. case Builtin::BIfmin:
  2159. case Builtin::BIfminf:
  2160. case Builtin::BIfminl:
  2161. case Builtin::BI__builtin_fmin:
  2162. case Builtin::BI__builtin_fminf:
  2163. case Builtin::BI__builtin_fminf16:
  2164. case Builtin::BI__builtin_fminl:
  2165. case Builtin::BI__builtin_fminf128:
  2166. return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
  2167. Intrinsic::minnum,
  2168. Intrinsic::experimental_constrained_minnum));
  2169. // fmod() is a special-case. It maps to the frem instruction rather than an
  2170. // LLVM intrinsic.
  2171. case Builtin::BIfmod:
  2172. case Builtin::BIfmodf:
  2173. case Builtin::BIfmodl:
  2174. case Builtin::BI__builtin_fmod:
  2175. case Builtin::BI__builtin_fmodf:
  2176. case Builtin::BI__builtin_fmodf16:
  2177. case Builtin::BI__builtin_fmodl:
  2178. case Builtin::BI__builtin_fmodf128: {
  2179. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2180. Value *Arg1 = EmitScalarExpr(E->getArg(0));
  2181. Value *Arg2 = EmitScalarExpr(E->getArg(1));
  2182. return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
  2183. }
  2184. case Builtin::BIlog:
  2185. case Builtin::BIlogf:
  2186. case Builtin::BIlogl:
  2187. case Builtin::BI__builtin_log:
  2188. case Builtin::BI__builtin_logf:
  2189. case Builtin::BI__builtin_logf16:
  2190. case Builtin::BI__builtin_logl:
  2191. case Builtin::BI__builtin_logf128:
  2192. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2193. Intrinsic::log,
  2194. Intrinsic::experimental_constrained_log));
  2195. case Builtin::BIlog10:
  2196. case Builtin::BIlog10f:
  2197. case Builtin::BIlog10l:
  2198. case Builtin::BI__builtin_log10:
  2199. case Builtin::BI__builtin_log10f:
  2200. case Builtin::BI__builtin_log10f16:
  2201. case Builtin::BI__builtin_log10l:
  2202. case Builtin::BI__builtin_log10f128:
  2203. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2204. Intrinsic::log10,
  2205. Intrinsic::experimental_constrained_log10));
  2206. case Builtin::BIlog2:
  2207. case Builtin::BIlog2f:
  2208. case Builtin::BIlog2l:
  2209. case Builtin::BI__builtin_log2:
  2210. case Builtin::BI__builtin_log2f:
  2211. case Builtin::BI__builtin_log2f16:
  2212. case Builtin::BI__builtin_log2l:
  2213. case Builtin::BI__builtin_log2f128:
  2214. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2215. Intrinsic::log2,
  2216. Intrinsic::experimental_constrained_log2));
  2217. case Builtin::BInearbyint:
  2218. case Builtin::BInearbyintf:
  2219. case Builtin::BInearbyintl:
  2220. case Builtin::BI__builtin_nearbyint:
  2221. case Builtin::BI__builtin_nearbyintf:
  2222. case Builtin::BI__builtin_nearbyintl:
  2223. case Builtin::BI__builtin_nearbyintf128:
  2224. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2225. Intrinsic::nearbyint,
  2226. Intrinsic::experimental_constrained_nearbyint));
  2227. case Builtin::BIpow:
  2228. case Builtin::BIpowf:
  2229. case Builtin::BIpowl:
  2230. case Builtin::BI__builtin_pow:
  2231. case Builtin::BI__builtin_powf:
  2232. case Builtin::BI__builtin_powf16:
  2233. case Builtin::BI__builtin_powl:
  2234. case Builtin::BI__builtin_powf128:
  2235. return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
  2236. Intrinsic::pow,
  2237. Intrinsic::experimental_constrained_pow));
  2238. case Builtin::BIrint:
  2239. case Builtin::BIrintf:
  2240. case Builtin::BIrintl:
  2241. case Builtin::BI__builtin_rint:
  2242. case Builtin::BI__builtin_rintf:
  2243. case Builtin::BI__builtin_rintf16:
  2244. case Builtin::BI__builtin_rintl:
  2245. case Builtin::BI__builtin_rintf128:
  2246. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2247. Intrinsic::rint,
  2248. Intrinsic::experimental_constrained_rint));
  2249. case Builtin::BIround:
  2250. case Builtin::BIroundf:
  2251. case Builtin::BIroundl:
  2252. case Builtin::BI__builtin_round:
  2253. case Builtin::BI__builtin_roundf:
  2254. case Builtin::BI__builtin_roundf16:
  2255. case Builtin::BI__builtin_roundl:
  2256. case Builtin::BI__builtin_roundf128:
  2257. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2258. Intrinsic::round,
  2259. Intrinsic::experimental_constrained_round));
  2260. case Builtin::BIsin:
  2261. case Builtin::BIsinf:
  2262. case Builtin::BIsinl:
  2263. case Builtin::BI__builtin_sin:
  2264. case Builtin::BI__builtin_sinf:
  2265. case Builtin::BI__builtin_sinf16:
  2266. case Builtin::BI__builtin_sinl:
  2267. case Builtin::BI__builtin_sinf128:
  2268. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2269. Intrinsic::sin,
  2270. Intrinsic::experimental_constrained_sin));
  2271. case Builtin::BIsqrt:
  2272. case Builtin::BIsqrtf:
  2273. case Builtin::BIsqrtl:
  2274. case Builtin::BI__builtin_sqrt:
  2275. case Builtin::BI__builtin_sqrtf:
  2276. case Builtin::BI__builtin_sqrtf16:
  2277. case Builtin::BI__builtin_sqrtl:
  2278. case Builtin::BI__builtin_sqrtf128:
  2279. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2280. Intrinsic::sqrt,
  2281. Intrinsic::experimental_constrained_sqrt));
  2282. case Builtin::BItrunc:
  2283. case Builtin::BItruncf:
  2284. case Builtin::BItruncl:
  2285. case Builtin::BI__builtin_trunc:
  2286. case Builtin::BI__builtin_truncf:
  2287. case Builtin::BI__builtin_truncf16:
  2288. case Builtin::BI__builtin_truncl:
  2289. case Builtin::BI__builtin_truncf128:
  2290. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
  2291. Intrinsic::trunc,
  2292. Intrinsic::experimental_constrained_trunc));
  2293. case Builtin::BIlround:
  2294. case Builtin::BIlroundf:
  2295. case Builtin::BIlroundl:
  2296. case Builtin::BI__builtin_lround:
  2297. case Builtin::BI__builtin_lroundf:
  2298. case Builtin::BI__builtin_lroundl:
  2299. case Builtin::BI__builtin_lroundf128:
  2300. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2301. *this, E, Intrinsic::lround,
  2302. Intrinsic::experimental_constrained_lround));
  2303. case Builtin::BIllround:
  2304. case Builtin::BIllroundf:
  2305. case Builtin::BIllroundl:
  2306. case Builtin::BI__builtin_llround:
  2307. case Builtin::BI__builtin_llroundf:
  2308. case Builtin::BI__builtin_llroundl:
  2309. case Builtin::BI__builtin_llroundf128:
  2310. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2311. *this, E, Intrinsic::llround,
  2312. Intrinsic::experimental_constrained_llround));
  2313. case Builtin::BIlrint:
  2314. case Builtin::BIlrintf:
  2315. case Builtin::BIlrintl:
  2316. case Builtin::BI__builtin_lrint:
  2317. case Builtin::BI__builtin_lrintf:
  2318. case Builtin::BI__builtin_lrintl:
  2319. case Builtin::BI__builtin_lrintf128:
  2320. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2321. *this, E, Intrinsic::lrint,
  2322. Intrinsic::experimental_constrained_lrint));
  2323. case Builtin::BIllrint:
  2324. case Builtin::BIllrintf:
  2325. case Builtin::BIllrintl:
  2326. case Builtin::BI__builtin_llrint:
  2327. case Builtin::BI__builtin_llrintf:
  2328. case Builtin::BI__builtin_llrintl:
  2329. case Builtin::BI__builtin_llrintf128:
  2330. return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
  2331. *this, E, Intrinsic::llrint,
  2332. Intrinsic::experimental_constrained_llrint));
  2333. default:
  2334. break;
  2335. }
  2336. }
  2337. switch (BuiltinIDIfNoAsmLabel) {
  2338. default: break;
  2339. case Builtin::BI__builtin___CFStringMakeConstantString:
  2340. case Builtin::BI__builtin___NSStringMakeConstantString:
  2341. return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
  2342. case Builtin::BI__builtin_stdarg_start:
  2343. case Builtin::BI__builtin_va_start:
  2344. case Builtin::BI__va_start:
  2345. case Builtin::BI__builtin_va_end:
  2346. return RValue::get(
  2347. EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
  2348. ? EmitScalarExpr(E->getArg(0))
  2349. : EmitVAListRef(E->getArg(0)).getPointer(),
  2350. BuiltinID != Builtin::BI__builtin_va_end));
  2351. case Builtin::BI__builtin_va_copy: {
  2352. Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
  2353. Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
  2354. llvm::Type *Type = Int8PtrTy;
  2355. DstPtr = Builder.CreateBitCast(DstPtr, Type);
  2356. SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
  2357. return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
  2358. {DstPtr, SrcPtr}));
  2359. }
  2360. case Builtin::BI__builtin_abs:
  2361. case Builtin::BI__builtin_labs:
  2362. case Builtin::BI__builtin_llabs: {
  2363. // X < 0 ? -X : X
  2364. // The negation has 'nsw' because abs of INT_MIN is undefined.
  2365. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2366. Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
  2367. Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
  2368. Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
  2369. Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
  2370. return RValue::get(Result);
  2371. }
  2372. case Builtin::BI__builtin_complex: {
  2373. Value *Real = EmitScalarExpr(E->getArg(0));
  2374. Value *Imag = EmitScalarExpr(E->getArg(1));
  2375. return RValue::getComplex({Real, Imag});
  2376. }
  2377. case Builtin::BI__builtin_conj:
  2378. case Builtin::BI__builtin_conjf:
  2379. case Builtin::BI__builtin_conjl:
  2380. case Builtin::BIconj:
  2381. case Builtin::BIconjf:
  2382. case Builtin::BIconjl: {
  2383. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2384. Value *Real = ComplexVal.first;
  2385. Value *Imag = ComplexVal.second;
  2386. Imag = Builder.CreateFNeg(Imag, "neg");
  2387. return RValue::getComplex(std::make_pair(Real, Imag));
  2388. }
  2389. case Builtin::BI__builtin_creal:
  2390. case Builtin::BI__builtin_crealf:
  2391. case Builtin::BI__builtin_creall:
  2392. case Builtin::BIcreal:
  2393. case Builtin::BIcrealf:
  2394. case Builtin::BIcreall: {
  2395. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2396. return RValue::get(ComplexVal.first);
  2397. }
  2398. case Builtin::BI__builtin_dump_struct: {
  2399. llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy);
  2400. llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get(
  2401. LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true);
  2402. Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
  2403. CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
  2404. const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
  2405. QualType Arg0Type = Arg0->getType()->getPointeeType();
  2406. Value *RecordPtr = EmitScalarExpr(Arg0);
  2407. Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align,
  2408. {LLVMFuncType, Func}, 0);
  2409. return RValue::get(Res);
  2410. }
  2411. case Builtin::BI__builtin_preserve_access_index: {
  2412. // Only enabled preserved access index region when debuginfo
  2413. // is available as debuginfo is needed to preserve user-level
  2414. // access pattern.
  2415. if (!getDebugInfo()) {
  2416. CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
  2417. return RValue::get(EmitScalarExpr(E->getArg(0)));
  2418. }
  2419. // Nested builtin_preserve_access_index() not supported
  2420. if (IsInPreservedAIRegion) {
  2421. CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
  2422. return RValue::get(EmitScalarExpr(E->getArg(0)));
  2423. }
  2424. IsInPreservedAIRegion = true;
  2425. Value *Res = EmitScalarExpr(E->getArg(0));
  2426. IsInPreservedAIRegion = false;
  2427. return RValue::get(Res);
  2428. }
  2429. case Builtin::BI__builtin_cimag:
  2430. case Builtin::BI__builtin_cimagf:
  2431. case Builtin::BI__builtin_cimagl:
  2432. case Builtin::BIcimag:
  2433. case Builtin::BIcimagf:
  2434. case Builtin::BIcimagl: {
  2435. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2436. return RValue::get(ComplexVal.second);
  2437. }
  2438. case Builtin::BI__builtin_clrsb:
  2439. case Builtin::BI__builtin_clrsbl:
  2440. case Builtin::BI__builtin_clrsbll: {
  2441. // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
  2442. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2443. llvm::Type *ArgType = ArgValue->getType();
  2444. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  2445. llvm::Type *ResultType = ConvertType(E->getType());
  2446. Value *Zero = llvm::Constant::getNullValue(ArgType);
  2447. Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
  2448. Value *Inverse = Builder.CreateNot(ArgValue, "not");
  2449. Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
  2450. Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
  2451. Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
  2452. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2453. "cast");
  2454. return RValue::get(Result);
  2455. }
  2456. case Builtin::BI__builtin_ctzs:
  2457. case Builtin::BI__builtin_ctz:
  2458. case Builtin::BI__builtin_ctzl:
  2459. case Builtin::BI__builtin_ctzll: {
  2460. Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
  2461. llvm::Type *ArgType = ArgValue->getType();
  2462. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  2463. llvm::Type *ResultType = ConvertType(E->getType());
  2464. Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
  2465. Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
  2466. if (Result->getType() != ResultType)
  2467. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2468. "cast");
  2469. return RValue::get(Result);
  2470. }
  2471. case Builtin::BI__builtin_clzs:
  2472. case Builtin::BI__builtin_clz:
  2473. case Builtin::BI__builtin_clzl:
  2474. case Builtin::BI__builtin_clzll: {
  2475. Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
  2476. llvm::Type *ArgType = ArgValue->getType();
  2477. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  2478. llvm::Type *ResultType = ConvertType(E->getType());
  2479. Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
  2480. Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
  2481. if (Result->getType() != ResultType)
  2482. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2483. "cast");
  2484. return RValue::get(Result);
  2485. }
  2486. case Builtin::BI__builtin_ffs:
  2487. case Builtin::BI__builtin_ffsl:
  2488. case Builtin::BI__builtin_ffsll: {
  2489. // ffs(x) -> x ? cttz(x) + 1 : 0
  2490. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2491. llvm::Type *ArgType = ArgValue->getType();
  2492. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  2493. llvm::Type *ResultType = ConvertType(E->getType());
  2494. Value *Tmp =
  2495. Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
  2496. llvm::ConstantInt::get(ArgType, 1));
  2497. Value *Zero = llvm::Constant::getNullValue(ArgType);
  2498. Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
  2499. Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
  2500. if (Result->getType() != ResultType)
  2501. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2502. "cast");
  2503. return RValue::get(Result);
  2504. }
  2505. case Builtin::BI__builtin_parity:
  2506. case Builtin::BI__builtin_parityl:
  2507. case Builtin::BI__builtin_parityll: {
  2508. // parity(x) -> ctpop(x) & 1
  2509. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2510. llvm::Type *ArgType = ArgValue->getType();
  2511. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  2512. llvm::Type *ResultType = ConvertType(E->getType());
  2513. Value *Tmp = Builder.CreateCall(F, ArgValue);
  2514. Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
  2515. if (Result->getType() != ResultType)
  2516. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2517. "cast");
  2518. return RValue::get(Result);
  2519. }
  2520. case Builtin::BI__lzcnt16:
  2521. case Builtin::BI__lzcnt:
  2522. case Builtin::BI__lzcnt64: {
  2523. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2524. llvm::Type *ArgType = ArgValue->getType();
  2525. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  2526. llvm::Type *ResultType = ConvertType(E->getType());
  2527. Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
  2528. if (Result->getType() != ResultType)
  2529. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2530. "cast");
  2531. return RValue::get(Result);
  2532. }
  2533. case Builtin::BI__popcnt16:
  2534. case Builtin::BI__popcnt:
  2535. case Builtin::BI__popcnt64:
  2536. case Builtin::BI__builtin_popcount:
  2537. case Builtin::BI__builtin_popcountl:
  2538. case Builtin::BI__builtin_popcountll: {
  2539. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2540. llvm::Type *ArgType = ArgValue->getType();
  2541. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  2542. llvm::Type *ResultType = ConvertType(E->getType());
  2543. Value *Result = Builder.CreateCall(F, ArgValue);
  2544. if (Result->getType() != ResultType)
  2545. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  2546. "cast");
  2547. return RValue::get(Result);
  2548. }
  2549. case Builtin::BI__builtin_unpredictable: {
  2550. // Always return the argument of __builtin_unpredictable. LLVM does not
  2551. // handle this builtin. Metadata for this builtin should be added directly
  2552. // to instructions such as branches or switches that use it.
  2553. return RValue::get(EmitScalarExpr(E->getArg(0)));
  2554. }
  2555. case Builtin::BI__builtin_expect: {
  2556. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2557. llvm::Type *ArgType = ArgValue->getType();
  2558. Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
  2559. // Don't generate llvm.expect on -O0 as the backend won't use it for
  2560. // anything.
  2561. // Note, we still IRGen ExpectedValue because it could have side-effects.
  2562. if (CGM.getCodeGenOpts().OptimizationLevel == 0)
  2563. return RValue::get(ArgValue);
  2564. Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
  2565. Value *Result =
  2566. Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
  2567. return RValue::get(Result);
  2568. }
  2569. case Builtin::BI__builtin_expect_with_probability: {
  2570. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2571. llvm::Type *ArgType = ArgValue->getType();
  2572. Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
  2573. llvm::APFloat Probability(0.0);
  2574. const Expr *ProbArg = E->getArg(2);
  2575. bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
  2576. assert(EvalSucceed && "probability should be able to evaluate as float");
  2577. (void)EvalSucceed;
  2578. bool LoseInfo = false;
  2579. Probability.convert(llvm::APFloat::IEEEdouble(),
  2580. llvm::RoundingMode::Dynamic, &LoseInfo);
  2581. llvm::Type *Ty = ConvertType(ProbArg->getType());
  2582. Constant *Confidence = ConstantFP::get(Ty, Probability);
  2583. // Don't generate llvm.expect.with.probability on -O0 as the backend
  2584. // won't use it for anything.
  2585. // Note, we still IRGen ExpectedValue because it could have side-effects.
  2586. if (CGM.getCodeGenOpts().OptimizationLevel == 0)
  2587. return RValue::get(ArgValue);
  2588. Function *FnExpect =
  2589. CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
  2590. Value *Result = Builder.CreateCall(
  2591. FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
  2592. return RValue::get(Result);
  2593. }
  2594. case Builtin::BI__builtin_assume_aligned: {
  2595. const Expr *Ptr = E->getArg(0);
  2596. Value *PtrValue = EmitScalarExpr(Ptr);
  2597. Value *OffsetValue =
  2598. (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
  2599. Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
  2600. ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
  2601. if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
  2602. AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
  2603. llvm::Value::MaximumAlignment);
  2604. emitAlignmentAssumption(PtrValue, Ptr,
  2605. /*The expr loc is sufficient.*/ SourceLocation(),
  2606. AlignmentCI, OffsetValue);
  2607. return RValue::get(PtrValue);
  2608. }
  2609. case Builtin::BI__assume:
  2610. case Builtin::BI__builtin_assume: {
  2611. if (E->getArg(0)->HasSideEffects(getContext()))
  2612. return RValue::get(nullptr);
  2613. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2614. Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
  2615. return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
  2616. }
  2617. case Builtin::BI__arithmetic_fence: {
  2618. // Create the builtin call if FastMath is selected, and the target
  2619. // supports the builtin, otherwise just return the argument.
  2620. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2621. llvm::FastMathFlags FMF = Builder.getFastMathFlags();
  2622. bool isArithmeticFenceEnabled =
  2623. FMF.allowReassoc() &&
  2624. getContext().getTargetInfo().checkArithmeticFenceSupported();
  2625. QualType ArgType = E->getArg(0)->getType();
  2626. if (ArgType->isComplexType()) {
  2627. if (isArithmeticFenceEnabled) {
  2628. QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
  2629. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2630. Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
  2631. ConvertType(ElementType));
  2632. Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
  2633. ConvertType(ElementType));
  2634. return RValue::getComplex(std::make_pair(Real, Imag));
  2635. }
  2636. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  2637. Value *Real = ComplexVal.first;
  2638. Value *Imag = ComplexVal.second;
  2639. return RValue::getComplex(std::make_pair(Real, Imag));
  2640. }
  2641. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  2642. if (isArithmeticFenceEnabled)
  2643. return RValue::get(
  2644. Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
  2645. return RValue::get(ArgValue);
  2646. }
  2647. case Builtin::BI__builtin_bswap16:
  2648. case Builtin::BI__builtin_bswap32:
  2649. case Builtin::BI__builtin_bswap64: {
  2650. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
  2651. }
  2652. case Builtin::BI__builtin_bitreverse8:
  2653. case Builtin::BI__builtin_bitreverse16:
  2654. case Builtin::BI__builtin_bitreverse32:
  2655. case Builtin::BI__builtin_bitreverse64: {
  2656. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
  2657. }
  2658. case Builtin::BI__builtin_rotateleft8:
  2659. case Builtin::BI__builtin_rotateleft16:
  2660. case Builtin::BI__builtin_rotateleft32:
  2661. case Builtin::BI__builtin_rotateleft64:
  2662. case Builtin::BI_rotl8: // Microsoft variants of rotate left
  2663. case Builtin::BI_rotl16:
  2664. case Builtin::BI_rotl:
  2665. case Builtin::BI_lrotl:
  2666. case Builtin::BI_rotl64:
  2667. return emitRotate(E, false);
  2668. case Builtin::BI__builtin_rotateright8:
  2669. case Builtin::BI__builtin_rotateright16:
  2670. case Builtin::BI__builtin_rotateright32:
  2671. case Builtin::BI__builtin_rotateright64:
  2672. case Builtin::BI_rotr8: // Microsoft variants of rotate right
  2673. case Builtin::BI_rotr16:
  2674. case Builtin::BI_rotr:
  2675. case Builtin::BI_lrotr:
  2676. case Builtin::BI_rotr64:
  2677. return emitRotate(E, true);
  2678. case Builtin::BI__builtin_constant_p: {
  2679. llvm::Type *ResultType = ConvertType(E->getType());
  2680. const Expr *Arg = E->getArg(0);
  2681. QualType ArgType = Arg->getType();
  2682. // FIXME: The allowance for Obj-C pointers and block pointers is historical
  2683. // and likely a mistake.
  2684. if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
  2685. !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
  2686. // Per the GCC documentation, only numeric constants are recognized after
  2687. // inlining.
  2688. return RValue::get(ConstantInt::get(ResultType, 0));
  2689. if (Arg->HasSideEffects(getContext()))
  2690. // The argument is unevaluated, so be conservative if it might have
  2691. // side-effects.
  2692. return RValue::get(ConstantInt::get(ResultType, 0));
  2693. Value *ArgValue = EmitScalarExpr(Arg);
  2694. if (ArgType->isObjCObjectPointerType()) {
  2695. // Convert Objective-C objects to id because we cannot distinguish between
  2696. // LLVM types for Obj-C classes as they are opaque.
  2697. ArgType = CGM.getContext().getObjCIdType();
  2698. ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
  2699. }
  2700. Function *F =
  2701. CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
  2702. Value *Result = Builder.CreateCall(F, ArgValue);
  2703. if (Result->getType() != ResultType)
  2704. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
  2705. return RValue::get(Result);
  2706. }
  2707. case Builtin::BI__builtin_dynamic_object_size:
  2708. case Builtin::BI__builtin_object_size: {
  2709. unsigned Type =
  2710. E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
  2711. auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
  2712. // We pass this builtin onto the optimizer so that it can figure out the
  2713. // object size in more complex cases.
  2714. bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
  2715. return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
  2716. /*EmittedE=*/nullptr, IsDynamic));
  2717. }
  2718. case Builtin::BI__builtin_prefetch: {
  2719. Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
  2720. // FIXME: Technically these constants should of type 'int', yes?
  2721. RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
  2722. llvm::ConstantInt::get(Int32Ty, 0);
  2723. Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
  2724. llvm::ConstantInt::get(Int32Ty, 3);
  2725. Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
  2726. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  2727. return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
  2728. }
  2729. case Builtin::BI__builtin_readcyclecounter: {
  2730. Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
  2731. return RValue::get(Builder.CreateCall(F));
  2732. }
  2733. case Builtin::BI__builtin___clear_cache: {
  2734. Value *Begin = EmitScalarExpr(E->getArg(0));
  2735. Value *End = EmitScalarExpr(E->getArg(1));
  2736. Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
  2737. return RValue::get(Builder.CreateCall(F, {Begin, End}));
  2738. }
  2739. case Builtin::BI__builtin_trap:
  2740. return RValue::get(EmitTrapCall(Intrinsic::trap));
  2741. case Builtin::BI__debugbreak:
  2742. return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
  2743. case Builtin::BI__builtin_unreachable: {
  2744. EmitUnreachable(E->getExprLoc());
  2745. // We do need to preserve an insertion point.
  2746. EmitBlock(createBasicBlock("unreachable.cont"));
  2747. return RValue::get(nullptr);
  2748. }
  2749. case Builtin::BI__builtin_powi:
  2750. case Builtin::BI__builtin_powif:
  2751. case Builtin::BI__builtin_powil: {
  2752. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  2753. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  2754. if (Builder.getIsFPConstrained()) {
  2755. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2756. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
  2757. Src0->getType());
  2758. return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
  2759. }
  2760. Function *F = CGM.getIntrinsic(Intrinsic::powi,
  2761. { Src0->getType(), Src1->getType() });
  2762. return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
  2763. }
  2764. case Builtin::BI__builtin_isgreater:
  2765. case Builtin::BI__builtin_isgreaterequal:
  2766. case Builtin::BI__builtin_isless:
  2767. case Builtin::BI__builtin_islessequal:
  2768. case Builtin::BI__builtin_islessgreater:
  2769. case Builtin::BI__builtin_isunordered: {
  2770. // Ordered comparisons: we know the arguments to these are matching scalar
  2771. // floating point values.
  2772. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2773. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  2774. Value *LHS = EmitScalarExpr(E->getArg(0));
  2775. Value *RHS = EmitScalarExpr(E->getArg(1));
  2776. switch (BuiltinID) {
  2777. default: llvm_unreachable("Unknown ordered comparison");
  2778. case Builtin::BI__builtin_isgreater:
  2779. LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
  2780. break;
  2781. case Builtin::BI__builtin_isgreaterequal:
  2782. LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
  2783. break;
  2784. case Builtin::BI__builtin_isless:
  2785. LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
  2786. break;
  2787. case Builtin::BI__builtin_islessequal:
  2788. LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
  2789. break;
  2790. case Builtin::BI__builtin_islessgreater:
  2791. LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
  2792. break;
  2793. case Builtin::BI__builtin_isunordered:
  2794. LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
  2795. break;
  2796. }
  2797. // ZExt bool to int type.
  2798. return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
  2799. }
  2800. case Builtin::BI__builtin_isnan: {
  2801. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2802. Value *V = EmitScalarExpr(E->getArg(0));
  2803. llvm::Type *Ty = V->getType();
  2804. const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
  2805. if (!Builder.getIsFPConstrained() ||
  2806. Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
  2807. !Ty->isIEEE()) {
  2808. V = Builder.CreateFCmpUNO(V, V, "cmp");
  2809. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  2810. }
  2811. if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
  2812. return RValue::get(Result);
  2813. // NaN has all exp bits set and a non zero significand. Therefore:
  2814. // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
  2815. unsigned bitsize = Ty->getScalarSizeInBits();
  2816. llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
  2817. Value *IntV = Builder.CreateBitCast(V, IntTy);
  2818. APInt AndMask = APInt::getSignedMaxValue(bitsize);
  2819. Value *AbsV =
  2820. Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
  2821. APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
  2822. Value *Sub =
  2823. Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
  2824. // V = sign bit (Sub) <=> V = (Sub < 0)
  2825. V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
  2826. if (bitsize > 32)
  2827. V = Builder.CreateTrunc(V, ConvertType(E->getType()));
  2828. return RValue::get(V);
  2829. }
  2830. case Builtin::BI__builtin_elementwise_abs: {
  2831. Value *Result;
  2832. QualType QT = E->getArg(0)->getType();
  2833. if (auto *VecTy = QT->getAs<VectorType>())
  2834. QT = VecTy->getElementType();
  2835. if (QT->isIntegerType())
  2836. Result = Builder.CreateBinaryIntrinsic(
  2837. llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
  2838. Builder.getFalse(), nullptr, "elt.abs");
  2839. else
  2840. Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
  2841. return RValue::get(Result);
  2842. }
  2843. case Builtin::BI__builtin_elementwise_ceil:
  2844. return RValue::get(
  2845. emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
  2846. case Builtin::BI__builtin_elementwise_floor:
  2847. return RValue::get(
  2848. emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
  2849. case Builtin::BI__builtin_elementwise_roundeven:
  2850. return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
  2851. "elt.roundeven"));
  2852. case Builtin::BI__builtin_elementwise_trunc:
  2853. return RValue::get(
  2854. emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
  2855. case Builtin::BI__builtin_elementwise_max: {
  2856. Value *Op0 = EmitScalarExpr(E->getArg(0));
  2857. Value *Op1 = EmitScalarExpr(E->getArg(1));
  2858. Value *Result;
  2859. if (Op0->getType()->isIntOrIntVectorTy()) {
  2860. QualType Ty = E->getArg(0)->getType();
  2861. if (auto *VecTy = Ty->getAs<VectorType>())
  2862. Ty = VecTy->getElementType();
  2863. Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
  2864. ? llvm::Intrinsic::smax
  2865. : llvm::Intrinsic::umax,
  2866. Op0, Op1, nullptr, "elt.max");
  2867. } else
  2868. Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
  2869. return RValue::get(Result);
  2870. }
  2871. case Builtin::BI__builtin_elementwise_min: {
  2872. Value *Op0 = EmitScalarExpr(E->getArg(0));
  2873. Value *Op1 = EmitScalarExpr(E->getArg(1));
  2874. Value *Result;
  2875. if (Op0->getType()->isIntOrIntVectorTy()) {
  2876. QualType Ty = E->getArg(0)->getType();
  2877. if (auto *VecTy = Ty->getAs<VectorType>())
  2878. Ty = VecTy->getElementType();
  2879. Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
  2880. ? llvm::Intrinsic::smin
  2881. : llvm::Intrinsic::umin,
  2882. Op0, Op1, nullptr, "elt.min");
  2883. } else
  2884. Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
  2885. return RValue::get(Result);
  2886. }
  2887. case Builtin::BI__builtin_reduce_max: {
  2888. auto GetIntrinsicID = [](QualType QT) {
  2889. if (auto *VecTy = QT->getAs<VectorType>())
  2890. QT = VecTy->getElementType();
  2891. if (QT->isSignedIntegerType())
  2892. return llvm::Intrinsic::vector_reduce_smax;
  2893. if (QT->isUnsignedIntegerType())
  2894. return llvm::Intrinsic::vector_reduce_umax;
  2895. assert(QT->isFloatingType() && "must have a float here");
  2896. return llvm::Intrinsic::vector_reduce_fmax;
  2897. };
  2898. return RValue::get(emitUnaryBuiltin(
  2899. *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
  2900. }
  2901. case Builtin::BI__builtin_reduce_min: {
  2902. auto GetIntrinsicID = [](QualType QT) {
  2903. if (auto *VecTy = QT->getAs<VectorType>())
  2904. QT = VecTy->getElementType();
  2905. if (QT->isSignedIntegerType())
  2906. return llvm::Intrinsic::vector_reduce_smin;
  2907. if (QT->isUnsignedIntegerType())
  2908. return llvm::Intrinsic::vector_reduce_umin;
  2909. assert(QT->isFloatingType() && "must have a float here");
  2910. return llvm::Intrinsic::vector_reduce_fmin;
  2911. };
  2912. return RValue::get(emitUnaryBuiltin(
  2913. *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
  2914. }
  2915. case Builtin::BI__builtin_reduce_xor:
  2916. return RValue::get(emitUnaryBuiltin(
  2917. *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
  2918. case Builtin::BI__builtin_reduce_or:
  2919. return RValue::get(emitUnaryBuiltin(
  2920. *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
  2921. case Builtin::BI__builtin_reduce_and:
  2922. return RValue::get(emitUnaryBuiltin(
  2923. *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
  2924. case Builtin::BI__builtin_matrix_transpose: {
  2925. auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
  2926. Value *MatValue = EmitScalarExpr(E->getArg(0));
  2927. MatrixBuilder<CGBuilderTy> MB(Builder);
  2928. Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
  2929. MatrixTy->getNumColumns());
  2930. return RValue::get(Result);
  2931. }
  2932. case Builtin::BI__builtin_matrix_column_major_load: {
  2933. MatrixBuilder<CGBuilderTy> MB(Builder);
  2934. // Emit everything that isn't dependent on the first parameter type
  2935. Value *Stride = EmitScalarExpr(E->getArg(3));
  2936. const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
  2937. auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
  2938. assert(PtrTy && "arg0 must be of pointer type");
  2939. bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
  2940. Address Src = EmitPointerWithAlignment(E->getArg(0));
  2941. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
  2942. E->getArg(0)->getExprLoc(), FD, 0);
  2943. Value *Result = MB.CreateColumnMajorLoad(
  2944. Src.getPointer(), Align(Src.getAlignment().getQuantity()), Stride,
  2945. IsVolatile, ResultTy->getNumRows(), ResultTy->getNumColumns(),
  2946. "matrix");
  2947. return RValue::get(Result);
  2948. }
  2949. case Builtin::BI__builtin_matrix_column_major_store: {
  2950. MatrixBuilder<CGBuilderTy> MB(Builder);
  2951. Value *Matrix = EmitScalarExpr(E->getArg(0));
  2952. Address Dst = EmitPointerWithAlignment(E->getArg(1));
  2953. Value *Stride = EmitScalarExpr(E->getArg(2));
  2954. const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
  2955. auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
  2956. assert(PtrTy && "arg1 must be of pointer type");
  2957. bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
  2958. EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(),
  2959. E->getArg(1)->getExprLoc(), FD, 0);
  2960. Value *Result = MB.CreateColumnMajorStore(
  2961. Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
  2962. Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
  2963. return RValue::get(Result);
  2964. }
  2965. case Builtin::BIfinite:
  2966. case Builtin::BI__finite:
  2967. case Builtin::BIfinitef:
  2968. case Builtin::BI__finitef:
  2969. case Builtin::BIfinitel:
  2970. case Builtin::BI__finitel:
  2971. case Builtin::BI__builtin_isinf:
  2972. case Builtin::BI__builtin_isfinite: {
  2973. // isinf(x) --> fabs(x) == infinity
  2974. // isfinite(x) --> fabs(x) != infinity
  2975. // x != NaN via the ordered compare in either case.
  2976. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  2977. Value *V = EmitScalarExpr(E->getArg(0));
  2978. llvm::Type *Ty = V->getType();
  2979. if (!Builder.getIsFPConstrained() ||
  2980. Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
  2981. !Ty->isIEEE()) {
  2982. Value *Fabs = EmitFAbs(*this, V);
  2983. Constant *Infinity = ConstantFP::getInfinity(V->getType());
  2984. CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
  2985. ? CmpInst::FCMP_OEQ
  2986. : CmpInst::FCMP_ONE;
  2987. Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
  2988. return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
  2989. }
  2990. if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
  2991. return RValue::get(Result);
  2992. // Inf values have all exp bits set and a zero significand. Therefore:
  2993. // isinf(V) == ((V << 1) == ((exp mask) << 1))
  2994. // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
  2995. unsigned bitsize = Ty->getScalarSizeInBits();
  2996. llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
  2997. Value *IntV = Builder.CreateBitCast(V, IntTy);
  2998. Value *Shl1 = Builder.CreateShl(IntV, 1);
  2999. const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
  3000. APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
  3001. Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1));
  3002. if (BuiltinID == Builtin::BI__builtin_isinf)
  3003. V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1);
  3004. else
  3005. V = Builder.CreateICmpULT(Shl1, ExpMaskShl1);
  3006. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  3007. }
  3008. case Builtin::BI__builtin_isinf_sign: {
  3009. // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
  3010. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  3011. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  3012. Value *Arg = EmitScalarExpr(E->getArg(0));
  3013. Value *AbsArg = EmitFAbs(*this, Arg);
  3014. Value *IsInf = Builder.CreateFCmpOEQ(
  3015. AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
  3016. Value *IsNeg = EmitSignBit(*this, Arg);
  3017. llvm::Type *IntTy = ConvertType(E->getType());
  3018. Value *Zero = Constant::getNullValue(IntTy);
  3019. Value *One = ConstantInt::get(IntTy, 1);
  3020. Value *NegativeOne = ConstantInt::get(IntTy, -1);
  3021. Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
  3022. Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
  3023. return RValue::get(Result);
  3024. }
  3025. case Builtin::BI__builtin_isnormal: {
  3026. // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
  3027. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  3028. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  3029. Value *V = EmitScalarExpr(E->getArg(0));
  3030. Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
  3031. Value *Abs = EmitFAbs(*this, V);
  3032. Value *IsLessThanInf =
  3033. Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
  3034. APFloat Smallest = APFloat::getSmallestNormalized(
  3035. getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
  3036. Value *IsNormal =
  3037. Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
  3038. "isnormal");
  3039. V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
  3040. V = Builder.CreateAnd(V, IsNormal, "and");
  3041. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  3042. }
  3043. case Builtin::BI__builtin_flt_rounds: {
  3044. Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds);
  3045. llvm::Type *ResultType = ConvertType(E->getType());
  3046. Value *Result = Builder.CreateCall(F);
  3047. if (Result->getType() != ResultType)
  3048. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  3049. "cast");
  3050. return RValue::get(Result);
  3051. }
  3052. case Builtin::BI__builtin_fpclassify: {
  3053. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  3054. // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
  3055. Value *V = EmitScalarExpr(E->getArg(5));
  3056. llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
  3057. // Create Result
  3058. BasicBlock *Begin = Builder.GetInsertBlock();
  3059. BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
  3060. Builder.SetInsertPoint(End);
  3061. PHINode *Result =
  3062. Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
  3063. "fpclassify_result");
  3064. // if (V==0) return FP_ZERO
  3065. Builder.SetInsertPoint(Begin);
  3066. Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
  3067. "iszero");
  3068. Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
  3069. BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
  3070. Builder.CreateCondBr(IsZero, End, NotZero);
  3071. Result->addIncoming(ZeroLiteral, Begin);
  3072. // if (V != V) return FP_NAN
  3073. Builder.SetInsertPoint(NotZero);
  3074. Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
  3075. Value *NanLiteral = EmitScalarExpr(E->getArg(0));
  3076. BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
  3077. Builder.CreateCondBr(IsNan, End, NotNan);
  3078. Result->addIncoming(NanLiteral, NotZero);
  3079. // if (fabs(V) == infinity) return FP_INFINITY
  3080. Builder.SetInsertPoint(NotNan);
  3081. Value *VAbs = EmitFAbs(*this, V);
  3082. Value *IsInf =
  3083. Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
  3084. "isinf");
  3085. Value *InfLiteral = EmitScalarExpr(E->getArg(1));
  3086. BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
  3087. Builder.CreateCondBr(IsInf, End, NotInf);
  3088. Result->addIncoming(InfLiteral, NotNan);
  3089. // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
  3090. Builder.SetInsertPoint(NotInf);
  3091. APFloat Smallest = APFloat::getSmallestNormalized(
  3092. getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
  3093. Value *IsNormal =
  3094. Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
  3095. "isnormal");
  3096. Value *NormalResult =
  3097. Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
  3098. EmitScalarExpr(E->getArg(3)));
  3099. Builder.CreateBr(End);
  3100. Result->addIncoming(NormalResult, NotInf);
  3101. // return Result
  3102. Builder.SetInsertPoint(End);
  3103. return RValue::get(Result);
  3104. }
  3105. case Builtin::BIalloca:
  3106. case Builtin::BI_alloca:
  3107. case Builtin::BI__builtin_alloca_uninitialized:
  3108. case Builtin::BI__builtin_alloca: {
  3109. Value *Size = EmitScalarExpr(E->getArg(0));
  3110. const TargetInfo &TI = getContext().getTargetInfo();
  3111. // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
  3112. const Align SuitableAlignmentInBytes =
  3113. CGM.getContext()
  3114. .toCharUnitsFromBits(TI.getSuitableAlign())
  3115. .getAsAlign();
  3116. AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
  3117. AI->setAlignment(SuitableAlignmentInBytes);
  3118. if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
  3119. initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
  3120. return RValue::get(AI);
  3121. }
  3122. case Builtin::BI__builtin_alloca_with_align_uninitialized:
  3123. case Builtin::BI__builtin_alloca_with_align: {
  3124. Value *Size = EmitScalarExpr(E->getArg(0));
  3125. Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
  3126. auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
  3127. unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
  3128. const Align AlignmentInBytes =
  3129. CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
  3130. AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
  3131. AI->setAlignment(AlignmentInBytes);
  3132. if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
  3133. initializeAlloca(*this, AI, Size, AlignmentInBytes);
  3134. return RValue::get(AI);
  3135. }
  3136. case Builtin::BIbzero:
  3137. case Builtin::BI__builtin_bzero: {
  3138. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3139. Value *SizeVal = EmitScalarExpr(E->getArg(1));
  3140. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3141. E->getArg(0)->getExprLoc(), FD, 0);
  3142. Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
  3143. return RValue::get(nullptr);
  3144. }
  3145. case Builtin::BImemcpy:
  3146. case Builtin::BI__builtin_memcpy:
  3147. case Builtin::BImempcpy:
  3148. case Builtin::BI__builtin_mempcpy: {
  3149. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3150. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3151. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3152. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3153. E->getArg(0)->getExprLoc(), FD, 0);
  3154. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  3155. E->getArg(1)->getExprLoc(), FD, 1);
  3156. Builder.CreateMemCpy(Dest, Src, SizeVal, false);
  3157. if (BuiltinID == Builtin::BImempcpy ||
  3158. BuiltinID == Builtin::BI__builtin_mempcpy)
  3159. return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
  3160. Dest.getPointer(), SizeVal));
  3161. else
  3162. return RValue::get(Dest.getPointer());
  3163. }
  3164. case Builtin::BI__builtin_memcpy_inline: {
  3165. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3166. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3167. uint64_t Size =
  3168. E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
  3169. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3170. E->getArg(0)->getExprLoc(), FD, 0);
  3171. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  3172. E->getArg(1)->getExprLoc(), FD, 1);
  3173. Builder.CreateMemCpyInline(Dest, Src, Size);
  3174. return RValue::get(nullptr);
  3175. }
  3176. case Builtin::BI__builtin_char_memchr:
  3177. BuiltinID = Builtin::BI__builtin_memchr;
  3178. break;
  3179. case Builtin::BI__builtin___memcpy_chk: {
  3180. // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
  3181. Expr::EvalResult SizeResult, DstSizeResult;
  3182. if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
  3183. !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
  3184. break;
  3185. llvm::APSInt Size = SizeResult.Val.getInt();
  3186. llvm::APSInt DstSize = DstSizeResult.Val.getInt();
  3187. if (Size.ugt(DstSize))
  3188. break;
  3189. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3190. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3191. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  3192. Builder.CreateMemCpy(Dest, Src, SizeVal, false);
  3193. return RValue::get(Dest.getPointer());
  3194. }
  3195. case Builtin::BI__builtin_objc_memmove_collectable: {
  3196. Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
  3197. Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
  3198. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3199. CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
  3200. DestAddr, SrcAddr, SizeVal);
  3201. return RValue::get(DestAddr.getPointer());
  3202. }
  3203. case Builtin::BI__builtin___memmove_chk: {
  3204. // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
  3205. Expr::EvalResult SizeResult, DstSizeResult;
  3206. if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
  3207. !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
  3208. break;
  3209. llvm::APSInt Size = SizeResult.Val.getInt();
  3210. llvm::APSInt DstSize = DstSizeResult.Val.getInt();
  3211. if (Size.ugt(DstSize))
  3212. break;
  3213. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3214. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3215. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  3216. Builder.CreateMemMove(Dest, Src, SizeVal, false);
  3217. return RValue::get(Dest.getPointer());
  3218. }
  3219. case Builtin::BImemmove:
  3220. case Builtin::BI__builtin_memmove: {
  3221. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3222. Address Src = EmitPointerWithAlignment(E->getArg(1));
  3223. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3224. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3225. E->getArg(0)->getExprLoc(), FD, 0);
  3226. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  3227. E->getArg(1)->getExprLoc(), FD, 1);
  3228. Builder.CreateMemMove(Dest, Src, SizeVal, false);
  3229. return RValue::get(Dest.getPointer());
  3230. }
  3231. case Builtin::BImemset:
  3232. case Builtin::BI__builtin_memset: {
  3233. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3234. Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
  3235. Builder.getInt8Ty());
  3236. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  3237. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  3238. E->getArg(0)->getExprLoc(), FD, 0);
  3239. Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
  3240. return RValue::get(Dest.getPointer());
  3241. }
  3242. case Builtin::BI__builtin___memset_chk: {
  3243. // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
  3244. Expr::EvalResult SizeResult, DstSizeResult;
  3245. if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
  3246. !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
  3247. break;
  3248. llvm::APSInt Size = SizeResult.Val.getInt();
  3249. llvm::APSInt DstSize = DstSizeResult.Val.getInt();
  3250. if (Size.ugt(DstSize))
  3251. break;
  3252. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  3253. Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
  3254. Builder.getInt8Ty());
  3255. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  3256. Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
  3257. return RValue::get(Dest.getPointer());
  3258. }
  3259. case Builtin::BI__builtin_wmemchr: {
  3260. // The MSVC runtime library does not provide a definition of wmemchr, so we
  3261. // need an inline implementation.
  3262. if (!getTarget().getTriple().isOSMSVCRT())
  3263. break;
  3264. llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
  3265. Value *Str = EmitScalarExpr(E->getArg(0));
  3266. Value *Chr = EmitScalarExpr(E->getArg(1));
  3267. Value *Size = EmitScalarExpr(E->getArg(2));
  3268. BasicBlock *Entry = Builder.GetInsertBlock();
  3269. BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
  3270. BasicBlock *Next = createBasicBlock("wmemchr.next");
  3271. BasicBlock *Exit = createBasicBlock("wmemchr.exit");
  3272. Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
  3273. Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
  3274. EmitBlock(CmpEq);
  3275. PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
  3276. StrPhi->addIncoming(Str, Entry);
  3277. PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
  3278. SizePhi->addIncoming(Size, Entry);
  3279. CharUnits WCharAlign =
  3280. getContext().getTypeAlignInChars(getContext().WCharTy);
  3281. Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
  3282. Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
  3283. Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
  3284. Builder.CreateCondBr(StrEqChr, Exit, Next);
  3285. EmitBlock(Next);
  3286. Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
  3287. Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
  3288. Value *NextSizeEq0 =
  3289. Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
  3290. Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
  3291. StrPhi->addIncoming(NextStr, Next);
  3292. SizePhi->addIncoming(NextSize, Next);
  3293. EmitBlock(Exit);
  3294. PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
  3295. Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
  3296. Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
  3297. Ret->addIncoming(FoundChr, CmpEq);
  3298. return RValue::get(Ret);
  3299. }
  3300. case Builtin::BI__builtin_wmemcmp: {
  3301. // The MSVC runtime library does not provide a definition of wmemcmp, so we
  3302. // need an inline implementation.
  3303. if (!getTarget().getTriple().isOSMSVCRT())
  3304. break;
  3305. llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
  3306. Value *Dst = EmitScalarExpr(E->getArg(0));
  3307. Value *Src = EmitScalarExpr(E->getArg(1));
  3308. Value *Size = EmitScalarExpr(E->getArg(2));
  3309. BasicBlock *Entry = Builder.GetInsertBlock();
  3310. BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
  3311. BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
  3312. BasicBlock *Next = createBasicBlock("wmemcmp.next");
  3313. BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
  3314. Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
  3315. Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
  3316. EmitBlock(CmpGT);
  3317. PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
  3318. DstPhi->addIncoming(Dst, Entry);
  3319. PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
  3320. SrcPhi->addIncoming(Src, Entry);
  3321. PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
  3322. SizePhi->addIncoming(Size, Entry);
  3323. CharUnits WCharAlign =
  3324. getContext().getTypeAlignInChars(getContext().WCharTy);
  3325. Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
  3326. Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
  3327. Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
  3328. Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
  3329. EmitBlock(CmpLT);
  3330. Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
  3331. Builder.CreateCondBr(DstLtSrc, Exit, Next);
  3332. EmitBlock(Next);
  3333. Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
  3334. Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
  3335. Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
  3336. Value *NextSizeEq0 =
  3337. Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
  3338. Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
  3339. DstPhi->addIncoming(NextDst, Next);
  3340. SrcPhi->addIncoming(NextSrc, Next);
  3341. SizePhi->addIncoming(NextSize, Next);
  3342. EmitBlock(Exit);
  3343. PHINode *Ret = Builder.CreatePHI(IntTy, 4);
  3344. Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
  3345. Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
  3346. Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
  3347. Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
  3348. return RValue::get(Ret);
  3349. }
  3350. case Builtin::BI__builtin_dwarf_cfa: {
  3351. // The offset in bytes from the first argument to the CFA.
  3352. //
  3353. // Why on earth is this in the frontend? Is there any reason at
  3354. // all that the backend can't reasonably determine this while
  3355. // lowering llvm.eh.dwarf.cfa()?
  3356. //
  3357. // TODO: If there's a satisfactory reason, add a target hook for
  3358. // this instead of hard-coding 0, which is correct for most targets.
  3359. int32_t Offset = 0;
  3360. Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
  3361. return RValue::get(Builder.CreateCall(F,
  3362. llvm::ConstantInt::get(Int32Ty, Offset)));
  3363. }
  3364. case Builtin::BI__builtin_return_address: {
  3365. Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
  3366. getContext().UnsignedIntTy);
  3367. Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
  3368. return RValue::get(Builder.CreateCall(F, Depth));
  3369. }
  3370. case Builtin::BI_ReturnAddress: {
  3371. Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
  3372. return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
  3373. }
  3374. case Builtin::BI__builtin_frame_address: {
  3375. Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
  3376. getContext().UnsignedIntTy);
  3377. Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
  3378. return RValue::get(Builder.CreateCall(F, Depth));
  3379. }
  3380. case Builtin::BI__builtin_extract_return_addr: {
  3381. Value *Address = EmitScalarExpr(E->getArg(0));
  3382. Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
  3383. return RValue::get(Result);
  3384. }
  3385. case Builtin::BI__builtin_frob_return_addr: {
  3386. Value *Address = EmitScalarExpr(E->getArg(0));
  3387. Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
  3388. return RValue::get(Result);
  3389. }
  3390. case Builtin::BI__builtin_dwarf_sp_column: {
  3391. llvm::IntegerType *Ty
  3392. = cast<llvm::IntegerType>(ConvertType(E->getType()));
  3393. int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
  3394. if (Column == -1) {
  3395. CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
  3396. return RValue::get(llvm::UndefValue::get(Ty));
  3397. }
  3398. return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
  3399. }
  3400. case Builtin::BI__builtin_init_dwarf_reg_size_table: {
  3401. Value *Address = EmitScalarExpr(E->getArg(0));
  3402. if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
  3403. CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
  3404. return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
  3405. }
  3406. case Builtin::BI__builtin_eh_return: {
  3407. Value *Int = EmitScalarExpr(E->getArg(0));
  3408. Value *Ptr = EmitScalarExpr(E->getArg(1));
  3409. llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
  3410. assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
  3411. "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
  3412. Function *F =
  3413. CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
  3414. : Intrinsic::eh_return_i64);
  3415. Builder.CreateCall(F, {Int, Ptr});
  3416. Builder.CreateUnreachable();
  3417. // We do need to preserve an insertion point.
  3418. EmitBlock(createBasicBlock("builtin_eh_return.cont"));
  3419. return RValue::get(nullptr);
  3420. }
  3421. case Builtin::BI__builtin_unwind_init: {
  3422. Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
  3423. return RValue::get(Builder.CreateCall(F));
  3424. }
  3425. case Builtin::BI__builtin_extend_pointer: {
  3426. // Extends a pointer to the size of an _Unwind_Word, which is
  3427. // uint64_t on all platforms. Generally this gets poked into a
  3428. // register and eventually used as an address, so if the
  3429. // addressing registers are wider than pointers and the platform
  3430. // doesn't implicitly ignore high-order bits when doing
  3431. // addressing, we need to make sure we zext / sext based on
  3432. // the platform's expectations.
  3433. //
  3434. // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
  3435. // Cast the pointer to intptr_t.
  3436. Value *Ptr = EmitScalarExpr(E->getArg(0));
  3437. Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
  3438. // If that's 64 bits, we're done.
  3439. if (IntPtrTy->getBitWidth() == 64)
  3440. return RValue::get(Result);
  3441. // Otherwise, ask the codegen data what to do.
  3442. if (getTargetHooks().extendPointerWithSExt())
  3443. return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
  3444. else
  3445. return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
  3446. }
  3447. case Builtin::BI__builtin_setjmp: {
  3448. // Buffer is a void**.
  3449. Address Buf = EmitPointerWithAlignment(E->getArg(0));
  3450. // Store the frame pointer to the setjmp buffer.
  3451. Value *FrameAddr = Builder.CreateCall(
  3452. CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
  3453. ConstantInt::get(Int32Ty, 0));
  3454. Builder.CreateStore(FrameAddr, Buf);
  3455. // Store the stack pointer to the setjmp buffer.
  3456. Value *StackAddr =
  3457. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
  3458. Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
  3459. Builder.CreateStore(StackAddr, StackSaveSlot);
  3460. // Call LLVM's EH setjmp, which is lightweight.
  3461. Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
  3462. Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
  3463. return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
  3464. }
  3465. case Builtin::BI__builtin_longjmp: {
  3466. Value *Buf = EmitScalarExpr(E->getArg(0));
  3467. Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
  3468. // Call LLVM's EH longjmp, which is lightweight.
  3469. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
  3470. // longjmp doesn't return; mark this as unreachable.
  3471. Builder.CreateUnreachable();
  3472. // We do need to preserve an insertion point.
  3473. EmitBlock(createBasicBlock("longjmp.cont"));
  3474. return RValue::get(nullptr);
  3475. }
  3476. case Builtin::BI__builtin_launder: {
  3477. const Expr *Arg = E->getArg(0);
  3478. QualType ArgTy = Arg->getType()->getPointeeType();
  3479. Value *Ptr = EmitScalarExpr(Arg);
  3480. if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
  3481. Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
  3482. return RValue::get(Ptr);
  3483. }
  3484. case Builtin::BI__sync_fetch_and_add:
  3485. case Builtin::BI__sync_fetch_and_sub:
  3486. case Builtin::BI__sync_fetch_and_or:
  3487. case Builtin::BI__sync_fetch_and_and:
  3488. case Builtin::BI__sync_fetch_and_xor:
  3489. case Builtin::BI__sync_fetch_and_nand:
  3490. case Builtin::BI__sync_add_and_fetch:
  3491. case Builtin::BI__sync_sub_and_fetch:
  3492. case Builtin::BI__sync_and_and_fetch:
  3493. case Builtin::BI__sync_or_and_fetch:
  3494. case Builtin::BI__sync_xor_and_fetch:
  3495. case Builtin::BI__sync_nand_and_fetch:
  3496. case Builtin::BI__sync_val_compare_and_swap:
  3497. case Builtin::BI__sync_bool_compare_and_swap:
  3498. case Builtin::BI__sync_lock_test_and_set:
  3499. case Builtin::BI__sync_lock_release:
  3500. case Builtin::BI__sync_swap:
  3501. llvm_unreachable("Shouldn't make it through sema");
  3502. case Builtin::BI__sync_fetch_and_add_1:
  3503. case Builtin::BI__sync_fetch_and_add_2:
  3504. case Builtin::BI__sync_fetch_and_add_4:
  3505. case Builtin::BI__sync_fetch_and_add_8:
  3506. case Builtin::BI__sync_fetch_and_add_16:
  3507. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
  3508. case Builtin::BI__sync_fetch_and_sub_1:
  3509. case Builtin::BI__sync_fetch_and_sub_2:
  3510. case Builtin::BI__sync_fetch_and_sub_4:
  3511. case Builtin::BI__sync_fetch_and_sub_8:
  3512. case Builtin::BI__sync_fetch_and_sub_16:
  3513. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
  3514. case Builtin::BI__sync_fetch_and_or_1:
  3515. case Builtin::BI__sync_fetch_and_or_2:
  3516. case Builtin::BI__sync_fetch_and_or_4:
  3517. case Builtin::BI__sync_fetch_and_or_8:
  3518. case Builtin::BI__sync_fetch_and_or_16:
  3519. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
  3520. case Builtin::BI__sync_fetch_and_and_1:
  3521. case Builtin::BI__sync_fetch_and_and_2:
  3522. case Builtin::BI__sync_fetch_and_and_4:
  3523. case Builtin::BI__sync_fetch_and_and_8:
  3524. case Builtin::BI__sync_fetch_and_and_16:
  3525. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
  3526. case Builtin::BI__sync_fetch_and_xor_1:
  3527. case Builtin::BI__sync_fetch_and_xor_2:
  3528. case Builtin::BI__sync_fetch_and_xor_4:
  3529. case Builtin::BI__sync_fetch_and_xor_8:
  3530. case Builtin::BI__sync_fetch_and_xor_16:
  3531. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
  3532. case Builtin::BI__sync_fetch_and_nand_1:
  3533. case Builtin::BI__sync_fetch_and_nand_2:
  3534. case Builtin::BI__sync_fetch_and_nand_4:
  3535. case Builtin::BI__sync_fetch_and_nand_8:
  3536. case Builtin::BI__sync_fetch_and_nand_16:
  3537. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
  3538. // Clang extensions: not overloaded yet.
  3539. case Builtin::BI__sync_fetch_and_min:
  3540. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
  3541. case Builtin::BI__sync_fetch_and_max:
  3542. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
  3543. case Builtin::BI__sync_fetch_and_umin:
  3544. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
  3545. case Builtin::BI__sync_fetch_and_umax:
  3546. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
  3547. case Builtin::BI__sync_add_and_fetch_1:
  3548. case Builtin::BI__sync_add_and_fetch_2:
  3549. case Builtin::BI__sync_add_and_fetch_4:
  3550. case Builtin::BI__sync_add_and_fetch_8:
  3551. case Builtin::BI__sync_add_and_fetch_16:
  3552. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
  3553. llvm::Instruction::Add);
  3554. case Builtin::BI__sync_sub_and_fetch_1:
  3555. case Builtin::BI__sync_sub_and_fetch_2:
  3556. case Builtin::BI__sync_sub_and_fetch_4:
  3557. case Builtin::BI__sync_sub_and_fetch_8:
  3558. case Builtin::BI__sync_sub_and_fetch_16:
  3559. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
  3560. llvm::Instruction::Sub);
  3561. case Builtin::BI__sync_and_and_fetch_1:
  3562. case Builtin::BI__sync_and_and_fetch_2:
  3563. case Builtin::BI__sync_and_and_fetch_4:
  3564. case Builtin::BI__sync_and_and_fetch_8:
  3565. case Builtin::BI__sync_and_and_fetch_16:
  3566. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
  3567. llvm::Instruction::And);
  3568. case Builtin::BI__sync_or_and_fetch_1:
  3569. case Builtin::BI__sync_or_and_fetch_2:
  3570. case Builtin::BI__sync_or_and_fetch_4:
  3571. case Builtin::BI__sync_or_and_fetch_8:
  3572. case Builtin::BI__sync_or_and_fetch_16:
  3573. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
  3574. llvm::Instruction::Or);
  3575. case Builtin::BI__sync_xor_and_fetch_1:
  3576. case Builtin::BI__sync_xor_and_fetch_2:
  3577. case Builtin::BI__sync_xor_and_fetch_4:
  3578. case Builtin::BI__sync_xor_and_fetch_8:
  3579. case Builtin::BI__sync_xor_and_fetch_16:
  3580. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
  3581. llvm::Instruction::Xor);
  3582. case Builtin::BI__sync_nand_and_fetch_1:
  3583. case Builtin::BI__sync_nand_and_fetch_2:
  3584. case Builtin::BI__sync_nand_and_fetch_4:
  3585. case Builtin::BI__sync_nand_and_fetch_8:
  3586. case Builtin::BI__sync_nand_and_fetch_16:
  3587. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
  3588. llvm::Instruction::And, true);
  3589. case Builtin::BI__sync_val_compare_and_swap_1:
  3590. case Builtin::BI__sync_val_compare_and_swap_2:
  3591. case Builtin::BI__sync_val_compare_and_swap_4:
  3592. case Builtin::BI__sync_val_compare_and_swap_8:
  3593. case Builtin::BI__sync_val_compare_and_swap_16:
  3594. return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
  3595. case Builtin::BI__sync_bool_compare_and_swap_1:
  3596. case Builtin::BI__sync_bool_compare_and_swap_2:
  3597. case Builtin::BI__sync_bool_compare_and_swap_4:
  3598. case Builtin::BI__sync_bool_compare_and_swap_8:
  3599. case Builtin::BI__sync_bool_compare_and_swap_16:
  3600. return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
  3601. case Builtin::BI__sync_swap_1:
  3602. case Builtin::BI__sync_swap_2:
  3603. case Builtin::BI__sync_swap_4:
  3604. case Builtin::BI__sync_swap_8:
  3605. case Builtin::BI__sync_swap_16:
  3606. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
  3607. case Builtin::BI__sync_lock_test_and_set_1:
  3608. case Builtin::BI__sync_lock_test_and_set_2:
  3609. case Builtin::BI__sync_lock_test_and_set_4:
  3610. case Builtin::BI__sync_lock_test_and_set_8:
  3611. case Builtin::BI__sync_lock_test_and_set_16:
  3612. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
  3613. case Builtin::BI__sync_lock_release_1:
  3614. case Builtin::BI__sync_lock_release_2:
  3615. case Builtin::BI__sync_lock_release_4:
  3616. case Builtin::BI__sync_lock_release_8:
  3617. case Builtin::BI__sync_lock_release_16: {
  3618. Value *Ptr = EmitScalarExpr(E->getArg(0));
  3619. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  3620. CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
  3621. llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
  3622. StoreSize.getQuantity() * 8);
  3623. Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  3624. llvm::StoreInst *Store =
  3625. Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
  3626. StoreSize);
  3627. Store->setAtomic(llvm::AtomicOrdering::Release);
  3628. return RValue::get(nullptr);
  3629. }
  3630. case Builtin::BI__sync_synchronize: {
  3631. // We assume this is supposed to correspond to a C++0x-style
  3632. // sequentially-consistent fence (i.e. this is only usable for
  3633. // synchronization, not device I/O or anything like that). This intrinsic
  3634. // is really badly designed in the sense that in theory, there isn't
  3635. // any way to safely use it... but in practice, it mostly works
  3636. // to use it with non-atomic loads and stores to get acquire/release
  3637. // semantics.
  3638. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
  3639. return RValue::get(nullptr);
  3640. }
  3641. case Builtin::BI__builtin_nontemporal_load:
  3642. return RValue::get(EmitNontemporalLoad(*this, E));
  3643. case Builtin::BI__builtin_nontemporal_store:
  3644. return RValue::get(EmitNontemporalStore(*this, E));
  3645. case Builtin::BI__c11_atomic_is_lock_free:
  3646. case Builtin::BI__atomic_is_lock_free: {
  3647. // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
  3648. // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
  3649. // _Atomic(T) is always properly-aligned.
  3650. const char *LibCallName = "__atomic_is_lock_free";
  3651. CallArgList Args;
  3652. Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
  3653. getContext().getSizeType());
  3654. if (BuiltinID == Builtin::BI__atomic_is_lock_free)
  3655. Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
  3656. getContext().VoidPtrTy);
  3657. else
  3658. Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
  3659. getContext().VoidPtrTy);
  3660. const CGFunctionInfo &FuncInfo =
  3661. CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
  3662. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
  3663. llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
  3664. return EmitCall(FuncInfo, CGCallee::forDirect(Func),
  3665. ReturnValueSlot(), Args);
  3666. }
  3667. case Builtin::BI__atomic_test_and_set: {
  3668. // Look at the argument type to determine whether this is a volatile
  3669. // operation. The parameter type is always volatile.
  3670. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  3671. bool Volatile =
  3672. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  3673. Value *Ptr = EmitScalarExpr(E->getArg(0));
  3674. unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
  3675. Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
  3676. Value *NewVal = Builder.getInt8(1);
  3677. Value *Order = EmitScalarExpr(E->getArg(1));
  3678. if (isa<llvm::ConstantInt>(Order)) {
  3679. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  3680. AtomicRMWInst *Result = nullptr;
  3681. switch (ord) {
  3682. case 0: // memory_order_relaxed
  3683. default: // invalid order
  3684. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3685. llvm::AtomicOrdering::Monotonic);
  3686. break;
  3687. case 1: // memory_order_consume
  3688. case 2: // memory_order_acquire
  3689. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3690. llvm::AtomicOrdering::Acquire);
  3691. break;
  3692. case 3: // memory_order_release
  3693. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3694. llvm::AtomicOrdering::Release);
  3695. break;
  3696. case 4: // memory_order_acq_rel
  3697. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3698. llvm::AtomicOrdering::AcquireRelease);
  3699. break;
  3700. case 5: // memory_order_seq_cst
  3701. Result = Builder.CreateAtomicRMW(
  3702. llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  3703. llvm::AtomicOrdering::SequentiallyConsistent);
  3704. break;
  3705. }
  3706. Result->setVolatile(Volatile);
  3707. return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
  3708. }
  3709. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  3710. llvm::BasicBlock *BBs[5] = {
  3711. createBasicBlock("monotonic", CurFn),
  3712. createBasicBlock("acquire", CurFn),
  3713. createBasicBlock("release", CurFn),
  3714. createBasicBlock("acqrel", CurFn),
  3715. createBasicBlock("seqcst", CurFn)
  3716. };
  3717. llvm::AtomicOrdering Orders[5] = {
  3718. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
  3719. llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
  3720. llvm::AtomicOrdering::SequentiallyConsistent};
  3721. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  3722. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
  3723. Builder.SetInsertPoint(ContBB);
  3724. PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
  3725. for (unsigned i = 0; i < 5; ++i) {
  3726. Builder.SetInsertPoint(BBs[i]);
  3727. AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
  3728. Ptr, NewVal, Orders[i]);
  3729. RMW->setVolatile(Volatile);
  3730. Result->addIncoming(RMW, BBs[i]);
  3731. Builder.CreateBr(ContBB);
  3732. }
  3733. SI->addCase(Builder.getInt32(0), BBs[0]);
  3734. SI->addCase(Builder.getInt32(1), BBs[1]);
  3735. SI->addCase(Builder.getInt32(2), BBs[1]);
  3736. SI->addCase(Builder.getInt32(3), BBs[2]);
  3737. SI->addCase(Builder.getInt32(4), BBs[3]);
  3738. SI->addCase(Builder.getInt32(5), BBs[4]);
  3739. Builder.SetInsertPoint(ContBB);
  3740. return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
  3741. }
  3742. case Builtin::BI__atomic_clear: {
  3743. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  3744. bool Volatile =
  3745. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  3746. Address Ptr = EmitPointerWithAlignment(E->getArg(0));
  3747. unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
  3748. Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
  3749. Value *NewVal = Builder.getInt8(0);
  3750. Value *Order = EmitScalarExpr(E->getArg(1));
  3751. if (isa<llvm::ConstantInt>(Order)) {
  3752. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  3753. StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
  3754. switch (ord) {
  3755. case 0: // memory_order_relaxed
  3756. default: // invalid order
  3757. Store->setOrdering(llvm::AtomicOrdering::Monotonic);
  3758. break;
  3759. case 3: // memory_order_release
  3760. Store->setOrdering(llvm::AtomicOrdering::Release);
  3761. break;
  3762. case 5: // memory_order_seq_cst
  3763. Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
  3764. break;
  3765. }
  3766. return RValue::get(nullptr);
  3767. }
  3768. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  3769. llvm::BasicBlock *BBs[3] = {
  3770. createBasicBlock("monotonic", CurFn),
  3771. createBasicBlock("release", CurFn),
  3772. createBasicBlock("seqcst", CurFn)
  3773. };
  3774. llvm::AtomicOrdering Orders[3] = {
  3775. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
  3776. llvm::AtomicOrdering::SequentiallyConsistent};
  3777. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  3778. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
  3779. for (unsigned i = 0; i < 3; ++i) {
  3780. Builder.SetInsertPoint(BBs[i]);
  3781. StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
  3782. Store->setOrdering(Orders[i]);
  3783. Builder.CreateBr(ContBB);
  3784. }
  3785. SI->addCase(Builder.getInt32(0), BBs[0]);
  3786. SI->addCase(Builder.getInt32(3), BBs[1]);
  3787. SI->addCase(Builder.getInt32(5), BBs[2]);
  3788. Builder.SetInsertPoint(ContBB);
  3789. return RValue::get(nullptr);
  3790. }
  3791. case Builtin::BI__atomic_thread_fence:
  3792. case Builtin::BI__atomic_signal_fence:
  3793. case Builtin::BI__c11_atomic_thread_fence:
  3794. case Builtin::BI__c11_atomic_signal_fence: {
  3795. llvm::SyncScope::ID SSID;
  3796. if (BuiltinID == Builtin::BI__atomic_signal_fence ||
  3797. BuiltinID == Builtin::BI__c11_atomic_signal_fence)
  3798. SSID = llvm::SyncScope::SingleThread;
  3799. else
  3800. SSID = llvm::SyncScope::System;
  3801. Value *Order = EmitScalarExpr(E->getArg(0));
  3802. if (isa<llvm::ConstantInt>(Order)) {
  3803. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  3804. switch (ord) {
  3805. case 0: // memory_order_relaxed
  3806. default: // invalid order
  3807. break;
  3808. case 1: // memory_order_consume
  3809. case 2: // memory_order_acquire
  3810. Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
  3811. break;
  3812. case 3: // memory_order_release
  3813. Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
  3814. break;
  3815. case 4: // memory_order_acq_rel
  3816. Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
  3817. break;
  3818. case 5: // memory_order_seq_cst
  3819. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
  3820. break;
  3821. }
  3822. return RValue::get(nullptr);
  3823. }
  3824. llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
  3825. AcquireBB = createBasicBlock("acquire", CurFn);
  3826. ReleaseBB = createBasicBlock("release", CurFn);
  3827. AcqRelBB = createBasicBlock("acqrel", CurFn);
  3828. SeqCstBB = createBasicBlock("seqcst", CurFn);
  3829. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  3830. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  3831. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
  3832. Builder.SetInsertPoint(AcquireBB);
  3833. Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
  3834. Builder.CreateBr(ContBB);
  3835. SI->addCase(Builder.getInt32(1), AcquireBB);
  3836. SI->addCase(Builder.getInt32(2), AcquireBB);
  3837. Builder.SetInsertPoint(ReleaseBB);
  3838. Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
  3839. Builder.CreateBr(ContBB);
  3840. SI->addCase(Builder.getInt32(3), ReleaseBB);
  3841. Builder.SetInsertPoint(AcqRelBB);
  3842. Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
  3843. Builder.CreateBr(ContBB);
  3844. SI->addCase(Builder.getInt32(4), AcqRelBB);
  3845. Builder.SetInsertPoint(SeqCstBB);
  3846. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
  3847. Builder.CreateBr(ContBB);
  3848. SI->addCase(Builder.getInt32(5), SeqCstBB);
  3849. Builder.SetInsertPoint(ContBB);
  3850. return RValue::get(nullptr);
  3851. }
  3852. case Builtin::BI__builtin_signbit:
  3853. case Builtin::BI__builtin_signbitf:
  3854. case Builtin::BI__builtin_signbitl: {
  3855. return RValue::get(
  3856. Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
  3857. ConvertType(E->getType())));
  3858. }
  3859. case Builtin::BI__warn_memset_zero_len:
  3860. return RValue::getIgnored();
  3861. case Builtin::BI__annotation: {
  3862. // Re-encode each wide string to UTF8 and make an MDString.
  3863. SmallVector<Metadata *, 1> Strings;
  3864. for (const Expr *Arg : E->arguments()) {
  3865. const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
  3866. assert(Str->getCharByteWidth() == 2);
  3867. StringRef WideBytes = Str->getBytes();
  3868. std::string StrUtf8;
  3869. if (!convertUTF16ToUTF8String(
  3870. makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
  3871. CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
  3872. continue;
  3873. }
  3874. Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
  3875. }
  3876. // Build and MDTuple of MDStrings and emit the intrinsic call.
  3877. llvm::Function *F =
  3878. CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
  3879. MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
  3880. Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
  3881. return RValue::getIgnored();
  3882. }
  3883. case Builtin::BI__builtin_annotation: {
  3884. llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
  3885. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
  3886. AnnVal->getType());
  3887. // Get the annotation string, go through casts. Sema requires this to be a
  3888. // non-wide string literal, potentially casted, so the cast<> is safe.
  3889. const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
  3890. StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
  3891. return RValue::get(
  3892. EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
  3893. }
  3894. case Builtin::BI__builtin_addcb:
  3895. case Builtin::BI__builtin_addcs:
  3896. case Builtin::BI__builtin_addc:
  3897. case Builtin::BI__builtin_addcl:
  3898. case Builtin::BI__builtin_addcll:
  3899. case Builtin::BI__builtin_subcb:
  3900. case Builtin::BI__builtin_subcs:
  3901. case Builtin::BI__builtin_subc:
  3902. case Builtin::BI__builtin_subcl:
  3903. case Builtin::BI__builtin_subcll: {
  3904. // We translate all of these builtins from expressions of the form:
  3905. // int x = ..., y = ..., carryin = ..., carryout, result;
  3906. // result = __builtin_addc(x, y, carryin, &carryout);
  3907. //
  3908. // to LLVM IR of the form:
  3909. //
  3910. // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
  3911. // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
  3912. // %carry1 = extractvalue {i32, i1} %tmp1, 1
  3913. // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
  3914. // i32 %carryin)
  3915. // %result = extractvalue {i32, i1} %tmp2, 0
  3916. // %carry2 = extractvalue {i32, i1} %tmp2, 1
  3917. // %tmp3 = or i1 %carry1, %carry2
  3918. // %tmp4 = zext i1 %tmp3 to i32
  3919. // store i32 %tmp4, i32* %carryout
  3920. // Scalarize our inputs.
  3921. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  3922. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  3923. llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
  3924. Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
  3925. // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
  3926. llvm::Intrinsic::ID IntrinsicId;
  3927. switch (BuiltinID) {
  3928. default: llvm_unreachable("Unknown multiprecision builtin id.");
  3929. case Builtin::BI__builtin_addcb:
  3930. case Builtin::BI__builtin_addcs:
  3931. case Builtin::BI__builtin_addc:
  3932. case Builtin::BI__builtin_addcl:
  3933. case Builtin::BI__builtin_addcll:
  3934. IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
  3935. break;
  3936. case Builtin::BI__builtin_subcb:
  3937. case Builtin::BI__builtin_subcs:
  3938. case Builtin::BI__builtin_subc:
  3939. case Builtin::BI__builtin_subcl:
  3940. case Builtin::BI__builtin_subcll:
  3941. IntrinsicId = llvm::Intrinsic::usub_with_overflow;
  3942. break;
  3943. }
  3944. // Construct our resulting LLVM IR expression.
  3945. llvm::Value *Carry1;
  3946. llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
  3947. X, Y, Carry1);
  3948. llvm::Value *Carry2;
  3949. llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
  3950. Sum1, Carryin, Carry2);
  3951. llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
  3952. X->getType());
  3953. Builder.CreateStore(CarryOut, CarryOutPtr);
  3954. return RValue::get(Sum2);
  3955. }
  3956. case Builtin::BI__builtin_add_overflow:
  3957. case Builtin::BI__builtin_sub_overflow:
  3958. case Builtin::BI__builtin_mul_overflow: {
  3959. const clang::Expr *LeftArg = E->getArg(0);
  3960. const clang::Expr *RightArg = E->getArg(1);
  3961. const clang::Expr *ResultArg = E->getArg(2);
  3962. clang::QualType ResultQTy =
  3963. ResultArg->getType()->castAs<PointerType>()->getPointeeType();
  3964. WidthAndSignedness LeftInfo =
  3965. getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
  3966. WidthAndSignedness RightInfo =
  3967. getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
  3968. WidthAndSignedness ResultInfo =
  3969. getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
  3970. // Handle mixed-sign multiplication as a special case, because adding
  3971. // runtime or backend support for our generic irgen would be too expensive.
  3972. if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
  3973. return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
  3974. RightInfo, ResultArg, ResultQTy,
  3975. ResultInfo);
  3976. if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
  3977. ResultInfo))
  3978. return EmitCheckedUnsignedMultiplySignedResult(
  3979. *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
  3980. ResultInfo);
  3981. WidthAndSignedness EncompassingInfo =
  3982. EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
  3983. llvm::Type *EncompassingLLVMTy =
  3984. llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
  3985. llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
  3986. llvm::Intrinsic::ID IntrinsicId;
  3987. switch (BuiltinID) {
  3988. default:
  3989. llvm_unreachable("Unknown overflow builtin id.");
  3990. case Builtin::BI__builtin_add_overflow:
  3991. IntrinsicId = EncompassingInfo.Signed
  3992. ? llvm::Intrinsic::sadd_with_overflow
  3993. : llvm::Intrinsic::uadd_with_overflow;
  3994. break;
  3995. case Builtin::BI__builtin_sub_overflow:
  3996. IntrinsicId = EncompassingInfo.Signed
  3997. ? llvm::Intrinsic::ssub_with_overflow
  3998. : llvm::Intrinsic::usub_with_overflow;
  3999. break;
  4000. case Builtin::BI__builtin_mul_overflow:
  4001. IntrinsicId = EncompassingInfo.Signed
  4002. ? llvm::Intrinsic::smul_with_overflow
  4003. : llvm::Intrinsic::umul_with_overflow;
  4004. break;
  4005. }
  4006. llvm::Value *Left = EmitScalarExpr(LeftArg);
  4007. llvm::Value *Right = EmitScalarExpr(RightArg);
  4008. Address ResultPtr = EmitPointerWithAlignment(ResultArg);
  4009. // Extend each operand to the encompassing type.
  4010. Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
  4011. Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
  4012. // Perform the operation on the extended values.
  4013. llvm::Value *Overflow, *Result;
  4014. Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
  4015. if (EncompassingInfo.Width > ResultInfo.Width) {
  4016. // The encompassing type is wider than the result type, so we need to
  4017. // truncate it.
  4018. llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
  4019. // To see if the truncation caused an overflow, we will extend
  4020. // the result and then compare it to the original result.
  4021. llvm::Value *ResultTruncExt = Builder.CreateIntCast(
  4022. ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
  4023. llvm::Value *TruncationOverflow =
  4024. Builder.CreateICmpNE(Result, ResultTruncExt);
  4025. Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
  4026. Result = ResultTrunc;
  4027. }
  4028. // Finally, store the result using the pointer.
  4029. bool isVolatile =
  4030. ResultArg->getType()->getPointeeType().isVolatileQualified();
  4031. Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
  4032. return RValue::get(Overflow);
  4033. }
  4034. case Builtin::BI__builtin_uadd_overflow:
  4035. case Builtin::BI__builtin_uaddl_overflow:
  4036. case Builtin::BI__builtin_uaddll_overflow:
  4037. case Builtin::BI__builtin_usub_overflow:
  4038. case Builtin::BI__builtin_usubl_overflow:
  4039. case Builtin::BI__builtin_usubll_overflow:
  4040. case Builtin::BI__builtin_umul_overflow:
  4041. case Builtin::BI__builtin_umull_overflow:
  4042. case Builtin::BI__builtin_umulll_overflow:
  4043. case Builtin::BI__builtin_sadd_overflow:
  4044. case Builtin::BI__builtin_saddl_overflow:
  4045. case Builtin::BI__builtin_saddll_overflow:
  4046. case Builtin::BI__builtin_ssub_overflow:
  4047. case Builtin::BI__builtin_ssubl_overflow:
  4048. case Builtin::BI__builtin_ssubll_overflow:
  4049. case Builtin::BI__builtin_smul_overflow:
  4050. case Builtin::BI__builtin_smull_overflow:
  4051. case Builtin::BI__builtin_smulll_overflow: {
  4052. // We translate all of these builtins directly to the relevant llvm IR node.
  4053. // Scalarize our inputs.
  4054. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  4055. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  4056. Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
  4057. // Decide which of the overflow intrinsics we are lowering to:
  4058. llvm::Intrinsic::ID IntrinsicId;
  4059. switch (BuiltinID) {
  4060. default: llvm_unreachable("Unknown overflow builtin id.");
  4061. case Builtin::BI__builtin_uadd_overflow:
  4062. case Builtin::BI__builtin_uaddl_overflow:
  4063. case Builtin::BI__builtin_uaddll_overflow:
  4064. IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
  4065. break;
  4066. case Builtin::BI__builtin_usub_overflow:
  4067. case Builtin::BI__builtin_usubl_overflow:
  4068. case Builtin::BI__builtin_usubll_overflow:
  4069. IntrinsicId = llvm::Intrinsic::usub_with_overflow;
  4070. break;
  4071. case Builtin::BI__builtin_umul_overflow:
  4072. case Builtin::BI__builtin_umull_overflow:
  4073. case Builtin::BI__builtin_umulll_overflow:
  4074. IntrinsicId = llvm::Intrinsic::umul_with_overflow;
  4075. break;
  4076. case Builtin::BI__builtin_sadd_overflow:
  4077. case Builtin::BI__builtin_saddl_overflow:
  4078. case Builtin::BI__builtin_saddll_overflow:
  4079. IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
  4080. break;
  4081. case Builtin::BI__builtin_ssub_overflow:
  4082. case Builtin::BI__builtin_ssubl_overflow:
  4083. case Builtin::BI__builtin_ssubll_overflow:
  4084. IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
  4085. break;
  4086. case Builtin::BI__builtin_smul_overflow:
  4087. case Builtin::BI__builtin_smull_overflow:
  4088. case Builtin::BI__builtin_smulll_overflow:
  4089. IntrinsicId = llvm::Intrinsic::smul_with_overflow;
  4090. break;
  4091. }
  4092. llvm::Value *Carry;
  4093. llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
  4094. Builder.CreateStore(Sum, SumOutPtr);
  4095. return RValue::get(Carry);
  4096. }
  4097. case Builtin::BI__builtin_addressof:
  4098. return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
  4099. case Builtin::BI__builtin_function_start:
  4100. return RValue::get(CGM.GetFunctionStart(
  4101. E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
  4102. case Builtin::BI__builtin_operator_new:
  4103. return EmitBuiltinNewDeleteCall(
  4104. E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
  4105. case Builtin::BI__builtin_operator_delete:
  4106. return EmitBuiltinNewDeleteCall(
  4107. E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
  4108. case Builtin::BI__builtin_is_aligned:
  4109. return EmitBuiltinIsAligned(E);
  4110. case Builtin::BI__builtin_align_up:
  4111. return EmitBuiltinAlignTo(E, true);
  4112. case Builtin::BI__builtin_align_down:
  4113. return EmitBuiltinAlignTo(E, false);
  4114. case Builtin::BI__noop:
  4115. // __noop always evaluates to an integer literal zero.
  4116. return RValue::get(ConstantInt::get(IntTy, 0));
  4117. case Builtin::BI__builtin_call_with_static_chain: {
  4118. const CallExpr *Call = cast<CallExpr>(E->getArg(0));
  4119. const Expr *Chain = E->getArg(1);
  4120. return EmitCall(Call->getCallee()->getType(),
  4121. EmitCallee(Call->getCallee()), Call, ReturnValue,
  4122. EmitScalarExpr(Chain));
  4123. }
  4124. case Builtin::BI_InterlockedExchange8:
  4125. case Builtin::BI_InterlockedExchange16:
  4126. case Builtin::BI_InterlockedExchange:
  4127. case Builtin::BI_InterlockedExchangePointer:
  4128. return RValue::get(
  4129. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
  4130. case Builtin::BI_InterlockedCompareExchangePointer:
  4131. case Builtin::BI_InterlockedCompareExchangePointer_nf: {
  4132. llvm::Type *RTy;
  4133. llvm::IntegerType *IntType =
  4134. IntegerType::get(getLLVMContext(),
  4135. getContext().getTypeSize(E->getType()));
  4136. llvm::Type *IntPtrType = IntType->getPointerTo();
  4137. llvm::Value *Destination =
  4138. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
  4139. llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
  4140. RTy = Exchange->getType();
  4141. Exchange = Builder.CreatePtrToInt(Exchange, IntType);
  4142. llvm::Value *Comparand =
  4143. Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
  4144. auto Ordering =
  4145. BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
  4146. AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
  4147. auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
  4148. Ordering, Ordering);
  4149. Result->setVolatile(true);
  4150. return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
  4151. 0),
  4152. RTy));
  4153. }
  4154. case Builtin::BI_InterlockedCompareExchange8:
  4155. case Builtin::BI_InterlockedCompareExchange16:
  4156. case Builtin::BI_InterlockedCompareExchange:
  4157. case Builtin::BI_InterlockedCompareExchange64:
  4158. return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
  4159. case Builtin::BI_InterlockedIncrement16:
  4160. case Builtin::BI_InterlockedIncrement:
  4161. return RValue::get(
  4162. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
  4163. case Builtin::BI_InterlockedDecrement16:
  4164. case Builtin::BI_InterlockedDecrement:
  4165. return RValue::get(
  4166. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
  4167. case Builtin::BI_InterlockedAnd8:
  4168. case Builtin::BI_InterlockedAnd16:
  4169. case Builtin::BI_InterlockedAnd:
  4170. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
  4171. case Builtin::BI_InterlockedExchangeAdd8:
  4172. case Builtin::BI_InterlockedExchangeAdd16:
  4173. case Builtin::BI_InterlockedExchangeAdd:
  4174. return RValue::get(
  4175. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
  4176. case Builtin::BI_InterlockedExchangeSub8:
  4177. case Builtin::BI_InterlockedExchangeSub16:
  4178. case Builtin::BI_InterlockedExchangeSub:
  4179. return RValue::get(
  4180. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
  4181. case Builtin::BI_InterlockedOr8:
  4182. case Builtin::BI_InterlockedOr16:
  4183. case Builtin::BI_InterlockedOr:
  4184. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
  4185. case Builtin::BI_InterlockedXor8:
  4186. case Builtin::BI_InterlockedXor16:
  4187. case Builtin::BI_InterlockedXor:
  4188. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
  4189. case Builtin::BI_bittest64:
  4190. case Builtin::BI_bittest:
  4191. case Builtin::BI_bittestandcomplement64:
  4192. case Builtin::BI_bittestandcomplement:
  4193. case Builtin::BI_bittestandreset64:
  4194. case Builtin::BI_bittestandreset:
  4195. case Builtin::BI_bittestandset64:
  4196. case Builtin::BI_bittestandset:
  4197. case Builtin::BI_interlockedbittestandreset:
  4198. case Builtin::BI_interlockedbittestandreset64:
  4199. case Builtin::BI_interlockedbittestandset64:
  4200. case Builtin::BI_interlockedbittestandset:
  4201. case Builtin::BI_interlockedbittestandset_acq:
  4202. case Builtin::BI_interlockedbittestandset_rel:
  4203. case Builtin::BI_interlockedbittestandset_nf:
  4204. case Builtin::BI_interlockedbittestandreset_acq:
  4205. case Builtin::BI_interlockedbittestandreset_rel:
  4206. case Builtin::BI_interlockedbittestandreset_nf:
  4207. return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
  4208. // These builtins exist to emit regular volatile loads and stores not
  4209. // affected by the -fms-volatile setting.
  4210. case Builtin::BI__iso_volatile_load8:
  4211. case Builtin::BI__iso_volatile_load16:
  4212. case Builtin::BI__iso_volatile_load32:
  4213. case Builtin::BI__iso_volatile_load64:
  4214. return RValue::get(EmitISOVolatileLoad(*this, E));
  4215. case Builtin::BI__iso_volatile_store8:
  4216. case Builtin::BI__iso_volatile_store16:
  4217. case Builtin::BI__iso_volatile_store32:
  4218. case Builtin::BI__iso_volatile_store64:
  4219. return RValue::get(EmitISOVolatileStore(*this, E));
  4220. case Builtin::BI__exception_code:
  4221. case Builtin::BI_exception_code:
  4222. return RValue::get(EmitSEHExceptionCode());
  4223. case Builtin::BI__exception_info:
  4224. case Builtin::BI_exception_info:
  4225. return RValue::get(EmitSEHExceptionInfo());
  4226. case Builtin::BI__abnormal_termination:
  4227. case Builtin::BI_abnormal_termination:
  4228. return RValue::get(EmitSEHAbnormalTermination());
  4229. case Builtin::BI_setjmpex:
  4230. if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
  4231. E->getArg(0)->getType()->isPointerType())
  4232. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
  4233. break;
  4234. case Builtin::BI_setjmp:
  4235. if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
  4236. E->getArg(0)->getType()->isPointerType()) {
  4237. if (getTarget().getTriple().getArch() == llvm::Triple::x86)
  4238. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
  4239. else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
  4240. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
  4241. return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
  4242. }
  4243. break;
  4244. case Builtin::BI__GetExceptionInfo: {
  4245. if (llvm::GlobalVariable *GV =
  4246. CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
  4247. return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
  4248. break;
  4249. }
  4250. case Builtin::BI__fastfail:
  4251. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
  4252. case Builtin::BI__builtin_coro_size: {
  4253. auto & Context = getContext();
  4254. auto SizeTy = Context.getSizeType();
  4255. auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
  4256. Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
  4257. return RValue::get(Builder.CreateCall(F));
  4258. }
  4259. case Builtin::BI__builtin_coro_id:
  4260. return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
  4261. case Builtin::BI__builtin_coro_promise:
  4262. return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
  4263. case Builtin::BI__builtin_coro_resume:
  4264. return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
  4265. case Builtin::BI__builtin_coro_frame:
  4266. return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
  4267. case Builtin::BI__builtin_coro_noop:
  4268. return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
  4269. case Builtin::BI__builtin_coro_free:
  4270. return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
  4271. case Builtin::BI__builtin_coro_destroy:
  4272. return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
  4273. case Builtin::BI__builtin_coro_done:
  4274. return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
  4275. case Builtin::BI__builtin_coro_alloc:
  4276. return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
  4277. case Builtin::BI__builtin_coro_begin:
  4278. return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
  4279. case Builtin::BI__builtin_coro_end:
  4280. return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
  4281. case Builtin::BI__builtin_coro_suspend:
  4282. return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
  4283. // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
  4284. case Builtin::BIread_pipe:
  4285. case Builtin::BIwrite_pipe: {
  4286. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  4287. *Arg1 = EmitScalarExpr(E->getArg(1));
  4288. CGOpenCLRuntime OpenCLRT(CGM);
  4289. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4290. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4291. // Type of the generic packet parameter.
  4292. unsigned GenericAS =
  4293. getContext().getTargetAddressSpace(LangAS::opencl_generic);
  4294. llvm::Type *I8PTy = llvm::PointerType::get(
  4295. llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
  4296. // Testing which overloaded version we should generate the call for.
  4297. if (2U == E->getNumArgs()) {
  4298. const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
  4299. : "__write_pipe_2";
  4300. // Creating a generic function type to be able to call with any builtin or
  4301. // user defined type.
  4302. llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
  4303. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4304. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4305. Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
  4306. return RValue::get(
  4307. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4308. {Arg0, BCast, PacketSize, PacketAlign}));
  4309. } else {
  4310. assert(4 == E->getNumArgs() &&
  4311. "Illegal number of parameters to pipe function");
  4312. const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
  4313. : "__write_pipe_4";
  4314. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
  4315. Int32Ty, Int32Ty};
  4316. Value *Arg2 = EmitScalarExpr(E->getArg(2)),
  4317. *Arg3 = EmitScalarExpr(E->getArg(3));
  4318. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4319. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4320. Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
  4321. // We know the third argument is an integer type, but we may need to cast
  4322. // it to i32.
  4323. if (Arg2->getType() != Int32Ty)
  4324. Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
  4325. return RValue::get(
  4326. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4327. {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
  4328. }
  4329. }
  4330. // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
  4331. // functions
  4332. case Builtin::BIreserve_read_pipe:
  4333. case Builtin::BIreserve_write_pipe:
  4334. case Builtin::BIwork_group_reserve_read_pipe:
  4335. case Builtin::BIwork_group_reserve_write_pipe:
  4336. case Builtin::BIsub_group_reserve_read_pipe:
  4337. case Builtin::BIsub_group_reserve_write_pipe: {
  4338. // Composing the mangled name for the function.
  4339. const char *Name;
  4340. if (BuiltinID == Builtin::BIreserve_read_pipe)
  4341. Name = "__reserve_read_pipe";
  4342. else if (BuiltinID == Builtin::BIreserve_write_pipe)
  4343. Name = "__reserve_write_pipe";
  4344. else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
  4345. Name = "__work_group_reserve_read_pipe";
  4346. else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
  4347. Name = "__work_group_reserve_write_pipe";
  4348. else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
  4349. Name = "__sub_group_reserve_read_pipe";
  4350. else
  4351. Name = "__sub_group_reserve_write_pipe";
  4352. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  4353. *Arg1 = EmitScalarExpr(E->getArg(1));
  4354. llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
  4355. CGOpenCLRuntime OpenCLRT(CGM);
  4356. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4357. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4358. // Building the generic function prototype.
  4359. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
  4360. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4361. ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4362. // We know the second argument is an integer type, but we may need to cast
  4363. // it to i32.
  4364. if (Arg1->getType() != Int32Ty)
  4365. Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
  4366. return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4367. {Arg0, Arg1, PacketSize, PacketAlign}));
  4368. }
  4369. // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
  4370. // functions
  4371. case Builtin::BIcommit_read_pipe:
  4372. case Builtin::BIcommit_write_pipe:
  4373. case Builtin::BIwork_group_commit_read_pipe:
  4374. case Builtin::BIwork_group_commit_write_pipe:
  4375. case Builtin::BIsub_group_commit_read_pipe:
  4376. case Builtin::BIsub_group_commit_write_pipe: {
  4377. const char *Name;
  4378. if (BuiltinID == Builtin::BIcommit_read_pipe)
  4379. Name = "__commit_read_pipe";
  4380. else if (BuiltinID == Builtin::BIcommit_write_pipe)
  4381. Name = "__commit_write_pipe";
  4382. else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
  4383. Name = "__work_group_commit_read_pipe";
  4384. else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
  4385. Name = "__work_group_commit_write_pipe";
  4386. else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
  4387. Name = "__sub_group_commit_read_pipe";
  4388. else
  4389. Name = "__sub_group_commit_write_pipe";
  4390. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  4391. *Arg1 = EmitScalarExpr(E->getArg(1));
  4392. CGOpenCLRuntime OpenCLRT(CGM);
  4393. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4394. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4395. // Building the generic function prototype.
  4396. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
  4397. llvm::FunctionType *FTy =
  4398. llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
  4399. llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4400. return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4401. {Arg0, Arg1, PacketSize, PacketAlign}));
  4402. }
  4403. // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
  4404. case Builtin::BIget_pipe_num_packets:
  4405. case Builtin::BIget_pipe_max_packets: {
  4406. const char *BaseName;
  4407. const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
  4408. if (BuiltinID == Builtin::BIget_pipe_num_packets)
  4409. BaseName = "__get_pipe_num_packets";
  4410. else
  4411. BaseName = "__get_pipe_max_packets";
  4412. std::string Name = std::string(BaseName) +
  4413. std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
  4414. // Building the generic function prototype.
  4415. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  4416. CGOpenCLRuntime OpenCLRT(CGM);
  4417. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  4418. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  4419. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
  4420. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4421. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4422. return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4423. {Arg0, PacketSize, PacketAlign}));
  4424. }
  4425. // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
  4426. case Builtin::BIto_global:
  4427. case Builtin::BIto_local:
  4428. case Builtin::BIto_private: {
  4429. auto Arg0 = EmitScalarExpr(E->getArg(0));
  4430. auto NewArgT = llvm::PointerType::get(Int8Ty,
  4431. CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4432. auto NewRetT = llvm::PointerType::get(Int8Ty,
  4433. CGM.getContext().getTargetAddressSpace(
  4434. E->getType()->getPointeeType().getAddressSpace()));
  4435. auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
  4436. llvm::Value *NewArg;
  4437. if (Arg0->getType()->getPointerAddressSpace() !=
  4438. NewArgT->getPointerAddressSpace())
  4439. NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
  4440. else
  4441. NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
  4442. auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
  4443. auto NewCall =
  4444. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
  4445. return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
  4446. ConvertType(E->getType())));
  4447. }
  4448. // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
  4449. // It contains four different overload formats specified in Table 6.13.17.1.
  4450. case Builtin::BIenqueue_kernel: {
  4451. StringRef Name; // Generated function call name
  4452. unsigned NumArgs = E->getNumArgs();
  4453. llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
  4454. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4455. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4456. llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
  4457. llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
  4458. LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
  4459. llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
  4460. llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
  4461. if (NumArgs == 4) {
  4462. // The most basic form of the call with parameters:
  4463. // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
  4464. Name = "__enqueue_kernel_basic";
  4465. llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
  4466. GenericVoidPtrTy};
  4467. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4468. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4469. auto Info =
  4470. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
  4471. llvm::Value *Kernel =
  4472. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4473. llvm::Value *Block =
  4474. Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4475. AttrBuilder B(Builder.getContext());
  4476. B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
  4477. llvm::AttributeList ByValAttrSet =
  4478. llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
  4479. auto RTCall =
  4480. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
  4481. {Queue, Flags, Range, Kernel, Block});
  4482. RTCall->setAttributes(ByValAttrSet);
  4483. return RValue::get(RTCall);
  4484. }
  4485. assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
  4486. // Create a temporary array to hold the sizes of local pointer arguments
  4487. // for the block. \p First is the position of the first size argument.
  4488. auto CreateArrayForSizeVar = [=](unsigned First)
  4489. -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
  4490. llvm::APInt ArraySize(32, NumArgs - First);
  4491. QualType SizeArrayTy = getContext().getConstantArrayType(
  4492. getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
  4493. /*IndexTypeQuals=*/0);
  4494. auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
  4495. llvm::Value *TmpPtr = Tmp.getPointer();
  4496. llvm::Value *TmpSize = EmitLifetimeStart(
  4497. CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
  4498. llvm::Value *ElemPtr;
  4499. // Each of the following arguments specifies the size of the corresponding
  4500. // argument passed to the enqueued block.
  4501. auto *Zero = llvm::ConstantInt::get(IntTy, 0);
  4502. for (unsigned I = First; I < NumArgs; ++I) {
  4503. auto *Index = llvm::ConstantInt::get(IntTy, I - First);
  4504. auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
  4505. {Zero, Index});
  4506. if (I == First)
  4507. ElemPtr = GEP;
  4508. auto *V =
  4509. Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
  4510. Builder.CreateAlignedStore(
  4511. V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
  4512. }
  4513. return std::tie(ElemPtr, TmpSize, TmpPtr);
  4514. };
  4515. // Could have events and/or varargs.
  4516. if (E->getArg(3)->getType()->isBlockPointerType()) {
  4517. // No events passed, but has variadic arguments.
  4518. Name = "__enqueue_kernel_varargs";
  4519. auto Info =
  4520. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
  4521. llvm::Value *Kernel =
  4522. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4523. auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4524. llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
  4525. std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
  4526. // Create a vector of the arguments, as well as a constant value to
  4527. // express to the runtime the number of variadic arguments.
  4528. llvm::Value *const Args[] = {Queue, Flags,
  4529. Range, Kernel,
  4530. Block, ConstantInt::get(IntTy, NumArgs - 4),
  4531. ElemPtr};
  4532. llvm::Type *const ArgTys[] = {
  4533. QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
  4534. GenericVoidPtrTy, IntTy, ElemPtr->getType()};
  4535. llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
  4536. auto Call = RValue::get(
  4537. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
  4538. if (TmpSize)
  4539. EmitLifetimeEnd(TmpSize, TmpPtr);
  4540. return Call;
  4541. }
  4542. // Any calls now have event arguments passed.
  4543. if (NumArgs >= 7) {
  4544. llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
  4545. llvm::PointerType *EventPtrTy = EventTy->getPointerTo(
  4546. CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4547. llvm::Value *NumEvents =
  4548. Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
  4549. // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
  4550. // to be a null pointer constant (including `0` literal), we can take it
  4551. // into account and emit null pointer directly.
  4552. llvm::Value *EventWaitList = nullptr;
  4553. if (E->getArg(4)->isNullPointerConstant(
  4554. getContext(), Expr::NPC_ValueDependentIsNotNull)) {
  4555. EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy);
  4556. } else {
  4557. EventWaitList = E->getArg(4)->getType()->isArrayType()
  4558. ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
  4559. : EmitScalarExpr(E->getArg(4));
  4560. // Convert to generic address space.
  4561. EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy);
  4562. }
  4563. llvm::Value *EventRet = nullptr;
  4564. if (E->getArg(5)->isNullPointerConstant(
  4565. getContext(), Expr::NPC_ValueDependentIsNotNull)) {
  4566. EventRet = llvm::ConstantPointerNull::get(EventPtrTy);
  4567. } else {
  4568. EventRet =
  4569. Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy);
  4570. }
  4571. auto Info =
  4572. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
  4573. llvm::Value *Kernel =
  4574. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4575. llvm::Value *Block =
  4576. Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4577. std::vector<llvm::Type *> ArgTys = {
  4578. QueueTy, Int32Ty, RangeTy, Int32Ty,
  4579. EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
  4580. std::vector<llvm::Value *> Args = {Queue, Flags, Range,
  4581. NumEvents, EventWaitList, EventRet,
  4582. Kernel, Block};
  4583. if (NumArgs == 7) {
  4584. // Has events but no variadics.
  4585. Name = "__enqueue_kernel_basic_events";
  4586. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4587. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4588. return RValue::get(
  4589. EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4590. llvm::ArrayRef<llvm::Value *>(Args)));
  4591. }
  4592. // Has event info and variadics
  4593. // Pass the number of variadics to the runtime function too.
  4594. Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
  4595. ArgTys.push_back(Int32Ty);
  4596. Name = "__enqueue_kernel_events_varargs";
  4597. llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
  4598. std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
  4599. Args.push_back(ElemPtr);
  4600. ArgTys.push_back(ElemPtr->getType());
  4601. llvm::FunctionType *FTy = llvm::FunctionType::get(
  4602. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  4603. auto Call =
  4604. RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
  4605. llvm::ArrayRef<llvm::Value *>(Args)));
  4606. if (TmpSize)
  4607. EmitLifetimeEnd(TmpSize, TmpPtr);
  4608. return Call;
  4609. }
  4610. LLVM_FALLTHROUGH;
  4611. }
  4612. // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
  4613. // parameter.
  4614. case Builtin::BIget_kernel_work_group_size: {
  4615. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4616. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4617. auto Info =
  4618. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
  4619. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4620. Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4621. return RValue::get(EmitRuntimeCall(
  4622. CGM.CreateRuntimeFunction(
  4623. llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
  4624. false),
  4625. "__get_kernel_work_group_size_impl"),
  4626. {Kernel, Arg}));
  4627. }
  4628. case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
  4629. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4630. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4631. auto Info =
  4632. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
  4633. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4634. Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4635. return RValue::get(EmitRuntimeCall(
  4636. CGM.CreateRuntimeFunction(
  4637. llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
  4638. false),
  4639. "__get_kernel_preferred_work_group_size_multiple_impl"),
  4640. {Kernel, Arg}));
  4641. }
  4642. case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
  4643. case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
  4644. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  4645. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  4646. LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
  4647. llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
  4648. auto Info =
  4649. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
  4650. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  4651. Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  4652. const char *Name =
  4653. BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
  4654. ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
  4655. : "__get_kernel_sub_group_count_for_ndrange_impl";
  4656. return RValue::get(EmitRuntimeCall(
  4657. CGM.CreateRuntimeFunction(
  4658. llvm::FunctionType::get(
  4659. IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
  4660. false),
  4661. Name),
  4662. {NDRange, Kernel, Block}));
  4663. }
  4664. case Builtin::BI__builtin_store_half:
  4665. case Builtin::BI__builtin_store_halff: {
  4666. Value *Val = EmitScalarExpr(E->getArg(0));
  4667. Address Address = EmitPointerWithAlignment(E->getArg(1));
  4668. Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
  4669. return RValue::get(Builder.CreateStore(HalfVal, Address));
  4670. }
  4671. case Builtin::BI__builtin_load_half: {
  4672. Address Address = EmitPointerWithAlignment(E->getArg(0));
  4673. Value *HalfVal = Builder.CreateLoad(Address);
  4674. return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
  4675. }
  4676. case Builtin::BI__builtin_load_halff: {
  4677. Address Address = EmitPointerWithAlignment(E->getArg(0));
  4678. Value *HalfVal = Builder.CreateLoad(Address);
  4679. return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
  4680. }
  4681. case Builtin::BIprintf:
  4682. if (getTarget().getTriple().isNVPTX() ||
  4683. getTarget().getTriple().isAMDGCN()) {
  4684. if (getLangOpts().OpenMPIsDevice)
  4685. return EmitOpenMPDevicePrintfCallExpr(E);
  4686. if (getTarget().getTriple().isNVPTX())
  4687. return EmitNVPTXDevicePrintfCallExpr(E);
  4688. if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
  4689. return EmitAMDGPUDevicePrintfCallExpr(E);
  4690. }
  4691. break;
  4692. case Builtin::BI__builtin_canonicalize:
  4693. case Builtin::BI__builtin_canonicalizef:
  4694. case Builtin::BI__builtin_canonicalizef16:
  4695. case Builtin::BI__builtin_canonicalizel:
  4696. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
  4697. case Builtin::BI__builtin_thread_pointer: {
  4698. if (!getContext().getTargetInfo().isTLSSupported())
  4699. CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
  4700. // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
  4701. break;
  4702. }
  4703. case Builtin::BI__builtin_os_log_format:
  4704. return emitBuiltinOSLogFormat(*E);
  4705. case Builtin::BI__xray_customevent: {
  4706. if (!ShouldXRayInstrumentFunction())
  4707. return RValue::getIgnored();
  4708. if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
  4709. XRayInstrKind::Custom))
  4710. return RValue::getIgnored();
  4711. if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
  4712. if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
  4713. return RValue::getIgnored();
  4714. Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
  4715. auto FTy = F->getFunctionType();
  4716. auto Arg0 = E->getArg(0);
  4717. auto Arg0Val = EmitScalarExpr(Arg0);
  4718. auto Arg0Ty = Arg0->getType();
  4719. auto PTy0 = FTy->getParamType(0);
  4720. if (PTy0 != Arg0Val->getType()) {
  4721. if (Arg0Ty->isArrayType())
  4722. Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
  4723. else
  4724. Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
  4725. }
  4726. auto Arg1 = EmitScalarExpr(E->getArg(1));
  4727. auto PTy1 = FTy->getParamType(1);
  4728. if (PTy1 != Arg1->getType())
  4729. Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
  4730. return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
  4731. }
  4732. case Builtin::BI__xray_typedevent: {
  4733. // TODO: There should be a way to always emit events even if the current
  4734. // function is not instrumented. Losing events in a stream can cripple
  4735. // a trace.
  4736. if (!ShouldXRayInstrumentFunction())
  4737. return RValue::getIgnored();
  4738. if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
  4739. XRayInstrKind::Typed))
  4740. return RValue::getIgnored();
  4741. if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
  4742. if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
  4743. return RValue::getIgnored();
  4744. Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
  4745. auto FTy = F->getFunctionType();
  4746. auto Arg0 = EmitScalarExpr(E->getArg(0));
  4747. auto PTy0 = FTy->getParamType(0);
  4748. if (PTy0 != Arg0->getType())
  4749. Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
  4750. auto Arg1 = E->getArg(1);
  4751. auto Arg1Val = EmitScalarExpr(Arg1);
  4752. auto Arg1Ty = Arg1->getType();
  4753. auto PTy1 = FTy->getParamType(1);
  4754. if (PTy1 != Arg1Val->getType()) {
  4755. if (Arg1Ty->isArrayType())
  4756. Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
  4757. else
  4758. Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
  4759. }
  4760. auto Arg2 = EmitScalarExpr(E->getArg(2));
  4761. auto PTy2 = FTy->getParamType(2);
  4762. if (PTy2 != Arg2->getType())
  4763. Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
  4764. return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
  4765. }
  4766. case Builtin::BI__builtin_ms_va_start:
  4767. case Builtin::BI__builtin_ms_va_end:
  4768. return RValue::get(
  4769. EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
  4770. BuiltinID == Builtin::BI__builtin_ms_va_start));
  4771. case Builtin::BI__builtin_ms_va_copy: {
  4772. // Lower this manually. We can't reliably determine whether or not any
  4773. // given va_copy() is for a Win64 va_list from the calling convention
  4774. // alone, because it's legal to do this from a System V ABI function.
  4775. // With opaque pointer types, we won't have enough information in LLVM
  4776. // IR to determine this from the argument types, either. Best to do it
  4777. // now, while we have enough information.
  4778. Address DestAddr = EmitMSVAListRef(E->getArg(0));
  4779. Address SrcAddr = EmitMSVAListRef(E->getArg(1));
  4780. llvm::Type *BPP = Int8PtrPtrTy;
  4781. DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
  4782. Int8PtrTy, DestAddr.getAlignment());
  4783. SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
  4784. Int8PtrTy, SrcAddr.getAlignment());
  4785. Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
  4786. return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
  4787. }
  4788. case Builtin::BI__builtin_get_device_side_mangled_name: {
  4789. auto Name = CGM.getCUDARuntime().getDeviceSideName(
  4790. cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
  4791. auto Str = CGM.GetAddrOfConstantCString(Name, "");
  4792. llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
  4793. llvm::ConstantInt::get(SizeTy, 0)};
  4794. auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
  4795. Str.getPointer(), Zeros);
  4796. return RValue::get(Ptr);
  4797. }
  4798. }
  4799. // If this is an alias for a lib function (e.g. __builtin_sin), emit
  4800. // the call using the normal call path, but using the unmangled
  4801. // version of the function name.
  4802. if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
  4803. return emitLibraryCall(*this, FD, E,
  4804. CGM.getBuiltinLibFunction(FD, BuiltinID));
  4805. // If this is a predefined lib function (e.g. malloc), emit the call
  4806. // using exactly the normal call path.
  4807. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
  4808. return emitLibraryCall(*this, FD, E,
  4809. cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
  4810. // Check that a call to a target specific builtin has the correct target
  4811. // features.
  4812. // This is down here to avoid non-target specific builtins, however, if
  4813. // generic builtins start to require generic target features then we
  4814. // can move this up to the beginning of the function.
  4815. checkTargetFeatures(E, FD);
  4816. if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
  4817. LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
  4818. // See if we have a target specific intrinsic.
  4819. const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
  4820. Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
  4821. StringRef Prefix =
  4822. llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
  4823. if (!Prefix.empty()) {
  4824. IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
  4825. // NOTE we don't need to perform a compatibility flag check here since the
  4826. // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
  4827. // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
  4828. if (IntrinsicID == Intrinsic::not_intrinsic)
  4829. IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
  4830. }
  4831. if (IntrinsicID != Intrinsic::not_intrinsic) {
  4832. SmallVector<Value*, 16> Args;
  4833. // Find out if any arguments are required to be integer constant
  4834. // expressions.
  4835. unsigned ICEArguments = 0;
  4836. ASTContext::GetBuiltinTypeError Error;
  4837. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  4838. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  4839. Function *F = CGM.getIntrinsic(IntrinsicID);
  4840. llvm::FunctionType *FTy = F->getFunctionType();
  4841. for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
  4842. Value *ArgValue;
  4843. // If this is a normal argument, just emit it as a scalar.
  4844. if ((ICEArguments & (1 << i)) == 0) {
  4845. ArgValue = EmitScalarExpr(E->getArg(i));
  4846. } else {
  4847. // If this is required to be a constant, constant fold it so that we
  4848. // know that the generated intrinsic gets a ConstantInt.
  4849. ArgValue = llvm::ConstantInt::get(
  4850. getLLVMContext(),
  4851. *E->getArg(i)->getIntegerConstantExpr(getContext()));
  4852. }
  4853. // If the intrinsic arg type is different from the builtin arg type
  4854. // we need to do a bit cast.
  4855. llvm::Type *PTy = FTy->getParamType(i);
  4856. if (PTy != ArgValue->getType()) {
  4857. // XXX - vector of pointers?
  4858. if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
  4859. if (PtrTy->getAddressSpace() !=
  4860. ArgValue->getType()->getPointerAddressSpace()) {
  4861. ArgValue = Builder.CreateAddrSpaceCast(
  4862. ArgValue,
  4863. ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
  4864. }
  4865. }
  4866. assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
  4867. "Must be able to losslessly bit cast to param");
  4868. ArgValue = Builder.CreateBitCast(ArgValue, PTy);
  4869. }
  4870. Args.push_back(ArgValue);
  4871. }
  4872. Value *V = Builder.CreateCall(F, Args);
  4873. QualType BuiltinRetType = E->getType();
  4874. llvm::Type *RetTy = VoidTy;
  4875. if (!BuiltinRetType->isVoidType())
  4876. RetTy = ConvertType(BuiltinRetType);
  4877. if (RetTy != V->getType()) {
  4878. // XXX - vector of pointers?
  4879. if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
  4880. if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
  4881. V = Builder.CreateAddrSpaceCast(
  4882. V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
  4883. }
  4884. }
  4885. assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
  4886. "Must be able to losslessly bit cast result type");
  4887. V = Builder.CreateBitCast(V, RetTy);
  4888. }
  4889. return RValue::get(V);
  4890. }
  4891. // Some target-specific builtins can have aggregate return values, e.g.
  4892. // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
  4893. // ReturnValue to be non-null, so that the target-specific emission code can
  4894. // always just emit into it.
  4895. TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
  4896. if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
  4897. Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
  4898. ReturnValue = ReturnValueSlot(DestPtr, false);
  4899. }
  4900. // Now see if we can emit a target-specific builtin.
  4901. if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
  4902. switch (EvalKind) {
  4903. case TEK_Scalar:
  4904. return RValue::get(V);
  4905. case TEK_Aggregate:
  4906. return RValue::getAggregate(ReturnValue.getValue(),
  4907. ReturnValue.isVolatile());
  4908. case TEK_Complex:
  4909. llvm_unreachable("No current target builtin returns complex");
  4910. }
  4911. llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
  4912. }
  4913. ErrorUnsupported(E, "builtin function");
  4914. // Unknown builtin, for now just dump it out and return undef.
  4915. return GetUndefRValue(E->getType());
  4916. }
  4917. static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
  4918. unsigned BuiltinID, const CallExpr *E,
  4919. ReturnValueSlot ReturnValue,
  4920. llvm::Triple::ArchType Arch) {
  4921. switch (Arch) {
  4922. case llvm::Triple::arm:
  4923. case llvm::Triple::armeb:
  4924. case llvm::Triple::thumb:
  4925. case llvm::Triple::thumbeb:
  4926. return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
  4927. case llvm::Triple::aarch64:
  4928. case llvm::Triple::aarch64_32:
  4929. case llvm::Triple::aarch64_be:
  4930. return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
  4931. case llvm::Triple::bpfeb:
  4932. case llvm::Triple::bpfel:
  4933. return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
  4934. case llvm::Triple::x86:
  4935. case llvm::Triple::x86_64:
  4936. return CGF->EmitX86BuiltinExpr(BuiltinID, E);
  4937. case llvm::Triple::ppc:
  4938. case llvm::Triple::ppcle:
  4939. case llvm::Triple::ppc64:
  4940. case llvm::Triple::ppc64le:
  4941. return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
  4942. case llvm::Triple::r600:
  4943. case llvm::Triple::amdgcn:
  4944. return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
  4945. case llvm::Triple::systemz:
  4946. return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
  4947. case llvm::Triple::nvptx:
  4948. case llvm::Triple::nvptx64:
  4949. return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
  4950. case llvm::Triple::wasm32:
  4951. case llvm::Triple::wasm64:
  4952. return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
  4953. case llvm::Triple::hexagon:
  4954. return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
  4955. case llvm::Triple::riscv32:
  4956. case llvm::Triple::riscv64:
  4957. return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
  4958. default:
  4959. return nullptr;
  4960. }
  4961. }
  4962. Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
  4963. const CallExpr *E,
  4964. ReturnValueSlot ReturnValue) {
  4965. if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
  4966. assert(getContext().getAuxTargetInfo() && "Missing aux target info");
  4967. return EmitTargetArchBuiltinExpr(
  4968. this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
  4969. ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
  4970. }
  4971. return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
  4972. getTarget().getTriple().getArch());
  4973. }
  4974. static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
  4975. NeonTypeFlags TypeFlags,
  4976. bool HasLegalHalfType = true,
  4977. bool V1Ty = false,
  4978. bool AllowBFloatArgsAndRet = true) {
  4979. int IsQuad = TypeFlags.isQuad();
  4980. switch (TypeFlags.getEltType()) {
  4981. case NeonTypeFlags::Int8:
  4982. case NeonTypeFlags::Poly8:
  4983. return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
  4984. case NeonTypeFlags::Int16:
  4985. case NeonTypeFlags::Poly16:
  4986. return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  4987. case NeonTypeFlags::BFloat16:
  4988. if (AllowBFloatArgsAndRet)
  4989. return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
  4990. else
  4991. return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  4992. case NeonTypeFlags::Float16:
  4993. if (HasLegalHalfType)
  4994. return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
  4995. else
  4996. return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  4997. case NeonTypeFlags::Int32:
  4998. return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
  4999. case NeonTypeFlags::Int64:
  5000. case NeonTypeFlags::Poly64:
  5001. return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
  5002. case NeonTypeFlags::Poly128:
  5003. // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
  5004. // There is a lot of i128 and f128 API missing.
  5005. // so we use v16i8 to represent poly128 and get pattern matched.
  5006. return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
  5007. case NeonTypeFlags::Float32:
  5008. return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
  5009. case NeonTypeFlags::Float64:
  5010. return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
  5011. }
  5012. llvm_unreachable("Unknown vector element type!");
  5013. }
  5014. static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
  5015. NeonTypeFlags IntTypeFlags) {
  5016. int IsQuad = IntTypeFlags.isQuad();
  5017. switch (IntTypeFlags.getEltType()) {
  5018. case NeonTypeFlags::Int16:
  5019. return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
  5020. case NeonTypeFlags::Int32:
  5021. return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
  5022. case NeonTypeFlags::Int64:
  5023. return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
  5024. default:
  5025. llvm_unreachable("Type can't be converted to floating-point!");
  5026. }
  5027. }
  5028. Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
  5029. const ElementCount &Count) {
  5030. Value *SV = llvm::ConstantVector::getSplat(Count, C);
  5031. return Builder.CreateShuffleVector(V, V, SV, "lane");
  5032. }
  5033. Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
  5034. ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
  5035. return EmitNeonSplat(V, C, EC);
  5036. }
  5037. Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
  5038. const char *name,
  5039. unsigned shift, bool rightshift) {
  5040. unsigned j = 0;
  5041. for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
  5042. ai != ae; ++ai, ++j) {
  5043. if (F->isConstrainedFPIntrinsic())
  5044. if (ai->getType()->isMetadataTy())
  5045. continue;
  5046. if (shift > 0 && shift == j)
  5047. Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
  5048. else
  5049. Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
  5050. }
  5051. if (F->isConstrainedFPIntrinsic())
  5052. return Builder.CreateConstrainedFPCall(F, Ops, name);
  5053. else
  5054. return Builder.CreateCall(F, Ops, name);
  5055. }
  5056. Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
  5057. bool neg) {
  5058. int SV = cast<ConstantInt>(V)->getSExtValue();
  5059. return ConstantInt::get(Ty, neg ? -SV : SV);
  5060. }
  5061. // Right-shift a vector by a constant.
  5062. Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
  5063. llvm::Type *Ty, bool usgn,
  5064. const char *name) {
  5065. llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
  5066. int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
  5067. int EltSize = VTy->getScalarSizeInBits();
  5068. Vec = Builder.CreateBitCast(Vec, Ty);
  5069. // lshr/ashr are undefined when the shift amount is equal to the vector
  5070. // element size.
  5071. if (ShiftAmt == EltSize) {
  5072. if (usgn) {
  5073. // Right-shifting an unsigned value by its size yields 0.
  5074. return llvm::ConstantAggregateZero::get(VTy);
  5075. } else {
  5076. // Right-shifting a signed value by its size is equivalent
  5077. // to a shift of size-1.
  5078. --ShiftAmt;
  5079. Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
  5080. }
  5081. }
  5082. Shift = EmitNeonShiftVector(Shift, Ty, false);
  5083. if (usgn)
  5084. return Builder.CreateLShr(Vec, Shift, name);
  5085. else
  5086. return Builder.CreateAShr(Vec, Shift, name);
  5087. }
  5088. enum {
  5089. AddRetType = (1 << 0),
  5090. Add1ArgType = (1 << 1),
  5091. Add2ArgTypes = (1 << 2),
  5092. VectorizeRetType = (1 << 3),
  5093. VectorizeArgTypes = (1 << 4),
  5094. InventFloatType = (1 << 5),
  5095. UnsignedAlts = (1 << 6),
  5096. Use64BitVectors = (1 << 7),
  5097. Use128BitVectors = (1 << 8),
  5098. Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
  5099. VectorRet = AddRetType | VectorizeRetType,
  5100. VectorRetGetArgs01 =
  5101. AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
  5102. FpCmpzModifiers =
  5103. AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
  5104. };
  5105. namespace {
  5106. struct ARMVectorIntrinsicInfo {
  5107. const char *NameHint;
  5108. unsigned BuiltinID;
  5109. unsigned LLVMIntrinsic;
  5110. unsigned AltLLVMIntrinsic;
  5111. uint64_t TypeModifier;
  5112. bool operator<(unsigned RHSBuiltinID) const {
  5113. return BuiltinID < RHSBuiltinID;
  5114. }
  5115. bool operator<(const ARMVectorIntrinsicInfo &TE) const {
  5116. return BuiltinID < TE.BuiltinID;
  5117. }
  5118. };
  5119. } // end anonymous namespace
  5120. #define NEONMAP0(NameBase) \
  5121. { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
  5122. #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
  5123. { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
  5124. Intrinsic::LLVMIntrinsic, 0, TypeModifier }
  5125. #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
  5126. { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
  5127. Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
  5128. TypeModifier }
  5129. static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
  5130. NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0),
  5131. NEONMAP0(splat_lane_v),
  5132. NEONMAP0(splat_laneq_v),
  5133. NEONMAP0(splatq_lane_v),
  5134. NEONMAP0(splatq_laneq_v),
  5135. NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
  5136. NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
  5137. NEONMAP1(vabs_v, arm_neon_vabs, 0),
  5138. NEONMAP1(vabsq_v, arm_neon_vabs, 0),
  5139. NEONMAP0(vadd_v),
  5140. NEONMAP0(vaddhn_v),
  5141. NEONMAP0(vaddq_v),
  5142. NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
  5143. NEONMAP1(vaeseq_v, arm_neon_aese, 0),
  5144. NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
  5145. NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
  5146. NEONMAP1(vbfdot_v, arm_neon_bfdot, 0),
  5147. NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0),
  5148. NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0),
  5149. NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0),
  5150. NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0),
  5151. NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
  5152. NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
  5153. NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
  5154. NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
  5155. NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
  5156. NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
  5157. NEONMAP1(vcage_v, arm_neon_vacge, 0),
  5158. NEONMAP1(vcageq_v, arm_neon_vacge, 0),
  5159. NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
  5160. NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
  5161. NEONMAP1(vcale_v, arm_neon_vacge, 0),
  5162. NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
  5163. NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
  5164. NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
  5165. NEONMAP0(vceqz_v),
  5166. NEONMAP0(vceqzq_v),
  5167. NEONMAP0(vcgez_v),
  5168. NEONMAP0(vcgezq_v),
  5169. NEONMAP0(vcgtz_v),
  5170. NEONMAP0(vcgtzq_v),
  5171. NEONMAP0(vclez_v),
  5172. NEONMAP0(vclezq_v),
  5173. NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
  5174. NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
  5175. NEONMAP0(vcltz_v),
  5176. NEONMAP0(vcltzq_v),
  5177. NEONMAP1(vclz_v, ctlz, Add1ArgType),
  5178. NEONMAP1(vclzq_v, ctlz, Add1ArgType),
  5179. NEONMAP1(vcnt_v, ctpop, Add1ArgType),
  5180. NEONMAP1(vcntq_v, ctpop, Add1ArgType),
  5181. NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
  5182. NEONMAP0(vcvt_f16_v),
  5183. NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
  5184. NEONMAP0(vcvt_f32_v),
  5185. NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  5186. NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  5187. NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
  5188. NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
  5189. NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
  5190. NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
  5191. NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
  5192. NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
  5193. NEONMAP0(vcvt_s16_v),
  5194. NEONMAP0(vcvt_s32_v),
  5195. NEONMAP0(vcvt_s64_v),
  5196. NEONMAP0(vcvt_u16_v),
  5197. NEONMAP0(vcvt_u32_v),
  5198. NEONMAP0(vcvt_u64_v),
  5199. NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
  5200. NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
  5201. NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
  5202. NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
  5203. NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
  5204. NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
  5205. NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
  5206. NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
  5207. NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
  5208. NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
  5209. NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
  5210. NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
  5211. NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
  5212. NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
  5213. NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
  5214. NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
  5215. NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
  5216. NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
  5217. NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
  5218. NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
  5219. NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
  5220. NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
  5221. NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
  5222. NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
  5223. NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
  5224. NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
  5225. NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
  5226. NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
  5227. NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
  5228. NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
  5229. NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
  5230. NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
  5231. NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
  5232. NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
  5233. NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
  5234. NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
  5235. NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
  5236. NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
  5237. NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
  5238. NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
  5239. NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
  5240. NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
  5241. NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
  5242. NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
  5243. NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
  5244. NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
  5245. NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
  5246. NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
  5247. NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
  5248. NEONMAP0(vcvtq_f16_v),
  5249. NEONMAP0(vcvtq_f32_v),
  5250. NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  5251. NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  5252. NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
  5253. NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
  5254. NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
  5255. NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
  5256. NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
  5257. NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
  5258. NEONMAP0(vcvtq_s16_v),
  5259. NEONMAP0(vcvtq_s32_v),
  5260. NEONMAP0(vcvtq_s64_v),
  5261. NEONMAP0(vcvtq_u16_v),
  5262. NEONMAP0(vcvtq_u32_v),
  5263. NEONMAP0(vcvtq_u64_v),
  5264. NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
  5265. NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
  5266. NEONMAP0(vext_v),
  5267. NEONMAP0(vextq_v),
  5268. NEONMAP0(vfma_v),
  5269. NEONMAP0(vfmaq_v),
  5270. NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
  5271. NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
  5272. NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
  5273. NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
  5274. NEONMAP0(vld1_dup_v),
  5275. NEONMAP1(vld1_v, arm_neon_vld1, 0),
  5276. NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
  5277. NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
  5278. NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
  5279. NEONMAP0(vld1q_dup_v),
  5280. NEONMAP1(vld1q_v, arm_neon_vld1, 0),
  5281. NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
  5282. NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
  5283. NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
  5284. NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
  5285. NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
  5286. NEONMAP1(vld2_v, arm_neon_vld2, 0),
  5287. NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
  5288. NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
  5289. NEONMAP1(vld2q_v, arm_neon_vld2, 0),
  5290. NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
  5291. NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
  5292. NEONMAP1(vld3_v, arm_neon_vld3, 0),
  5293. NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
  5294. NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
  5295. NEONMAP1(vld3q_v, arm_neon_vld3, 0),
  5296. NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
  5297. NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
  5298. NEONMAP1(vld4_v, arm_neon_vld4, 0),
  5299. NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
  5300. NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
  5301. NEONMAP1(vld4q_v, arm_neon_vld4, 0),
  5302. NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
  5303. NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
  5304. NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
  5305. NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
  5306. NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
  5307. NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
  5308. NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
  5309. NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
  5310. NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0),
  5311. NEONMAP0(vmovl_v),
  5312. NEONMAP0(vmovn_v),
  5313. NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
  5314. NEONMAP0(vmull_v),
  5315. NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
  5316. NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
  5317. NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
  5318. NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
  5319. NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
  5320. NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
  5321. NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
  5322. NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
  5323. NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
  5324. NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
  5325. NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
  5326. NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
  5327. NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
  5328. NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
  5329. NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
  5330. NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
  5331. NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
  5332. NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
  5333. NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
  5334. NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
  5335. NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
  5336. NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
  5337. NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType),
  5338. NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType),
  5339. NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType),
  5340. NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType),
  5341. NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
  5342. NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
  5343. NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
  5344. NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
  5345. NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
  5346. NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
  5347. NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
  5348. NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
  5349. NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
  5350. NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
  5351. NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
  5352. NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
  5353. NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
  5354. NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
  5355. NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
  5356. NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
  5357. NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
  5358. NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
  5359. NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
  5360. NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
  5361. NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
  5362. NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
  5363. NEONMAP0(vrndi_v),
  5364. NEONMAP0(vrndiq_v),
  5365. NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
  5366. NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
  5367. NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
  5368. NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
  5369. NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
  5370. NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
  5371. NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
  5372. NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
  5373. NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
  5374. NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
  5375. NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
  5376. NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
  5377. NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
  5378. NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
  5379. NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
  5380. NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
  5381. NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
  5382. NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
  5383. NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
  5384. NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
  5385. NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
  5386. NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
  5387. NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
  5388. NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
  5389. NEONMAP0(vshl_n_v),
  5390. NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
  5391. NEONMAP0(vshll_n_v),
  5392. NEONMAP0(vshlq_n_v),
  5393. NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
  5394. NEONMAP0(vshr_n_v),
  5395. NEONMAP0(vshrn_n_v),
  5396. NEONMAP0(vshrq_n_v),
  5397. NEONMAP1(vst1_v, arm_neon_vst1, 0),
  5398. NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
  5399. NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
  5400. NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
  5401. NEONMAP1(vst1q_v, arm_neon_vst1, 0),
  5402. NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
  5403. NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
  5404. NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
  5405. NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
  5406. NEONMAP1(vst2_v, arm_neon_vst2, 0),
  5407. NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
  5408. NEONMAP1(vst2q_v, arm_neon_vst2, 0),
  5409. NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
  5410. NEONMAP1(vst3_v, arm_neon_vst3, 0),
  5411. NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
  5412. NEONMAP1(vst3q_v, arm_neon_vst3, 0),
  5413. NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
  5414. NEONMAP1(vst4_v, arm_neon_vst4, 0),
  5415. NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
  5416. NEONMAP1(vst4q_v, arm_neon_vst4, 0),
  5417. NEONMAP0(vsubhn_v),
  5418. NEONMAP0(vtrn_v),
  5419. NEONMAP0(vtrnq_v),
  5420. NEONMAP0(vtst_v),
  5421. NEONMAP0(vtstq_v),
  5422. NEONMAP1(vusdot_v, arm_neon_usdot, 0),
  5423. NEONMAP1(vusdotq_v, arm_neon_usdot, 0),
  5424. NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0),
  5425. NEONMAP0(vuzp_v),
  5426. NEONMAP0(vuzpq_v),
  5427. NEONMAP0(vzip_v),
  5428. NEONMAP0(vzipq_v)
  5429. };
  5430. static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
  5431. NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0),
  5432. NEONMAP0(splat_lane_v),
  5433. NEONMAP0(splat_laneq_v),
  5434. NEONMAP0(splatq_lane_v),
  5435. NEONMAP0(splatq_laneq_v),
  5436. NEONMAP1(vabs_v, aarch64_neon_abs, 0),
  5437. NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
  5438. NEONMAP0(vadd_v),
  5439. NEONMAP0(vaddhn_v),
  5440. NEONMAP0(vaddq_p128),
  5441. NEONMAP0(vaddq_v),
  5442. NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
  5443. NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
  5444. NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
  5445. NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
  5446. NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
  5447. NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0),
  5448. NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0),
  5449. NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0),
  5450. NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0),
  5451. NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0),
  5452. NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
  5453. NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
  5454. NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
  5455. NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
  5456. NEONMAP1(vcage_v, aarch64_neon_facge, 0),
  5457. NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
  5458. NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
  5459. NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
  5460. NEONMAP1(vcale_v, aarch64_neon_facge, 0),
  5461. NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
  5462. NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
  5463. NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
  5464. NEONMAP0(vceqz_v),
  5465. NEONMAP0(vceqzq_v),
  5466. NEONMAP0(vcgez_v),
  5467. NEONMAP0(vcgezq_v),
  5468. NEONMAP0(vcgtz_v),
  5469. NEONMAP0(vcgtzq_v),
  5470. NEONMAP0(vclez_v),
  5471. NEONMAP0(vclezq_v),
  5472. NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
  5473. NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
  5474. NEONMAP0(vcltz_v),
  5475. NEONMAP0(vcltzq_v),
  5476. NEONMAP1(vclz_v, ctlz, Add1ArgType),
  5477. NEONMAP1(vclzq_v, ctlz, Add1ArgType),
  5478. NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
  5479. NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
  5480. NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
  5481. NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType),
  5482. NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
  5483. NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
  5484. NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
  5485. NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType),
  5486. NEONMAP1(vcnt_v, ctpop, Add1ArgType),
  5487. NEONMAP1(vcntq_v, ctpop, Add1ArgType),
  5488. NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
  5489. NEONMAP0(vcvt_f16_v),
  5490. NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
  5491. NEONMAP0(vcvt_f32_v),
  5492. NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5493. NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5494. NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5495. NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
  5496. NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
  5497. NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
  5498. NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
  5499. NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
  5500. NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
  5501. NEONMAP0(vcvtq_f16_v),
  5502. NEONMAP0(vcvtq_f32_v),
  5503. NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0),
  5504. NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5505. NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5506. NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  5507. NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
  5508. NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
  5509. NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
  5510. NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
  5511. NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
  5512. NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
  5513. NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
  5514. NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
  5515. NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
  5516. NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
  5517. NEONMAP0(vext_v),
  5518. NEONMAP0(vextq_v),
  5519. NEONMAP0(vfma_v),
  5520. NEONMAP0(vfmaq_v),
  5521. NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
  5522. NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
  5523. NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
  5524. NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
  5525. NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
  5526. NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
  5527. NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
  5528. NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
  5529. NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
  5530. NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
  5531. NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
  5532. NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
  5533. NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
  5534. NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
  5535. NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
  5536. NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
  5537. NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
  5538. NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
  5539. NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0),
  5540. NEONMAP0(vmovl_v),
  5541. NEONMAP0(vmovn_v),
  5542. NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
  5543. NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
  5544. NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
  5545. NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
  5546. NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
  5547. NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
  5548. NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
  5549. NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
  5550. NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
  5551. NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
  5552. NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
  5553. NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
  5554. NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
  5555. NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
  5556. NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
  5557. NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
  5558. NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
  5559. NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
  5560. NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
  5561. NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
  5562. NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
  5563. NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
  5564. NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
  5565. NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType),
  5566. NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType),
  5567. NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType),
  5568. NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType),
  5569. NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
  5570. NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
  5571. NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
  5572. NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
  5573. NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
  5574. NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
  5575. NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
  5576. NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
  5577. NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
  5578. NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
  5579. NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
  5580. NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
  5581. NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
  5582. NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
  5583. NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
  5584. NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
  5585. NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
  5586. NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0),
  5587. NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
  5588. NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
  5589. NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
  5590. NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
  5591. NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
  5592. NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
  5593. NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
  5594. NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
  5595. NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
  5596. NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
  5597. NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
  5598. NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
  5599. NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
  5600. NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
  5601. NEONMAP0(vrndi_v),
  5602. NEONMAP0(vrndiq_v),
  5603. NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
  5604. NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
  5605. NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
  5606. NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
  5607. NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
  5608. NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
  5609. NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
  5610. NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
  5611. NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
  5612. NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
  5613. NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
  5614. NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
  5615. NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
  5616. NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
  5617. NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
  5618. NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0),
  5619. NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0),
  5620. NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0),
  5621. NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0),
  5622. NEONMAP0(vshl_n_v),
  5623. NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
  5624. NEONMAP0(vshll_n_v),
  5625. NEONMAP0(vshlq_n_v),
  5626. NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
  5627. NEONMAP0(vshr_n_v),
  5628. NEONMAP0(vshrn_n_v),
  5629. NEONMAP0(vshrq_n_v),
  5630. NEONMAP1(vsm3partw1q_v, aarch64_crypto_sm3partw1, 0),
  5631. NEONMAP1(vsm3partw2q_v, aarch64_crypto_sm3partw2, 0),
  5632. NEONMAP1(vsm3ss1q_v, aarch64_crypto_sm3ss1, 0),
  5633. NEONMAP1(vsm3tt1aq_v, aarch64_crypto_sm3tt1a, 0),
  5634. NEONMAP1(vsm3tt1bq_v, aarch64_crypto_sm3tt1b, 0),
  5635. NEONMAP1(vsm3tt2aq_v, aarch64_crypto_sm3tt2a, 0),
  5636. NEONMAP1(vsm3tt2bq_v, aarch64_crypto_sm3tt2b, 0),
  5637. NEONMAP1(vsm4ekeyq_v, aarch64_crypto_sm4ekey, 0),
  5638. NEONMAP1(vsm4eq_v, aarch64_crypto_sm4e, 0),
  5639. NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
  5640. NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
  5641. NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
  5642. NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
  5643. NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
  5644. NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
  5645. NEONMAP0(vsubhn_v),
  5646. NEONMAP0(vtst_v),
  5647. NEONMAP0(vtstq_v),
  5648. NEONMAP1(vusdot_v, aarch64_neon_usdot, 0),
  5649. NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0),
  5650. NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0),
  5651. NEONMAP1(vxarq_v, aarch64_crypto_xar, 0),
  5652. };
  5653. static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
  5654. NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
  5655. NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
  5656. NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
  5657. NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
  5658. NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
  5659. NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
  5660. NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
  5661. NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
  5662. NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
  5663. NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5664. NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
  5665. NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
  5666. NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
  5667. NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
  5668. NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5669. NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5670. NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
  5671. NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
  5672. NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5673. NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5674. NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
  5675. NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
  5676. NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5677. NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
  5678. NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5679. NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5680. NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5681. NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5682. NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5683. NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5684. NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5685. NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5686. NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5687. NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5688. NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
  5689. NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5690. NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5691. NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5692. NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5693. NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5694. NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5695. NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5696. NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5697. NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5698. NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5699. NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5700. NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5701. NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5702. NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5703. NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5704. NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5705. NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5706. NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5707. NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
  5708. NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5709. NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5710. NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5711. NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5712. NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
  5713. NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
  5714. NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5715. NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5716. NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
  5717. NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
  5718. NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5719. NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5720. NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5721. NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5722. NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
  5723. NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
  5724. NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5725. NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5726. NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
  5727. NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
  5728. NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
  5729. NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
  5730. NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
  5731. NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5732. NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  5733. NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5734. NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  5735. NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5736. NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  5737. NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5738. NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  5739. NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5740. NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  5741. NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
  5742. NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
  5743. NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
  5744. NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
  5745. NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
  5746. NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
  5747. NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
  5748. NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
  5749. NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
  5750. NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
  5751. NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
  5752. NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
  5753. NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
  5754. NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
  5755. NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
  5756. NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
  5757. NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
  5758. NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
  5759. NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
  5760. NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
  5761. NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
  5762. NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
  5763. NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
  5764. NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
  5765. NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
  5766. NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
  5767. NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
  5768. NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
  5769. NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
  5770. NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
  5771. NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
  5772. NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
  5773. NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
  5774. NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
  5775. NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
  5776. NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
  5777. NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
  5778. NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
  5779. NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
  5780. NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
  5781. NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
  5782. NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
  5783. NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
  5784. NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
  5785. NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
  5786. NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
  5787. NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
  5788. NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
  5789. NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
  5790. NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
  5791. NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
  5792. NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
  5793. NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5794. NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5795. NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5796. NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5797. NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
  5798. NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
  5799. NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5800. NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5801. NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  5802. NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  5803. NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
  5804. NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
  5805. NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
  5806. NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
  5807. NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
  5808. NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
  5809. NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
  5810. NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
  5811. NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
  5812. NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
  5813. NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
  5814. NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
  5815. NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
  5816. NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
  5817. NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
  5818. NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
  5819. NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
  5820. NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
  5821. NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
  5822. NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
  5823. NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
  5824. NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
  5825. NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
  5826. NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
  5827. NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
  5828. NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
  5829. NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
  5830. NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
  5831. NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
  5832. NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
  5833. NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
  5834. NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
  5835. NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
  5836. NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
  5837. NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
  5838. NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
  5839. NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
  5840. NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
  5841. NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
  5842. NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
  5843. NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
  5844. NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
  5845. NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
  5846. NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
  5847. NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
  5848. NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
  5849. NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
  5850. NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
  5851. NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
  5852. NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
  5853. NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
  5854. NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
  5855. // FP16 scalar intrinisics go here.
  5856. NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
  5857. NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5858. NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  5859. NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5860. NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  5861. NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5862. NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  5863. NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5864. NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  5865. NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5866. NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  5867. NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5868. NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  5869. NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5870. NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
  5871. NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5872. NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
  5873. NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5874. NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  5875. NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5876. NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  5877. NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5878. NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  5879. NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5880. NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  5881. NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5882. NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  5883. NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5884. NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  5885. NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
  5886. NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
  5887. NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
  5888. NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
  5889. NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
  5890. };
  5891. #undef NEONMAP0
  5892. #undef NEONMAP1
  5893. #undef NEONMAP2
  5894. #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
  5895. { \
  5896. #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
  5897. TypeModifier \
  5898. }
  5899. #define SVEMAP2(NameBase, TypeModifier) \
  5900. { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
  5901. static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
  5902. #define GET_SVE_LLVM_INTRINSIC_MAP
  5903. #include "clang/Basic/arm_sve_builtin_cg.inc"
  5904. #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
  5905. #undef GET_SVE_LLVM_INTRINSIC_MAP
  5906. };
  5907. #undef SVEMAP1
  5908. #undef SVEMAP2
  5909. static bool NEONSIMDIntrinsicsProvenSorted = false;
  5910. static bool AArch64SIMDIntrinsicsProvenSorted = false;
  5911. static bool AArch64SISDIntrinsicsProvenSorted = false;
  5912. static bool AArch64SVEIntrinsicsProvenSorted = false;
  5913. static const ARMVectorIntrinsicInfo *
  5914. findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
  5915. unsigned BuiltinID, bool &MapProvenSorted) {
  5916. #ifndef NDEBUG
  5917. if (!MapProvenSorted) {
  5918. assert(llvm::is_sorted(IntrinsicMap));
  5919. MapProvenSorted = true;
  5920. }
  5921. #endif
  5922. const ARMVectorIntrinsicInfo *Builtin =
  5923. llvm::lower_bound(IntrinsicMap, BuiltinID);
  5924. if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
  5925. return Builtin;
  5926. return nullptr;
  5927. }
  5928. Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
  5929. unsigned Modifier,
  5930. llvm::Type *ArgType,
  5931. const CallExpr *E) {
  5932. int VectorSize = 0;
  5933. if (Modifier & Use64BitVectors)
  5934. VectorSize = 64;
  5935. else if (Modifier & Use128BitVectors)
  5936. VectorSize = 128;
  5937. // Return type.
  5938. SmallVector<llvm::Type *, 3> Tys;
  5939. if (Modifier & AddRetType) {
  5940. llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
  5941. if (Modifier & VectorizeRetType)
  5942. Ty = llvm::FixedVectorType::get(
  5943. Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
  5944. Tys.push_back(Ty);
  5945. }
  5946. // Arguments.
  5947. if (Modifier & VectorizeArgTypes) {
  5948. int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
  5949. ArgType = llvm::FixedVectorType::get(ArgType, Elts);
  5950. }
  5951. if (Modifier & (Add1ArgType | Add2ArgTypes))
  5952. Tys.push_back(ArgType);
  5953. if (Modifier & Add2ArgTypes)
  5954. Tys.push_back(ArgType);
  5955. if (Modifier & InventFloatType)
  5956. Tys.push_back(FloatTy);
  5957. return CGM.getIntrinsic(IntrinsicID, Tys);
  5958. }
  5959. static Value *EmitCommonNeonSISDBuiltinExpr(
  5960. CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
  5961. SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
  5962. unsigned BuiltinID = SISDInfo.BuiltinID;
  5963. unsigned int Int = SISDInfo.LLVMIntrinsic;
  5964. unsigned Modifier = SISDInfo.TypeModifier;
  5965. const char *s = SISDInfo.NameHint;
  5966. switch (BuiltinID) {
  5967. case NEON::BI__builtin_neon_vcled_s64:
  5968. case NEON::BI__builtin_neon_vcled_u64:
  5969. case NEON::BI__builtin_neon_vcles_f32:
  5970. case NEON::BI__builtin_neon_vcled_f64:
  5971. case NEON::BI__builtin_neon_vcltd_s64:
  5972. case NEON::BI__builtin_neon_vcltd_u64:
  5973. case NEON::BI__builtin_neon_vclts_f32:
  5974. case NEON::BI__builtin_neon_vcltd_f64:
  5975. case NEON::BI__builtin_neon_vcales_f32:
  5976. case NEON::BI__builtin_neon_vcaled_f64:
  5977. case NEON::BI__builtin_neon_vcalts_f32:
  5978. case NEON::BI__builtin_neon_vcaltd_f64:
  5979. // Only one direction of comparisons actually exist, cmle is actually a cmge
  5980. // with swapped operands. The table gives us the right intrinsic but we
  5981. // still need to do the swap.
  5982. std::swap(Ops[0], Ops[1]);
  5983. break;
  5984. }
  5985. assert(Int && "Generic code assumes a valid intrinsic");
  5986. // Determine the type(s) of this overloaded AArch64 intrinsic.
  5987. const Expr *Arg = E->getArg(0);
  5988. llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
  5989. Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
  5990. int j = 0;
  5991. ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
  5992. for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
  5993. ai != ae; ++ai, ++j) {
  5994. llvm::Type *ArgTy = ai->getType();
  5995. if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
  5996. ArgTy->getPrimitiveSizeInBits())
  5997. continue;
  5998. assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
  5999. // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
  6000. // it before inserting.
  6001. Ops[j] = CGF.Builder.CreateTruncOrBitCast(
  6002. Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
  6003. Ops[j] =
  6004. CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
  6005. }
  6006. Value *Result = CGF.EmitNeonCall(F, Ops, s);
  6007. llvm::Type *ResultType = CGF.ConvertType(E->getType());
  6008. if (ResultType->getPrimitiveSizeInBits().getFixedSize() <
  6009. Result->getType()->getPrimitiveSizeInBits().getFixedSize())
  6010. return CGF.Builder.CreateExtractElement(Result, C0);
  6011. return CGF.Builder.CreateBitCast(Result, ResultType, s);
  6012. }
  6013. Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
  6014. unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
  6015. const char *NameHint, unsigned Modifier, const CallExpr *E,
  6016. SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
  6017. llvm::Triple::ArchType Arch) {
  6018. // Get the last argument, which specifies the vector type.
  6019. const Expr *Arg = E->getArg(E->getNumArgs() - 1);
  6020. Optional<llvm::APSInt> NeonTypeConst =
  6021. Arg->getIntegerConstantExpr(getContext());
  6022. if (!NeonTypeConst)
  6023. return nullptr;
  6024. // Determine the type of this overloaded NEON intrinsic.
  6025. NeonTypeFlags Type(NeonTypeConst->getZExtValue());
  6026. bool Usgn = Type.isUnsigned();
  6027. bool Quad = Type.isQuad();
  6028. const bool HasLegalHalfType = getTarget().hasLegalHalfType();
  6029. const bool AllowBFloatArgsAndRet =
  6030. getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
  6031. llvm::FixedVectorType *VTy =
  6032. GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
  6033. llvm::Type *Ty = VTy;
  6034. if (!Ty)
  6035. return nullptr;
  6036. auto getAlignmentValue32 = [&](Address addr) -> Value* {
  6037. return Builder.getInt32(addr.getAlignment().getQuantity());
  6038. };
  6039. unsigned Int = LLVMIntrinsic;
  6040. if ((Modifier & UnsignedAlts) && !Usgn)
  6041. Int = AltLLVMIntrinsic;
  6042. switch (BuiltinID) {
  6043. default: break;
  6044. case NEON::BI__builtin_neon_splat_lane_v:
  6045. case NEON::BI__builtin_neon_splat_laneq_v:
  6046. case NEON::BI__builtin_neon_splatq_lane_v:
  6047. case NEON::BI__builtin_neon_splatq_laneq_v: {
  6048. auto NumElements = VTy->getElementCount();
  6049. if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
  6050. NumElements = NumElements * 2;
  6051. if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
  6052. NumElements = NumElements.divideCoefficientBy(2);
  6053. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  6054. return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
  6055. }
  6056. case NEON::BI__builtin_neon_vpadd_v:
  6057. case NEON::BI__builtin_neon_vpaddq_v:
  6058. // We don't allow fp/int overloading of intrinsics.
  6059. if (VTy->getElementType()->isFloatingPointTy() &&
  6060. Int == Intrinsic::aarch64_neon_addp)
  6061. Int = Intrinsic::aarch64_neon_faddp;
  6062. break;
  6063. case NEON::BI__builtin_neon_vabs_v:
  6064. case NEON::BI__builtin_neon_vabsq_v:
  6065. if (VTy->getElementType()->isFloatingPointTy())
  6066. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
  6067. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
  6068. case NEON::BI__builtin_neon_vadd_v:
  6069. case NEON::BI__builtin_neon_vaddq_v: {
  6070. llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
  6071. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  6072. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  6073. Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
  6074. return Builder.CreateBitCast(Ops[0], Ty);
  6075. }
  6076. case NEON::BI__builtin_neon_vaddhn_v: {
  6077. llvm::FixedVectorType *SrcTy =
  6078. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6079. // %sum = add <4 x i32> %lhs, %rhs
  6080. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6081. Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
  6082. Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
  6083. // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
  6084. Constant *ShiftAmt =
  6085. ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
  6086. Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
  6087. // %res = trunc <4 x i32> %high to <4 x i16>
  6088. return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
  6089. }
  6090. case NEON::BI__builtin_neon_vcale_v:
  6091. case NEON::BI__builtin_neon_vcaleq_v:
  6092. case NEON::BI__builtin_neon_vcalt_v:
  6093. case NEON::BI__builtin_neon_vcaltq_v:
  6094. std::swap(Ops[0], Ops[1]);
  6095. LLVM_FALLTHROUGH;
  6096. case NEON::BI__builtin_neon_vcage_v:
  6097. case NEON::BI__builtin_neon_vcageq_v:
  6098. case NEON::BI__builtin_neon_vcagt_v:
  6099. case NEON::BI__builtin_neon_vcagtq_v: {
  6100. llvm::Type *Ty;
  6101. switch (VTy->getScalarSizeInBits()) {
  6102. default: llvm_unreachable("unexpected type");
  6103. case 32:
  6104. Ty = FloatTy;
  6105. break;
  6106. case 64:
  6107. Ty = DoubleTy;
  6108. break;
  6109. case 16:
  6110. Ty = HalfTy;
  6111. break;
  6112. }
  6113. auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
  6114. llvm::Type *Tys[] = { VTy, VecFlt };
  6115. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6116. return EmitNeonCall(F, Ops, NameHint);
  6117. }
  6118. case NEON::BI__builtin_neon_vceqz_v:
  6119. case NEON::BI__builtin_neon_vceqzq_v:
  6120. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
  6121. ICmpInst::ICMP_EQ, "vceqz");
  6122. case NEON::BI__builtin_neon_vcgez_v:
  6123. case NEON::BI__builtin_neon_vcgezq_v:
  6124. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
  6125. ICmpInst::ICMP_SGE, "vcgez");
  6126. case NEON::BI__builtin_neon_vclez_v:
  6127. case NEON::BI__builtin_neon_vclezq_v:
  6128. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
  6129. ICmpInst::ICMP_SLE, "vclez");
  6130. case NEON::BI__builtin_neon_vcgtz_v:
  6131. case NEON::BI__builtin_neon_vcgtzq_v:
  6132. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
  6133. ICmpInst::ICMP_SGT, "vcgtz");
  6134. case NEON::BI__builtin_neon_vcltz_v:
  6135. case NEON::BI__builtin_neon_vcltzq_v:
  6136. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
  6137. ICmpInst::ICMP_SLT, "vcltz");
  6138. case NEON::BI__builtin_neon_vclz_v:
  6139. case NEON::BI__builtin_neon_vclzq_v:
  6140. // We generate target-independent intrinsic, which needs a second argument
  6141. // for whether or not clz of zero is undefined; on ARM it isn't.
  6142. Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
  6143. break;
  6144. case NEON::BI__builtin_neon_vcvt_f32_v:
  6145. case NEON::BI__builtin_neon_vcvtq_f32_v:
  6146. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6147. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
  6148. HasLegalHalfType);
  6149. return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  6150. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  6151. case NEON::BI__builtin_neon_vcvt_f16_v:
  6152. case NEON::BI__builtin_neon_vcvtq_f16_v:
  6153. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6154. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
  6155. HasLegalHalfType);
  6156. return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  6157. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  6158. case NEON::BI__builtin_neon_vcvt_n_f16_v:
  6159. case NEON::BI__builtin_neon_vcvt_n_f32_v:
  6160. case NEON::BI__builtin_neon_vcvt_n_f64_v:
  6161. case NEON::BI__builtin_neon_vcvtq_n_f16_v:
  6162. case NEON::BI__builtin_neon_vcvtq_n_f32_v:
  6163. case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
  6164. llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
  6165. Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
  6166. Function *F = CGM.getIntrinsic(Int, Tys);
  6167. return EmitNeonCall(F, Ops, "vcvt_n");
  6168. }
  6169. case NEON::BI__builtin_neon_vcvt_n_s16_v:
  6170. case NEON::BI__builtin_neon_vcvt_n_s32_v:
  6171. case NEON::BI__builtin_neon_vcvt_n_u16_v:
  6172. case NEON::BI__builtin_neon_vcvt_n_u32_v:
  6173. case NEON::BI__builtin_neon_vcvt_n_s64_v:
  6174. case NEON::BI__builtin_neon_vcvt_n_u64_v:
  6175. case NEON::BI__builtin_neon_vcvtq_n_s16_v:
  6176. case NEON::BI__builtin_neon_vcvtq_n_s32_v:
  6177. case NEON::BI__builtin_neon_vcvtq_n_u16_v:
  6178. case NEON::BI__builtin_neon_vcvtq_n_u32_v:
  6179. case NEON::BI__builtin_neon_vcvtq_n_s64_v:
  6180. case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
  6181. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6182. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6183. return EmitNeonCall(F, Ops, "vcvt_n");
  6184. }
  6185. case NEON::BI__builtin_neon_vcvt_s32_v:
  6186. case NEON::BI__builtin_neon_vcvt_u32_v:
  6187. case NEON::BI__builtin_neon_vcvt_s64_v:
  6188. case NEON::BI__builtin_neon_vcvt_u64_v:
  6189. case NEON::BI__builtin_neon_vcvt_s16_v:
  6190. case NEON::BI__builtin_neon_vcvt_u16_v:
  6191. case NEON::BI__builtin_neon_vcvtq_s32_v:
  6192. case NEON::BI__builtin_neon_vcvtq_u32_v:
  6193. case NEON::BI__builtin_neon_vcvtq_s64_v:
  6194. case NEON::BI__builtin_neon_vcvtq_u64_v:
  6195. case NEON::BI__builtin_neon_vcvtq_s16_v:
  6196. case NEON::BI__builtin_neon_vcvtq_u16_v: {
  6197. Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
  6198. return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
  6199. : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
  6200. }
  6201. case NEON::BI__builtin_neon_vcvta_s16_v:
  6202. case NEON::BI__builtin_neon_vcvta_s32_v:
  6203. case NEON::BI__builtin_neon_vcvta_s64_v:
  6204. case NEON::BI__builtin_neon_vcvta_u16_v:
  6205. case NEON::BI__builtin_neon_vcvta_u32_v:
  6206. case NEON::BI__builtin_neon_vcvta_u64_v:
  6207. case NEON::BI__builtin_neon_vcvtaq_s16_v:
  6208. case NEON::BI__builtin_neon_vcvtaq_s32_v:
  6209. case NEON::BI__builtin_neon_vcvtaq_s64_v:
  6210. case NEON::BI__builtin_neon_vcvtaq_u16_v:
  6211. case NEON::BI__builtin_neon_vcvtaq_u32_v:
  6212. case NEON::BI__builtin_neon_vcvtaq_u64_v:
  6213. case NEON::BI__builtin_neon_vcvtn_s16_v:
  6214. case NEON::BI__builtin_neon_vcvtn_s32_v:
  6215. case NEON::BI__builtin_neon_vcvtn_s64_v:
  6216. case NEON::BI__builtin_neon_vcvtn_u16_v:
  6217. case NEON::BI__builtin_neon_vcvtn_u32_v:
  6218. case NEON::BI__builtin_neon_vcvtn_u64_v:
  6219. case NEON::BI__builtin_neon_vcvtnq_s16_v:
  6220. case NEON::BI__builtin_neon_vcvtnq_s32_v:
  6221. case NEON::BI__builtin_neon_vcvtnq_s64_v:
  6222. case NEON::BI__builtin_neon_vcvtnq_u16_v:
  6223. case NEON::BI__builtin_neon_vcvtnq_u32_v:
  6224. case NEON::BI__builtin_neon_vcvtnq_u64_v:
  6225. case NEON::BI__builtin_neon_vcvtp_s16_v:
  6226. case NEON::BI__builtin_neon_vcvtp_s32_v:
  6227. case NEON::BI__builtin_neon_vcvtp_s64_v:
  6228. case NEON::BI__builtin_neon_vcvtp_u16_v:
  6229. case NEON::BI__builtin_neon_vcvtp_u32_v:
  6230. case NEON::BI__builtin_neon_vcvtp_u64_v:
  6231. case NEON::BI__builtin_neon_vcvtpq_s16_v:
  6232. case NEON::BI__builtin_neon_vcvtpq_s32_v:
  6233. case NEON::BI__builtin_neon_vcvtpq_s64_v:
  6234. case NEON::BI__builtin_neon_vcvtpq_u16_v:
  6235. case NEON::BI__builtin_neon_vcvtpq_u32_v:
  6236. case NEON::BI__builtin_neon_vcvtpq_u64_v:
  6237. case NEON::BI__builtin_neon_vcvtm_s16_v:
  6238. case NEON::BI__builtin_neon_vcvtm_s32_v:
  6239. case NEON::BI__builtin_neon_vcvtm_s64_v:
  6240. case NEON::BI__builtin_neon_vcvtm_u16_v:
  6241. case NEON::BI__builtin_neon_vcvtm_u32_v:
  6242. case NEON::BI__builtin_neon_vcvtm_u64_v:
  6243. case NEON::BI__builtin_neon_vcvtmq_s16_v:
  6244. case NEON::BI__builtin_neon_vcvtmq_s32_v:
  6245. case NEON::BI__builtin_neon_vcvtmq_s64_v:
  6246. case NEON::BI__builtin_neon_vcvtmq_u16_v:
  6247. case NEON::BI__builtin_neon_vcvtmq_u32_v:
  6248. case NEON::BI__builtin_neon_vcvtmq_u64_v: {
  6249. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6250. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
  6251. }
  6252. case NEON::BI__builtin_neon_vcvtx_f32_v: {
  6253. llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
  6254. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
  6255. }
  6256. case NEON::BI__builtin_neon_vext_v:
  6257. case NEON::BI__builtin_neon_vextq_v: {
  6258. int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
  6259. SmallVector<int, 16> Indices;
  6260. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  6261. Indices.push_back(i+CV);
  6262. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6263. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6264. return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
  6265. }
  6266. case NEON::BI__builtin_neon_vfma_v:
  6267. case NEON::BI__builtin_neon_vfmaq_v: {
  6268. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6269. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6270. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6271. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  6272. return emitCallMaybeConstrainedFPBuiltin(
  6273. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  6274. {Ops[1], Ops[2], Ops[0]});
  6275. }
  6276. case NEON::BI__builtin_neon_vld1_v:
  6277. case NEON::BI__builtin_neon_vld1q_v: {
  6278. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  6279. Ops.push_back(getAlignmentValue32(PtrOp0));
  6280. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
  6281. }
  6282. case NEON::BI__builtin_neon_vld1_x2_v:
  6283. case NEON::BI__builtin_neon_vld1q_x2_v:
  6284. case NEON::BI__builtin_neon_vld1_x3_v:
  6285. case NEON::BI__builtin_neon_vld1q_x3_v:
  6286. case NEON::BI__builtin_neon_vld1_x4_v:
  6287. case NEON::BI__builtin_neon_vld1q_x4_v: {
  6288. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
  6289. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  6290. llvm::Type *Tys[2] = { VTy, PTy };
  6291. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6292. Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
  6293. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  6294. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6295. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  6296. }
  6297. case NEON::BI__builtin_neon_vld2_v:
  6298. case NEON::BI__builtin_neon_vld2q_v:
  6299. case NEON::BI__builtin_neon_vld3_v:
  6300. case NEON::BI__builtin_neon_vld3q_v:
  6301. case NEON::BI__builtin_neon_vld4_v:
  6302. case NEON::BI__builtin_neon_vld4q_v:
  6303. case NEON::BI__builtin_neon_vld2_dup_v:
  6304. case NEON::BI__builtin_neon_vld2q_dup_v:
  6305. case NEON::BI__builtin_neon_vld3_dup_v:
  6306. case NEON::BI__builtin_neon_vld3q_dup_v:
  6307. case NEON::BI__builtin_neon_vld4_dup_v:
  6308. case NEON::BI__builtin_neon_vld4q_dup_v: {
  6309. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  6310. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6311. Value *Align = getAlignmentValue32(PtrOp1);
  6312. Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
  6313. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  6314. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6315. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  6316. }
  6317. case NEON::BI__builtin_neon_vld1_dup_v:
  6318. case NEON::BI__builtin_neon_vld1q_dup_v: {
  6319. Value *V = UndefValue::get(Ty);
  6320. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  6321. PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
  6322. LoadInst *Ld = Builder.CreateLoad(PtrOp0);
  6323. llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
  6324. Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
  6325. return EmitNeonSplat(Ops[0], CI);
  6326. }
  6327. case NEON::BI__builtin_neon_vld2_lane_v:
  6328. case NEON::BI__builtin_neon_vld2q_lane_v:
  6329. case NEON::BI__builtin_neon_vld3_lane_v:
  6330. case NEON::BI__builtin_neon_vld3q_lane_v:
  6331. case NEON::BI__builtin_neon_vld4_lane_v:
  6332. case NEON::BI__builtin_neon_vld4q_lane_v: {
  6333. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  6334. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  6335. for (unsigned I = 2; I < Ops.size() - 1; ++I)
  6336. Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
  6337. Ops.push_back(getAlignmentValue32(PtrOp1));
  6338. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
  6339. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  6340. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6341. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  6342. }
  6343. case NEON::BI__builtin_neon_vmovl_v: {
  6344. llvm::FixedVectorType *DTy =
  6345. llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
  6346. Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
  6347. if (Usgn)
  6348. return Builder.CreateZExt(Ops[0], Ty, "vmovl");
  6349. return Builder.CreateSExt(Ops[0], Ty, "vmovl");
  6350. }
  6351. case NEON::BI__builtin_neon_vmovn_v: {
  6352. llvm::FixedVectorType *QTy =
  6353. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6354. Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
  6355. return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
  6356. }
  6357. case NEON::BI__builtin_neon_vmull_v:
  6358. // FIXME: the integer vmull operations could be emitted in terms of pure
  6359. // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
  6360. // hoisting the exts outside loops. Until global ISel comes along that can
  6361. // see through such movement this leads to bad CodeGen. So we need an
  6362. // intrinsic for now.
  6363. Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
  6364. Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
  6365. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
  6366. case NEON::BI__builtin_neon_vpadal_v:
  6367. case NEON::BI__builtin_neon_vpadalq_v: {
  6368. // The source operand type has twice as many elements of half the size.
  6369. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  6370. llvm::Type *EltTy =
  6371. llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
  6372. auto *NarrowTy =
  6373. llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
  6374. llvm::Type *Tys[2] = { Ty, NarrowTy };
  6375. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  6376. }
  6377. case NEON::BI__builtin_neon_vpaddl_v:
  6378. case NEON::BI__builtin_neon_vpaddlq_v: {
  6379. // The source operand type has twice as many elements of half the size.
  6380. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  6381. llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
  6382. auto *NarrowTy =
  6383. llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
  6384. llvm::Type *Tys[2] = { Ty, NarrowTy };
  6385. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
  6386. }
  6387. case NEON::BI__builtin_neon_vqdmlal_v:
  6388. case NEON::BI__builtin_neon_vqdmlsl_v: {
  6389. SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
  6390. Ops[1] =
  6391. EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
  6392. Ops.resize(2);
  6393. return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
  6394. }
  6395. case NEON::BI__builtin_neon_vqdmulhq_lane_v:
  6396. case NEON::BI__builtin_neon_vqdmulh_lane_v:
  6397. case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
  6398. case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
  6399. auto *RTy = cast<llvm::FixedVectorType>(Ty);
  6400. if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
  6401. BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
  6402. RTy = llvm::FixedVectorType::get(RTy->getElementType(),
  6403. RTy->getNumElements() * 2);
  6404. llvm::Type *Tys[2] = {
  6405. RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
  6406. /*isQuad*/ false))};
  6407. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  6408. }
  6409. case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
  6410. case NEON::BI__builtin_neon_vqdmulh_laneq_v:
  6411. case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
  6412. case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
  6413. llvm::Type *Tys[2] = {
  6414. Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
  6415. /*isQuad*/ true))};
  6416. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  6417. }
  6418. case NEON::BI__builtin_neon_vqshl_n_v:
  6419. case NEON::BI__builtin_neon_vqshlq_n_v:
  6420. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
  6421. 1, false);
  6422. case NEON::BI__builtin_neon_vqshlu_n_v:
  6423. case NEON::BI__builtin_neon_vqshluq_n_v:
  6424. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
  6425. 1, false);
  6426. case NEON::BI__builtin_neon_vrecpe_v:
  6427. case NEON::BI__builtin_neon_vrecpeq_v:
  6428. case NEON::BI__builtin_neon_vrsqrte_v:
  6429. case NEON::BI__builtin_neon_vrsqrteq_v:
  6430. Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
  6431. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
  6432. case NEON::BI__builtin_neon_vrndi_v:
  6433. case NEON::BI__builtin_neon_vrndiq_v:
  6434. Int = Builder.getIsFPConstrained()
  6435. ? Intrinsic::experimental_constrained_nearbyint
  6436. : Intrinsic::nearbyint;
  6437. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
  6438. case NEON::BI__builtin_neon_vrshr_n_v:
  6439. case NEON::BI__builtin_neon_vrshrq_n_v:
  6440. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
  6441. 1, true);
  6442. case NEON::BI__builtin_neon_vsha512hq_v:
  6443. case NEON::BI__builtin_neon_vsha512h2q_v:
  6444. case NEON::BI__builtin_neon_vsha512su0q_v:
  6445. case NEON::BI__builtin_neon_vsha512su1q_v: {
  6446. Function *F = CGM.getIntrinsic(Int);
  6447. return EmitNeonCall(F, Ops, "");
  6448. }
  6449. case NEON::BI__builtin_neon_vshl_n_v:
  6450. case NEON::BI__builtin_neon_vshlq_n_v:
  6451. Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
  6452. return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
  6453. "vshl_n");
  6454. case NEON::BI__builtin_neon_vshll_n_v: {
  6455. llvm::FixedVectorType *SrcTy =
  6456. llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
  6457. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6458. if (Usgn)
  6459. Ops[0] = Builder.CreateZExt(Ops[0], VTy);
  6460. else
  6461. Ops[0] = Builder.CreateSExt(Ops[0], VTy);
  6462. Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
  6463. return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
  6464. }
  6465. case NEON::BI__builtin_neon_vshrn_n_v: {
  6466. llvm::FixedVectorType *SrcTy =
  6467. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6468. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6469. Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
  6470. if (Usgn)
  6471. Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
  6472. else
  6473. Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
  6474. return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
  6475. }
  6476. case NEON::BI__builtin_neon_vshr_n_v:
  6477. case NEON::BI__builtin_neon_vshrq_n_v:
  6478. return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
  6479. case NEON::BI__builtin_neon_vst1_v:
  6480. case NEON::BI__builtin_neon_vst1q_v:
  6481. case NEON::BI__builtin_neon_vst2_v:
  6482. case NEON::BI__builtin_neon_vst2q_v:
  6483. case NEON::BI__builtin_neon_vst3_v:
  6484. case NEON::BI__builtin_neon_vst3q_v:
  6485. case NEON::BI__builtin_neon_vst4_v:
  6486. case NEON::BI__builtin_neon_vst4q_v:
  6487. case NEON::BI__builtin_neon_vst2_lane_v:
  6488. case NEON::BI__builtin_neon_vst2q_lane_v:
  6489. case NEON::BI__builtin_neon_vst3_lane_v:
  6490. case NEON::BI__builtin_neon_vst3q_lane_v:
  6491. case NEON::BI__builtin_neon_vst4_lane_v:
  6492. case NEON::BI__builtin_neon_vst4q_lane_v: {
  6493. llvm::Type *Tys[] = {Int8PtrTy, Ty};
  6494. Ops.push_back(getAlignmentValue32(PtrOp0));
  6495. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
  6496. }
  6497. case NEON::BI__builtin_neon_vsm3partw1q_v:
  6498. case NEON::BI__builtin_neon_vsm3partw2q_v:
  6499. case NEON::BI__builtin_neon_vsm3ss1q_v:
  6500. case NEON::BI__builtin_neon_vsm4ekeyq_v:
  6501. case NEON::BI__builtin_neon_vsm4eq_v: {
  6502. Function *F = CGM.getIntrinsic(Int);
  6503. return EmitNeonCall(F, Ops, "");
  6504. }
  6505. case NEON::BI__builtin_neon_vsm3tt1aq_v:
  6506. case NEON::BI__builtin_neon_vsm3tt1bq_v:
  6507. case NEON::BI__builtin_neon_vsm3tt2aq_v:
  6508. case NEON::BI__builtin_neon_vsm3tt2bq_v: {
  6509. Function *F = CGM.getIntrinsic(Int);
  6510. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  6511. return EmitNeonCall(F, Ops, "");
  6512. }
  6513. case NEON::BI__builtin_neon_vst1_x2_v:
  6514. case NEON::BI__builtin_neon_vst1q_x2_v:
  6515. case NEON::BI__builtin_neon_vst1_x3_v:
  6516. case NEON::BI__builtin_neon_vst1q_x3_v:
  6517. case NEON::BI__builtin_neon_vst1_x4_v:
  6518. case NEON::BI__builtin_neon_vst1q_x4_v: {
  6519. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
  6520. // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
  6521. // in AArch64 it comes last. We may want to stick to one or another.
  6522. if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
  6523. Arch == llvm::Triple::aarch64_32) {
  6524. llvm::Type *Tys[2] = { VTy, PTy };
  6525. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  6526. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
  6527. }
  6528. llvm::Type *Tys[2] = { PTy, VTy };
  6529. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
  6530. }
  6531. case NEON::BI__builtin_neon_vsubhn_v: {
  6532. llvm::FixedVectorType *SrcTy =
  6533. llvm::FixedVectorType::getExtendedElementVectorType(VTy);
  6534. // %sum = add <4 x i32> %lhs, %rhs
  6535. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  6536. Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
  6537. Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
  6538. // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
  6539. Constant *ShiftAmt =
  6540. ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
  6541. Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
  6542. // %res = trunc <4 x i32> %high to <4 x i16>
  6543. return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
  6544. }
  6545. case NEON::BI__builtin_neon_vtrn_v:
  6546. case NEON::BI__builtin_neon_vtrnq_v: {
  6547. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  6548. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6549. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6550. Value *SV = nullptr;
  6551. for (unsigned vi = 0; vi != 2; ++vi) {
  6552. SmallVector<int, 16> Indices;
  6553. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  6554. Indices.push_back(i+vi);
  6555. Indices.push_back(i+e+vi);
  6556. }
  6557. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  6558. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
  6559. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  6560. }
  6561. return SV;
  6562. }
  6563. case NEON::BI__builtin_neon_vtst_v:
  6564. case NEON::BI__builtin_neon_vtstq_v: {
  6565. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6566. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6567. Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
  6568. Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
  6569. ConstantAggregateZero::get(Ty));
  6570. return Builder.CreateSExt(Ops[0], Ty, "vtst");
  6571. }
  6572. case NEON::BI__builtin_neon_vuzp_v:
  6573. case NEON::BI__builtin_neon_vuzpq_v: {
  6574. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  6575. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6576. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6577. Value *SV = nullptr;
  6578. for (unsigned vi = 0; vi != 2; ++vi) {
  6579. SmallVector<int, 16> Indices;
  6580. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  6581. Indices.push_back(2*i+vi);
  6582. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  6583. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
  6584. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  6585. }
  6586. return SV;
  6587. }
  6588. case NEON::BI__builtin_neon_vxarq_v: {
  6589. Function *F = CGM.getIntrinsic(Int);
  6590. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  6591. return EmitNeonCall(F, Ops, "");
  6592. }
  6593. case NEON::BI__builtin_neon_vzip_v:
  6594. case NEON::BI__builtin_neon_vzipq_v: {
  6595. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  6596. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6597. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6598. Value *SV = nullptr;
  6599. for (unsigned vi = 0; vi != 2; ++vi) {
  6600. SmallVector<int, 16> Indices;
  6601. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  6602. Indices.push_back((i + vi*e) >> 1);
  6603. Indices.push_back(((i + vi*e) >> 1)+e);
  6604. }
  6605. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  6606. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
  6607. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  6608. }
  6609. return SV;
  6610. }
  6611. case NEON::BI__builtin_neon_vdot_v:
  6612. case NEON::BI__builtin_neon_vdotq_v: {
  6613. auto *InputTy =
  6614. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6615. llvm::Type *Tys[2] = { Ty, InputTy };
  6616. Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
  6617. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
  6618. }
  6619. case NEON::BI__builtin_neon_vfmlal_low_v:
  6620. case NEON::BI__builtin_neon_vfmlalq_low_v: {
  6621. auto *InputTy =
  6622. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6623. llvm::Type *Tys[2] = { Ty, InputTy };
  6624. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
  6625. }
  6626. case NEON::BI__builtin_neon_vfmlsl_low_v:
  6627. case NEON::BI__builtin_neon_vfmlslq_low_v: {
  6628. auto *InputTy =
  6629. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6630. llvm::Type *Tys[2] = { Ty, InputTy };
  6631. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
  6632. }
  6633. case NEON::BI__builtin_neon_vfmlal_high_v:
  6634. case NEON::BI__builtin_neon_vfmlalq_high_v: {
  6635. auto *InputTy =
  6636. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6637. llvm::Type *Tys[2] = { Ty, InputTy };
  6638. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
  6639. }
  6640. case NEON::BI__builtin_neon_vfmlsl_high_v:
  6641. case NEON::BI__builtin_neon_vfmlslq_high_v: {
  6642. auto *InputTy =
  6643. llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
  6644. llvm::Type *Tys[2] = { Ty, InputTy };
  6645. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
  6646. }
  6647. case NEON::BI__builtin_neon_vmmlaq_v: {
  6648. auto *InputTy =
  6649. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6650. llvm::Type *Tys[2] = { Ty, InputTy };
  6651. Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
  6652. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla");
  6653. }
  6654. case NEON::BI__builtin_neon_vusmmlaq_v: {
  6655. auto *InputTy =
  6656. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6657. llvm::Type *Tys[2] = { Ty, InputTy };
  6658. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
  6659. }
  6660. case NEON::BI__builtin_neon_vusdot_v:
  6661. case NEON::BI__builtin_neon_vusdotq_v: {
  6662. auto *InputTy =
  6663. llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  6664. llvm::Type *Tys[2] = { Ty, InputTy };
  6665. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
  6666. }
  6667. case NEON::BI__builtin_neon_vbfdot_v:
  6668. case NEON::BI__builtin_neon_vbfdotq_v: {
  6669. llvm::Type *InputTy =
  6670. llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
  6671. llvm::Type *Tys[2] = { Ty, InputTy };
  6672. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
  6673. }
  6674. case NEON::BI__builtin_neon___a32_vcvt_bf16_v: {
  6675. llvm::Type *Tys[1] = { Ty };
  6676. Function *F = CGM.getIntrinsic(Int, Tys);
  6677. return EmitNeonCall(F, Ops, "vcvtfp2bf");
  6678. }
  6679. }
  6680. assert(Int && "Expected valid intrinsic number");
  6681. // Determine the type(s) of this overloaded AArch64 intrinsic.
  6682. Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
  6683. Value *Result = EmitNeonCall(F, Ops, NameHint);
  6684. llvm::Type *ResultType = ConvertType(E->getType());
  6685. // AArch64 intrinsic one-element vector type cast to
  6686. // scalar type expected by the builtin
  6687. return Builder.CreateBitCast(Result, ResultType, NameHint);
  6688. }
  6689. Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
  6690. Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
  6691. const CmpInst::Predicate Ip, const Twine &Name) {
  6692. llvm::Type *OTy = Op->getType();
  6693. // FIXME: this is utterly horrific. We should not be looking at previous
  6694. // codegen context to find out what needs doing. Unfortunately TableGen
  6695. // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
  6696. // (etc).
  6697. if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
  6698. OTy = BI->getOperand(0)->getType();
  6699. Op = Builder.CreateBitCast(Op, OTy);
  6700. if (OTy->getScalarType()->isFloatingPointTy()) {
  6701. Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
  6702. } else {
  6703. Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
  6704. }
  6705. return Builder.CreateSExt(Op, Ty, Name);
  6706. }
  6707. static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  6708. Value *ExtOp, Value *IndexOp,
  6709. llvm::Type *ResTy, unsigned IntID,
  6710. const char *Name) {
  6711. SmallVector<Value *, 2> TblOps;
  6712. if (ExtOp)
  6713. TblOps.push_back(ExtOp);
  6714. // Build a vector containing sequential number like (0, 1, 2, ..., 15)
  6715. SmallVector<int, 16> Indices;
  6716. auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
  6717. for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
  6718. Indices.push_back(2*i);
  6719. Indices.push_back(2*i+1);
  6720. }
  6721. int PairPos = 0, End = Ops.size() - 1;
  6722. while (PairPos < End) {
  6723. TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
  6724. Ops[PairPos+1], Indices,
  6725. Name));
  6726. PairPos += 2;
  6727. }
  6728. // If there's an odd number of 64-bit lookup table, fill the high 64-bit
  6729. // of the 128-bit lookup table with zero.
  6730. if (PairPos == End) {
  6731. Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
  6732. TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
  6733. ZeroTbl, Indices, Name));
  6734. }
  6735. Function *TblF;
  6736. TblOps.push_back(IndexOp);
  6737. TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
  6738. return CGF.EmitNeonCall(TblF, TblOps, Name);
  6739. }
  6740. Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
  6741. unsigned Value;
  6742. switch (BuiltinID) {
  6743. default:
  6744. return nullptr;
  6745. case ARM::BI__builtin_arm_nop:
  6746. Value = 0;
  6747. break;
  6748. case ARM::BI__builtin_arm_yield:
  6749. case ARM::BI__yield:
  6750. Value = 1;
  6751. break;
  6752. case ARM::BI__builtin_arm_wfe:
  6753. case ARM::BI__wfe:
  6754. Value = 2;
  6755. break;
  6756. case ARM::BI__builtin_arm_wfi:
  6757. case ARM::BI__wfi:
  6758. Value = 3;
  6759. break;
  6760. case ARM::BI__builtin_arm_sev:
  6761. case ARM::BI__sev:
  6762. Value = 4;
  6763. break;
  6764. case ARM::BI__builtin_arm_sevl:
  6765. case ARM::BI__sevl:
  6766. Value = 5;
  6767. break;
  6768. }
  6769. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
  6770. llvm::ConstantInt::get(Int32Ty, Value));
  6771. }
  6772. enum SpecialRegisterAccessKind {
  6773. NormalRead,
  6774. VolatileRead,
  6775. Write,
  6776. };
  6777. // Generates the IR for the read/write special register builtin,
  6778. // ValueType is the type of the value that is to be written or read,
  6779. // RegisterType is the type of the register being written to or read from.
  6780. static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
  6781. const CallExpr *E,
  6782. llvm::Type *RegisterType,
  6783. llvm::Type *ValueType,
  6784. SpecialRegisterAccessKind AccessKind,
  6785. StringRef SysReg = "") {
  6786. // write and register intrinsics only support 32 and 64 bit operations.
  6787. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
  6788. && "Unsupported size for register.");
  6789. CodeGen::CGBuilderTy &Builder = CGF.Builder;
  6790. CodeGen::CodeGenModule &CGM = CGF.CGM;
  6791. LLVMContext &Context = CGM.getLLVMContext();
  6792. if (SysReg.empty()) {
  6793. const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
  6794. SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
  6795. }
  6796. llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
  6797. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  6798. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  6799. llvm::Type *Types[] = { RegisterType };
  6800. bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
  6801. assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
  6802. && "Can't fit 64-bit value in 32-bit register");
  6803. if (AccessKind != Write) {
  6804. assert(AccessKind == NormalRead || AccessKind == VolatileRead);
  6805. llvm::Function *F = CGM.getIntrinsic(
  6806. AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
  6807. : llvm::Intrinsic::read_register,
  6808. Types);
  6809. llvm::Value *Call = Builder.CreateCall(F, Metadata);
  6810. if (MixedTypes)
  6811. // Read into 64 bit register and then truncate result to 32 bit.
  6812. return Builder.CreateTrunc(Call, ValueType);
  6813. if (ValueType->isPointerTy())
  6814. // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
  6815. return Builder.CreateIntToPtr(Call, ValueType);
  6816. return Call;
  6817. }
  6818. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
  6819. llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
  6820. if (MixedTypes) {
  6821. // Extend 32 bit write value to 64 bit to pass to write.
  6822. ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
  6823. return Builder.CreateCall(F, { Metadata, ArgValue });
  6824. }
  6825. if (ValueType->isPointerTy()) {
  6826. // Have VoidPtrTy ArgValue but want to return an i32/i64.
  6827. ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
  6828. return Builder.CreateCall(F, { Metadata, ArgValue });
  6829. }
  6830. return Builder.CreateCall(F, { Metadata, ArgValue });
  6831. }
  6832. /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
  6833. /// argument that specifies the vector type.
  6834. static bool HasExtraNeonArgument(unsigned BuiltinID) {
  6835. switch (BuiltinID) {
  6836. default: break;
  6837. case NEON::BI__builtin_neon_vget_lane_i8:
  6838. case NEON::BI__builtin_neon_vget_lane_i16:
  6839. case NEON::BI__builtin_neon_vget_lane_bf16:
  6840. case NEON::BI__builtin_neon_vget_lane_i32:
  6841. case NEON::BI__builtin_neon_vget_lane_i64:
  6842. case NEON::BI__builtin_neon_vget_lane_f32:
  6843. case NEON::BI__builtin_neon_vgetq_lane_i8:
  6844. case NEON::BI__builtin_neon_vgetq_lane_i16:
  6845. case NEON::BI__builtin_neon_vgetq_lane_bf16:
  6846. case NEON::BI__builtin_neon_vgetq_lane_i32:
  6847. case NEON::BI__builtin_neon_vgetq_lane_i64:
  6848. case NEON::BI__builtin_neon_vgetq_lane_f32:
  6849. case NEON::BI__builtin_neon_vduph_lane_bf16:
  6850. case NEON::BI__builtin_neon_vduph_laneq_bf16:
  6851. case NEON::BI__builtin_neon_vset_lane_i8:
  6852. case NEON::BI__builtin_neon_vset_lane_i16:
  6853. case NEON::BI__builtin_neon_vset_lane_bf16:
  6854. case NEON::BI__builtin_neon_vset_lane_i32:
  6855. case NEON::BI__builtin_neon_vset_lane_i64:
  6856. case NEON::BI__builtin_neon_vset_lane_f32:
  6857. case NEON::BI__builtin_neon_vsetq_lane_i8:
  6858. case NEON::BI__builtin_neon_vsetq_lane_i16:
  6859. case NEON::BI__builtin_neon_vsetq_lane_bf16:
  6860. case NEON::BI__builtin_neon_vsetq_lane_i32:
  6861. case NEON::BI__builtin_neon_vsetq_lane_i64:
  6862. case NEON::BI__builtin_neon_vsetq_lane_f32:
  6863. case NEON::BI__builtin_neon_vsha1h_u32:
  6864. case NEON::BI__builtin_neon_vsha1cq_u32:
  6865. case NEON::BI__builtin_neon_vsha1pq_u32:
  6866. case NEON::BI__builtin_neon_vsha1mq_u32:
  6867. case NEON::BI__builtin_neon_vcvth_bf16_f32:
  6868. case clang::ARM::BI_MoveToCoprocessor:
  6869. case clang::ARM::BI_MoveToCoprocessor2:
  6870. return false;
  6871. }
  6872. return true;
  6873. }
  6874. Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
  6875. const CallExpr *E,
  6876. ReturnValueSlot ReturnValue,
  6877. llvm::Triple::ArchType Arch) {
  6878. if (auto Hint = GetValueForARMHint(BuiltinID))
  6879. return Hint;
  6880. if (BuiltinID == ARM::BI__emit) {
  6881. bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
  6882. llvm::FunctionType *FTy =
  6883. llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
  6884. Expr::EvalResult Result;
  6885. if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
  6886. llvm_unreachable("Sema will ensure that the parameter is constant");
  6887. llvm::APSInt Value = Result.Val.getInt();
  6888. uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
  6889. llvm::InlineAsm *Emit =
  6890. IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
  6891. /*hasSideEffects=*/true)
  6892. : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
  6893. /*hasSideEffects=*/true);
  6894. return Builder.CreateCall(Emit);
  6895. }
  6896. if (BuiltinID == ARM::BI__builtin_arm_dbg) {
  6897. Value *Option = EmitScalarExpr(E->getArg(0));
  6898. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
  6899. }
  6900. if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
  6901. Value *Address = EmitScalarExpr(E->getArg(0));
  6902. Value *RW = EmitScalarExpr(E->getArg(1));
  6903. Value *IsData = EmitScalarExpr(E->getArg(2));
  6904. // Locality is not supported on ARM target
  6905. Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
  6906. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  6907. return Builder.CreateCall(F, {Address, RW, Locality, IsData});
  6908. }
  6909. if (BuiltinID == ARM::BI__builtin_arm_rbit) {
  6910. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  6911. return Builder.CreateCall(
  6912. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  6913. }
  6914. if (BuiltinID == ARM::BI__builtin_arm_cls) {
  6915. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  6916. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
  6917. }
  6918. if (BuiltinID == ARM::BI__builtin_arm_cls64) {
  6919. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  6920. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
  6921. "cls");
  6922. }
  6923. if (BuiltinID == ARM::BI__clear_cache) {
  6924. assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
  6925. const FunctionDecl *FD = E->getDirectCallee();
  6926. Value *Ops[2];
  6927. for (unsigned i = 0; i < 2; i++)
  6928. Ops[i] = EmitScalarExpr(E->getArg(i));
  6929. llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
  6930. llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
  6931. StringRef Name = FD->getName();
  6932. return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
  6933. }
  6934. if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
  6935. BuiltinID == ARM::BI__builtin_arm_mcrr2) {
  6936. Function *F;
  6937. switch (BuiltinID) {
  6938. default: llvm_unreachable("unexpected builtin");
  6939. case ARM::BI__builtin_arm_mcrr:
  6940. F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
  6941. break;
  6942. case ARM::BI__builtin_arm_mcrr2:
  6943. F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
  6944. break;
  6945. }
  6946. // MCRR{2} instruction has 5 operands but
  6947. // the intrinsic has 4 because Rt and Rt2
  6948. // are represented as a single unsigned 64
  6949. // bit integer in the intrinsic definition
  6950. // but internally it's represented as 2 32
  6951. // bit integers.
  6952. Value *Coproc = EmitScalarExpr(E->getArg(0));
  6953. Value *Opc1 = EmitScalarExpr(E->getArg(1));
  6954. Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
  6955. Value *CRm = EmitScalarExpr(E->getArg(3));
  6956. Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
  6957. Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
  6958. Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
  6959. Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
  6960. return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
  6961. }
  6962. if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
  6963. BuiltinID == ARM::BI__builtin_arm_mrrc2) {
  6964. Function *F;
  6965. switch (BuiltinID) {
  6966. default: llvm_unreachable("unexpected builtin");
  6967. case ARM::BI__builtin_arm_mrrc:
  6968. F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
  6969. break;
  6970. case ARM::BI__builtin_arm_mrrc2:
  6971. F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
  6972. break;
  6973. }
  6974. Value *Coproc = EmitScalarExpr(E->getArg(0));
  6975. Value *Opc1 = EmitScalarExpr(E->getArg(1));
  6976. Value *CRm = EmitScalarExpr(E->getArg(2));
  6977. Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
  6978. // Returns an unsigned 64 bit integer, represented
  6979. // as two 32 bit integers.
  6980. Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
  6981. Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
  6982. Rt = Builder.CreateZExt(Rt, Int64Ty);
  6983. Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
  6984. Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
  6985. RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
  6986. RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
  6987. return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
  6988. }
  6989. if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
  6990. ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
  6991. BuiltinID == ARM::BI__builtin_arm_ldaex) &&
  6992. getContext().getTypeSize(E->getType()) == 64) ||
  6993. BuiltinID == ARM::BI__ldrexd) {
  6994. Function *F;
  6995. switch (BuiltinID) {
  6996. default: llvm_unreachable("unexpected builtin");
  6997. case ARM::BI__builtin_arm_ldaex:
  6998. F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
  6999. break;
  7000. case ARM::BI__builtin_arm_ldrexd:
  7001. case ARM::BI__builtin_arm_ldrex:
  7002. case ARM::BI__ldrexd:
  7003. F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
  7004. break;
  7005. }
  7006. Value *LdPtr = EmitScalarExpr(E->getArg(0));
  7007. Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
  7008. "ldrexd");
  7009. Value *Val0 = Builder.CreateExtractValue(Val, 1);
  7010. Value *Val1 = Builder.CreateExtractValue(Val, 0);
  7011. Val0 = Builder.CreateZExt(Val0, Int64Ty);
  7012. Val1 = Builder.CreateZExt(Val1, Int64Ty);
  7013. Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
  7014. Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
  7015. Val = Builder.CreateOr(Val, Val1);
  7016. return Builder.CreateBitCast(Val, ConvertType(E->getType()));
  7017. }
  7018. if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
  7019. BuiltinID == ARM::BI__builtin_arm_ldaex) {
  7020. Value *LoadAddr = EmitScalarExpr(E->getArg(0));
  7021. QualType Ty = E->getType();
  7022. llvm::Type *RealResTy = ConvertType(Ty);
  7023. llvm::Type *PtrTy = llvm::IntegerType::get(
  7024. getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
  7025. LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
  7026. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
  7027. ? Intrinsic::arm_ldaex
  7028. : Intrinsic::arm_ldrex,
  7029. PtrTy);
  7030. Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
  7031. if (RealResTy->isPointerTy())
  7032. return Builder.CreateIntToPtr(Val, RealResTy);
  7033. else {
  7034. llvm::Type *IntResTy = llvm::IntegerType::get(
  7035. getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
  7036. Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
  7037. return Builder.CreateBitCast(Val, RealResTy);
  7038. }
  7039. }
  7040. if (BuiltinID == ARM::BI__builtin_arm_strexd ||
  7041. ((BuiltinID == ARM::BI__builtin_arm_stlex ||
  7042. BuiltinID == ARM::BI__builtin_arm_strex) &&
  7043. getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
  7044. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
  7045. ? Intrinsic::arm_stlexd
  7046. : Intrinsic::arm_strexd);
  7047. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
  7048. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  7049. Value *Val = EmitScalarExpr(E->getArg(0));
  7050. Builder.CreateStore(Val, Tmp);
  7051. Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
  7052. Val = Builder.CreateLoad(LdPtr);
  7053. Value *Arg0 = Builder.CreateExtractValue(Val, 0);
  7054. Value *Arg1 = Builder.CreateExtractValue(Val, 1);
  7055. Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
  7056. return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
  7057. }
  7058. if (BuiltinID == ARM::BI__builtin_arm_strex ||
  7059. BuiltinID == ARM::BI__builtin_arm_stlex) {
  7060. Value *StoreVal = EmitScalarExpr(E->getArg(0));
  7061. Value *StoreAddr = EmitScalarExpr(E->getArg(1));
  7062. QualType Ty = E->getArg(0)->getType();
  7063. llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
  7064. getContext().getTypeSize(Ty));
  7065. StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
  7066. if (StoreVal->getType()->isPointerTy())
  7067. StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
  7068. else {
  7069. llvm::Type *IntTy = llvm::IntegerType::get(
  7070. getLLVMContext(),
  7071. CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
  7072. StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
  7073. StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
  7074. }
  7075. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
  7076. ? Intrinsic::arm_stlex
  7077. : Intrinsic::arm_strex,
  7078. StoreAddr->getType());
  7079. return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
  7080. }
  7081. if (BuiltinID == ARM::BI__builtin_arm_clrex) {
  7082. Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
  7083. return Builder.CreateCall(F);
  7084. }
  7085. // CRC32
  7086. Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
  7087. switch (BuiltinID) {
  7088. case ARM::BI__builtin_arm_crc32b:
  7089. CRCIntrinsicID = Intrinsic::arm_crc32b; break;
  7090. case ARM::BI__builtin_arm_crc32cb:
  7091. CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
  7092. case ARM::BI__builtin_arm_crc32h:
  7093. CRCIntrinsicID = Intrinsic::arm_crc32h; break;
  7094. case ARM::BI__builtin_arm_crc32ch:
  7095. CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
  7096. case ARM::BI__builtin_arm_crc32w:
  7097. case ARM::BI__builtin_arm_crc32d:
  7098. CRCIntrinsicID = Intrinsic::arm_crc32w; break;
  7099. case ARM::BI__builtin_arm_crc32cw:
  7100. case ARM::BI__builtin_arm_crc32cd:
  7101. CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
  7102. }
  7103. if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
  7104. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  7105. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  7106. // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
  7107. // intrinsics, hence we need different codegen for these cases.
  7108. if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
  7109. BuiltinID == ARM::BI__builtin_arm_crc32cd) {
  7110. Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
  7111. Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
  7112. Value *Arg1b = Builder.CreateLShr(Arg1, C1);
  7113. Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
  7114. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  7115. Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
  7116. return Builder.CreateCall(F, {Res, Arg1b});
  7117. } else {
  7118. Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
  7119. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  7120. return Builder.CreateCall(F, {Arg0, Arg1});
  7121. }
  7122. }
  7123. if (BuiltinID == ARM::BI__builtin_arm_rsr ||
  7124. BuiltinID == ARM::BI__builtin_arm_rsr64 ||
  7125. BuiltinID == ARM::BI__builtin_arm_rsrp ||
  7126. BuiltinID == ARM::BI__builtin_arm_wsr ||
  7127. BuiltinID == ARM::BI__builtin_arm_wsr64 ||
  7128. BuiltinID == ARM::BI__builtin_arm_wsrp) {
  7129. SpecialRegisterAccessKind AccessKind = Write;
  7130. if (BuiltinID == ARM::BI__builtin_arm_rsr ||
  7131. BuiltinID == ARM::BI__builtin_arm_rsr64 ||
  7132. BuiltinID == ARM::BI__builtin_arm_rsrp)
  7133. AccessKind = VolatileRead;
  7134. bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
  7135. BuiltinID == ARM::BI__builtin_arm_wsrp;
  7136. bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
  7137. BuiltinID == ARM::BI__builtin_arm_wsr64;
  7138. llvm::Type *ValueType;
  7139. llvm::Type *RegisterType;
  7140. if (IsPointerBuiltin) {
  7141. ValueType = VoidPtrTy;
  7142. RegisterType = Int32Ty;
  7143. } else if (Is64Bit) {
  7144. ValueType = RegisterType = Int64Ty;
  7145. } else {
  7146. ValueType = RegisterType = Int32Ty;
  7147. }
  7148. return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
  7149. AccessKind);
  7150. }
  7151. // Handle MSVC intrinsics before argument evaluation to prevent double
  7152. // evaluation.
  7153. if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
  7154. return EmitMSVCBuiltinExpr(*MsvcIntId, E);
  7155. // Deal with MVE builtins
  7156. if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
  7157. return Result;
  7158. // Handle CDE builtins
  7159. if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
  7160. return Result;
  7161. // Find out if any arguments are required to be integer constant
  7162. // expressions.
  7163. unsigned ICEArguments = 0;
  7164. ASTContext::GetBuiltinTypeError Error;
  7165. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  7166. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  7167. auto getAlignmentValue32 = [&](Address addr) -> Value* {
  7168. return Builder.getInt32(addr.getAlignment().getQuantity());
  7169. };
  7170. Address PtrOp0 = Address::invalid();
  7171. Address PtrOp1 = Address::invalid();
  7172. SmallVector<Value*, 4> Ops;
  7173. bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
  7174. unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
  7175. for (unsigned i = 0, e = NumArgs; i != e; i++) {
  7176. if (i == 0) {
  7177. switch (BuiltinID) {
  7178. case NEON::BI__builtin_neon_vld1_v:
  7179. case NEON::BI__builtin_neon_vld1q_v:
  7180. case NEON::BI__builtin_neon_vld1q_lane_v:
  7181. case NEON::BI__builtin_neon_vld1_lane_v:
  7182. case NEON::BI__builtin_neon_vld1_dup_v:
  7183. case NEON::BI__builtin_neon_vld1q_dup_v:
  7184. case NEON::BI__builtin_neon_vst1_v:
  7185. case NEON::BI__builtin_neon_vst1q_v:
  7186. case NEON::BI__builtin_neon_vst1q_lane_v:
  7187. case NEON::BI__builtin_neon_vst1_lane_v:
  7188. case NEON::BI__builtin_neon_vst2_v:
  7189. case NEON::BI__builtin_neon_vst2q_v:
  7190. case NEON::BI__builtin_neon_vst2_lane_v:
  7191. case NEON::BI__builtin_neon_vst2q_lane_v:
  7192. case NEON::BI__builtin_neon_vst3_v:
  7193. case NEON::BI__builtin_neon_vst3q_v:
  7194. case NEON::BI__builtin_neon_vst3_lane_v:
  7195. case NEON::BI__builtin_neon_vst3q_lane_v:
  7196. case NEON::BI__builtin_neon_vst4_v:
  7197. case NEON::BI__builtin_neon_vst4q_v:
  7198. case NEON::BI__builtin_neon_vst4_lane_v:
  7199. case NEON::BI__builtin_neon_vst4q_lane_v:
  7200. // Get the alignment for the argument in addition to the value;
  7201. // we'll use it later.
  7202. PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
  7203. Ops.push_back(PtrOp0.getPointer());
  7204. continue;
  7205. }
  7206. }
  7207. if (i == 1) {
  7208. switch (BuiltinID) {
  7209. case NEON::BI__builtin_neon_vld2_v:
  7210. case NEON::BI__builtin_neon_vld2q_v:
  7211. case NEON::BI__builtin_neon_vld3_v:
  7212. case NEON::BI__builtin_neon_vld3q_v:
  7213. case NEON::BI__builtin_neon_vld4_v:
  7214. case NEON::BI__builtin_neon_vld4q_v:
  7215. case NEON::BI__builtin_neon_vld2_lane_v:
  7216. case NEON::BI__builtin_neon_vld2q_lane_v:
  7217. case NEON::BI__builtin_neon_vld3_lane_v:
  7218. case NEON::BI__builtin_neon_vld3q_lane_v:
  7219. case NEON::BI__builtin_neon_vld4_lane_v:
  7220. case NEON::BI__builtin_neon_vld4q_lane_v:
  7221. case NEON::BI__builtin_neon_vld2_dup_v:
  7222. case NEON::BI__builtin_neon_vld2q_dup_v:
  7223. case NEON::BI__builtin_neon_vld3_dup_v:
  7224. case NEON::BI__builtin_neon_vld3q_dup_v:
  7225. case NEON::BI__builtin_neon_vld4_dup_v:
  7226. case NEON::BI__builtin_neon_vld4q_dup_v:
  7227. // Get the alignment for the argument in addition to the value;
  7228. // we'll use it later.
  7229. PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
  7230. Ops.push_back(PtrOp1.getPointer());
  7231. continue;
  7232. }
  7233. }
  7234. if ((ICEArguments & (1 << i)) == 0) {
  7235. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  7236. } else {
  7237. // If this is required to be a constant, constant fold it so that we know
  7238. // that the generated intrinsic gets a ConstantInt.
  7239. Ops.push_back(llvm::ConstantInt::get(
  7240. getLLVMContext(),
  7241. *E->getArg(i)->getIntegerConstantExpr(getContext())));
  7242. }
  7243. }
  7244. switch (BuiltinID) {
  7245. default: break;
  7246. case NEON::BI__builtin_neon_vget_lane_i8:
  7247. case NEON::BI__builtin_neon_vget_lane_i16:
  7248. case NEON::BI__builtin_neon_vget_lane_i32:
  7249. case NEON::BI__builtin_neon_vget_lane_i64:
  7250. case NEON::BI__builtin_neon_vget_lane_bf16:
  7251. case NEON::BI__builtin_neon_vget_lane_f32:
  7252. case NEON::BI__builtin_neon_vgetq_lane_i8:
  7253. case NEON::BI__builtin_neon_vgetq_lane_i16:
  7254. case NEON::BI__builtin_neon_vgetq_lane_i32:
  7255. case NEON::BI__builtin_neon_vgetq_lane_i64:
  7256. case NEON::BI__builtin_neon_vgetq_lane_bf16:
  7257. case NEON::BI__builtin_neon_vgetq_lane_f32:
  7258. case NEON::BI__builtin_neon_vduph_lane_bf16:
  7259. case NEON::BI__builtin_neon_vduph_laneq_bf16:
  7260. return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
  7261. case NEON::BI__builtin_neon_vrndns_f32: {
  7262. Value *Arg = EmitScalarExpr(E->getArg(0));
  7263. llvm::Type *Tys[] = {Arg->getType()};
  7264. Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
  7265. return Builder.CreateCall(F, {Arg}, "vrndn"); }
  7266. case NEON::BI__builtin_neon_vset_lane_i8:
  7267. case NEON::BI__builtin_neon_vset_lane_i16:
  7268. case NEON::BI__builtin_neon_vset_lane_i32:
  7269. case NEON::BI__builtin_neon_vset_lane_i64:
  7270. case NEON::BI__builtin_neon_vset_lane_bf16:
  7271. case NEON::BI__builtin_neon_vset_lane_f32:
  7272. case NEON::BI__builtin_neon_vsetq_lane_i8:
  7273. case NEON::BI__builtin_neon_vsetq_lane_i16:
  7274. case NEON::BI__builtin_neon_vsetq_lane_i32:
  7275. case NEON::BI__builtin_neon_vsetq_lane_i64:
  7276. case NEON::BI__builtin_neon_vsetq_lane_bf16:
  7277. case NEON::BI__builtin_neon_vsetq_lane_f32:
  7278. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  7279. case NEON::BI__builtin_neon_vsha1h_u32:
  7280. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
  7281. "vsha1h");
  7282. case NEON::BI__builtin_neon_vsha1cq_u32:
  7283. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
  7284. "vsha1h");
  7285. case NEON::BI__builtin_neon_vsha1pq_u32:
  7286. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
  7287. "vsha1h");
  7288. case NEON::BI__builtin_neon_vsha1mq_u32:
  7289. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
  7290. "vsha1h");
  7291. case NEON::BI__builtin_neon_vcvth_bf16_f32: {
  7292. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
  7293. "vcvtbfp2bf");
  7294. }
  7295. // The ARM _MoveToCoprocessor builtins put the input register value as
  7296. // the first argument, but the LLVM intrinsic expects it as the third one.
  7297. case ARM::BI_MoveToCoprocessor:
  7298. case ARM::BI_MoveToCoprocessor2: {
  7299. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
  7300. Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
  7301. return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
  7302. Ops[3], Ops[4], Ops[5]});
  7303. }
  7304. }
  7305. // Get the last argument, which specifies the vector type.
  7306. assert(HasExtraArg);
  7307. const Expr *Arg = E->getArg(E->getNumArgs()-1);
  7308. Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext());
  7309. if (!Result)
  7310. return nullptr;
  7311. if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
  7312. BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
  7313. // Determine the overloaded type of this builtin.
  7314. llvm::Type *Ty;
  7315. if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
  7316. Ty = FloatTy;
  7317. else
  7318. Ty = DoubleTy;
  7319. // Determine whether this is an unsigned conversion or not.
  7320. bool usgn = Result->getZExtValue() == 1;
  7321. unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
  7322. // Call the appropriate intrinsic.
  7323. Function *F = CGM.getIntrinsic(Int, Ty);
  7324. return Builder.CreateCall(F, Ops, "vcvtr");
  7325. }
  7326. // Determine the type of this overloaded NEON intrinsic.
  7327. NeonTypeFlags Type = Result->getZExtValue();
  7328. bool usgn = Type.isUnsigned();
  7329. bool rightShift = false;
  7330. llvm::FixedVectorType *VTy =
  7331. GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
  7332. getTarget().hasBFloat16Type());
  7333. llvm::Type *Ty = VTy;
  7334. if (!Ty)
  7335. return nullptr;
  7336. // Many NEON builtins have identical semantics and uses in ARM and
  7337. // AArch64. Emit these in a single function.
  7338. auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
  7339. const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
  7340. IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
  7341. if (Builtin)
  7342. return EmitCommonNeonBuiltinExpr(
  7343. Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
  7344. Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
  7345. unsigned Int;
  7346. switch (BuiltinID) {
  7347. default: return nullptr;
  7348. case NEON::BI__builtin_neon_vld1q_lane_v:
  7349. // Handle 64-bit integer elements as a special case. Use shuffles of
  7350. // one-element vectors to avoid poor code for i64 in the backend.
  7351. if (VTy->getElementType()->isIntegerTy(64)) {
  7352. // Extract the other lane.
  7353. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7354. int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
  7355. Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
  7356. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
  7357. // Load the value as a one-element vector.
  7358. Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
  7359. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  7360. Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
  7361. Value *Align = getAlignmentValue32(PtrOp0);
  7362. Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
  7363. // Combine them.
  7364. int Indices[] = {1 - Lane, Lane};
  7365. return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
  7366. }
  7367. LLVM_FALLTHROUGH;
  7368. case NEON::BI__builtin_neon_vld1_lane_v: {
  7369. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7370. PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
  7371. Value *Ld = Builder.CreateLoad(PtrOp0);
  7372. return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
  7373. }
  7374. case NEON::BI__builtin_neon_vqrshrn_n_v:
  7375. Int =
  7376. usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
  7377. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
  7378. 1, true);
  7379. case NEON::BI__builtin_neon_vqrshrun_n_v:
  7380. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
  7381. Ops, "vqrshrun_n", 1, true);
  7382. case NEON::BI__builtin_neon_vqshrn_n_v:
  7383. Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
  7384. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
  7385. 1, true);
  7386. case NEON::BI__builtin_neon_vqshrun_n_v:
  7387. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
  7388. Ops, "vqshrun_n", 1, true);
  7389. case NEON::BI__builtin_neon_vrecpe_v:
  7390. case NEON::BI__builtin_neon_vrecpeq_v:
  7391. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
  7392. Ops, "vrecpe");
  7393. case NEON::BI__builtin_neon_vrshrn_n_v:
  7394. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
  7395. Ops, "vrshrn_n", 1, true);
  7396. case NEON::BI__builtin_neon_vrsra_n_v:
  7397. case NEON::BI__builtin_neon_vrsraq_n_v:
  7398. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7399. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7400. Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
  7401. Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
  7402. Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
  7403. return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
  7404. case NEON::BI__builtin_neon_vsri_n_v:
  7405. case NEON::BI__builtin_neon_vsriq_n_v:
  7406. rightShift = true;
  7407. LLVM_FALLTHROUGH;
  7408. case NEON::BI__builtin_neon_vsli_n_v:
  7409. case NEON::BI__builtin_neon_vsliq_n_v:
  7410. Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
  7411. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
  7412. Ops, "vsli_n");
  7413. case NEON::BI__builtin_neon_vsra_n_v:
  7414. case NEON::BI__builtin_neon_vsraq_n_v:
  7415. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7416. Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
  7417. return Builder.CreateAdd(Ops[0], Ops[1]);
  7418. case NEON::BI__builtin_neon_vst1q_lane_v:
  7419. // Handle 64-bit integer elements as a special case. Use a shuffle to get
  7420. // a one-element vector and avoid poor code for i64 in the backend.
  7421. if (VTy->getElementType()->isIntegerTy(64)) {
  7422. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7423. Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
  7424. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
  7425. Ops[2] = getAlignmentValue32(PtrOp0);
  7426. llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
  7427. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
  7428. Tys), Ops);
  7429. }
  7430. LLVM_FALLTHROUGH;
  7431. case NEON::BI__builtin_neon_vst1_lane_v: {
  7432. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7433. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
  7434. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  7435. auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
  7436. return St;
  7437. }
  7438. case NEON::BI__builtin_neon_vtbl1_v:
  7439. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
  7440. Ops, "vtbl1");
  7441. case NEON::BI__builtin_neon_vtbl2_v:
  7442. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
  7443. Ops, "vtbl2");
  7444. case NEON::BI__builtin_neon_vtbl3_v:
  7445. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
  7446. Ops, "vtbl3");
  7447. case NEON::BI__builtin_neon_vtbl4_v:
  7448. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
  7449. Ops, "vtbl4");
  7450. case NEON::BI__builtin_neon_vtbx1_v:
  7451. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
  7452. Ops, "vtbx1");
  7453. case NEON::BI__builtin_neon_vtbx2_v:
  7454. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
  7455. Ops, "vtbx2");
  7456. case NEON::BI__builtin_neon_vtbx3_v:
  7457. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
  7458. Ops, "vtbx3");
  7459. case NEON::BI__builtin_neon_vtbx4_v:
  7460. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
  7461. Ops, "vtbx4");
  7462. }
  7463. }
  7464. template<typename Integer>
  7465. static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
  7466. return E->getIntegerConstantExpr(Context)->getExtValue();
  7467. }
  7468. static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
  7469. llvm::Type *T, bool Unsigned) {
  7470. // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
  7471. // which finds it convenient to specify signed/unsigned as a boolean flag.
  7472. return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
  7473. }
  7474. static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
  7475. uint32_t Shift, bool Unsigned) {
  7476. // MVE helper function for integer shift right. This must handle signed vs
  7477. // unsigned, and also deal specially with the case where the shift count is
  7478. // equal to the lane size. In LLVM IR, an LShr with that parameter would be
  7479. // undefined behavior, but in MVE it's legal, so we must convert it to code
  7480. // that is not undefined in IR.
  7481. unsigned LaneBits = cast<llvm::VectorType>(V->getType())
  7482. ->getElementType()
  7483. ->getPrimitiveSizeInBits();
  7484. if (Shift == LaneBits) {
  7485. // An unsigned shift of the full lane size always generates zero, so we can
  7486. // simply emit a zero vector. A signed shift of the full lane size does the
  7487. // same thing as shifting by one bit fewer.
  7488. if (Unsigned)
  7489. return llvm::Constant::getNullValue(V->getType());
  7490. else
  7491. --Shift;
  7492. }
  7493. return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
  7494. }
  7495. static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
  7496. // MVE-specific helper function for a vector splat, which infers the element
  7497. // count of the output vector by knowing that MVE vectors are all 128 bits
  7498. // wide.
  7499. unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
  7500. return Builder.CreateVectorSplat(Elements, V);
  7501. }
  7502. static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
  7503. CodeGenFunction *CGF,
  7504. llvm::Value *V,
  7505. llvm::Type *DestType) {
  7506. // Convert one MVE vector type into another by reinterpreting its in-register
  7507. // format.
  7508. //
  7509. // Little-endian, this is identical to a bitcast (which reinterprets the
  7510. // memory format). But big-endian, they're not necessarily the same, because
  7511. // the register and memory formats map to each other differently depending on
  7512. // the lane size.
  7513. //
  7514. // We generate a bitcast whenever we can (if we're little-endian, or if the
  7515. // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
  7516. // that performs the different kind of reinterpretation.
  7517. if (CGF->getTarget().isBigEndian() &&
  7518. V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
  7519. return Builder.CreateCall(
  7520. CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
  7521. {DestType, V->getType()}),
  7522. V);
  7523. } else {
  7524. return Builder.CreateBitCast(V, DestType);
  7525. }
  7526. }
  7527. static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
  7528. // Make a shufflevector that extracts every other element of a vector (evens
  7529. // or odds, as desired).
  7530. SmallVector<int, 16> Indices;
  7531. unsigned InputElements =
  7532. cast<llvm::FixedVectorType>(V->getType())->getNumElements();
  7533. for (unsigned i = 0; i < InputElements; i += 2)
  7534. Indices.push_back(i + Odd);
  7535. return Builder.CreateShuffleVector(V, Indices);
  7536. }
  7537. static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
  7538. llvm::Value *V1) {
  7539. // Make a shufflevector that interleaves two vectors element by element.
  7540. assert(V0->getType() == V1->getType() && "Can't zip different vector types");
  7541. SmallVector<int, 16> Indices;
  7542. unsigned InputElements =
  7543. cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
  7544. for (unsigned i = 0; i < InputElements; i++) {
  7545. Indices.push_back(i);
  7546. Indices.push_back(i + InputElements);
  7547. }
  7548. return Builder.CreateShuffleVector(V0, V1, Indices);
  7549. }
  7550. template<unsigned HighBit, unsigned OtherBits>
  7551. static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
  7552. // MVE-specific helper function to make a vector splat of a constant such as
  7553. // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
  7554. llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
  7555. unsigned LaneBits = T->getPrimitiveSizeInBits();
  7556. uint32_t Value = HighBit << (LaneBits - 1);
  7557. if (OtherBits)
  7558. Value |= (1UL << (LaneBits - 1)) - 1;
  7559. llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
  7560. return ARMMVEVectorSplat(Builder, Lane);
  7561. }
  7562. static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
  7563. llvm::Value *V,
  7564. unsigned ReverseWidth) {
  7565. // MVE-specific helper function which reverses the elements of a
  7566. // vector within every (ReverseWidth)-bit collection of lanes.
  7567. SmallVector<int, 16> Indices;
  7568. unsigned LaneSize = V->getType()->getScalarSizeInBits();
  7569. unsigned Elements = 128 / LaneSize;
  7570. unsigned Mask = ReverseWidth / LaneSize - 1;
  7571. for (unsigned i = 0; i < Elements; i++)
  7572. Indices.push_back(i ^ Mask);
  7573. return Builder.CreateShuffleVector(V, Indices);
  7574. }
  7575. Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
  7576. const CallExpr *E,
  7577. ReturnValueSlot ReturnValue,
  7578. llvm::Triple::ArchType Arch) {
  7579. enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
  7580. Intrinsic::ID IRIntr;
  7581. unsigned NumVectors;
  7582. // Code autogenerated by Tablegen will handle all the simple builtins.
  7583. switch (BuiltinID) {
  7584. #include "clang/Basic/arm_mve_builtin_cg.inc"
  7585. // If we didn't match an MVE builtin id at all, go back to the
  7586. // main EmitARMBuiltinExpr.
  7587. default:
  7588. return nullptr;
  7589. }
  7590. // Anything that breaks from that switch is an MVE builtin that
  7591. // needs handwritten code to generate.
  7592. switch (CustomCodeGenType) {
  7593. case CustomCodeGen::VLD24: {
  7594. llvm::SmallVector<Value *, 4> Ops;
  7595. llvm::SmallVector<llvm::Type *, 4> Tys;
  7596. auto MvecCType = E->getType();
  7597. auto MvecLType = ConvertType(MvecCType);
  7598. assert(MvecLType->isStructTy() &&
  7599. "Return type for vld[24]q should be a struct");
  7600. assert(MvecLType->getStructNumElements() == 1 &&
  7601. "Return-type struct for vld[24]q should have one element");
  7602. auto MvecLTypeInner = MvecLType->getStructElementType(0);
  7603. assert(MvecLTypeInner->isArrayTy() &&
  7604. "Return-type struct for vld[24]q should contain an array");
  7605. assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
  7606. "Array member of return-type struct vld[24]q has wrong length");
  7607. auto VecLType = MvecLTypeInner->getArrayElementType();
  7608. Tys.push_back(VecLType);
  7609. auto Addr = E->getArg(0);
  7610. Ops.push_back(EmitScalarExpr(Addr));
  7611. Tys.push_back(ConvertType(Addr->getType()));
  7612. Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
  7613. Value *LoadResult = Builder.CreateCall(F, Ops);
  7614. Value *MvecOut = UndefValue::get(MvecLType);
  7615. for (unsigned i = 0; i < NumVectors; ++i) {
  7616. Value *Vec = Builder.CreateExtractValue(LoadResult, i);
  7617. MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
  7618. }
  7619. if (ReturnValue.isNull())
  7620. return MvecOut;
  7621. else
  7622. return Builder.CreateStore(MvecOut, ReturnValue.getValue());
  7623. }
  7624. case CustomCodeGen::VST24: {
  7625. llvm::SmallVector<Value *, 4> Ops;
  7626. llvm::SmallVector<llvm::Type *, 4> Tys;
  7627. auto Addr = E->getArg(0);
  7628. Ops.push_back(EmitScalarExpr(Addr));
  7629. Tys.push_back(ConvertType(Addr->getType()));
  7630. auto MvecCType = E->getArg(1)->getType();
  7631. auto MvecLType = ConvertType(MvecCType);
  7632. assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
  7633. assert(MvecLType->getStructNumElements() == 1 &&
  7634. "Data-type struct for vst2q should have one element");
  7635. auto MvecLTypeInner = MvecLType->getStructElementType(0);
  7636. assert(MvecLTypeInner->isArrayTy() &&
  7637. "Data-type struct for vst2q should contain an array");
  7638. assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
  7639. "Array member of return-type struct vld[24]q has wrong length");
  7640. auto VecLType = MvecLTypeInner->getArrayElementType();
  7641. Tys.push_back(VecLType);
  7642. AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
  7643. EmitAggExpr(E->getArg(1), MvecSlot);
  7644. auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
  7645. for (unsigned i = 0; i < NumVectors; i++)
  7646. Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
  7647. Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
  7648. Value *ToReturn = nullptr;
  7649. for (unsigned i = 0; i < NumVectors; i++) {
  7650. Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
  7651. ToReturn = Builder.CreateCall(F, Ops);
  7652. Ops.pop_back();
  7653. }
  7654. return ToReturn;
  7655. }
  7656. }
  7657. llvm_unreachable("unknown custom codegen type.");
  7658. }
  7659. Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
  7660. const CallExpr *E,
  7661. ReturnValueSlot ReturnValue,
  7662. llvm::Triple::ArchType Arch) {
  7663. switch (BuiltinID) {
  7664. default:
  7665. return nullptr;
  7666. #include "clang/Basic/arm_cde_builtin_cg.inc"
  7667. }
  7668. }
  7669. static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
  7670. const CallExpr *E,
  7671. SmallVectorImpl<Value *> &Ops,
  7672. llvm::Triple::ArchType Arch) {
  7673. unsigned int Int = 0;
  7674. const char *s = nullptr;
  7675. switch (BuiltinID) {
  7676. default:
  7677. return nullptr;
  7678. case NEON::BI__builtin_neon_vtbl1_v:
  7679. case NEON::BI__builtin_neon_vqtbl1_v:
  7680. case NEON::BI__builtin_neon_vqtbl1q_v:
  7681. case NEON::BI__builtin_neon_vtbl2_v:
  7682. case NEON::BI__builtin_neon_vqtbl2_v:
  7683. case NEON::BI__builtin_neon_vqtbl2q_v:
  7684. case NEON::BI__builtin_neon_vtbl3_v:
  7685. case NEON::BI__builtin_neon_vqtbl3_v:
  7686. case NEON::BI__builtin_neon_vqtbl3q_v:
  7687. case NEON::BI__builtin_neon_vtbl4_v:
  7688. case NEON::BI__builtin_neon_vqtbl4_v:
  7689. case NEON::BI__builtin_neon_vqtbl4q_v:
  7690. break;
  7691. case NEON::BI__builtin_neon_vtbx1_v:
  7692. case NEON::BI__builtin_neon_vqtbx1_v:
  7693. case NEON::BI__builtin_neon_vqtbx1q_v:
  7694. case NEON::BI__builtin_neon_vtbx2_v:
  7695. case NEON::BI__builtin_neon_vqtbx2_v:
  7696. case NEON::BI__builtin_neon_vqtbx2q_v:
  7697. case NEON::BI__builtin_neon_vtbx3_v:
  7698. case NEON::BI__builtin_neon_vqtbx3_v:
  7699. case NEON::BI__builtin_neon_vqtbx3q_v:
  7700. case NEON::BI__builtin_neon_vtbx4_v:
  7701. case NEON::BI__builtin_neon_vqtbx4_v:
  7702. case NEON::BI__builtin_neon_vqtbx4q_v:
  7703. break;
  7704. }
  7705. assert(E->getNumArgs() >= 3);
  7706. // Get the last argument, which specifies the vector type.
  7707. const Expr *Arg = E->getArg(E->getNumArgs() - 1);
  7708. Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext());
  7709. if (!Result)
  7710. return nullptr;
  7711. // Determine the type of this overloaded NEON intrinsic.
  7712. NeonTypeFlags Type = Result->getZExtValue();
  7713. llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
  7714. if (!Ty)
  7715. return nullptr;
  7716. CodeGen::CGBuilderTy &Builder = CGF.Builder;
  7717. // AArch64 scalar builtins are not overloaded, they do not have an extra
  7718. // argument that specifies the vector type, need to handle each case.
  7719. switch (BuiltinID) {
  7720. case NEON::BI__builtin_neon_vtbl1_v: {
  7721. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
  7722. Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
  7723. "vtbl1");
  7724. }
  7725. case NEON::BI__builtin_neon_vtbl2_v: {
  7726. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
  7727. Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
  7728. "vtbl1");
  7729. }
  7730. case NEON::BI__builtin_neon_vtbl3_v: {
  7731. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
  7732. Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
  7733. "vtbl2");
  7734. }
  7735. case NEON::BI__builtin_neon_vtbl4_v: {
  7736. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
  7737. Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
  7738. "vtbl2");
  7739. }
  7740. case NEON::BI__builtin_neon_vtbx1_v: {
  7741. Value *TblRes =
  7742. packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
  7743. Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
  7744. llvm::Constant *EightV = ConstantInt::get(Ty, 8);
  7745. Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
  7746. CmpRes = Builder.CreateSExt(CmpRes, Ty);
  7747. Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
  7748. Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
  7749. return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
  7750. }
  7751. case NEON::BI__builtin_neon_vtbx2_v: {
  7752. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
  7753. Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
  7754. "vtbx1");
  7755. }
  7756. case NEON::BI__builtin_neon_vtbx3_v: {
  7757. Value *TblRes =
  7758. packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
  7759. Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
  7760. llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
  7761. Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
  7762. TwentyFourV);
  7763. CmpRes = Builder.CreateSExt(CmpRes, Ty);
  7764. Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
  7765. Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
  7766. return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
  7767. }
  7768. case NEON::BI__builtin_neon_vtbx4_v: {
  7769. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
  7770. Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
  7771. "vtbx2");
  7772. }
  7773. case NEON::BI__builtin_neon_vqtbl1_v:
  7774. case NEON::BI__builtin_neon_vqtbl1q_v:
  7775. Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
  7776. case NEON::BI__builtin_neon_vqtbl2_v:
  7777. case NEON::BI__builtin_neon_vqtbl2q_v: {
  7778. Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
  7779. case NEON::BI__builtin_neon_vqtbl3_v:
  7780. case NEON::BI__builtin_neon_vqtbl3q_v:
  7781. Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
  7782. case NEON::BI__builtin_neon_vqtbl4_v:
  7783. case NEON::BI__builtin_neon_vqtbl4q_v:
  7784. Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
  7785. case NEON::BI__builtin_neon_vqtbx1_v:
  7786. case NEON::BI__builtin_neon_vqtbx1q_v:
  7787. Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
  7788. case NEON::BI__builtin_neon_vqtbx2_v:
  7789. case NEON::BI__builtin_neon_vqtbx2q_v:
  7790. Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
  7791. case NEON::BI__builtin_neon_vqtbx3_v:
  7792. case NEON::BI__builtin_neon_vqtbx3q_v:
  7793. Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
  7794. case NEON::BI__builtin_neon_vqtbx4_v:
  7795. case NEON::BI__builtin_neon_vqtbx4q_v:
  7796. Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
  7797. }
  7798. }
  7799. if (!Int)
  7800. return nullptr;
  7801. Function *F = CGF.CGM.getIntrinsic(Int, Ty);
  7802. return CGF.EmitNeonCall(F, Ops, s);
  7803. }
  7804. Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
  7805. auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  7806. Op = Builder.CreateBitCast(Op, Int16Ty);
  7807. Value *V = UndefValue::get(VTy);
  7808. llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
  7809. Op = Builder.CreateInsertElement(V, Op, CI);
  7810. return Op;
  7811. }
  7812. /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
  7813. /// access builtin. Only required if it can't be inferred from the base pointer
  7814. /// operand.
  7815. llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
  7816. switch (TypeFlags.getMemEltType()) {
  7817. case SVETypeFlags::MemEltTyDefault:
  7818. return getEltType(TypeFlags);
  7819. case SVETypeFlags::MemEltTyInt8:
  7820. return Builder.getInt8Ty();
  7821. case SVETypeFlags::MemEltTyInt16:
  7822. return Builder.getInt16Ty();
  7823. case SVETypeFlags::MemEltTyInt32:
  7824. return Builder.getInt32Ty();
  7825. case SVETypeFlags::MemEltTyInt64:
  7826. return Builder.getInt64Ty();
  7827. }
  7828. llvm_unreachable("Unknown MemEltType");
  7829. }
  7830. llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
  7831. switch (TypeFlags.getEltType()) {
  7832. default:
  7833. llvm_unreachable("Invalid SVETypeFlag!");
  7834. case SVETypeFlags::EltTyInt8:
  7835. return Builder.getInt8Ty();
  7836. case SVETypeFlags::EltTyInt16:
  7837. return Builder.getInt16Ty();
  7838. case SVETypeFlags::EltTyInt32:
  7839. return Builder.getInt32Ty();
  7840. case SVETypeFlags::EltTyInt64:
  7841. return Builder.getInt64Ty();
  7842. case SVETypeFlags::EltTyFloat16:
  7843. return Builder.getHalfTy();
  7844. case SVETypeFlags::EltTyFloat32:
  7845. return Builder.getFloatTy();
  7846. case SVETypeFlags::EltTyFloat64:
  7847. return Builder.getDoubleTy();
  7848. case SVETypeFlags::EltTyBFloat16:
  7849. return Builder.getBFloatTy();
  7850. case SVETypeFlags::EltTyBool8:
  7851. case SVETypeFlags::EltTyBool16:
  7852. case SVETypeFlags::EltTyBool32:
  7853. case SVETypeFlags::EltTyBool64:
  7854. return Builder.getInt1Ty();
  7855. }
  7856. }
  7857. // Return the llvm predicate vector type corresponding to the specified element
  7858. // TypeFlags.
  7859. llvm::ScalableVectorType *
  7860. CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
  7861. switch (TypeFlags.getEltType()) {
  7862. default: llvm_unreachable("Unhandled SVETypeFlag!");
  7863. case SVETypeFlags::EltTyInt8:
  7864. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
  7865. case SVETypeFlags::EltTyInt16:
  7866. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  7867. case SVETypeFlags::EltTyInt32:
  7868. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  7869. case SVETypeFlags::EltTyInt64:
  7870. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  7871. case SVETypeFlags::EltTyBFloat16:
  7872. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  7873. case SVETypeFlags::EltTyFloat16:
  7874. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  7875. case SVETypeFlags::EltTyFloat32:
  7876. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  7877. case SVETypeFlags::EltTyFloat64:
  7878. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  7879. case SVETypeFlags::EltTyBool8:
  7880. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
  7881. case SVETypeFlags::EltTyBool16:
  7882. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  7883. case SVETypeFlags::EltTyBool32:
  7884. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  7885. case SVETypeFlags::EltTyBool64:
  7886. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  7887. }
  7888. }
  7889. // Return the llvm vector type corresponding to the specified element TypeFlags.
  7890. llvm::ScalableVectorType *
  7891. CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
  7892. switch (TypeFlags.getEltType()) {
  7893. default:
  7894. llvm_unreachable("Invalid SVETypeFlag!");
  7895. case SVETypeFlags::EltTyInt8:
  7896. return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
  7897. case SVETypeFlags::EltTyInt16:
  7898. return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
  7899. case SVETypeFlags::EltTyInt32:
  7900. return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
  7901. case SVETypeFlags::EltTyInt64:
  7902. return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
  7903. case SVETypeFlags::EltTyFloat16:
  7904. return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
  7905. case SVETypeFlags::EltTyBFloat16:
  7906. return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
  7907. case SVETypeFlags::EltTyFloat32:
  7908. return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
  7909. case SVETypeFlags::EltTyFloat64:
  7910. return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
  7911. case SVETypeFlags::EltTyBool8:
  7912. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
  7913. case SVETypeFlags::EltTyBool16:
  7914. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
  7915. case SVETypeFlags::EltTyBool32:
  7916. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
  7917. case SVETypeFlags::EltTyBool64:
  7918. return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
  7919. }
  7920. }
  7921. llvm::Value *
  7922. CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
  7923. Function *Ptrue =
  7924. CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
  7925. return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
  7926. }
  7927. constexpr unsigned SVEBitsPerBlock = 128;
  7928. static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
  7929. unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
  7930. return llvm::ScalableVectorType::get(EltTy, NumElts);
  7931. }
  7932. // Reinterpret the input predicate so that it can be used to correctly isolate
  7933. // the elements of the specified datatype.
  7934. Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
  7935. llvm::ScalableVectorType *VTy) {
  7936. auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
  7937. if (Pred->getType() == RTy)
  7938. return Pred;
  7939. unsigned IntID;
  7940. llvm::Type *IntrinsicTy;
  7941. switch (VTy->getMinNumElements()) {
  7942. default:
  7943. llvm_unreachable("unsupported element count!");
  7944. case 2:
  7945. case 4:
  7946. case 8:
  7947. IntID = Intrinsic::aarch64_sve_convert_from_svbool;
  7948. IntrinsicTy = RTy;
  7949. break;
  7950. case 16:
  7951. IntID = Intrinsic::aarch64_sve_convert_to_svbool;
  7952. IntrinsicTy = Pred->getType();
  7953. break;
  7954. }
  7955. Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
  7956. Value *C = Builder.CreateCall(F, Pred);
  7957. assert(C->getType() == RTy && "Unexpected return type!");
  7958. return C;
  7959. }
  7960. Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
  7961. SmallVectorImpl<Value *> &Ops,
  7962. unsigned IntID) {
  7963. auto *ResultTy = getSVEType(TypeFlags);
  7964. auto *OverloadedTy =
  7965. llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
  7966. // At the ACLE level there's only one predicate type, svbool_t, which is
  7967. // mapped to <n x 16 x i1>. However, this might be incompatible with the
  7968. // actual type being loaded. For example, when loading doubles (i64) the
  7969. // predicated should be <n x 2 x i1> instead. At the IR level the type of
  7970. // the predicate and the data being loaded must match. Cast accordingly.
  7971. Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
  7972. Function *F = nullptr;
  7973. if (Ops[1]->getType()->isVectorTy())
  7974. // This is the "vector base, scalar offset" case. In order to uniquely
  7975. // map this built-in to an LLVM IR intrinsic, we need both the return type
  7976. // and the type of the vector base.
  7977. F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
  7978. else
  7979. // This is the "scalar base, vector offset case". The type of the offset
  7980. // is encoded in the name of the intrinsic. We only need to specify the
  7981. // return type in order to uniquely map this built-in to an LLVM IR
  7982. // intrinsic.
  7983. F = CGM.getIntrinsic(IntID, OverloadedTy);
  7984. // Pass 0 when the offset is missing. This can only be applied when using
  7985. // the "vector base" addressing mode for which ACLE allows no offset. The
  7986. // corresponding LLVM IR always requires an offset.
  7987. if (Ops.size() == 2) {
  7988. assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
  7989. Ops.push_back(ConstantInt::get(Int64Ty, 0));
  7990. }
  7991. // For "vector base, scalar index" scale the index so that it becomes a
  7992. // scalar offset.
  7993. if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
  7994. unsigned BytesPerElt =
  7995. OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
  7996. Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
  7997. Ops[2] = Builder.CreateMul(Ops[2], Scale);
  7998. }
  7999. Value *Call = Builder.CreateCall(F, Ops);
  8000. // The following sext/zext is only needed when ResultTy != OverloadedTy. In
  8001. // other cases it's folded into a nop.
  8002. return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
  8003. : Builder.CreateSExt(Call, ResultTy);
  8004. }
  8005. Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
  8006. SmallVectorImpl<Value *> &Ops,
  8007. unsigned IntID) {
  8008. auto *SrcDataTy = getSVEType(TypeFlags);
  8009. auto *OverloadedTy =
  8010. llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
  8011. // In ACLE the source data is passed in the last argument, whereas in LLVM IR
  8012. // it's the first argument. Move it accordingly.
  8013. Ops.insert(Ops.begin(), Ops.pop_back_val());
  8014. Function *F = nullptr;
  8015. if (Ops[2]->getType()->isVectorTy())
  8016. // This is the "vector base, scalar offset" case. In order to uniquely
  8017. // map this built-in to an LLVM IR intrinsic, we need both the return type
  8018. // and the type of the vector base.
  8019. F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
  8020. else
  8021. // This is the "scalar base, vector offset case". The type of the offset
  8022. // is encoded in the name of the intrinsic. We only need to specify the
  8023. // return type in order to uniquely map this built-in to an LLVM IR
  8024. // intrinsic.
  8025. F = CGM.getIntrinsic(IntID, OverloadedTy);
  8026. // Pass 0 when the offset is missing. This can only be applied when using
  8027. // the "vector base" addressing mode for which ACLE allows no offset. The
  8028. // corresponding LLVM IR always requires an offset.
  8029. if (Ops.size() == 3) {
  8030. assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
  8031. Ops.push_back(ConstantInt::get(Int64Ty, 0));
  8032. }
  8033. // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
  8034. // folded into a nop.
  8035. Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
  8036. // At the ACLE level there's only one predicate type, svbool_t, which is
  8037. // mapped to <n x 16 x i1>. However, this might be incompatible with the
  8038. // actual type being stored. For example, when storing doubles (i64) the
  8039. // predicated should be <n x 2 x i1> instead. At the IR level the type of
  8040. // the predicate and the data being stored must match. Cast accordingly.
  8041. Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
  8042. // For "vector base, scalar index" scale the index so that it becomes a
  8043. // scalar offset.
  8044. if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
  8045. unsigned BytesPerElt =
  8046. OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
  8047. Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
  8048. Ops[3] = Builder.CreateMul(Ops[3], Scale);
  8049. }
  8050. return Builder.CreateCall(F, Ops);
  8051. }
  8052. Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
  8053. SmallVectorImpl<Value *> &Ops,
  8054. unsigned IntID) {
  8055. // The gather prefetches are overloaded on the vector input - this can either
  8056. // be the vector of base addresses or vector of offsets.
  8057. auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
  8058. if (!OverloadedTy)
  8059. OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
  8060. // Cast the predicate from svbool_t to the right number of elements.
  8061. Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
  8062. // vector + imm addressing modes
  8063. if (Ops[1]->getType()->isVectorTy()) {
  8064. if (Ops.size() == 3) {
  8065. // Pass 0 for 'vector+imm' when the index is omitted.
  8066. Ops.push_back(ConstantInt::get(Int64Ty, 0));
  8067. // The sv_prfop is the last operand in the builtin and IR intrinsic.
  8068. std::swap(Ops[2], Ops[3]);
  8069. } else {
  8070. // Index needs to be passed as scaled offset.
  8071. llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
  8072. unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
  8073. Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
  8074. Ops[2] = Builder.CreateMul(Ops[2], Scale);
  8075. }
  8076. }
  8077. Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
  8078. return Builder.CreateCall(F, Ops);
  8079. }
  8080. Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
  8081. SmallVectorImpl<Value*> &Ops,
  8082. unsigned IntID) {
  8083. llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
  8084. auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
  8085. auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
  8086. unsigned N;
  8087. switch (IntID) {
  8088. case Intrinsic::aarch64_sve_ld2:
  8089. N = 2;
  8090. break;
  8091. case Intrinsic::aarch64_sve_ld3:
  8092. N = 3;
  8093. break;
  8094. case Intrinsic::aarch64_sve_ld4:
  8095. N = 4;
  8096. break;
  8097. default:
  8098. llvm_unreachable("unknown intrinsic!");
  8099. }
  8100. auto RetTy = llvm::VectorType::get(VTy->getElementType(),
  8101. VTy->getElementCount() * N);
  8102. Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
  8103. Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
  8104. Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
  8105. BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
  8106. BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
  8107. Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()});
  8108. return Builder.CreateCall(F, { Predicate, BasePtr });
  8109. }
  8110. Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
  8111. SmallVectorImpl<Value*> &Ops,
  8112. unsigned IntID) {
  8113. llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
  8114. auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
  8115. auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
  8116. unsigned N;
  8117. switch (IntID) {
  8118. case Intrinsic::aarch64_sve_st2:
  8119. N = 2;
  8120. break;
  8121. case Intrinsic::aarch64_sve_st3:
  8122. N = 3;
  8123. break;
  8124. case Intrinsic::aarch64_sve_st4:
  8125. N = 4;
  8126. break;
  8127. default:
  8128. llvm_unreachable("unknown intrinsic!");
  8129. }
  8130. auto TupleTy =
  8131. llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N);
  8132. Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
  8133. Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
  8134. Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0);
  8135. Value *Val = Ops.back();
  8136. BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
  8137. BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
  8138. // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
  8139. // need to break up the tuple vector.
  8140. SmallVector<llvm::Value*, 5> Operands;
  8141. Function *FExtr =
  8142. CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
  8143. for (unsigned I = 0; I < N; ++I)
  8144. Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)}));
  8145. Operands.append({Predicate, BasePtr});
  8146. Function *F = CGM.getIntrinsic(IntID, { VTy });
  8147. return Builder.CreateCall(F, Operands);
  8148. }
  8149. // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
  8150. // svpmullt_pair intrinsics, with the exception that their results are bitcast
  8151. // to a wider type.
  8152. Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
  8153. SmallVectorImpl<Value *> &Ops,
  8154. unsigned BuiltinID) {
  8155. // Splat scalar operand to vector (intrinsics with _n infix)
  8156. if (TypeFlags.hasSplatOperand()) {
  8157. unsigned OpNo = TypeFlags.getSplatOperand();
  8158. Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
  8159. }
  8160. // The pair-wise function has a narrower overloaded type.
  8161. Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
  8162. Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
  8163. // Now bitcast to the wider result type.
  8164. llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
  8165. return EmitSVEReinterpret(Call, Ty);
  8166. }
  8167. Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
  8168. ArrayRef<Value *> Ops, unsigned BuiltinID) {
  8169. llvm::Type *OverloadedTy = getSVEType(TypeFlags);
  8170. Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
  8171. return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
  8172. }
  8173. Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
  8174. SmallVectorImpl<Value *> &Ops,
  8175. unsigned BuiltinID) {
  8176. auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
  8177. auto *VectorTy = getSVEVectorForElementType(MemEltTy);
  8178. auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
  8179. Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
  8180. Value *BasePtr = Ops[1];
  8181. // Implement the index operand if not omitted.
  8182. if (Ops.size() > 3) {
  8183. BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
  8184. BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
  8185. }
  8186. // Prefetch intriniscs always expect an i8*
  8187. BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
  8188. Value *PrfOp = Ops.back();
  8189. Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
  8190. return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
  8191. }
  8192. Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
  8193. llvm::Type *ReturnTy,
  8194. SmallVectorImpl<Value *> &Ops,
  8195. unsigned BuiltinID,
  8196. bool IsZExtReturn) {
  8197. QualType LangPTy = E->getArg(1)->getType();
  8198. llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
  8199. LangPTy->castAs<PointerType>()->getPointeeType());
  8200. // The vector type that is returned may be different from the
  8201. // eventual type loaded from memory.
  8202. auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
  8203. auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
  8204. Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
  8205. Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
  8206. Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
  8207. BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
  8208. BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
  8209. Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
  8210. Value *Load = Builder.CreateCall(F, {Predicate, BasePtr});
  8211. return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
  8212. : Builder.CreateSExt(Load, VectorTy);
  8213. }
  8214. Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
  8215. SmallVectorImpl<Value *> &Ops,
  8216. unsigned BuiltinID) {
  8217. QualType LangPTy = E->getArg(1)->getType();
  8218. llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
  8219. LangPTy->castAs<PointerType>()->getPointeeType());
  8220. // The vector type that is stored may be different from the
  8221. // eventual type stored to memory.
  8222. auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
  8223. auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
  8224. Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
  8225. Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
  8226. Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0);
  8227. BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
  8228. // Last value is always the data
  8229. llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
  8230. BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
  8231. Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
  8232. return Builder.CreateCall(F, {Val, Predicate, BasePtr});
  8233. }
  8234. // Limit the usage of scalable llvm IR generated by the ACLE by using the
  8235. // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
  8236. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
  8237. auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty);
  8238. return Builder.CreateCall(F, Scalar);
  8239. }
  8240. Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
  8241. return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
  8242. }
  8243. Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
  8244. // FIXME: For big endian this needs an additional REV, or needs a separate
  8245. // intrinsic that is code-generated as a no-op, because the LLVM bitcast
  8246. // instruction is defined as 'bitwise' equivalent from memory point of
  8247. // view (when storing/reloading), whereas the svreinterpret builtin
  8248. // implements bitwise equivalent cast from register point of view.
  8249. // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
  8250. return Builder.CreateBitCast(Val, Ty);
  8251. }
  8252. static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
  8253. SmallVectorImpl<Value *> &Ops) {
  8254. auto *SplatZero = Constant::getNullValue(Ty);
  8255. Ops.insert(Ops.begin(), SplatZero);
  8256. }
  8257. static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
  8258. SmallVectorImpl<Value *> &Ops) {
  8259. auto *SplatUndef = UndefValue::get(Ty);
  8260. Ops.insert(Ops.begin(), SplatUndef);
  8261. }
  8262. SmallVector<llvm::Type *, 2>
  8263. CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
  8264. llvm::Type *ResultType,
  8265. ArrayRef<Value *> Ops) {
  8266. if (TypeFlags.isOverloadNone())
  8267. return {};
  8268. llvm::Type *DefaultType = getSVEType(TypeFlags);
  8269. if (TypeFlags.isOverloadWhile())
  8270. return {DefaultType, Ops[1]->getType()};
  8271. if (TypeFlags.isOverloadWhileRW())
  8272. return {getSVEPredType(TypeFlags), Ops[0]->getType()};
  8273. if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet())
  8274. return {Ops[0]->getType(), Ops.back()->getType()};
  8275. if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet())
  8276. return {ResultType, Ops[0]->getType()};
  8277. assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
  8278. return {DefaultType};
  8279. }
  8280. Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
  8281. const CallExpr *E) {
  8282. // Find out if any arguments are required to be integer constant expressions.
  8283. unsigned ICEArguments = 0;
  8284. ASTContext::GetBuiltinTypeError Error;
  8285. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  8286. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  8287. llvm::Type *Ty = ConvertType(E->getType());
  8288. if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
  8289. BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
  8290. Value *Val = EmitScalarExpr(E->getArg(0));
  8291. return EmitSVEReinterpret(Val, Ty);
  8292. }
  8293. llvm::SmallVector<Value *, 4> Ops;
  8294. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  8295. if ((ICEArguments & (1 << i)) == 0)
  8296. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  8297. else {
  8298. // If this is required to be a constant, constant fold it so that we know
  8299. // that the generated intrinsic gets a ConstantInt.
  8300. Optional<llvm::APSInt> Result =
  8301. E->getArg(i)->getIntegerConstantExpr(getContext());
  8302. assert(Result && "Expected argument to be a constant");
  8303. // Immediates for SVE llvm intrinsics are always 32bit. We can safely
  8304. // truncate because the immediate has been range checked and no valid
  8305. // immediate requires more than a handful of bits.
  8306. *Result = Result->extOrTrunc(32);
  8307. Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
  8308. }
  8309. }
  8310. auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
  8311. AArch64SVEIntrinsicsProvenSorted);
  8312. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8313. if (TypeFlags.isLoad())
  8314. return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
  8315. TypeFlags.isZExtReturn());
  8316. else if (TypeFlags.isStore())
  8317. return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
  8318. else if (TypeFlags.isGatherLoad())
  8319. return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8320. else if (TypeFlags.isScatterStore())
  8321. return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8322. else if (TypeFlags.isPrefetch())
  8323. return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8324. else if (TypeFlags.isGatherPrefetch())
  8325. return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8326. else if (TypeFlags.isStructLoad())
  8327. return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8328. else if (TypeFlags.isStructStore())
  8329. return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
  8330. else if (TypeFlags.isUndef())
  8331. return UndefValue::get(Ty);
  8332. else if (Builtin->LLVMIntrinsic != 0) {
  8333. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
  8334. InsertExplicitZeroOperand(Builder, Ty, Ops);
  8335. if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
  8336. InsertExplicitUndefOperand(Builder, Ty, Ops);
  8337. // Some ACLE builtins leave out the argument to specify the predicate
  8338. // pattern, which is expected to be expanded to an SV_ALL pattern.
  8339. if (TypeFlags.isAppendSVALL())
  8340. Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
  8341. if (TypeFlags.isInsertOp1SVALL())
  8342. Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
  8343. // Predicates must match the main datatype.
  8344. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  8345. if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
  8346. if (PredTy->getElementType()->isIntegerTy(1))
  8347. Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
  8348. // Splat scalar operand to vector (intrinsics with _n infix)
  8349. if (TypeFlags.hasSplatOperand()) {
  8350. unsigned OpNo = TypeFlags.getSplatOperand();
  8351. Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
  8352. }
  8353. if (TypeFlags.isReverseCompare())
  8354. std::swap(Ops[1], Ops[2]);
  8355. if (TypeFlags.isReverseUSDOT())
  8356. std::swap(Ops[1], Ops[2]);
  8357. // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
  8358. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
  8359. llvm::Type *OpndTy = Ops[1]->getType();
  8360. auto *SplatZero = Constant::getNullValue(OpndTy);
  8361. Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy);
  8362. Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero});
  8363. }
  8364. Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
  8365. getSVEOverloadTypes(TypeFlags, Ty, Ops));
  8366. Value *Call = Builder.CreateCall(F, Ops);
  8367. // Predicate results must be converted to svbool_t.
  8368. if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
  8369. if (PredTy->getScalarType()->isIntegerTy(1))
  8370. Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
  8371. return Call;
  8372. }
  8373. switch (BuiltinID) {
  8374. default:
  8375. return nullptr;
  8376. case SVE::BI__builtin_sve_svmov_b_z: {
  8377. // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
  8378. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8379. llvm::Type* OverloadedTy = getSVEType(TypeFlags);
  8380. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
  8381. return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
  8382. }
  8383. case SVE::BI__builtin_sve_svnot_b_z: {
  8384. // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
  8385. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8386. llvm::Type* OverloadedTy = getSVEType(TypeFlags);
  8387. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
  8388. return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
  8389. }
  8390. case SVE::BI__builtin_sve_svmovlb_u16:
  8391. case SVE::BI__builtin_sve_svmovlb_u32:
  8392. case SVE::BI__builtin_sve_svmovlb_u64:
  8393. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
  8394. case SVE::BI__builtin_sve_svmovlb_s16:
  8395. case SVE::BI__builtin_sve_svmovlb_s32:
  8396. case SVE::BI__builtin_sve_svmovlb_s64:
  8397. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
  8398. case SVE::BI__builtin_sve_svmovlt_u16:
  8399. case SVE::BI__builtin_sve_svmovlt_u32:
  8400. case SVE::BI__builtin_sve_svmovlt_u64:
  8401. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
  8402. case SVE::BI__builtin_sve_svmovlt_s16:
  8403. case SVE::BI__builtin_sve_svmovlt_s32:
  8404. case SVE::BI__builtin_sve_svmovlt_s64:
  8405. return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
  8406. case SVE::BI__builtin_sve_svpmullt_u16:
  8407. case SVE::BI__builtin_sve_svpmullt_u64:
  8408. case SVE::BI__builtin_sve_svpmullt_n_u16:
  8409. case SVE::BI__builtin_sve_svpmullt_n_u64:
  8410. return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
  8411. case SVE::BI__builtin_sve_svpmullb_u16:
  8412. case SVE::BI__builtin_sve_svpmullb_u64:
  8413. case SVE::BI__builtin_sve_svpmullb_n_u16:
  8414. case SVE::BI__builtin_sve_svpmullb_n_u64:
  8415. return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
  8416. case SVE::BI__builtin_sve_svdup_n_b8:
  8417. case SVE::BI__builtin_sve_svdup_n_b16:
  8418. case SVE::BI__builtin_sve_svdup_n_b32:
  8419. case SVE::BI__builtin_sve_svdup_n_b64: {
  8420. Value *CmpNE =
  8421. Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
  8422. llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
  8423. Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
  8424. return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
  8425. }
  8426. case SVE::BI__builtin_sve_svdupq_n_b8:
  8427. case SVE::BI__builtin_sve_svdupq_n_b16:
  8428. case SVE::BI__builtin_sve_svdupq_n_b32:
  8429. case SVE::BI__builtin_sve_svdupq_n_b64:
  8430. case SVE::BI__builtin_sve_svdupq_n_u8:
  8431. case SVE::BI__builtin_sve_svdupq_n_s8:
  8432. case SVE::BI__builtin_sve_svdupq_n_u64:
  8433. case SVE::BI__builtin_sve_svdupq_n_f64:
  8434. case SVE::BI__builtin_sve_svdupq_n_s64:
  8435. case SVE::BI__builtin_sve_svdupq_n_u16:
  8436. case SVE::BI__builtin_sve_svdupq_n_f16:
  8437. case SVE::BI__builtin_sve_svdupq_n_bf16:
  8438. case SVE::BI__builtin_sve_svdupq_n_s16:
  8439. case SVE::BI__builtin_sve_svdupq_n_u32:
  8440. case SVE::BI__builtin_sve_svdupq_n_f32:
  8441. case SVE::BI__builtin_sve_svdupq_n_s32: {
  8442. // These builtins are implemented by storing each element to an array and using
  8443. // ld1rq to materialize a vector.
  8444. unsigned NumOpnds = Ops.size();
  8445. bool IsBoolTy =
  8446. cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
  8447. // For svdupq_n_b* the element type of is an integer of type 128/numelts,
  8448. // so that the compare can use the width that is natural for the expected
  8449. // number of predicate lanes.
  8450. llvm::Type *EltTy = Ops[0]->getType();
  8451. if (IsBoolTy)
  8452. EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
  8453. SmallVector<llvm::Value *, 16> VecOps;
  8454. for (unsigned I = 0; I < NumOpnds; ++I)
  8455. VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
  8456. Value *Vec = BuildVector(VecOps);
  8457. SVETypeFlags TypeFlags(Builtin->TypeModifier);
  8458. Value *Pred = EmitSVEAllTruePred(TypeFlags);
  8459. llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
  8460. Value *InsertSubVec = Builder.CreateInsertVector(
  8461. OverloadedTy, UndefValue::get(OverloadedTy), Vec, Builder.getInt64(0));
  8462. Function *F =
  8463. CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
  8464. Value *DupQLane =
  8465. Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
  8466. if (!IsBoolTy)
  8467. return DupQLane;
  8468. // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
  8469. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
  8470. : Intrinsic::aarch64_sve_cmpne_wide,
  8471. OverloadedTy);
  8472. Value *Call = Builder.CreateCall(
  8473. F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
  8474. return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
  8475. }
  8476. case SVE::BI__builtin_sve_svpfalse_b:
  8477. return ConstantInt::getFalse(Ty);
  8478. case SVE::BI__builtin_sve_svlen_bf16:
  8479. case SVE::BI__builtin_sve_svlen_f16:
  8480. case SVE::BI__builtin_sve_svlen_f32:
  8481. case SVE::BI__builtin_sve_svlen_f64:
  8482. case SVE::BI__builtin_sve_svlen_s8:
  8483. case SVE::BI__builtin_sve_svlen_s16:
  8484. case SVE::BI__builtin_sve_svlen_s32:
  8485. case SVE::BI__builtin_sve_svlen_s64:
  8486. case SVE::BI__builtin_sve_svlen_u8:
  8487. case SVE::BI__builtin_sve_svlen_u16:
  8488. case SVE::BI__builtin_sve_svlen_u32:
  8489. case SVE::BI__builtin_sve_svlen_u64: {
  8490. SVETypeFlags TF(Builtin->TypeModifier);
  8491. auto VTy = cast<llvm::VectorType>(getSVEType(TF));
  8492. auto *NumEls =
  8493. llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
  8494. Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
  8495. return Builder.CreateMul(NumEls, Builder.CreateCall(F));
  8496. }
  8497. case SVE::BI__builtin_sve_svtbl2_u8:
  8498. case SVE::BI__builtin_sve_svtbl2_s8:
  8499. case SVE::BI__builtin_sve_svtbl2_u16:
  8500. case SVE::BI__builtin_sve_svtbl2_s16:
  8501. case SVE::BI__builtin_sve_svtbl2_u32:
  8502. case SVE::BI__builtin_sve_svtbl2_s32:
  8503. case SVE::BI__builtin_sve_svtbl2_u64:
  8504. case SVE::BI__builtin_sve_svtbl2_s64:
  8505. case SVE::BI__builtin_sve_svtbl2_f16:
  8506. case SVE::BI__builtin_sve_svtbl2_bf16:
  8507. case SVE::BI__builtin_sve_svtbl2_f32:
  8508. case SVE::BI__builtin_sve_svtbl2_f64: {
  8509. SVETypeFlags TF(Builtin->TypeModifier);
  8510. auto VTy = cast<llvm::VectorType>(getSVEType(TF));
  8511. auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy);
  8512. Function *FExtr =
  8513. CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
  8514. Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)});
  8515. Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)});
  8516. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
  8517. return Builder.CreateCall(F, {V0, V1, Ops[1]});
  8518. }
  8519. case SVE::BI__builtin_sve_svset_neonq_s8:
  8520. case SVE::BI__builtin_sve_svset_neonq_s16:
  8521. case SVE::BI__builtin_sve_svset_neonq_s32:
  8522. case SVE::BI__builtin_sve_svset_neonq_s64:
  8523. case SVE::BI__builtin_sve_svset_neonq_u8:
  8524. case SVE::BI__builtin_sve_svset_neonq_u16:
  8525. case SVE::BI__builtin_sve_svset_neonq_u32:
  8526. case SVE::BI__builtin_sve_svset_neonq_u64:
  8527. case SVE::BI__builtin_sve_svset_neonq_f16:
  8528. case SVE::BI__builtin_sve_svset_neonq_f32:
  8529. case SVE::BI__builtin_sve_svset_neonq_f64:
  8530. case SVE::BI__builtin_sve_svset_neonq_bf16: {
  8531. return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
  8532. }
  8533. case SVE::BI__builtin_sve_svget_neonq_s8:
  8534. case SVE::BI__builtin_sve_svget_neonq_s16:
  8535. case SVE::BI__builtin_sve_svget_neonq_s32:
  8536. case SVE::BI__builtin_sve_svget_neonq_s64:
  8537. case SVE::BI__builtin_sve_svget_neonq_u8:
  8538. case SVE::BI__builtin_sve_svget_neonq_u16:
  8539. case SVE::BI__builtin_sve_svget_neonq_u32:
  8540. case SVE::BI__builtin_sve_svget_neonq_u64:
  8541. case SVE::BI__builtin_sve_svget_neonq_f16:
  8542. case SVE::BI__builtin_sve_svget_neonq_f32:
  8543. case SVE::BI__builtin_sve_svget_neonq_f64:
  8544. case SVE::BI__builtin_sve_svget_neonq_bf16: {
  8545. return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
  8546. }
  8547. case SVE::BI__builtin_sve_svdup_neonq_s8:
  8548. case SVE::BI__builtin_sve_svdup_neonq_s16:
  8549. case SVE::BI__builtin_sve_svdup_neonq_s32:
  8550. case SVE::BI__builtin_sve_svdup_neonq_s64:
  8551. case SVE::BI__builtin_sve_svdup_neonq_u8:
  8552. case SVE::BI__builtin_sve_svdup_neonq_u16:
  8553. case SVE::BI__builtin_sve_svdup_neonq_u32:
  8554. case SVE::BI__builtin_sve_svdup_neonq_u64:
  8555. case SVE::BI__builtin_sve_svdup_neonq_f16:
  8556. case SVE::BI__builtin_sve_svdup_neonq_f32:
  8557. case SVE::BI__builtin_sve_svdup_neonq_f64:
  8558. case SVE::BI__builtin_sve_svdup_neonq_bf16: {
  8559. Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0],
  8560. Builder.getInt64(0));
  8561. return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
  8562. {Insert, Builder.getInt64(0)});
  8563. }
  8564. }
  8565. /// Should not happen
  8566. return nullptr;
  8567. }
  8568. Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
  8569. const CallExpr *E,
  8570. llvm::Triple::ArchType Arch) {
  8571. if (BuiltinID >= AArch64::FirstSVEBuiltin &&
  8572. BuiltinID <= AArch64::LastSVEBuiltin)
  8573. return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
  8574. unsigned HintID = static_cast<unsigned>(-1);
  8575. switch (BuiltinID) {
  8576. default: break;
  8577. case AArch64::BI__builtin_arm_nop:
  8578. HintID = 0;
  8579. break;
  8580. case AArch64::BI__builtin_arm_yield:
  8581. case AArch64::BI__yield:
  8582. HintID = 1;
  8583. break;
  8584. case AArch64::BI__builtin_arm_wfe:
  8585. case AArch64::BI__wfe:
  8586. HintID = 2;
  8587. break;
  8588. case AArch64::BI__builtin_arm_wfi:
  8589. case AArch64::BI__wfi:
  8590. HintID = 3;
  8591. break;
  8592. case AArch64::BI__builtin_arm_sev:
  8593. case AArch64::BI__sev:
  8594. HintID = 4;
  8595. break;
  8596. case AArch64::BI__builtin_arm_sevl:
  8597. case AArch64::BI__sevl:
  8598. HintID = 5;
  8599. break;
  8600. }
  8601. if (HintID != static_cast<unsigned>(-1)) {
  8602. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
  8603. return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
  8604. }
  8605. if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
  8606. Value *Address = EmitScalarExpr(E->getArg(0));
  8607. Value *RW = EmitScalarExpr(E->getArg(1));
  8608. Value *CacheLevel = EmitScalarExpr(E->getArg(2));
  8609. Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
  8610. Value *IsData = EmitScalarExpr(E->getArg(4));
  8611. Value *Locality = nullptr;
  8612. if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
  8613. // Temporal fetch, needs to convert cache level to locality.
  8614. Locality = llvm::ConstantInt::get(Int32Ty,
  8615. -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
  8616. } else {
  8617. // Streaming fetch.
  8618. Locality = llvm::ConstantInt::get(Int32Ty, 0);
  8619. }
  8620. // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
  8621. // PLDL3STRM or PLDL2STRM.
  8622. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  8623. return Builder.CreateCall(F, {Address, RW, Locality, IsData});
  8624. }
  8625. if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
  8626. assert((getContext().getTypeSize(E->getType()) == 32) &&
  8627. "rbit of unusual size!");
  8628. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8629. return Builder.CreateCall(
  8630. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  8631. }
  8632. if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
  8633. assert((getContext().getTypeSize(E->getType()) == 64) &&
  8634. "rbit of unusual size!");
  8635. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8636. return Builder.CreateCall(
  8637. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  8638. }
  8639. if (BuiltinID == AArch64::BI__builtin_arm_cls) {
  8640. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8641. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
  8642. "cls");
  8643. }
  8644. if (BuiltinID == AArch64::BI__builtin_arm_cls64) {
  8645. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8646. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
  8647. "cls");
  8648. }
  8649. if (BuiltinID == AArch64::BI__builtin_arm_frint32zf ||
  8650. BuiltinID == AArch64::BI__builtin_arm_frint32z) {
  8651. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8652. llvm::Type *Ty = Arg->getType();
  8653. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
  8654. Arg, "frint32z");
  8655. }
  8656. if (BuiltinID == AArch64::BI__builtin_arm_frint64zf ||
  8657. BuiltinID == AArch64::BI__builtin_arm_frint64z) {
  8658. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8659. llvm::Type *Ty = Arg->getType();
  8660. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
  8661. Arg, "frint64z");
  8662. }
  8663. if (BuiltinID == AArch64::BI__builtin_arm_frint32xf ||
  8664. BuiltinID == AArch64::BI__builtin_arm_frint32x) {
  8665. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8666. llvm::Type *Ty = Arg->getType();
  8667. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
  8668. Arg, "frint32x");
  8669. }
  8670. if (BuiltinID == AArch64::BI__builtin_arm_frint64xf ||
  8671. BuiltinID == AArch64::BI__builtin_arm_frint64x) {
  8672. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8673. llvm::Type *Ty = Arg->getType();
  8674. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
  8675. Arg, "frint64x");
  8676. }
  8677. if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {
  8678. assert((getContext().getTypeSize(E->getType()) == 32) &&
  8679. "__jcvt of unusual size!");
  8680. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  8681. return Builder.CreateCall(
  8682. CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
  8683. }
  8684. if (BuiltinID == AArch64::BI__builtin_arm_ld64b ||
  8685. BuiltinID == AArch64::BI__builtin_arm_st64b ||
  8686. BuiltinID == AArch64::BI__builtin_arm_st64bv ||
  8687. BuiltinID == AArch64::BI__builtin_arm_st64bv0) {
  8688. llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
  8689. llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
  8690. if (BuiltinID == AArch64::BI__builtin_arm_ld64b) {
  8691. // Load from the address via an LLVM intrinsic, receiving a
  8692. // tuple of 8 i64 words, and store each one to ValPtr.
  8693. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
  8694. llvm::Value *Val = Builder.CreateCall(F, MemAddr);
  8695. llvm::Value *ToRet;
  8696. for (size_t i = 0; i < 8; i++) {
  8697. llvm::Value *ValOffsetPtr =
  8698. Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
  8699. Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8));
  8700. ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
  8701. }
  8702. return ToRet;
  8703. } else {
  8704. // Load 8 i64 words from ValPtr, and store them to the address
  8705. // via an LLVM intrinsic.
  8706. SmallVector<llvm::Value *, 9> Args;
  8707. Args.push_back(MemAddr);
  8708. for (size_t i = 0; i < 8; i++) {
  8709. llvm::Value *ValOffsetPtr =
  8710. Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
  8711. Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8));
  8712. Args.push_back(Builder.CreateLoad(Addr));
  8713. }
  8714. auto Intr = (BuiltinID == AArch64::BI__builtin_arm_st64b
  8715. ? Intrinsic::aarch64_st64b
  8716. : BuiltinID == AArch64::BI__builtin_arm_st64bv
  8717. ? Intrinsic::aarch64_st64bv
  8718. : Intrinsic::aarch64_st64bv0);
  8719. Function *F = CGM.getIntrinsic(Intr);
  8720. return Builder.CreateCall(F, Args);
  8721. }
  8722. }
  8723. if (BuiltinID == AArch64::BI__builtin_arm_rndr ||
  8724. BuiltinID == AArch64::BI__builtin_arm_rndrrs) {
  8725. auto Intr = (BuiltinID == AArch64::BI__builtin_arm_rndr
  8726. ? Intrinsic::aarch64_rndr
  8727. : Intrinsic::aarch64_rndrrs);
  8728. Function *F = CGM.getIntrinsic(Intr);
  8729. llvm::Value *Val = Builder.CreateCall(F);
  8730. Value *RandomValue = Builder.CreateExtractValue(Val, 0);
  8731. Value *Status = Builder.CreateExtractValue(Val, 1);
  8732. Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
  8733. Builder.CreateStore(RandomValue, MemAddress);
  8734. Status = Builder.CreateZExt(Status, Int32Ty);
  8735. return Status;
  8736. }
  8737. if (BuiltinID == AArch64::BI__clear_cache) {
  8738. assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
  8739. const FunctionDecl *FD = E->getDirectCallee();
  8740. Value *Ops[2];
  8741. for (unsigned i = 0; i < 2; i++)
  8742. Ops[i] = EmitScalarExpr(E->getArg(i));
  8743. llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
  8744. llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
  8745. StringRef Name = FD->getName();
  8746. return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
  8747. }
  8748. if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
  8749. BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
  8750. getContext().getTypeSize(E->getType()) == 128) {
  8751. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
  8752. ? Intrinsic::aarch64_ldaxp
  8753. : Intrinsic::aarch64_ldxp);
  8754. Value *LdPtr = EmitScalarExpr(E->getArg(0));
  8755. Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
  8756. "ldxp");
  8757. Value *Val0 = Builder.CreateExtractValue(Val, 1);
  8758. Value *Val1 = Builder.CreateExtractValue(Val, 0);
  8759. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  8760. Val0 = Builder.CreateZExt(Val0, Int128Ty);
  8761. Val1 = Builder.CreateZExt(Val1, Int128Ty);
  8762. Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
  8763. Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
  8764. Val = Builder.CreateOr(Val, Val1);
  8765. return Builder.CreateBitCast(Val, ConvertType(E->getType()));
  8766. } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
  8767. BuiltinID == AArch64::BI__builtin_arm_ldaex) {
  8768. Value *LoadAddr = EmitScalarExpr(E->getArg(0));
  8769. QualType Ty = E->getType();
  8770. llvm::Type *RealResTy = ConvertType(Ty);
  8771. llvm::Type *PtrTy = llvm::IntegerType::get(
  8772. getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
  8773. LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
  8774. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
  8775. ? Intrinsic::aarch64_ldaxr
  8776. : Intrinsic::aarch64_ldxr,
  8777. PtrTy);
  8778. Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
  8779. if (RealResTy->isPointerTy())
  8780. return Builder.CreateIntToPtr(Val, RealResTy);
  8781. llvm::Type *IntResTy = llvm::IntegerType::get(
  8782. getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
  8783. Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
  8784. return Builder.CreateBitCast(Val, RealResTy);
  8785. }
  8786. if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
  8787. BuiltinID == AArch64::BI__builtin_arm_stlex) &&
  8788. getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
  8789. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
  8790. ? Intrinsic::aarch64_stlxp
  8791. : Intrinsic::aarch64_stxp);
  8792. llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
  8793. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  8794. EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
  8795. Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
  8796. llvm::Value *Val = Builder.CreateLoad(Tmp);
  8797. Value *Arg0 = Builder.CreateExtractValue(Val, 0);
  8798. Value *Arg1 = Builder.CreateExtractValue(Val, 1);
  8799. Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
  8800. Int8PtrTy);
  8801. return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
  8802. }
  8803. if (BuiltinID == AArch64::BI__builtin_arm_strex ||
  8804. BuiltinID == AArch64::BI__builtin_arm_stlex) {
  8805. Value *StoreVal = EmitScalarExpr(E->getArg(0));
  8806. Value *StoreAddr = EmitScalarExpr(E->getArg(1));
  8807. QualType Ty = E->getArg(0)->getType();
  8808. llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
  8809. getContext().getTypeSize(Ty));
  8810. StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
  8811. if (StoreVal->getType()->isPointerTy())
  8812. StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
  8813. else {
  8814. llvm::Type *IntTy = llvm::IntegerType::get(
  8815. getLLVMContext(),
  8816. CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
  8817. StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
  8818. StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
  8819. }
  8820. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
  8821. ? Intrinsic::aarch64_stlxr
  8822. : Intrinsic::aarch64_stxr,
  8823. StoreAddr->getType());
  8824. return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
  8825. }
  8826. if (BuiltinID == AArch64::BI__getReg) {
  8827. Expr::EvalResult Result;
  8828. if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
  8829. llvm_unreachable("Sema will ensure that the parameter is constant");
  8830. llvm::APSInt Value = Result.Val.getInt();
  8831. LLVMContext &Context = CGM.getLLVMContext();
  8832. std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
  8833. llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
  8834. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  8835. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  8836. llvm::Function *F =
  8837. CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
  8838. return Builder.CreateCall(F, Metadata);
  8839. }
  8840. if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
  8841. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
  8842. return Builder.CreateCall(F);
  8843. }
  8844. if (BuiltinID == AArch64::BI_ReadWriteBarrier)
  8845. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  8846. llvm::SyncScope::SingleThread);
  8847. // CRC32
  8848. Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
  8849. switch (BuiltinID) {
  8850. case AArch64::BI__builtin_arm_crc32b:
  8851. CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
  8852. case AArch64::BI__builtin_arm_crc32cb:
  8853. CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
  8854. case AArch64::BI__builtin_arm_crc32h:
  8855. CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
  8856. case AArch64::BI__builtin_arm_crc32ch:
  8857. CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
  8858. case AArch64::BI__builtin_arm_crc32w:
  8859. CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
  8860. case AArch64::BI__builtin_arm_crc32cw:
  8861. CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
  8862. case AArch64::BI__builtin_arm_crc32d:
  8863. CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
  8864. case AArch64::BI__builtin_arm_crc32cd:
  8865. CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
  8866. }
  8867. if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
  8868. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  8869. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  8870. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  8871. llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
  8872. Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
  8873. return Builder.CreateCall(F, {Arg0, Arg1});
  8874. }
  8875. // Memory Operations (MOPS)
  8876. if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
  8877. Value *Dst = EmitScalarExpr(E->getArg(0));
  8878. Value *Val = EmitScalarExpr(E->getArg(1));
  8879. Value *Size = EmitScalarExpr(E->getArg(2));
  8880. Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
  8881. Val = Builder.CreateTrunc(Val, Int8Ty);
  8882. Size = Builder.CreateIntCast(Size, Int64Ty, false);
  8883. return Builder.CreateCall(
  8884. CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
  8885. }
  8886. // Memory Tagging Extensions (MTE) Intrinsics
  8887. Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
  8888. switch (BuiltinID) {
  8889. case AArch64::BI__builtin_arm_irg:
  8890. MTEIntrinsicID = Intrinsic::aarch64_irg; break;
  8891. case AArch64::BI__builtin_arm_addg:
  8892. MTEIntrinsicID = Intrinsic::aarch64_addg; break;
  8893. case AArch64::BI__builtin_arm_gmi:
  8894. MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
  8895. case AArch64::BI__builtin_arm_ldg:
  8896. MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
  8897. case AArch64::BI__builtin_arm_stg:
  8898. MTEIntrinsicID = Intrinsic::aarch64_stg; break;
  8899. case AArch64::BI__builtin_arm_subp:
  8900. MTEIntrinsicID = Intrinsic::aarch64_subp; break;
  8901. }
  8902. if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
  8903. llvm::Type *T = ConvertType(E->getType());
  8904. if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
  8905. Value *Pointer = EmitScalarExpr(E->getArg(0));
  8906. Value *Mask = EmitScalarExpr(E->getArg(1));
  8907. Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
  8908. Mask = Builder.CreateZExt(Mask, Int64Ty);
  8909. Value *RV = Builder.CreateCall(
  8910. CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
  8911. return Builder.CreatePointerCast(RV, T);
  8912. }
  8913. if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
  8914. Value *Pointer = EmitScalarExpr(E->getArg(0));
  8915. Value *TagOffset = EmitScalarExpr(E->getArg(1));
  8916. Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
  8917. TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
  8918. Value *RV = Builder.CreateCall(
  8919. CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
  8920. return Builder.CreatePointerCast(RV, T);
  8921. }
  8922. if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
  8923. Value *Pointer = EmitScalarExpr(E->getArg(0));
  8924. Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
  8925. ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
  8926. Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
  8927. return Builder.CreateCall(
  8928. CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
  8929. }
  8930. // Although it is possible to supply a different return
  8931. // address (first arg) to this intrinsic, for now we set
  8932. // return address same as input address.
  8933. if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
  8934. Value *TagAddress = EmitScalarExpr(E->getArg(0));
  8935. TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
  8936. Value *RV = Builder.CreateCall(
  8937. CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
  8938. return Builder.CreatePointerCast(RV, T);
  8939. }
  8940. // Although it is possible to supply a different tag (to set)
  8941. // to this intrinsic (as first arg), for now we supply
  8942. // the tag that is in input address arg (common use case).
  8943. if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
  8944. Value *TagAddress = EmitScalarExpr(E->getArg(0));
  8945. TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
  8946. return Builder.CreateCall(
  8947. CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
  8948. }
  8949. if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
  8950. Value *PointerA = EmitScalarExpr(E->getArg(0));
  8951. Value *PointerB = EmitScalarExpr(E->getArg(1));
  8952. PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
  8953. PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
  8954. return Builder.CreateCall(
  8955. CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
  8956. }
  8957. }
  8958. if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
  8959. BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
  8960. BuiltinID == AArch64::BI__builtin_arm_rsrp ||
  8961. BuiltinID == AArch64::BI__builtin_arm_wsr ||
  8962. BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
  8963. BuiltinID == AArch64::BI__builtin_arm_wsrp) {
  8964. SpecialRegisterAccessKind AccessKind = Write;
  8965. if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
  8966. BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
  8967. BuiltinID == AArch64::BI__builtin_arm_rsrp)
  8968. AccessKind = VolatileRead;
  8969. bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
  8970. BuiltinID == AArch64::BI__builtin_arm_wsrp;
  8971. bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
  8972. BuiltinID != AArch64::BI__builtin_arm_wsr;
  8973. llvm::Type *ValueType;
  8974. llvm::Type *RegisterType = Int64Ty;
  8975. if (IsPointerBuiltin) {
  8976. ValueType = VoidPtrTy;
  8977. } else if (Is64Bit) {
  8978. ValueType = Int64Ty;
  8979. } else {
  8980. ValueType = Int32Ty;
  8981. }
  8982. return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
  8983. AccessKind);
  8984. }
  8985. if (BuiltinID == AArch64::BI_ReadStatusReg ||
  8986. BuiltinID == AArch64::BI_WriteStatusReg) {
  8987. LLVMContext &Context = CGM.getLLVMContext();
  8988. unsigned SysReg =
  8989. E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
  8990. std::string SysRegStr;
  8991. llvm::raw_string_ostream(SysRegStr) <<
  8992. ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
  8993. ((SysReg >> 11) & 7) << ":" <<
  8994. ((SysReg >> 7) & 15) << ":" <<
  8995. ((SysReg >> 3) & 15) << ":" <<
  8996. ( SysReg & 7);
  8997. llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
  8998. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  8999. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  9000. llvm::Type *RegisterType = Int64Ty;
  9001. llvm::Type *Types[] = { RegisterType };
  9002. if (BuiltinID == AArch64::BI_ReadStatusReg) {
  9003. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
  9004. return Builder.CreateCall(F, Metadata);
  9005. }
  9006. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
  9007. llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
  9008. return Builder.CreateCall(F, { Metadata, ArgValue });
  9009. }
  9010. if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
  9011. llvm::Function *F =
  9012. CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
  9013. return Builder.CreateCall(F);
  9014. }
  9015. if (BuiltinID == AArch64::BI__builtin_sponentry) {
  9016. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
  9017. return Builder.CreateCall(F);
  9018. }
  9019. if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
  9020. llvm::Type *ResType = ConvertType(E->getType());
  9021. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  9022. bool IsSigned = BuiltinID == AArch64::BI__mulh;
  9023. Value *LHS =
  9024. Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
  9025. Value *RHS =
  9026. Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
  9027. Value *MulResult, *HigherBits;
  9028. if (IsSigned) {
  9029. MulResult = Builder.CreateNSWMul(LHS, RHS);
  9030. HigherBits = Builder.CreateAShr(MulResult, 64);
  9031. } else {
  9032. MulResult = Builder.CreateNUWMul(LHS, RHS);
  9033. HigherBits = Builder.CreateLShr(MulResult, 64);
  9034. }
  9035. HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
  9036. return HigherBits;
  9037. }
  9038. // Handle MSVC intrinsics before argument evaluation to prevent double
  9039. // evaluation.
  9040. if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
  9041. return EmitMSVCBuiltinExpr(*MsvcIntId, E);
  9042. // Find out if any arguments are required to be integer constant
  9043. // expressions.
  9044. unsigned ICEArguments = 0;
  9045. ASTContext::GetBuiltinTypeError Error;
  9046. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  9047. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  9048. llvm::SmallVector<Value*, 4> Ops;
  9049. Address PtrOp0 = Address::invalid();
  9050. for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
  9051. if (i == 0) {
  9052. switch (BuiltinID) {
  9053. case NEON::BI__builtin_neon_vld1_v:
  9054. case NEON::BI__builtin_neon_vld1q_v:
  9055. case NEON::BI__builtin_neon_vld1_dup_v:
  9056. case NEON::BI__builtin_neon_vld1q_dup_v:
  9057. case NEON::BI__builtin_neon_vld1_lane_v:
  9058. case NEON::BI__builtin_neon_vld1q_lane_v:
  9059. case NEON::BI__builtin_neon_vst1_v:
  9060. case NEON::BI__builtin_neon_vst1q_v:
  9061. case NEON::BI__builtin_neon_vst1_lane_v:
  9062. case NEON::BI__builtin_neon_vst1q_lane_v:
  9063. // Get the alignment for the argument in addition to the value;
  9064. // we'll use it later.
  9065. PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
  9066. Ops.push_back(PtrOp0.getPointer());
  9067. continue;
  9068. }
  9069. }
  9070. if ((ICEArguments & (1 << i)) == 0) {
  9071. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  9072. } else {
  9073. // If this is required to be a constant, constant fold it so that we know
  9074. // that the generated intrinsic gets a ConstantInt.
  9075. Ops.push_back(llvm::ConstantInt::get(
  9076. getLLVMContext(),
  9077. *E->getArg(i)->getIntegerConstantExpr(getContext())));
  9078. }
  9079. }
  9080. auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
  9081. const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
  9082. SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
  9083. if (Builtin) {
  9084. Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
  9085. Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
  9086. assert(Result && "SISD intrinsic should have been handled");
  9087. return Result;
  9088. }
  9089. const Expr *Arg = E->getArg(E->getNumArgs()-1);
  9090. NeonTypeFlags Type(0);
  9091. if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()))
  9092. // Determine the type of this overloaded NEON intrinsic.
  9093. Type = NeonTypeFlags(Result->getZExtValue());
  9094. bool usgn = Type.isUnsigned();
  9095. bool quad = Type.isQuad();
  9096. // Handle non-overloaded intrinsics first.
  9097. switch (BuiltinID) {
  9098. default: break;
  9099. case NEON::BI__builtin_neon_vabsh_f16:
  9100. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9101. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
  9102. case NEON::BI__builtin_neon_vaddq_p128: {
  9103. llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
  9104. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9105. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  9106. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  9107. Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
  9108. llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
  9109. return Builder.CreateBitCast(Ops[0], Int128Ty);
  9110. }
  9111. case NEON::BI__builtin_neon_vldrq_p128: {
  9112. llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
  9113. llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
  9114. Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
  9115. return Builder.CreateAlignedLoad(Int128Ty, Ptr,
  9116. CharUnits::fromQuantity(16));
  9117. }
  9118. case NEON::BI__builtin_neon_vstrq_p128: {
  9119. llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
  9120. Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
  9121. return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
  9122. }
  9123. case NEON::BI__builtin_neon_vcvts_f32_u32:
  9124. case NEON::BI__builtin_neon_vcvtd_f64_u64:
  9125. usgn = true;
  9126. LLVM_FALLTHROUGH;
  9127. case NEON::BI__builtin_neon_vcvts_f32_s32:
  9128. case NEON::BI__builtin_neon_vcvtd_f64_s64: {
  9129. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9130. bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
  9131. llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
  9132. llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
  9133. Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
  9134. if (usgn)
  9135. return Builder.CreateUIToFP(Ops[0], FTy);
  9136. return Builder.CreateSIToFP(Ops[0], FTy);
  9137. }
  9138. case NEON::BI__builtin_neon_vcvth_f16_u16:
  9139. case NEON::BI__builtin_neon_vcvth_f16_u32:
  9140. case NEON::BI__builtin_neon_vcvth_f16_u64:
  9141. usgn = true;
  9142. LLVM_FALLTHROUGH;
  9143. case NEON::BI__builtin_neon_vcvth_f16_s16:
  9144. case NEON::BI__builtin_neon_vcvth_f16_s32:
  9145. case NEON::BI__builtin_neon_vcvth_f16_s64: {
  9146. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9147. llvm::Type *FTy = HalfTy;
  9148. llvm::Type *InTy;
  9149. if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
  9150. InTy = Int64Ty;
  9151. else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
  9152. InTy = Int32Ty;
  9153. else
  9154. InTy = Int16Ty;
  9155. Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
  9156. if (usgn)
  9157. return Builder.CreateUIToFP(Ops[0], FTy);
  9158. return Builder.CreateSIToFP(Ops[0], FTy);
  9159. }
  9160. case NEON::BI__builtin_neon_vcvtah_u16_f16:
  9161. case NEON::BI__builtin_neon_vcvtmh_u16_f16:
  9162. case NEON::BI__builtin_neon_vcvtnh_u16_f16:
  9163. case NEON::BI__builtin_neon_vcvtph_u16_f16:
  9164. case NEON::BI__builtin_neon_vcvth_u16_f16:
  9165. case NEON::BI__builtin_neon_vcvtah_s16_f16:
  9166. case NEON::BI__builtin_neon_vcvtmh_s16_f16:
  9167. case NEON::BI__builtin_neon_vcvtnh_s16_f16:
  9168. case NEON::BI__builtin_neon_vcvtph_s16_f16:
  9169. case NEON::BI__builtin_neon_vcvth_s16_f16: {
  9170. unsigned Int;
  9171. llvm::Type* InTy = Int32Ty;
  9172. llvm::Type* FTy = HalfTy;
  9173. llvm::Type *Tys[2] = {InTy, FTy};
  9174. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9175. switch (BuiltinID) {
  9176. default: llvm_unreachable("missing builtin ID in switch!");
  9177. case NEON::BI__builtin_neon_vcvtah_u16_f16:
  9178. Int = Intrinsic::aarch64_neon_fcvtau; break;
  9179. case NEON::BI__builtin_neon_vcvtmh_u16_f16:
  9180. Int = Intrinsic::aarch64_neon_fcvtmu; break;
  9181. case NEON::BI__builtin_neon_vcvtnh_u16_f16:
  9182. Int = Intrinsic::aarch64_neon_fcvtnu; break;
  9183. case NEON::BI__builtin_neon_vcvtph_u16_f16:
  9184. Int = Intrinsic::aarch64_neon_fcvtpu; break;
  9185. case NEON::BI__builtin_neon_vcvth_u16_f16:
  9186. Int = Intrinsic::aarch64_neon_fcvtzu; break;
  9187. case NEON::BI__builtin_neon_vcvtah_s16_f16:
  9188. Int = Intrinsic::aarch64_neon_fcvtas; break;
  9189. case NEON::BI__builtin_neon_vcvtmh_s16_f16:
  9190. Int = Intrinsic::aarch64_neon_fcvtms; break;
  9191. case NEON::BI__builtin_neon_vcvtnh_s16_f16:
  9192. Int = Intrinsic::aarch64_neon_fcvtns; break;
  9193. case NEON::BI__builtin_neon_vcvtph_s16_f16:
  9194. Int = Intrinsic::aarch64_neon_fcvtps; break;
  9195. case NEON::BI__builtin_neon_vcvth_s16_f16:
  9196. Int = Intrinsic::aarch64_neon_fcvtzs; break;
  9197. }
  9198. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
  9199. return Builder.CreateTrunc(Ops[0], Int16Ty);
  9200. }
  9201. case NEON::BI__builtin_neon_vcaleh_f16:
  9202. case NEON::BI__builtin_neon_vcalth_f16:
  9203. case NEON::BI__builtin_neon_vcageh_f16:
  9204. case NEON::BI__builtin_neon_vcagth_f16: {
  9205. unsigned Int;
  9206. llvm::Type* InTy = Int32Ty;
  9207. llvm::Type* FTy = HalfTy;
  9208. llvm::Type *Tys[2] = {InTy, FTy};
  9209. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9210. switch (BuiltinID) {
  9211. default: llvm_unreachable("missing builtin ID in switch!");
  9212. case NEON::BI__builtin_neon_vcageh_f16:
  9213. Int = Intrinsic::aarch64_neon_facge; break;
  9214. case NEON::BI__builtin_neon_vcagth_f16:
  9215. Int = Intrinsic::aarch64_neon_facgt; break;
  9216. case NEON::BI__builtin_neon_vcaleh_f16:
  9217. Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
  9218. case NEON::BI__builtin_neon_vcalth_f16:
  9219. Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
  9220. }
  9221. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
  9222. return Builder.CreateTrunc(Ops[0], Int16Ty);
  9223. }
  9224. case NEON::BI__builtin_neon_vcvth_n_s16_f16:
  9225. case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
  9226. unsigned Int;
  9227. llvm::Type* InTy = Int32Ty;
  9228. llvm::Type* FTy = HalfTy;
  9229. llvm::Type *Tys[2] = {InTy, FTy};
  9230. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9231. switch (BuiltinID) {
  9232. default: llvm_unreachable("missing builtin ID in switch!");
  9233. case NEON::BI__builtin_neon_vcvth_n_s16_f16:
  9234. Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
  9235. case NEON::BI__builtin_neon_vcvth_n_u16_f16:
  9236. Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
  9237. }
  9238. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
  9239. return Builder.CreateTrunc(Ops[0], Int16Ty);
  9240. }
  9241. case NEON::BI__builtin_neon_vcvth_n_f16_s16:
  9242. case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
  9243. unsigned Int;
  9244. llvm::Type* FTy = HalfTy;
  9245. llvm::Type* InTy = Int32Ty;
  9246. llvm::Type *Tys[2] = {FTy, InTy};
  9247. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9248. switch (BuiltinID) {
  9249. default: llvm_unreachable("missing builtin ID in switch!");
  9250. case NEON::BI__builtin_neon_vcvth_n_f16_s16:
  9251. Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
  9252. Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
  9253. break;
  9254. case NEON::BI__builtin_neon_vcvth_n_f16_u16:
  9255. Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
  9256. Ops[0] = Builder.CreateZExt(Ops[0], InTy);
  9257. break;
  9258. }
  9259. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
  9260. }
  9261. case NEON::BI__builtin_neon_vpaddd_s64: {
  9262. auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
  9263. Value *Vec = EmitScalarExpr(E->getArg(0));
  9264. // The vector is v2f64, so make sure it's bitcast to that.
  9265. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
  9266. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  9267. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  9268. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  9269. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  9270. // Pairwise addition of a v2f64 into a scalar f64.
  9271. return Builder.CreateAdd(Op0, Op1, "vpaddd");
  9272. }
  9273. case NEON::BI__builtin_neon_vpaddd_f64: {
  9274. auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
  9275. Value *Vec = EmitScalarExpr(E->getArg(0));
  9276. // The vector is v2f64, so make sure it's bitcast to that.
  9277. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
  9278. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  9279. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  9280. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  9281. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  9282. // Pairwise addition of a v2f64 into a scalar f64.
  9283. return Builder.CreateFAdd(Op0, Op1, "vpaddd");
  9284. }
  9285. case NEON::BI__builtin_neon_vpadds_f32: {
  9286. auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
  9287. Value *Vec = EmitScalarExpr(E->getArg(0));
  9288. // The vector is v2f32, so make sure it's bitcast to that.
  9289. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
  9290. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  9291. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  9292. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  9293. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  9294. // Pairwise addition of a v2f32 into a scalar f32.
  9295. return Builder.CreateFAdd(Op0, Op1, "vpaddd");
  9296. }
  9297. case NEON::BI__builtin_neon_vceqzd_s64:
  9298. case NEON::BI__builtin_neon_vceqzd_f64:
  9299. case NEON::BI__builtin_neon_vceqzs_f32:
  9300. case NEON::BI__builtin_neon_vceqzh_f16:
  9301. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9302. return EmitAArch64CompareBuiltinExpr(
  9303. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9304. ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
  9305. case NEON::BI__builtin_neon_vcgezd_s64:
  9306. case NEON::BI__builtin_neon_vcgezd_f64:
  9307. case NEON::BI__builtin_neon_vcgezs_f32:
  9308. case NEON::BI__builtin_neon_vcgezh_f16:
  9309. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9310. return EmitAArch64CompareBuiltinExpr(
  9311. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9312. ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
  9313. case NEON::BI__builtin_neon_vclezd_s64:
  9314. case NEON::BI__builtin_neon_vclezd_f64:
  9315. case NEON::BI__builtin_neon_vclezs_f32:
  9316. case NEON::BI__builtin_neon_vclezh_f16:
  9317. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9318. return EmitAArch64CompareBuiltinExpr(
  9319. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9320. ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
  9321. case NEON::BI__builtin_neon_vcgtzd_s64:
  9322. case NEON::BI__builtin_neon_vcgtzd_f64:
  9323. case NEON::BI__builtin_neon_vcgtzs_f32:
  9324. case NEON::BI__builtin_neon_vcgtzh_f16:
  9325. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9326. return EmitAArch64CompareBuiltinExpr(
  9327. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9328. ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
  9329. case NEON::BI__builtin_neon_vcltzd_s64:
  9330. case NEON::BI__builtin_neon_vcltzd_f64:
  9331. case NEON::BI__builtin_neon_vcltzs_f32:
  9332. case NEON::BI__builtin_neon_vcltzh_f16:
  9333. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9334. return EmitAArch64CompareBuiltinExpr(
  9335. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  9336. ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
  9337. case NEON::BI__builtin_neon_vceqzd_u64: {
  9338. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9339. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  9340. Ops[0] =
  9341. Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
  9342. return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
  9343. }
  9344. case NEON::BI__builtin_neon_vceqd_f64:
  9345. case NEON::BI__builtin_neon_vcled_f64:
  9346. case NEON::BI__builtin_neon_vcltd_f64:
  9347. case NEON::BI__builtin_neon_vcged_f64:
  9348. case NEON::BI__builtin_neon_vcgtd_f64: {
  9349. llvm::CmpInst::Predicate P;
  9350. switch (BuiltinID) {
  9351. default: llvm_unreachable("missing builtin ID in switch!");
  9352. case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
  9353. case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
  9354. case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
  9355. case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
  9356. case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
  9357. }
  9358. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9359. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  9360. Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
  9361. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  9362. return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
  9363. }
  9364. case NEON::BI__builtin_neon_vceqs_f32:
  9365. case NEON::BI__builtin_neon_vcles_f32:
  9366. case NEON::BI__builtin_neon_vclts_f32:
  9367. case NEON::BI__builtin_neon_vcges_f32:
  9368. case NEON::BI__builtin_neon_vcgts_f32: {
  9369. llvm::CmpInst::Predicate P;
  9370. switch (BuiltinID) {
  9371. default: llvm_unreachable("missing builtin ID in switch!");
  9372. case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
  9373. case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
  9374. case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
  9375. case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
  9376. case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
  9377. }
  9378. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9379. Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
  9380. Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
  9381. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  9382. return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
  9383. }
  9384. case NEON::BI__builtin_neon_vceqh_f16:
  9385. case NEON::BI__builtin_neon_vcleh_f16:
  9386. case NEON::BI__builtin_neon_vclth_f16:
  9387. case NEON::BI__builtin_neon_vcgeh_f16:
  9388. case NEON::BI__builtin_neon_vcgth_f16: {
  9389. llvm::CmpInst::Predicate P;
  9390. switch (BuiltinID) {
  9391. default: llvm_unreachable("missing builtin ID in switch!");
  9392. case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
  9393. case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
  9394. case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
  9395. case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
  9396. case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
  9397. }
  9398. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9399. Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
  9400. Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
  9401. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  9402. return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
  9403. }
  9404. case NEON::BI__builtin_neon_vceqd_s64:
  9405. case NEON::BI__builtin_neon_vceqd_u64:
  9406. case NEON::BI__builtin_neon_vcgtd_s64:
  9407. case NEON::BI__builtin_neon_vcgtd_u64:
  9408. case NEON::BI__builtin_neon_vcltd_s64:
  9409. case NEON::BI__builtin_neon_vcltd_u64:
  9410. case NEON::BI__builtin_neon_vcged_u64:
  9411. case NEON::BI__builtin_neon_vcged_s64:
  9412. case NEON::BI__builtin_neon_vcled_u64:
  9413. case NEON::BI__builtin_neon_vcled_s64: {
  9414. llvm::CmpInst::Predicate P;
  9415. switch (BuiltinID) {
  9416. default: llvm_unreachable("missing builtin ID in switch!");
  9417. case NEON::BI__builtin_neon_vceqd_s64:
  9418. case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
  9419. case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
  9420. case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
  9421. case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
  9422. case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
  9423. case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
  9424. case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
  9425. case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
  9426. case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
  9427. }
  9428. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9429. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  9430. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  9431. Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
  9432. return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
  9433. }
  9434. case NEON::BI__builtin_neon_vtstd_s64:
  9435. case NEON::BI__builtin_neon_vtstd_u64: {
  9436. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9437. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  9438. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  9439. Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
  9440. Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
  9441. llvm::Constant::getNullValue(Int64Ty));
  9442. return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
  9443. }
  9444. case NEON::BI__builtin_neon_vset_lane_i8:
  9445. case NEON::BI__builtin_neon_vset_lane_i16:
  9446. case NEON::BI__builtin_neon_vset_lane_i32:
  9447. case NEON::BI__builtin_neon_vset_lane_i64:
  9448. case NEON::BI__builtin_neon_vset_lane_bf16:
  9449. case NEON::BI__builtin_neon_vset_lane_f32:
  9450. case NEON::BI__builtin_neon_vsetq_lane_i8:
  9451. case NEON::BI__builtin_neon_vsetq_lane_i16:
  9452. case NEON::BI__builtin_neon_vsetq_lane_i32:
  9453. case NEON::BI__builtin_neon_vsetq_lane_i64:
  9454. case NEON::BI__builtin_neon_vsetq_lane_bf16:
  9455. case NEON::BI__builtin_neon_vsetq_lane_f32:
  9456. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  9457. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  9458. case NEON::BI__builtin_neon_vset_lane_f64:
  9459. // The vector type needs a cast for the v1f64 variant.
  9460. Ops[1] =
  9461. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
  9462. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  9463. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  9464. case NEON::BI__builtin_neon_vsetq_lane_f64:
  9465. // The vector type needs a cast for the v2f64 variant.
  9466. Ops[1] =
  9467. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
  9468. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  9469. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  9470. case NEON::BI__builtin_neon_vget_lane_i8:
  9471. case NEON::BI__builtin_neon_vdupb_lane_i8:
  9472. Ops[0] =
  9473. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
  9474. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9475. "vget_lane");
  9476. case NEON::BI__builtin_neon_vgetq_lane_i8:
  9477. case NEON::BI__builtin_neon_vdupb_laneq_i8:
  9478. Ops[0] =
  9479. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
  9480. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9481. "vgetq_lane");
  9482. case NEON::BI__builtin_neon_vget_lane_i16:
  9483. case NEON::BI__builtin_neon_vduph_lane_i16:
  9484. Ops[0] =
  9485. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
  9486. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9487. "vget_lane");
  9488. case NEON::BI__builtin_neon_vgetq_lane_i16:
  9489. case NEON::BI__builtin_neon_vduph_laneq_i16:
  9490. Ops[0] =
  9491. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
  9492. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9493. "vgetq_lane");
  9494. case NEON::BI__builtin_neon_vget_lane_i32:
  9495. case NEON::BI__builtin_neon_vdups_lane_i32:
  9496. Ops[0] =
  9497. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
  9498. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9499. "vget_lane");
  9500. case NEON::BI__builtin_neon_vdups_lane_f32:
  9501. Ops[0] =
  9502. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
  9503. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9504. "vdups_lane");
  9505. case NEON::BI__builtin_neon_vgetq_lane_i32:
  9506. case NEON::BI__builtin_neon_vdups_laneq_i32:
  9507. Ops[0] =
  9508. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
  9509. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9510. "vgetq_lane");
  9511. case NEON::BI__builtin_neon_vget_lane_i64:
  9512. case NEON::BI__builtin_neon_vdupd_lane_i64:
  9513. Ops[0] =
  9514. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
  9515. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9516. "vget_lane");
  9517. case NEON::BI__builtin_neon_vdupd_lane_f64:
  9518. Ops[0] =
  9519. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
  9520. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9521. "vdupd_lane");
  9522. case NEON::BI__builtin_neon_vgetq_lane_i64:
  9523. case NEON::BI__builtin_neon_vdupd_laneq_i64:
  9524. Ops[0] =
  9525. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
  9526. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9527. "vgetq_lane");
  9528. case NEON::BI__builtin_neon_vget_lane_f32:
  9529. Ops[0] =
  9530. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
  9531. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9532. "vget_lane");
  9533. case NEON::BI__builtin_neon_vget_lane_f64:
  9534. Ops[0] =
  9535. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
  9536. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9537. "vget_lane");
  9538. case NEON::BI__builtin_neon_vgetq_lane_f32:
  9539. case NEON::BI__builtin_neon_vdups_laneq_f32:
  9540. Ops[0] =
  9541. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
  9542. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9543. "vgetq_lane");
  9544. case NEON::BI__builtin_neon_vgetq_lane_f64:
  9545. case NEON::BI__builtin_neon_vdupd_laneq_f64:
  9546. Ops[0] =
  9547. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
  9548. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9549. "vgetq_lane");
  9550. case NEON::BI__builtin_neon_vaddh_f16:
  9551. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9552. return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
  9553. case NEON::BI__builtin_neon_vsubh_f16:
  9554. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9555. return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
  9556. case NEON::BI__builtin_neon_vmulh_f16:
  9557. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9558. return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
  9559. case NEON::BI__builtin_neon_vdivh_f16:
  9560. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9561. return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
  9562. case NEON::BI__builtin_neon_vfmah_f16:
  9563. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  9564. return emitCallMaybeConstrainedFPBuiltin(
  9565. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
  9566. {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
  9567. case NEON::BI__builtin_neon_vfmsh_f16: {
  9568. // FIXME: This should be an fneg instruction:
  9569. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
  9570. Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
  9571. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  9572. return emitCallMaybeConstrainedFPBuiltin(
  9573. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
  9574. {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
  9575. }
  9576. case NEON::BI__builtin_neon_vaddd_s64:
  9577. case NEON::BI__builtin_neon_vaddd_u64:
  9578. return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
  9579. case NEON::BI__builtin_neon_vsubd_s64:
  9580. case NEON::BI__builtin_neon_vsubd_u64:
  9581. return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
  9582. case NEON::BI__builtin_neon_vqdmlalh_s16:
  9583. case NEON::BI__builtin_neon_vqdmlslh_s16: {
  9584. SmallVector<Value *, 2> ProductOps;
  9585. ProductOps.push_back(vectorWrapScalar16(Ops[1]));
  9586. ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
  9587. auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
  9588. Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
  9589. ProductOps, "vqdmlXl");
  9590. Constant *CI = ConstantInt::get(SizeTy, 0);
  9591. Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
  9592. unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
  9593. ? Intrinsic::aarch64_neon_sqadd
  9594. : Intrinsic::aarch64_neon_sqsub;
  9595. return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
  9596. }
  9597. case NEON::BI__builtin_neon_vqshlud_n_s64: {
  9598. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9599. Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
  9600. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
  9601. Ops, "vqshlu_n");
  9602. }
  9603. case NEON::BI__builtin_neon_vqshld_n_u64:
  9604. case NEON::BI__builtin_neon_vqshld_n_s64: {
  9605. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
  9606. ? Intrinsic::aarch64_neon_uqshl
  9607. : Intrinsic::aarch64_neon_sqshl;
  9608. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9609. Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
  9610. return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
  9611. }
  9612. case NEON::BI__builtin_neon_vrshrd_n_u64:
  9613. case NEON::BI__builtin_neon_vrshrd_n_s64: {
  9614. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
  9615. ? Intrinsic::aarch64_neon_urshl
  9616. : Intrinsic::aarch64_neon_srshl;
  9617. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9618. int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
  9619. Ops[1] = ConstantInt::get(Int64Ty, -SV);
  9620. return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
  9621. }
  9622. case NEON::BI__builtin_neon_vrsrad_n_u64:
  9623. case NEON::BI__builtin_neon_vrsrad_n_s64: {
  9624. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
  9625. ? Intrinsic::aarch64_neon_urshl
  9626. : Intrinsic::aarch64_neon_srshl;
  9627. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  9628. Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
  9629. Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
  9630. {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
  9631. return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
  9632. }
  9633. case NEON::BI__builtin_neon_vshld_n_s64:
  9634. case NEON::BI__builtin_neon_vshld_n_u64: {
  9635. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  9636. return Builder.CreateShl(
  9637. Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
  9638. }
  9639. case NEON::BI__builtin_neon_vshrd_n_s64: {
  9640. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  9641. return Builder.CreateAShr(
  9642. Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
  9643. Amt->getZExtValue())),
  9644. "shrd_n");
  9645. }
  9646. case NEON::BI__builtin_neon_vshrd_n_u64: {
  9647. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  9648. uint64_t ShiftAmt = Amt->getZExtValue();
  9649. // Right-shifting an unsigned value by its size yields 0.
  9650. if (ShiftAmt == 64)
  9651. return ConstantInt::get(Int64Ty, 0);
  9652. return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
  9653. "shrd_n");
  9654. }
  9655. case NEON::BI__builtin_neon_vsrad_n_s64: {
  9656. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
  9657. Ops[1] = Builder.CreateAShr(
  9658. Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
  9659. Amt->getZExtValue())),
  9660. "shrd_n");
  9661. return Builder.CreateAdd(Ops[0], Ops[1]);
  9662. }
  9663. case NEON::BI__builtin_neon_vsrad_n_u64: {
  9664. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
  9665. uint64_t ShiftAmt = Amt->getZExtValue();
  9666. // Right-shifting an unsigned value by its size yields 0.
  9667. // As Op + 0 = Op, return Ops[0] directly.
  9668. if (ShiftAmt == 64)
  9669. return Ops[0];
  9670. Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
  9671. "shrd_n");
  9672. return Builder.CreateAdd(Ops[0], Ops[1]);
  9673. }
  9674. case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
  9675. case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
  9676. case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
  9677. case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
  9678. Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
  9679. "lane");
  9680. SmallVector<Value *, 2> ProductOps;
  9681. ProductOps.push_back(vectorWrapScalar16(Ops[1]));
  9682. ProductOps.push_back(vectorWrapScalar16(Ops[2]));
  9683. auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
  9684. Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
  9685. ProductOps, "vqdmlXl");
  9686. Constant *CI = ConstantInt::get(SizeTy, 0);
  9687. Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
  9688. Ops.pop_back();
  9689. unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
  9690. BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
  9691. ? Intrinsic::aarch64_neon_sqadd
  9692. : Intrinsic::aarch64_neon_sqsub;
  9693. return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
  9694. }
  9695. case NEON::BI__builtin_neon_vqdmlals_s32:
  9696. case NEON::BI__builtin_neon_vqdmlsls_s32: {
  9697. SmallVector<Value *, 2> ProductOps;
  9698. ProductOps.push_back(Ops[1]);
  9699. ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
  9700. Ops[1] =
  9701. EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
  9702. ProductOps, "vqdmlXl");
  9703. unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
  9704. ? Intrinsic::aarch64_neon_sqadd
  9705. : Intrinsic::aarch64_neon_sqsub;
  9706. return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
  9707. }
  9708. case NEON::BI__builtin_neon_vqdmlals_lane_s32:
  9709. case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
  9710. case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
  9711. case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
  9712. Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
  9713. "lane");
  9714. SmallVector<Value *, 2> ProductOps;
  9715. ProductOps.push_back(Ops[1]);
  9716. ProductOps.push_back(Ops[2]);
  9717. Ops[1] =
  9718. EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
  9719. ProductOps, "vqdmlXl");
  9720. Ops.pop_back();
  9721. unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
  9722. BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
  9723. ? Intrinsic::aarch64_neon_sqadd
  9724. : Intrinsic::aarch64_neon_sqsub;
  9725. return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
  9726. }
  9727. case NEON::BI__builtin_neon_vget_lane_bf16:
  9728. case NEON::BI__builtin_neon_vduph_lane_bf16:
  9729. case NEON::BI__builtin_neon_vduph_lane_f16: {
  9730. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9731. "vget_lane");
  9732. }
  9733. case NEON::BI__builtin_neon_vgetq_lane_bf16:
  9734. case NEON::BI__builtin_neon_vduph_laneq_bf16:
  9735. case NEON::BI__builtin_neon_vduph_laneq_f16: {
  9736. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  9737. "vgetq_lane");
  9738. }
  9739. case AArch64::BI_InterlockedAdd: {
  9740. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  9741. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  9742. AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
  9743. AtomicRMWInst::Add, Arg0, Arg1,
  9744. llvm::AtomicOrdering::SequentiallyConsistent);
  9745. return Builder.CreateAdd(RMWI, Arg1);
  9746. }
  9747. }
  9748. llvm::FixedVectorType *VTy = GetNeonType(this, Type);
  9749. llvm::Type *Ty = VTy;
  9750. if (!Ty)
  9751. return nullptr;
  9752. // Not all intrinsics handled by the common case work for AArch64 yet, so only
  9753. // defer to common code if it's been added to our special map.
  9754. Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
  9755. AArch64SIMDIntrinsicsProvenSorted);
  9756. if (Builtin)
  9757. return EmitCommonNeonBuiltinExpr(
  9758. Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
  9759. Builtin->NameHint, Builtin->TypeModifier, E, Ops,
  9760. /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
  9761. if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
  9762. return V;
  9763. unsigned Int;
  9764. switch (BuiltinID) {
  9765. default: return nullptr;
  9766. case NEON::BI__builtin_neon_vbsl_v:
  9767. case NEON::BI__builtin_neon_vbslq_v: {
  9768. llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
  9769. Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
  9770. Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
  9771. Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
  9772. Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
  9773. Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
  9774. Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
  9775. return Builder.CreateBitCast(Ops[0], Ty);
  9776. }
  9777. case NEON::BI__builtin_neon_vfma_lane_v:
  9778. case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
  9779. // The ARM builtins (and instructions) have the addend as the first
  9780. // operand, but the 'fma' intrinsics have it last. Swap it around here.
  9781. Value *Addend = Ops[0];
  9782. Value *Multiplicand = Ops[1];
  9783. Value *LaneSource = Ops[2];
  9784. Ops[0] = Multiplicand;
  9785. Ops[1] = LaneSource;
  9786. Ops[2] = Addend;
  9787. // Now adjust things to handle the lane access.
  9788. auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
  9789. ? llvm::FixedVectorType::get(VTy->getElementType(),
  9790. VTy->getNumElements() / 2)
  9791. : VTy;
  9792. llvm::Constant *cst = cast<Constant>(Ops[3]);
  9793. Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
  9794. Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
  9795. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
  9796. Ops.pop_back();
  9797. Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
  9798. : Intrinsic::fma;
  9799. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
  9800. }
  9801. case NEON::BI__builtin_neon_vfma_laneq_v: {
  9802. auto *VTy = cast<llvm::FixedVectorType>(Ty);
  9803. // v1f64 fma should be mapped to Neon scalar f64 fma
  9804. if (VTy && VTy->getElementType() == DoubleTy) {
  9805. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  9806. Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
  9807. llvm::FixedVectorType *VTy =
  9808. GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
  9809. Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
  9810. Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
  9811. Value *Result;
  9812. Result = emitCallMaybeConstrainedFPBuiltin(
  9813. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
  9814. DoubleTy, {Ops[1], Ops[2], Ops[0]});
  9815. return Builder.CreateBitCast(Result, Ty);
  9816. }
  9817. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  9818. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  9819. auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
  9820. VTy->getNumElements() * 2);
  9821. Ops[2] = Builder.CreateBitCast(Ops[2], STy);
  9822. Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
  9823. cast<ConstantInt>(Ops[3]));
  9824. Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
  9825. return emitCallMaybeConstrainedFPBuiltin(
  9826. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  9827. {Ops[2], Ops[1], Ops[0]});
  9828. }
  9829. case NEON::BI__builtin_neon_vfmaq_laneq_v: {
  9830. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  9831. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  9832. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  9833. Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
  9834. return emitCallMaybeConstrainedFPBuiltin(
  9835. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  9836. {Ops[2], Ops[1], Ops[0]});
  9837. }
  9838. case NEON::BI__builtin_neon_vfmah_lane_f16:
  9839. case NEON::BI__builtin_neon_vfmas_lane_f32:
  9840. case NEON::BI__builtin_neon_vfmah_laneq_f16:
  9841. case NEON::BI__builtin_neon_vfmas_laneq_f32:
  9842. case NEON::BI__builtin_neon_vfmad_lane_f64:
  9843. case NEON::BI__builtin_neon_vfmad_laneq_f64: {
  9844. Ops.push_back(EmitScalarExpr(E->getArg(3)));
  9845. llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
  9846. Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
  9847. return emitCallMaybeConstrainedFPBuiltin(
  9848. *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
  9849. {Ops[1], Ops[2], Ops[0]});
  9850. }
  9851. case NEON::BI__builtin_neon_vmull_v:
  9852. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  9853. Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
  9854. if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
  9855. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
  9856. case NEON::BI__builtin_neon_vmax_v:
  9857. case NEON::BI__builtin_neon_vmaxq_v:
  9858. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  9859. Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
  9860. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
  9861. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
  9862. case NEON::BI__builtin_neon_vmaxh_f16: {
  9863. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9864. Int = Intrinsic::aarch64_neon_fmax;
  9865. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
  9866. }
  9867. case NEON::BI__builtin_neon_vmin_v:
  9868. case NEON::BI__builtin_neon_vminq_v:
  9869. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  9870. Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
  9871. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
  9872. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
  9873. case NEON::BI__builtin_neon_vminh_f16: {
  9874. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9875. Int = Intrinsic::aarch64_neon_fmin;
  9876. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
  9877. }
  9878. case NEON::BI__builtin_neon_vabd_v:
  9879. case NEON::BI__builtin_neon_vabdq_v:
  9880. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  9881. Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
  9882. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
  9883. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
  9884. case NEON::BI__builtin_neon_vpadal_v:
  9885. case NEON::BI__builtin_neon_vpadalq_v: {
  9886. unsigned ArgElts = VTy->getNumElements();
  9887. llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
  9888. unsigned BitWidth = EltTy->getBitWidth();
  9889. auto *ArgTy = llvm::FixedVectorType::get(
  9890. llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
  9891. llvm::Type* Tys[2] = { VTy, ArgTy };
  9892. Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
  9893. SmallVector<llvm::Value*, 1> TmpOps;
  9894. TmpOps.push_back(Ops[1]);
  9895. Function *F = CGM.getIntrinsic(Int, Tys);
  9896. llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
  9897. llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
  9898. return Builder.CreateAdd(tmp, addend);
  9899. }
  9900. case NEON::BI__builtin_neon_vpmin_v:
  9901. case NEON::BI__builtin_neon_vpminq_v:
  9902. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  9903. Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
  9904. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
  9905. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
  9906. case NEON::BI__builtin_neon_vpmax_v:
  9907. case NEON::BI__builtin_neon_vpmaxq_v:
  9908. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  9909. Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
  9910. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
  9911. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
  9912. case NEON::BI__builtin_neon_vminnm_v:
  9913. case NEON::BI__builtin_neon_vminnmq_v:
  9914. Int = Intrinsic::aarch64_neon_fminnm;
  9915. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
  9916. case NEON::BI__builtin_neon_vminnmh_f16:
  9917. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9918. Int = Intrinsic::aarch64_neon_fminnm;
  9919. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
  9920. case NEON::BI__builtin_neon_vmaxnm_v:
  9921. case NEON::BI__builtin_neon_vmaxnmq_v:
  9922. Int = Intrinsic::aarch64_neon_fmaxnm;
  9923. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
  9924. case NEON::BI__builtin_neon_vmaxnmh_f16:
  9925. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9926. Int = Intrinsic::aarch64_neon_fmaxnm;
  9927. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
  9928. case NEON::BI__builtin_neon_vrecpss_f32: {
  9929. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9930. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
  9931. Ops, "vrecps");
  9932. }
  9933. case NEON::BI__builtin_neon_vrecpsd_f64:
  9934. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9935. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
  9936. Ops, "vrecps");
  9937. case NEON::BI__builtin_neon_vrecpsh_f16:
  9938. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  9939. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
  9940. Ops, "vrecps");
  9941. case NEON::BI__builtin_neon_vqshrun_n_v:
  9942. Int = Intrinsic::aarch64_neon_sqshrun;
  9943. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
  9944. case NEON::BI__builtin_neon_vqrshrun_n_v:
  9945. Int = Intrinsic::aarch64_neon_sqrshrun;
  9946. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
  9947. case NEON::BI__builtin_neon_vqshrn_n_v:
  9948. Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
  9949. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
  9950. case NEON::BI__builtin_neon_vrshrn_n_v:
  9951. Int = Intrinsic::aarch64_neon_rshrn;
  9952. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
  9953. case NEON::BI__builtin_neon_vqrshrn_n_v:
  9954. Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
  9955. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
  9956. case NEON::BI__builtin_neon_vrndah_f16: {
  9957. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9958. Int = Builder.getIsFPConstrained()
  9959. ? Intrinsic::experimental_constrained_round
  9960. : Intrinsic::round;
  9961. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
  9962. }
  9963. case NEON::BI__builtin_neon_vrnda_v:
  9964. case NEON::BI__builtin_neon_vrndaq_v: {
  9965. Int = Builder.getIsFPConstrained()
  9966. ? Intrinsic::experimental_constrained_round
  9967. : Intrinsic::round;
  9968. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
  9969. }
  9970. case NEON::BI__builtin_neon_vrndih_f16: {
  9971. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9972. Int = Builder.getIsFPConstrained()
  9973. ? Intrinsic::experimental_constrained_nearbyint
  9974. : Intrinsic::nearbyint;
  9975. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
  9976. }
  9977. case NEON::BI__builtin_neon_vrndmh_f16: {
  9978. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9979. Int = Builder.getIsFPConstrained()
  9980. ? Intrinsic::experimental_constrained_floor
  9981. : Intrinsic::floor;
  9982. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
  9983. }
  9984. case NEON::BI__builtin_neon_vrndm_v:
  9985. case NEON::BI__builtin_neon_vrndmq_v: {
  9986. Int = Builder.getIsFPConstrained()
  9987. ? Intrinsic::experimental_constrained_floor
  9988. : Intrinsic::floor;
  9989. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
  9990. }
  9991. case NEON::BI__builtin_neon_vrndnh_f16: {
  9992. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  9993. Int = Builder.getIsFPConstrained()
  9994. ? Intrinsic::experimental_constrained_roundeven
  9995. : Intrinsic::roundeven;
  9996. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
  9997. }
  9998. case NEON::BI__builtin_neon_vrndn_v:
  9999. case NEON::BI__builtin_neon_vrndnq_v: {
  10000. Int = Builder.getIsFPConstrained()
  10001. ? Intrinsic::experimental_constrained_roundeven
  10002. : Intrinsic::roundeven;
  10003. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
  10004. }
  10005. case NEON::BI__builtin_neon_vrndns_f32: {
  10006. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10007. Int = Builder.getIsFPConstrained()
  10008. ? Intrinsic::experimental_constrained_roundeven
  10009. : Intrinsic::roundeven;
  10010. return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
  10011. }
  10012. case NEON::BI__builtin_neon_vrndph_f16: {
  10013. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10014. Int = Builder.getIsFPConstrained()
  10015. ? Intrinsic::experimental_constrained_ceil
  10016. : Intrinsic::ceil;
  10017. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
  10018. }
  10019. case NEON::BI__builtin_neon_vrndp_v:
  10020. case NEON::BI__builtin_neon_vrndpq_v: {
  10021. Int = Builder.getIsFPConstrained()
  10022. ? Intrinsic::experimental_constrained_ceil
  10023. : Intrinsic::ceil;
  10024. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
  10025. }
  10026. case NEON::BI__builtin_neon_vrndxh_f16: {
  10027. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10028. Int = Builder.getIsFPConstrained()
  10029. ? Intrinsic::experimental_constrained_rint
  10030. : Intrinsic::rint;
  10031. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
  10032. }
  10033. case NEON::BI__builtin_neon_vrndx_v:
  10034. case NEON::BI__builtin_neon_vrndxq_v: {
  10035. Int = Builder.getIsFPConstrained()
  10036. ? Intrinsic::experimental_constrained_rint
  10037. : Intrinsic::rint;
  10038. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
  10039. }
  10040. case NEON::BI__builtin_neon_vrndh_f16: {
  10041. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10042. Int = Builder.getIsFPConstrained()
  10043. ? Intrinsic::experimental_constrained_trunc
  10044. : Intrinsic::trunc;
  10045. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
  10046. }
  10047. case NEON::BI__builtin_neon_vrnd32x_v:
  10048. case NEON::BI__builtin_neon_vrnd32xq_v: {
  10049. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10050. Int = Intrinsic::aarch64_neon_frint32x;
  10051. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
  10052. }
  10053. case NEON::BI__builtin_neon_vrnd32z_v:
  10054. case NEON::BI__builtin_neon_vrnd32zq_v: {
  10055. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10056. Int = Intrinsic::aarch64_neon_frint32z;
  10057. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
  10058. }
  10059. case NEON::BI__builtin_neon_vrnd64x_v:
  10060. case NEON::BI__builtin_neon_vrnd64xq_v: {
  10061. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10062. Int = Intrinsic::aarch64_neon_frint64x;
  10063. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
  10064. }
  10065. case NEON::BI__builtin_neon_vrnd64z_v:
  10066. case NEON::BI__builtin_neon_vrnd64zq_v: {
  10067. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10068. Int = Intrinsic::aarch64_neon_frint64z;
  10069. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
  10070. }
  10071. case NEON::BI__builtin_neon_vrnd_v:
  10072. case NEON::BI__builtin_neon_vrndq_v: {
  10073. Int = Builder.getIsFPConstrained()
  10074. ? Intrinsic::experimental_constrained_trunc
  10075. : Intrinsic::trunc;
  10076. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
  10077. }
  10078. case NEON::BI__builtin_neon_vcvt_f64_v:
  10079. case NEON::BI__builtin_neon_vcvtq_f64_v:
  10080. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10081. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
  10082. return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  10083. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  10084. case NEON::BI__builtin_neon_vcvt_f64_f32: {
  10085. assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
  10086. "unexpected vcvt_f64_f32 builtin");
  10087. NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
  10088. Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
  10089. return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
  10090. }
  10091. case NEON::BI__builtin_neon_vcvt_f32_f64: {
  10092. assert(Type.getEltType() == NeonTypeFlags::Float32 &&
  10093. "unexpected vcvt_f32_f64 builtin");
  10094. NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
  10095. Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
  10096. return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
  10097. }
  10098. case NEON::BI__builtin_neon_vcvt_s32_v:
  10099. case NEON::BI__builtin_neon_vcvt_u32_v:
  10100. case NEON::BI__builtin_neon_vcvt_s64_v:
  10101. case NEON::BI__builtin_neon_vcvt_u64_v:
  10102. case NEON::BI__builtin_neon_vcvt_s16_v:
  10103. case NEON::BI__builtin_neon_vcvt_u16_v:
  10104. case NEON::BI__builtin_neon_vcvtq_s32_v:
  10105. case NEON::BI__builtin_neon_vcvtq_u32_v:
  10106. case NEON::BI__builtin_neon_vcvtq_s64_v:
  10107. case NEON::BI__builtin_neon_vcvtq_u64_v:
  10108. case NEON::BI__builtin_neon_vcvtq_s16_v:
  10109. case NEON::BI__builtin_neon_vcvtq_u16_v: {
  10110. Int =
  10111. usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
  10112. llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
  10113. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
  10114. }
  10115. case NEON::BI__builtin_neon_vcvta_s16_v:
  10116. case NEON::BI__builtin_neon_vcvta_u16_v:
  10117. case NEON::BI__builtin_neon_vcvta_s32_v:
  10118. case NEON::BI__builtin_neon_vcvtaq_s16_v:
  10119. case NEON::BI__builtin_neon_vcvtaq_s32_v:
  10120. case NEON::BI__builtin_neon_vcvta_u32_v:
  10121. case NEON::BI__builtin_neon_vcvtaq_u16_v:
  10122. case NEON::BI__builtin_neon_vcvtaq_u32_v:
  10123. case NEON::BI__builtin_neon_vcvta_s64_v:
  10124. case NEON::BI__builtin_neon_vcvtaq_s64_v:
  10125. case NEON::BI__builtin_neon_vcvta_u64_v:
  10126. case NEON::BI__builtin_neon_vcvtaq_u64_v: {
  10127. Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
  10128. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10129. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
  10130. }
  10131. case NEON::BI__builtin_neon_vcvtm_s16_v:
  10132. case NEON::BI__builtin_neon_vcvtm_s32_v:
  10133. case NEON::BI__builtin_neon_vcvtmq_s16_v:
  10134. case NEON::BI__builtin_neon_vcvtmq_s32_v:
  10135. case NEON::BI__builtin_neon_vcvtm_u16_v:
  10136. case NEON::BI__builtin_neon_vcvtm_u32_v:
  10137. case NEON::BI__builtin_neon_vcvtmq_u16_v:
  10138. case NEON::BI__builtin_neon_vcvtmq_u32_v:
  10139. case NEON::BI__builtin_neon_vcvtm_s64_v:
  10140. case NEON::BI__builtin_neon_vcvtmq_s64_v:
  10141. case NEON::BI__builtin_neon_vcvtm_u64_v:
  10142. case NEON::BI__builtin_neon_vcvtmq_u64_v: {
  10143. Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
  10144. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10145. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
  10146. }
  10147. case NEON::BI__builtin_neon_vcvtn_s16_v:
  10148. case NEON::BI__builtin_neon_vcvtn_s32_v:
  10149. case NEON::BI__builtin_neon_vcvtnq_s16_v:
  10150. case NEON::BI__builtin_neon_vcvtnq_s32_v:
  10151. case NEON::BI__builtin_neon_vcvtn_u16_v:
  10152. case NEON::BI__builtin_neon_vcvtn_u32_v:
  10153. case NEON::BI__builtin_neon_vcvtnq_u16_v:
  10154. case NEON::BI__builtin_neon_vcvtnq_u32_v:
  10155. case NEON::BI__builtin_neon_vcvtn_s64_v:
  10156. case NEON::BI__builtin_neon_vcvtnq_s64_v:
  10157. case NEON::BI__builtin_neon_vcvtn_u64_v:
  10158. case NEON::BI__builtin_neon_vcvtnq_u64_v: {
  10159. Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
  10160. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10161. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
  10162. }
  10163. case NEON::BI__builtin_neon_vcvtp_s16_v:
  10164. case NEON::BI__builtin_neon_vcvtp_s32_v:
  10165. case NEON::BI__builtin_neon_vcvtpq_s16_v:
  10166. case NEON::BI__builtin_neon_vcvtpq_s32_v:
  10167. case NEON::BI__builtin_neon_vcvtp_u16_v:
  10168. case NEON::BI__builtin_neon_vcvtp_u32_v:
  10169. case NEON::BI__builtin_neon_vcvtpq_u16_v:
  10170. case NEON::BI__builtin_neon_vcvtpq_u32_v:
  10171. case NEON::BI__builtin_neon_vcvtp_s64_v:
  10172. case NEON::BI__builtin_neon_vcvtpq_s64_v:
  10173. case NEON::BI__builtin_neon_vcvtp_u64_v:
  10174. case NEON::BI__builtin_neon_vcvtpq_u64_v: {
  10175. Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
  10176. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  10177. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
  10178. }
  10179. case NEON::BI__builtin_neon_vmulx_v:
  10180. case NEON::BI__builtin_neon_vmulxq_v: {
  10181. Int = Intrinsic::aarch64_neon_fmulx;
  10182. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
  10183. }
  10184. case NEON::BI__builtin_neon_vmulxh_lane_f16:
  10185. case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
  10186. // vmulx_lane should be mapped to Neon scalar mulx after
  10187. // extracting the scalar element
  10188. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  10189. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
  10190. Ops.pop_back();
  10191. Int = Intrinsic::aarch64_neon_fmulx;
  10192. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
  10193. }
  10194. case NEON::BI__builtin_neon_vmul_lane_v:
  10195. case NEON::BI__builtin_neon_vmul_laneq_v: {
  10196. // v1f64 vmul_lane should be mapped to Neon scalar mul lane
  10197. bool Quad = false;
  10198. if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
  10199. Quad = true;
  10200. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  10201. llvm::FixedVectorType *VTy =
  10202. GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
  10203. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  10204. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
  10205. Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
  10206. return Builder.CreateBitCast(Result, Ty);
  10207. }
  10208. case NEON::BI__builtin_neon_vnegd_s64:
  10209. return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
  10210. case NEON::BI__builtin_neon_vnegh_f16:
  10211. return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
  10212. case NEON::BI__builtin_neon_vpmaxnm_v:
  10213. case NEON::BI__builtin_neon_vpmaxnmq_v: {
  10214. Int = Intrinsic::aarch64_neon_fmaxnmp;
  10215. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
  10216. }
  10217. case NEON::BI__builtin_neon_vpminnm_v:
  10218. case NEON::BI__builtin_neon_vpminnmq_v: {
  10219. Int = Intrinsic::aarch64_neon_fminnmp;
  10220. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
  10221. }
  10222. case NEON::BI__builtin_neon_vsqrth_f16: {
  10223. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10224. Int = Builder.getIsFPConstrained()
  10225. ? Intrinsic::experimental_constrained_sqrt
  10226. : Intrinsic::sqrt;
  10227. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
  10228. }
  10229. case NEON::BI__builtin_neon_vsqrt_v:
  10230. case NEON::BI__builtin_neon_vsqrtq_v: {
  10231. Int = Builder.getIsFPConstrained()
  10232. ? Intrinsic::experimental_constrained_sqrt
  10233. : Intrinsic::sqrt;
  10234. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10235. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
  10236. }
  10237. case NEON::BI__builtin_neon_vrbit_v:
  10238. case NEON::BI__builtin_neon_vrbitq_v: {
  10239. Int = Intrinsic::bitreverse;
  10240. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
  10241. }
  10242. case NEON::BI__builtin_neon_vaddv_u8:
  10243. // FIXME: These are handled by the AArch64 scalar code.
  10244. usgn = true;
  10245. LLVM_FALLTHROUGH;
  10246. case NEON::BI__builtin_neon_vaddv_s8: {
  10247. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10248. Ty = Int32Ty;
  10249. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10250. llvm::Type *Tys[2] = { Ty, VTy };
  10251. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10252. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10253. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10254. }
  10255. case NEON::BI__builtin_neon_vaddv_u16:
  10256. usgn = true;
  10257. LLVM_FALLTHROUGH;
  10258. case NEON::BI__builtin_neon_vaddv_s16: {
  10259. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10260. Ty = Int32Ty;
  10261. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10262. llvm::Type *Tys[2] = { Ty, VTy };
  10263. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10264. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10265. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10266. }
  10267. case NEON::BI__builtin_neon_vaddvq_u8:
  10268. usgn = true;
  10269. LLVM_FALLTHROUGH;
  10270. case NEON::BI__builtin_neon_vaddvq_s8: {
  10271. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10272. Ty = Int32Ty;
  10273. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10274. llvm::Type *Tys[2] = { Ty, VTy };
  10275. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10276. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10277. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10278. }
  10279. case NEON::BI__builtin_neon_vaddvq_u16:
  10280. usgn = true;
  10281. LLVM_FALLTHROUGH;
  10282. case NEON::BI__builtin_neon_vaddvq_s16: {
  10283. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  10284. Ty = Int32Ty;
  10285. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10286. llvm::Type *Tys[2] = { Ty, VTy };
  10287. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10288. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  10289. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10290. }
  10291. case NEON::BI__builtin_neon_vmaxv_u8: {
  10292. Int = Intrinsic::aarch64_neon_umaxv;
  10293. Ty = Int32Ty;
  10294. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10295. llvm::Type *Tys[2] = { Ty, VTy };
  10296. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10297. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10298. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10299. }
  10300. case NEON::BI__builtin_neon_vmaxv_u16: {
  10301. Int = Intrinsic::aarch64_neon_umaxv;
  10302. Ty = Int32Ty;
  10303. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10304. llvm::Type *Tys[2] = { Ty, VTy };
  10305. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10306. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10307. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10308. }
  10309. case NEON::BI__builtin_neon_vmaxvq_u8: {
  10310. Int = Intrinsic::aarch64_neon_umaxv;
  10311. Ty = Int32Ty;
  10312. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10313. llvm::Type *Tys[2] = { Ty, VTy };
  10314. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10315. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10316. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10317. }
  10318. case NEON::BI__builtin_neon_vmaxvq_u16: {
  10319. Int = Intrinsic::aarch64_neon_umaxv;
  10320. Ty = Int32Ty;
  10321. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10322. llvm::Type *Tys[2] = { Ty, VTy };
  10323. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10324. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10325. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10326. }
  10327. case NEON::BI__builtin_neon_vmaxv_s8: {
  10328. Int = Intrinsic::aarch64_neon_smaxv;
  10329. Ty = Int32Ty;
  10330. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10331. llvm::Type *Tys[2] = { Ty, VTy };
  10332. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10333. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10334. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10335. }
  10336. case NEON::BI__builtin_neon_vmaxv_s16: {
  10337. Int = Intrinsic::aarch64_neon_smaxv;
  10338. Ty = Int32Ty;
  10339. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10340. llvm::Type *Tys[2] = { Ty, VTy };
  10341. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10342. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10343. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10344. }
  10345. case NEON::BI__builtin_neon_vmaxvq_s8: {
  10346. Int = Intrinsic::aarch64_neon_smaxv;
  10347. Ty = Int32Ty;
  10348. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10349. llvm::Type *Tys[2] = { Ty, VTy };
  10350. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10351. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10352. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10353. }
  10354. case NEON::BI__builtin_neon_vmaxvq_s16: {
  10355. Int = Intrinsic::aarch64_neon_smaxv;
  10356. Ty = Int32Ty;
  10357. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10358. llvm::Type *Tys[2] = { Ty, VTy };
  10359. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10360. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10361. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10362. }
  10363. case NEON::BI__builtin_neon_vmaxv_f16: {
  10364. Int = Intrinsic::aarch64_neon_fmaxv;
  10365. Ty = HalfTy;
  10366. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10367. llvm::Type *Tys[2] = { Ty, VTy };
  10368. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10369. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10370. return Builder.CreateTrunc(Ops[0], HalfTy);
  10371. }
  10372. case NEON::BI__builtin_neon_vmaxvq_f16: {
  10373. Int = Intrinsic::aarch64_neon_fmaxv;
  10374. Ty = HalfTy;
  10375. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10376. llvm::Type *Tys[2] = { Ty, VTy };
  10377. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10378. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  10379. return Builder.CreateTrunc(Ops[0], HalfTy);
  10380. }
  10381. case NEON::BI__builtin_neon_vminv_u8: {
  10382. Int = Intrinsic::aarch64_neon_uminv;
  10383. Ty = Int32Ty;
  10384. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10385. llvm::Type *Tys[2] = { Ty, VTy };
  10386. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10387. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10388. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10389. }
  10390. case NEON::BI__builtin_neon_vminv_u16: {
  10391. Int = Intrinsic::aarch64_neon_uminv;
  10392. Ty = Int32Ty;
  10393. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10394. llvm::Type *Tys[2] = { Ty, VTy };
  10395. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10396. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10397. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10398. }
  10399. case NEON::BI__builtin_neon_vminvq_u8: {
  10400. Int = Intrinsic::aarch64_neon_uminv;
  10401. Ty = Int32Ty;
  10402. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10403. llvm::Type *Tys[2] = { Ty, VTy };
  10404. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10405. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10406. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10407. }
  10408. case NEON::BI__builtin_neon_vminvq_u16: {
  10409. Int = Intrinsic::aarch64_neon_uminv;
  10410. Ty = Int32Ty;
  10411. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10412. llvm::Type *Tys[2] = { Ty, VTy };
  10413. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10414. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10415. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10416. }
  10417. case NEON::BI__builtin_neon_vminv_s8: {
  10418. Int = Intrinsic::aarch64_neon_sminv;
  10419. Ty = Int32Ty;
  10420. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10421. llvm::Type *Tys[2] = { Ty, VTy };
  10422. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10423. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10424. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10425. }
  10426. case NEON::BI__builtin_neon_vminv_s16: {
  10427. Int = Intrinsic::aarch64_neon_sminv;
  10428. Ty = Int32Ty;
  10429. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10430. llvm::Type *Tys[2] = { Ty, VTy };
  10431. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10432. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10433. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10434. }
  10435. case NEON::BI__builtin_neon_vminvq_s8: {
  10436. Int = Intrinsic::aarch64_neon_sminv;
  10437. Ty = Int32Ty;
  10438. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10439. llvm::Type *Tys[2] = { Ty, VTy };
  10440. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10441. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10442. return Builder.CreateTrunc(Ops[0], Int8Ty);
  10443. }
  10444. case NEON::BI__builtin_neon_vminvq_s16: {
  10445. Int = Intrinsic::aarch64_neon_sminv;
  10446. Ty = Int32Ty;
  10447. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10448. llvm::Type *Tys[2] = { Ty, VTy };
  10449. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10450. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10451. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10452. }
  10453. case NEON::BI__builtin_neon_vminv_f16: {
  10454. Int = Intrinsic::aarch64_neon_fminv;
  10455. Ty = HalfTy;
  10456. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10457. llvm::Type *Tys[2] = { Ty, VTy };
  10458. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10459. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10460. return Builder.CreateTrunc(Ops[0], HalfTy);
  10461. }
  10462. case NEON::BI__builtin_neon_vminvq_f16: {
  10463. Int = Intrinsic::aarch64_neon_fminv;
  10464. Ty = HalfTy;
  10465. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10466. llvm::Type *Tys[2] = { Ty, VTy };
  10467. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10468. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  10469. return Builder.CreateTrunc(Ops[0], HalfTy);
  10470. }
  10471. case NEON::BI__builtin_neon_vmaxnmv_f16: {
  10472. Int = Intrinsic::aarch64_neon_fmaxnmv;
  10473. Ty = HalfTy;
  10474. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10475. llvm::Type *Tys[2] = { Ty, VTy };
  10476. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10477. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
  10478. return Builder.CreateTrunc(Ops[0], HalfTy);
  10479. }
  10480. case NEON::BI__builtin_neon_vmaxnmvq_f16: {
  10481. Int = Intrinsic::aarch64_neon_fmaxnmv;
  10482. Ty = HalfTy;
  10483. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10484. llvm::Type *Tys[2] = { Ty, VTy };
  10485. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10486. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
  10487. return Builder.CreateTrunc(Ops[0], HalfTy);
  10488. }
  10489. case NEON::BI__builtin_neon_vminnmv_f16: {
  10490. Int = Intrinsic::aarch64_neon_fminnmv;
  10491. Ty = HalfTy;
  10492. VTy = llvm::FixedVectorType::get(HalfTy, 4);
  10493. llvm::Type *Tys[2] = { Ty, VTy };
  10494. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10495. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
  10496. return Builder.CreateTrunc(Ops[0], HalfTy);
  10497. }
  10498. case NEON::BI__builtin_neon_vminnmvq_f16: {
  10499. Int = Intrinsic::aarch64_neon_fminnmv;
  10500. Ty = HalfTy;
  10501. VTy = llvm::FixedVectorType::get(HalfTy, 8);
  10502. llvm::Type *Tys[2] = { Ty, VTy };
  10503. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10504. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
  10505. return Builder.CreateTrunc(Ops[0], HalfTy);
  10506. }
  10507. case NEON::BI__builtin_neon_vmul_n_f64: {
  10508. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  10509. Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
  10510. return Builder.CreateFMul(Ops[0], RHS);
  10511. }
  10512. case NEON::BI__builtin_neon_vaddlv_u8: {
  10513. Int = Intrinsic::aarch64_neon_uaddlv;
  10514. Ty = Int32Ty;
  10515. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10516. llvm::Type *Tys[2] = { Ty, VTy };
  10517. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10518. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10519. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10520. }
  10521. case NEON::BI__builtin_neon_vaddlv_u16: {
  10522. Int = Intrinsic::aarch64_neon_uaddlv;
  10523. Ty = Int32Ty;
  10524. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10525. llvm::Type *Tys[2] = { Ty, VTy };
  10526. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10527. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10528. }
  10529. case NEON::BI__builtin_neon_vaddlvq_u8: {
  10530. Int = Intrinsic::aarch64_neon_uaddlv;
  10531. Ty = Int32Ty;
  10532. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10533. llvm::Type *Tys[2] = { Ty, VTy };
  10534. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10535. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10536. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10537. }
  10538. case NEON::BI__builtin_neon_vaddlvq_u16: {
  10539. Int = Intrinsic::aarch64_neon_uaddlv;
  10540. Ty = Int32Ty;
  10541. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10542. llvm::Type *Tys[2] = { Ty, VTy };
  10543. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10544. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10545. }
  10546. case NEON::BI__builtin_neon_vaddlv_s8: {
  10547. Int = Intrinsic::aarch64_neon_saddlv;
  10548. Ty = Int32Ty;
  10549. VTy = llvm::FixedVectorType::get(Int8Ty, 8);
  10550. llvm::Type *Tys[2] = { Ty, VTy };
  10551. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10552. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10553. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10554. }
  10555. case NEON::BI__builtin_neon_vaddlv_s16: {
  10556. Int = Intrinsic::aarch64_neon_saddlv;
  10557. Ty = Int32Ty;
  10558. VTy = llvm::FixedVectorType::get(Int16Ty, 4);
  10559. llvm::Type *Tys[2] = { Ty, VTy };
  10560. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10561. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10562. }
  10563. case NEON::BI__builtin_neon_vaddlvq_s8: {
  10564. Int = Intrinsic::aarch64_neon_saddlv;
  10565. Ty = Int32Ty;
  10566. VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  10567. llvm::Type *Tys[2] = { Ty, VTy };
  10568. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10569. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10570. return Builder.CreateTrunc(Ops[0], Int16Ty);
  10571. }
  10572. case NEON::BI__builtin_neon_vaddlvq_s16: {
  10573. Int = Intrinsic::aarch64_neon_saddlv;
  10574. Ty = Int32Ty;
  10575. VTy = llvm::FixedVectorType::get(Int16Ty, 8);
  10576. llvm::Type *Tys[2] = { Ty, VTy };
  10577. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  10578. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  10579. }
  10580. case NEON::BI__builtin_neon_vsri_n_v:
  10581. case NEON::BI__builtin_neon_vsriq_n_v: {
  10582. Int = Intrinsic::aarch64_neon_vsri;
  10583. llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
  10584. return EmitNeonCall(Intrin, Ops, "vsri_n");
  10585. }
  10586. case NEON::BI__builtin_neon_vsli_n_v:
  10587. case NEON::BI__builtin_neon_vsliq_n_v: {
  10588. Int = Intrinsic::aarch64_neon_vsli;
  10589. llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
  10590. return EmitNeonCall(Intrin, Ops, "vsli_n");
  10591. }
  10592. case NEON::BI__builtin_neon_vsra_n_v:
  10593. case NEON::BI__builtin_neon_vsraq_n_v:
  10594. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10595. Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
  10596. return Builder.CreateAdd(Ops[0], Ops[1]);
  10597. case NEON::BI__builtin_neon_vrsra_n_v:
  10598. case NEON::BI__builtin_neon_vrsraq_n_v: {
  10599. Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
  10600. SmallVector<llvm::Value*,2> TmpOps;
  10601. TmpOps.push_back(Ops[1]);
  10602. TmpOps.push_back(Ops[2]);
  10603. Function* F = CGM.getIntrinsic(Int, Ty);
  10604. llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
  10605. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  10606. return Builder.CreateAdd(Ops[0], tmp);
  10607. }
  10608. case NEON::BI__builtin_neon_vld1_v:
  10609. case NEON::BI__builtin_neon_vld1q_v: {
  10610. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
  10611. return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
  10612. }
  10613. case NEON::BI__builtin_neon_vst1_v:
  10614. case NEON::BI__builtin_neon_vst1q_v:
  10615. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
  10616. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  10617. return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
  10618. case NEON::BI__builtin_neon_vld1_lane_v:
  10619. case NEON::BI__builtin_neon_vld1q_lane_v: {
  10620. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10621. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  10622. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10623. Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
  10624. PtrOp0.getAlignment());
  10625. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
  10626. }
  10627. case NEON::BI__builtin_neon_vld1_dup_v:
  10628. case NEON::BI__builtin_neon_vld1q_dup_v: {
  10629. Value *V = UndefValue::get(Ty);
  10630. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  10631. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10632. Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
  10633. PtrOp0.getAlignment());
  10634. llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
  10635. Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
  10636. return EmitNeonSplat(Ops[0], CI);
  10637. }
  10638. case NEON::BI__builtin_neon_vst1_lane_v:
  10639. case NEON::BI__builtin_neon_vst1q_lane_v:
  10640. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10641. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
  10642. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  10643. return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
  10644. PtrOp0.getAlignment());
  10645. case NEON::BI__builtin_neon_vld2_v:
  10646. case NEON::BI__builtin_neon_vld2q_v: {
  10647. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  10648. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  10649. llvm::Type *Tys[2] = { VTy, PTy };
  10650. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
  10651. Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
  10652. Ops[0] = Builder.CreateBitCast(Ops[0],
  10653. llvm::PointerType::getUnqual(Ops[1]->getType()));
  10654. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10655. }
  10656. case NEON::BI__builtin_neon_vld3_v:
  10657. case NEON::BI__builtin_neon_vld3q_v: {
  10658. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  10659. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  10660. llvm::Type *Tys[2] = { VTy, PTy };
  10661. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
  10662. Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
  10663. Ops[0] = Builder.CreateBitCast(Ops[0],
  10664. llvm::PointerType::getUnqual(Ops[1]->getType()));
  10665. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10666. }
  10667. case NEON::BI__builtin_neon_vld4_v:
  10668. case NEON::BI__builtin_neon_vld4q_v: {
  10669. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  10670. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  10671. llvm::Type *Tys[2] = { VTy, PTy };
  10672. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
  10673. Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
  10674. Ops[0] = Builder.CreateBitCast(Ops[0],
  10675. llvm::PointerType::getUnqual(Ops[1]->getType()));
  10676. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10677. }
  10678. case NEON::BI__builtin_neon_vld2_dup_v:
  10679. case NEON::BI__builtin_neon_vld2q_dup_v: {
  10680. llvm::Type *PTy =
  10681. llvm::PointerType::getUnqual(VTy->getElementType());
  10682. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  10683. llvm::Type *Tys[2] = { VTy, PTy };
  10684. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
  10685. Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
  10686. Ops[0] = Builder.CreateBitCast(Ops[0],
  10687. llvm::PointerType::getUnqual(Ops[1]->getType()));
  10688. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10689. }
  10690. case NEON::BI__builtin_neon_vld3_dup_v:
  10691. case NEON::BI__builtin_neon_vld3q_dup_v: {
  10692. llvm::Type *PTy =
  10693. llvm::PointerType::getUnqual(VTy->getElementType());
  10694. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  10695. llvm::Type *Tys[2] = { VTy, PTy };
  10696. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
  10697. Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
  10698. Ops[0] = Builder.CreateBitCast(Ops[0],
  10699. llvm::PointerType::getUnqual(Ops[1]->getType()));
  10700. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10701. }
  10702. case NEON::BI__builtin_neon_vld4_dup_v:
  10703. case NEON::BI__builtin_neon_vld4q_dup_v: {
  10704. llvm::Type *PTy =
  10705. llvm::PointerType::getUnqual(VTy->getElementType());
  10706. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  10707. llvm::Type *Tys[2] = { VTy, PTy };
  10708. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
  10709. Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
  10710. Ops[0] = Builder.CreateBitCast(Ops[0],
  10711. llvm::PointerType::getUnqual(Ops[1]->getType()));
  10712. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10713. }
  10714. case NEON::BI__builtin_neon_vld2_lane_v:
  10715. case NEON::BI__builtin_neon_vld2q_lane_v: {
  10716. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  10717. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
  10718. std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
  10719. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10720. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10721. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  10722. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
  10723. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  10724. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10725. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10726. }
  10727. case NEON::BI__builtin_neon_vld3_lane_v:
  10728. case NEON::BI__builtin_neon_vld3q_lane_v: {
  10729. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  10730. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
  10731. std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
  10732. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10733. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10734. Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
  10735. Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
  10736. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
  10737. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  10738. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10739. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10740. }
  10741. case NEON::BI__builtin_neon_vld4_lane_v:
  10742. case NEON::BI__builtin_neon_vld4q_lane_v: {
  10743. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  10744. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
  10745. std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
  10746. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10747. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10748. Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
  10749. Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
  10750. Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
  10751. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
  10752. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  10753. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  10754. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  10755. }
  10756. case NEON::BI__builtin_neon_vst2_v:
  10757. case NEON::BI__builtin_neon_vst2q_v: {
  10758. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  10759. llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
  10760. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
  10761. Ops, "");
  10762. }
  10763. case NEON::BI__builtin_neon_vst2_lane_v:
  10764. case NEON::BI__builtin_neon_vst2q_lane_v: {
  10765. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  10766. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  10767. llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
  10768. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
  10769. Ops, "");
  10770. }
  10771. case NEON::BI__builtin_neon_vst3_v:
  10772. case NEON::BI__builtin_neon_vst3q_v: {
  10773. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  10774. llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
  10775. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
  10776. Ops, "");
  10777. }
  10778. case NEON::BI__builtin_neon_vst3_lane_v:
  10779. case NEON::BI__builtin_neon_vst3q_lane_v: {
  10780. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  10781. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  10782. llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
  10783. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
  10784. Ops, "");
  10785. }
  10786. case NEON::BI__builtin_neon_vst4_v:
  10787. case NEON::BI__builtin_neon_vst4q_v: {
  10788. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  10789. llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
  10790. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
  10791. Ops, "");
  10792. }
  10793. case NEON::BI__builtin_neon_vst4_lane_v:
  10794. case NEON::BI__builtin_neon_vst4q_lane_v: {
  10795. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  10796. Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
  10797. llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
  10798. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
  10799. Ops, "");
  10800. }
  10801. case NEON::BI__builtin_neon_vtrn_v:
  10802. case NEON::BI__builtin_neon_vtrnq_v: {
  10803. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  10804. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10805. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10806. Value *SV = nullptr;
  10807. for (unsigned vi = 0; vi != 2; ++vi) {
  10808. SmallVector<int, 16> Indices;
  10809. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  10810. Indices.push_back(i+vi);
  10811. Indices.push_back(i+e+vi);
  10812. }
  10813. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  10814. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
  10815. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  10816. }
  10817. return SV;
  10818. }
  10819. case NEON::BI__builtin_neon_vuzp_v:
  10820. case NEON::BI__builtin_neon_vuzpq_v: {
  10821. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  10822. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10823. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10824. Value *SV = nullptr;
  10825. for (unsigned vi = 0; vi != 2; ++vi) {
  10826. SmallVector<int, 16> Indices;
  10827. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  10828. Indices.push_back(2*i+vi);
  10829. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  10830. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
  10831. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  10832. }
  10833. return SV;
  10834. }
  10835. case NEON::BI__builtin_neon_vzip_v:
  10836. case NEON::BI__builtin_neon_vzipq_v: {
  10837. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  10838. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  10839. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  10840. Value *SV = nullptr;
  10841. for (unsigned vi = 0; vi != 2; ++vi) {
  10842. SmallVector<int, 16> Indices;
  10843. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  10844. Indices.push_back((i + vi*e) >> 1);
  10845. Indices.push_back(((i + vi*e) >> 1)+e);
  10846. }
  10847. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  10848. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
  10849. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  10850. }
  10851. return SV;
  10852. }
  10853. case NEON::BI__builtin_neon_vqtbl1q_v: {
  10854. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
  10855. Ops, "vtbl1");
  10856. }
  10857. case NEON::BI__builtin_neon_vqtbl2q_v: {
  10858. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
  10859. Ops, "vtbl2");
  10860. }
  10861. case NEON::BI__builtin_neon_vqtbl3q_v: {
  10862. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
  10863. Ops, "vtbl3");
  10864. }
  10865. case NEON::BI__builtin_neon_vqtbl4q_v: {
  10866. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
  10867. Ops, "vtbl4");
  10868. }
  10869. case NEON::BI__builtin_neon_vqtbx1q_v: {
  10870. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
  10871. Ops, "vtbx1");
  10872. }
  10873. case NEON::BI__builtin_neon_vqtbx2q_v: {
  10874. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
  10875. Ops, "vtbx2");
  10876. }
  10877. case NEON::BI__builtin_neon_vqtbx3q_v: {
  10878. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
  10879. Ops, "vtbx3");
  10880. }
  10881. case NEON::BI__builtin_neon_vqtbx4q_v: {
  10882. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
  10883. Ops, "vtbx4");
  10884. }
  10885. case NEON::BI__builtin_neon_vsqadd_v:
  10886. case NEON::BI__builtin_neon_vsqaddq_v: {
  10887. Int = Intrinsic::aarch64_neon_usqadd;
  10888. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
  10889. }
  10890. case NEON::BI__builtin_neon_vuqadd_v:
  10891. case NEON::BI__builtin_neon_vuqaddq_v: {
  10892. Int = Intrinsic::aarch64_neon_suqadd;
  10893. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
  10894. }
  10895. }
  10896. }
  10897. Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
  10898. const CallExpr *E) {
  10899. assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
  10900. BuiltinID == BPF::BI__builtin_btf_type_id ||
  10901. BuiltinID == BPF::BI__builtin_preserve_type_info ||
  10902. BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
  10903. "unexpected BPF builtin");
  10904. // A sequence number, injected into IR builtin functions, to
  10905. // prevent CSE given the only difference of the funciton
  10906. // may just be the debuginfo metadata.
  10907. static uint32_t BuiltinSeqNum;
  10908. switch (BuiltinID) {
  10909. default:
  10910. llvm_unreachable("Unexpected BPF builtin");
  10911. case BPF::BI__builtin_preserve_field_info: {
  10912. const Expr *Arg = E->getArg(0);
  10913. bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
  10914. if (!getDebugInfo()) {
  10915. CGM.Error(E->getExprLoc(),
  10916. "using __builtin_preserve_field_info() without -g");
  10917. return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
  10918. : EmitLValue(Arg).getPointer(*this);
  10919. }
  10920. // Enable underlying preserve_*_access_index() generation.
  10921. bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
  10922. IsInPreservedAIRegion = true;
  10923. Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
  10924. : EmitLValue(Arg).getPointer(*this);
  10925. IsInPreservedAIRegion = OldIsInPreservedAIRegion;
  10926. ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  10927. Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
  10928. // Built the IR for the preserve_field_info intrinsic.
  10929. llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
  10930. &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
  10931. {FieldAddr->getType()});
  10932. return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
  10933. }
  10934. case BPF::BI__builtin_btf_type_id:
  10935. case BPF::BI__builtin_preserve_type_info: {
  10936. if (!getDebugInfo()) {
  10937. CGM.Error(E->getExprLoc(), "using builtin function without -g");
  10938. return nullptr;
  10939. }
  10940. const Expr *Arg0 = E->getArg(0);
  10941. llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
  10942. Arg0->getType(), Arg0->getExprLoc());
  10943. ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  10944. Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
  10945. Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
  10946. llvm::Function *FnDecl;
  10947. if (BuiltinID == BPF::BI__builtin_btf_type_id)
  10948. FnDecl = llvm::Intrinsic::getDeclaration(
  10949. &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
  10950. else
  10951. FnDecl = llvm::Intrinsic::getDeclaration(
  10952. &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
  10953. CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
  10954. Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
  10955. return Fn;
  10956. }
  10957. case BPF::BI__builtin_preserve_enum_value: {
  10958. if (!getDebugInfo()) {
  10959. CGM.Error(E->getExprLoc(), "using builtin function without -g");
  10960. return nullptr;
  10961. }
  10962. const Expr *Arg0 = E->getArg(0);
  10963. llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
  10964. Arg0->getType(), Arg0->getExprLoc());
  10965. // Find enumerator
  10966. const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
  10967. const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
  10968. const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
  10969. const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
  10970. auto &InitVal = Enumerator->getInitVal();
  10971. std::string InitValStr;
  10972. if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
  10973. InitValStr = std::to_string(InitVal.getSExtValue());
  10974. else
  10975. InitValStr = std::to_string(InitVal.getZExtValue());
  10976. std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
  10977. Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
  10978. ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  10979. Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
  10980. Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
  10981. llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
  10982. &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
  10983. CallInst *Fn =
  10984. Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
  10985. Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
  10986. return Fn;
  10987. }
  10988. }
  10989. }
  10990. llvm::Value *CodeGenFunction::
  10991. BuildVector(ArrayRef<llvm::Value*> Ops) {
  10992. assert((Ops.size() & (Ops.size() - 1)) == 0 &&
  10993. "Not a power-of-two sized vector!");
  10994. bool AllConstants = true;
  10995. for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
  10996. AllConstants &= isa<Constant>(Ops[i]);
  10997. // If this is a constant vector, create a ConstantVector.
  10998. if (AllConstants) {
  10999. SmallVector<llvm::Constant*, 16> CstOps;
  11000. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  11001. CstOps.push_back(cast<Constant>(Ops[i]));
  11002. return llvm::ConstantVector::get(CstOps);
  11003. }
  11004. // Otherwise, insertelement the values to build the vector.
  11005. Value *Result = llvm::UndefValue::get(
  11006. llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
  11007. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  11008. Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
  11009. return Result;
  11010. }
  11011. // Convert the mask from an integer type to a vector of i1.
  11012. static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
  11013. unsigned NumElts) {
  11014. auto *MaskTy = llvm::FixedVectorType::get(
  11015. CGF.Builder.getInt1Ty(),
  11016. cast<IntegerType>(Mask->getType())->getBitWidth());
  11017. Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
  11018. // If we have less than 8 elements, then the starting mask was an i8 and
  11019. // we need to extract down to the right number of elements.
  11020. if (NumElts < 8) {
  11021. int Indices[4];
  11022. for (unsigned i = 0; i != NumElts; ++i)
  11023. Indices[i] = i;
  11024. MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
  11025. makeArrayRef(Indices, NumElts),
  11026. "extract");
  11027. }
  11028. return MaskVec;
  11029. }
  11030. static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  11031. Align Alignment) {
  11032. // Cast the pointer to right type.
  11033. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
  11034. llvm::PointerType::getUnqual(Ops[1]->getType()));
  11035. Value *MaskVec = getMaskVecValue(
  11036. CGF, Ops[2],
  11037. cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
  11038. return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
  11039. }
  11040. static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  11041. Align Alignment) {
  11042. // Cast the pointer to right type.
  11043. llvm::Type *Ty = Ops[1]->getType();
  11044. Value *Ptr =
  11045. CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  11046. Value *MaskVec = getMaskVecValue(
  11047. CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
  11048. return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
  11049. }
  11050. static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
  11051. ArrayRef<Value *> Ops) {
  11052. auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
  11053. llvm::Type *PtrTy = ResultTy->getElementType();
  11054. // Cast the pointer to element type.
  11055. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
  11056. llvm::PointerType::getUnqual(PtrTy));
  11057. Value *MaskVec = getMaskVecValue(
  11058. CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
  11059. llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
  11060. ResultTy);
  11061. return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
  11062. }
  11063. static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
  11064. ArrayRef<Value *> Ops,
  11065. bool IsCompress) {
  11066. auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
  11067. Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
  11068. Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
  11069. : Intrinsic::x86_avx512_mask_expand;
  11070. llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
  11071. return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
  11072. }
  11073. static Value *EmitX86CompressStore(CodeGenFunction &CGF,
  11074. ArrayRef<Value *> Ops) {
  11075. auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
  11076. llvm::Type *PtrTy = ResultTy->getElementType();
  11077. // Cast the pointer to element type.
  11078. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
  11079. llvm::PointerType::getUnqual(PtrTy));
  11080. Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
  11081. llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
  11082. ResultTy);
  11083. return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
  11084. }
  11085. static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
  11086. ArrayRef<Value *> Ops,
  11087. bool InvertLHS = false) {
  11088. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  11089. Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
  11090. Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
  11091. if (InvertLHS)
  11092. LHS = CGF.Builder.CreateNot(LHS);
  11093. return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
  11094. Ops[0]->getType());
  11095. }
  11096. static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
  11097. Value *Amt, bool IsRight) {
  11098. llvm::Type *Ty = Op0->getType();
  11099. // Amount may be scalar immediate, in which case create a splat vector.
  11100. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
  11101. // we only care about the lowest log2 bits anyway.
  11102. if (Amt->getType() != Ty) {
  11103. unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
  11104. Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
  11105. Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
  11106. }
  11107. unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
  11108. Function *F = CGF.CGM.getIntrinsic(IID, Ty);
  11109. return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
  11110. }
  11111. static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  11112. bool IsSigned) {
  11113. Value *Op0 = Ops[0];
  11114. Value *Op1 = Ops[1];
  11115. llvm::Type *Ty = Op0->getType();
  11116. uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  11117. CmpInst::Predicate Pred;
  11118. switch (Imm) {
  11119. case 0x0:
  11120. Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  11121. break;
  11122. case 0x1:
  11123. Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  11124. break;
  11125. case 0x2:
  11126. Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  11127. break;
  11128. case 0x3:
  11129. Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  11130. break;
  11131. case 0x4:
  11132. Pred = ICmpInst::ICMP_EQ;
  11133. break;
  11134. case 0x5:
  11135. Pred = ICmpInst::ICMP_NE;
  11136. break;
  11137. case 0x6:
  11138. return llvm::Constant::getNullValue(Ty); // FALSE
  11139. case 0x7:
  11140. return llvm::Constant::getAllOnesValue(Ty); // TRUE
  11141. default:
  11142. llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
  11143. }
  11144. Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
  11145. Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
  11146. return Res;
  11147. }
  11148. static Value *EmitX86Select(CodeGenFunction &CGF,
  11149. Value *Mask, Value *Op0, Value *Op1) {
  11150. // If the mask is all ones just return first argument.
  11151. if (const auto *C = dyn_cast<Constant>(Mask))
  11152. if (C->isAllOnesValue())
  11153. return Op0;
  11154. Mask = getMaskVecValue(
  11155. CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
  11156. return CGF.Builder.CreateSelect(Mask, Op0, Op1);
  11157. }
  11158. static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
  11159. Value *Mask, Value *Op0, Value *Op1) {
  11160. // If the mask is all ones just return first argument.
  11161. if (const auto *C = dyn_cast<Constant>(Mask))
  11162. if (C->isAllOnesValue())
  11163. return Op0;
  11164. auto *MaskTy = llvm::FixedVectorType::get(
  11165. CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
  11166. Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
  11167. Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
  11168. return CGF.Builder.CreateSelect(Mask, Op0, Op1);
  11169. }
  11170. static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
  11171. unsigned NumElts, Value *MaskIn) {
  11172. if (MaskIn) {
  11173. const auto *C = dyn_cast<Constant>(MaskIn);
  11174. if (!C || !C->isAllOnesValue())
  11175. Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
  11176. }
  11177. if (NumElts < 8) {
  11178. int Indices[8];
  11179. for (unsigned i = 0; i != NumElts; ++i)
  11180. Indices[i] = i;
  11181. for (unsigned i = NumElts; i != 8; ++i)
  11182. Indices[i] = i % NumElts + NumElts;
  11183. Cmp = CGF.Builder.CreateShuffleVector(
  11184. Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
  11185. }
  11186. return CGF.Builder.CreateBitCast(Cmp,
  11187. IntegerType::get(CGF.getLLVMContext(),
  11188. std::max(NumElts, 8U)));
  11189. }
  11190. static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
  11191. bool Signed, ArrayRef<Value *> Ops) {
  11192. assert((Ops.size() == 2 || Ops.size() == 4) &&
  11193. "Unexpected number of arguments");
  11194. unsigned NumElts =
  11195. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  11196. Value *Cmp;
  11197. if (CC == 3) {
  11198. Cmp = Constant::getNullValue(
  11199. llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
  11200. } else if (CC == 7) {
  11201. Cmp = Constant::getAllOnesValue(
  11202. llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
  11203. } else {
  11204. ICmpInst::Predicate Pred;
  11205. switch (CC) {
  11206. default: llvm_unreachable("Unknown condition code");
  11207. case 0: Pred = ICmpInst::ICMP_EQ; break;
  11208. case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
  11209. case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
  11210. case 4: Pred = ICmpInst::ICMP_NE; break;
  11211. case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
  11212. case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
  11213. }
  11214. Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
  11215. }
  11216. Value *MaskIn = nullptr;
  11217. if (Ops.size() == 4)
  11218. MaskIn = Ops[3];
  11219. return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
  11220. }
  11221. static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
  11222. Value *Zero = Constant::getNullValue(In->getType());
  11223. return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
  11224. }
  11225. static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
  11226. ArrayRef<Value *> Ops, bool IsSigned) {
  11227. unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
  11228. llvm::Type *Ty = Ops[1]->getType();
  11229. Value *Res;
  11230. if (Rnd != 4) {
  11231. Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
  11232. : Intrinsic::x86_avx512_uitofp_round;
  11233. Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
  11234. Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
  11235. } else {
  11236. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  11237. Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
  11238. : CGF.Builder.CreateUIToFP(Ops[0], Ty);
  11239. }
  11240. return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
  11241. }
  11242. // Lowers X86 FMA intrinsics to IR.
  11243. static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
  11244. ArrayRef<Value *> Ops, unsigned BuiltinID,
  11245. bool IsAddSub) {
  11246. bool Subtract = false;
  11247. Intrinsic::ID IID = Intrinsic::not_intrinsic;
  11248. switch (BuiltinID) {
  11249. default: break;
  11250. case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
  11251. Subtract = true;
  11252. LLVM_FALLTHROUGH;
  11253. case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
  11254. case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
  11255. case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
  11256. IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
  11257. break;
  11258. case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
  11259. Subtract = true;
  11260. LLVM_FALLTHROUGH;
  11261. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
  11262. case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
  11263. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
  11264. IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
  11265. break;
  11266. case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
  11267. Subtract = true;
  11268. LLVM_FALLTHROUGH;
  11269. case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
  11270. case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
  11271. case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
  11272. IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
  11273. case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
  11274. Subtract = true;
  11275. LLVM_FALLTHROUGH;
  11276. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
  11277. case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
  11278. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
  11279. IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
  11280. case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
  11281. Subtract = true;
  11282. LLVM_FALLTHROUGH;
  11283. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
  11284. case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
  11285. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
  11286. IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
  11287. break;
  11288. case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
  11289. Subtract = true;
  11290. LLVM_FALLTHROUGH;
  11291. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
  11292. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
  11293. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
  11294. IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
  11295. break;
  11296. }
  11297. Value *A = Ops[0];
  11298. Value *B = Ops[1];
  11299. Value *C = Ops[2];
  11300. if (Subtract)
  11301. C = CGF.Builder.CreateFNeg(C);
  11302. Value *Res;
  11303. // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
  11304. if (IID != Intrinsic::not_intrinsic &&
  11305. (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
  11306. IsAddSub)) {
  11307. Function *Intr = CGF.CGM.getIntrinsic(IID);
  11308. Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
  11309. } else {
  11310. llvm::Type *Ty = A->getType();
  11311. Function *FMA;
  11312. if (CGF.Builder.getIsFPConstrained()) {
  11313. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  11314. FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
  11315. Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
  11316. } else {
  11317. FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
  11318. Res = CGF.Builder.CreateCall(FMA, {A, B, C});
  11319. }
  11320. }
  11321. // Handle any required masking.
  11322. Value *MaskFalseVal = nullptr;
  11323. switch (BuiltinID) {
  11324. case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
  11325. case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
  11326. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
  11327. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
  11328. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
  11329. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
  11330. MaskFalseVal = Ops[0];
  11331. break;
  11332. case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
  11333. case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
  11334. case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
  11335. case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
  11336. case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
  11337. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
  11338. MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
  11339. break;
  11340. case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
  11341. case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
  11342. case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
  11343. case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
  11344. case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
  11345. case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
  11346. case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
  11347. case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
  11348. case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
  11349. case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
  11350. case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
  11351. case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
  11352. MaskFalseVal = Ops[2];
  11353. break;
  11354. }
  11355. if (MaskFalseVal)
  11356. return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
  11357. return Res;
  11358. }
  11359. static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
  11360. MutableArrayRef<Value *> Ops, Value *Upper,
  11361. bool ZeroMask = false, unsigned PTIdx = 0,
  11362. bool NegAcc = false) {
  11363. unsigned Rnd = 4;
  11364. if (Ops.size() > 4)
  11365. Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
  11366. if (NegAcc)
  11367. Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
  11368. Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  11369. Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  11370. Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  11371. Value *Res;
  11372. if (Rnd != 4) {
  11373. Intrinsic::ID IID;
  11374. switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
  11375. case 16:
  11376. IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
  11377. break;
  11378. case 32:
  11379. IID = Intrinsic::x86_avx512_vfmadd_f32;
  11380. break;
  11381. case 64:
  11382. IID = Intrinsic::x86_avx512_vfmadd_f64;
  11383. break;
  11384. default:
  11385. llvm_unreachable("Unexpected size");
  11386. }
  11387. Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
  11388. {Ops[0], Ops[1], Ops[2], Ops[4]});
  11389. } else if (CGF.Builder.getIsFPConstrained()) {
  11390. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  11391. Function *FMA = CGF.CGM.getIntrinsic(
  11392. Intrinsic::experimental_constrained_fma, Ops[0]->getType());
  11393. Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
  11394. } else {
  11395. Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
  11396. Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
  11397. }
  11398. // If we have more than 3 arguments, we need to do masking.
  11399. if (Ops.size() > 3) {
  11400. Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
  11401. : Ops[PTIdx];
  11402. // If we negated the accumulator and the its the PassThru value we need to
  11403. // bypass the negate. Conveniently Upper should be the same thing in this
  11404. // case.
  11405. if (NegAcc && PTIdx == 2)
  11406. PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
  11407. Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
  11408. }
  11409. return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
  11410. }
  11411. static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
  11412. ArrayRef<Value *> Ops) {
  11413. llvm::Type *Ty = Ops[0]->getType();
  11414. // Arguments have a vXi32 type so cast to vXi64.
  11415. Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
  11416. Ty->getPrimitiveSizeInBits() / 64);
  11417. Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
  11418. Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
  11419. if (IsSigned) {
  11420. // Shift left then arithmetic shift right.
  11421. Constant *ShiftAmt = ConstantInt::get(Ty, 32);
  11422. LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
  11423. LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
  11424. RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
  11425. RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
  11426. } else {
  11427. // Clear the upper bits.
  11428. Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
  11429. LHS = CGF.Builder.CreateAnd(LHS, Mask);
  11430. RHS = CGF.Builder.CreateAnd(RHS, Mask);
  11431. }
  11432. return CGF.Builder.CreateMul(LHS, RHS);
  11433. }
  11434. // Emit a masked pternlog intrinsic. This only exists because the header has to
  11435. // use a macro and we aren't able to pass the input argument to a pternlog
  11436. // builtin and a select builtin without evaluating it twice.
  11437. static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
  11438. ArrayRef<Value *> Ops) {
  11439. llvm::Type *Ty = Ops[0]->getType();
  11440. unsigned VecWidth = Ty->getPrimitiveSizeInBits();
  11441. unsigned EltWidth = Ty->getScalarSizeInBits();
  11442. Intrinsic::ID IID;
  11443. if (VecWidth == 128 && EltWidth == 32)
  11444. IID = Intrinsic::x86_avx512_pternlog_d_128;
  11445. else if (VecWidth == 256 && EltWidth == 32)
  11446. IID = Intrinsic::x86_avx512_pternlog_d_256;
  11447. else if (VecWidth == 512 && EltWidth == 32)
  11448. IID = Intrinsic::x86_avx512_pternlog_d_512;
  11449. else if (VecWidth == 128 && EltWidth == 64)
  11450. IID = Intrinsic::x86_avx512_pternlog_q_128;
  11451. else if (VecWidth == 256 && EltWidth == 64)
  11452. IID = Intrinsic::x86_avx512_pternlog_q_256;
  11453. else if (VecWidth == 512 && EltWidth == 64)
  11454. IID = Intrinsic::x86_avx512_pternlog_q_512;
  11455. else
  11456. llvm_unreachable("Unexpected intrinsic");
  11457. Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
  11458. Ops.drop_back());
  11459. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
  11460. return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
  11461. }
  11462. static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
  11463. llvm::Type *DstTy) {
  11464. unsigned NumberOfElements =
  11465. cast<llvm::FixedVectorType>(DstTy)->getNumElements();
  11466. Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
  11467. return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
  11468. }
  11469. // Emit binary intrinsic with the same type used in result/args.
  11470. static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF,
  11471. ArrayRef<Value *> Ops, Intrinsic::ID IID) {
  11472. llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
  11473. return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
  11474. }
  11475. Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
  11476. const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
  11477. StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
  11478. return EmitX86CpuIs(CPUStr);
  11479. }
  11480. // Convert F16 halfs to floats.
  11481. static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
  11482. ArrayRef<Value *> Ops,
  11483. llvm::Type *DstTy) {
  11484. assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
  11485. "Unknown cvtph2ps intrinsic");
  11486. // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
  11487. if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
  11488. Function *F =
  11489. CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
  11490. return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
  11491. }
  11492. unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
  11493. Value *Src = Ops[0];
  11494. // Extract the subvector.
  11495. if (NumDstElts !=
  11496. cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
  11497. assert(NumDstElts == 4 && "Unexpected vector size");
  11498. Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
  11499. }
  11500. // Bitcast from vXi16 to vXf16.
  11501. auto *HalfTy = llvm::FixedVectorType::get(
  11502. llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
  11503. Src = CGF.Builder.CreateBitCast(Src, HalfTy);
  11504. // Perform the fp-extension.
  11505. Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
  11506. if (Ops.size() >= 3)
  11507. Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
  11508. return Res;
  11509. }
  11510. // Convert a BF16 to a float.
  11511. static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF,
  11512. const CallExpr *E,
  11513. ArrayRef<Value *> Ops) {
  11514. llvm::Type *Int32Ty = CGF.Builder.getInt32Ty();
  11515. Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty);
  11516. Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16);
  11517. llvm::Type *ResultType = CGF.ConvertType(E->getType());
  11518. Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType);
  11519. return BitCast;
  11520. }
  11521. Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
  11522. llvm::Type *Int32Ty = Builder.getInt32Ty();
  11523. // Matching the struct layout from the compiler-rt/libgcc structure that is
  11524. // filled in:
  11525. // unsigned int __cpu_vendor;
  11526. // unsigned int __cpu_type;
  11527. // unsigned int __cpu_subtype;
  11528. // unsigned int __cpu_features[1];
  11529. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
  11530. llvm::ArrayType::get(Int32Ty, 1));
  11531. // Grab the global __cpu_model.
  11532. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
  11533. cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
  11534. // Calculate the index needed to access the correct field based on the
  11535. // range. Also adjust the expected value.
  11536. unsigned Index;
  11537. unsigned Value;
  11538. std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
  11539. #define X86_VENDOR(ENUM, STRING) \
  11540. .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
  11541. #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
  11542. .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
  11543. #define X86_CPU_TYPE(ENUM, STR) \
  11544. .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
  11545. #define X86_CPU_SUBTYPE(ENUM, STR) \
  11546. .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
  11547. #include "llvm/Support/X86TargetParser.def"
  11548. .Default({0, 0});
  11549. assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
  11550. // Grab the appropriate field from __cpu_model.
  11551. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
  11552. ConstantInt::get(Int32Ty, Index)};
  11553. llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
  11554. CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
  11555. CharUnits::fromQuantity(4));
  11556. // Check the value of the field against the requested value.
  11557. return Builder.CreateICmpEQ(CpuValue,
  11558. llvm::ConstantInt::get(Int32Ty, Value));
  11559. }
  11560. Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
  11561. const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
  11562. StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
  11563. return EmitX86CpuSupports(FeatureStr);
  11564. }
  11565. Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
  11566. return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
  11567. }
  11568. llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
  11569. uint32_t Features1 = Lo_32(FeaturesMask);
  11570. uint32_t Features2 = Hi_32(FeaturesMask);
  11571. Value *Result = Builder.getTrue();
  11572. if (Features1 != 0) {
  11573. // Matching the struct layout from the compiler-rt/libgcc structure that is
  11574. // filled in:
  11575. // unsigned int __cpu_vendor;
  11576. // unsigned int __cpu_type;
  11577. // unsigned int __cpu_subtype;
  11578. // unsigned int __cpu_features[1];
  11579. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
  11580. llvm::ArrayType::get(Int32Ty, 1));
  11581. // Grab the global __cpu_model.
  11582. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
  11583. cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
  11584. // Grab the first (0th) element from the field __cpu_features off of the
  11585. // global in the struct STy.
  11586. Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
  11587. Builder.getInt32(0)};
  11588. Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
  11589. Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
  11590. CharUnits::fromQuantity(4));
  11591. // Check the value of the bit corresponding to the feature requested.
  11592. Value *Mask = Builder.getInt32(Features1);
  11593. Value *Bitset = Builder.CreateAnd(Features, Mask);
  11594. Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
  11595. Result = Builder.CreateAnd(Result, Cmp);
  11596. }
  11597. if (Features2 != 0) {
  11598. llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
  11599. "__cpu_features2");
  11600. cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
  11601. Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2,
  11602. CharUnits::fromQuantity(4));
  11603. // Check the value of the bit corresponding to the feature requested.
  11604. Value *Mask = Builder.getInt32(Features2);
  11605. Value *Bitset = Builder.CreateAnd(Features, Mask);
  11606. Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
  11607. Result = Builder.CreateAnd(Result, Cmp);
  11608. }
  11609. return Result;
  11610. }
  11611. Value *CodeGenFunction::EmitX86CpuInit() {
  11612. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
  11613. /*Variadic*/ false);
  11614. llvm::FunctionCallee Func =
  11615. CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
  11616. cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
  11617. cast<llvm::GlobalValue>(Func.getCallee())
  11618. ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
  11619. return Builder.CreateCall(Func);
  11620. }
  11621. Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
  11622. const CallExpr *E) {
  11623. if (BuiltinID == X86::BI__builtin_cpu_is)
  11624. return EmitX86CpuIs(E);
  11625. if (BuiltinID == X86::BI__builtin_cpu_supports)
  11626. return EmitX86CpuSupports(E);
  11627. if (BuiltinID == X86::BI__builtin_cpu_init)
  11628. return EmitX86CpuInit();
  11629. // Handle MSVC intrinsics before argument evaluation to prevent double
  11630. // evaluation.
  11631. if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
  11632. return EmitMSVCBuiltinExpr(*MsvcIntId, E);
  11633. SmallVector<Value*, 4> Ops;
  11634. bool IsMaskFCmp = false;
  11635. bool IsConjFMA = false;
  11636. // Find out if any arguments are required to be integer constant expressions.
  11637. unsigned ICEArguments = 0;
  11638. ASTContext::GetBuiltinTypeError Error;
  11639. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  11640. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  11641. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  11642. // If this is a normal argument, just emit it as a scalar.
  11643. if ((ICEArguments & (1 << i)) == 0) {
  11644. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  11645. continue;
  11646. }
  11647. // If this is required to be a constant, constant fold it so that we know
  11648. // that the generated intrinsic gets a ConstantInt.
  11649. Ops.push_back(llvm::ConstantInt::get(
  11650. getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
  11651. }
  11652. // These exist so that the builtin that takes an immediate can be bounds
  11653. // checked by clang to avoid passing bad immediates to the backend. Since
  11654. // AVX has a larger immediate than SSE we would need separate builtins to
  11655. // do the different bounds checking. Rather than create a clang specific
  11656. // SSE only builtin, this implements eight separate builtins to match gcc
  11657. // implementation.
  11658. auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
  11659. Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
  11660. llvm::Function *F = CGM.getIntrinsic(ID);
  11661. return Builder.CreateCall(F, Ops);
  11662. };
  11663. // For the vector forms of FP comparisons, translate the builtins directly to
  11664. // IR.
  11665. // TODO: The builtins could be removed if the SSE header files used vector
  11666. // extension comparisons directly (vector ordered/unordered may need
  11667. // additional support via __builtin_isnan()).
  11668. auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
  11669. bool IsSignaling) {
  11670. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  11671. Value *Cmp;
  11672. if (IsSignaling)
  11673. Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
  11674. else
  11675. Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
  11676. llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
  11677. llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
  11678. Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
  11679. return Builder.CreateBitCast(Sext, FPVecTy);
  11680. };
  11681. switch (BuiltinID) {
  11682. default: return nullptr;
  11683. case X86::BI_mm_prefetch: {
  11684. Value *Address = Ops[0];
  11685. ConstantInt *C = cast<ConstantInt>(Ops[1]);
  11686. Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
  11687. Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
  11688. Value *Data = ConstantInt::get(Int32Ty, 1);
  11689. Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
  11690. return Builder.CreateCall(F, {Address, RW, Locality, Data});
  11691. }
  11692. case X86::BI_mm_clflush: {
  11693. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
  11694. Ops[0]);
  11695. }
  11696. case X86::BI_mm_lfence: {
  11697. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
  11698. }
  11699. case X86::BI_mm_mfence: {
  11700. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
  11701. }
  11702. case X86::BI_mm_sfence: {
  11703. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
  11704. }
  11705. case X86::BI_mm_pause: {
  11706. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
  11707. }
  11708. case X86::BI__rdtsc: {
  11709. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
  11710. }
  11711. case X86::BI__builtin_ia32_rdtscp: {
  11712. Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
  11713. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
  11714. Ops[0]);
  11715. return Builder.CreateExtractValue(Call, 0);
  11716. }
  11717. case X86::BI__builtin_ia32_lzcnt_u16:
  11718. case X86::BI__builtin_ia32_lzcnt_u32:
  11719. case X86::BI__builtin_ia32_lzcnt_u64: {
  11720. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
  11721. return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
  11722. }
  11723. case X86::BI__builtin_ia32_tzcnt_u16:
  11724. case X86::BI__builtin_ia32_tzcnt_u32:
  11725. case X86::BI__builtin_ia32_tzcnt_u64: {
  11726. Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
  11727. return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
  11728. }
  11729. case X86::BI__builtin_ia32_undef128:
  11730. case X86::BI__builtin_ia32_undef256:
  11731. case X86::BI__builtin_ia32_undef512:
  11732. // The x86 definition of "undef" is not the same as the LLVM definition
  11733. // (PR32176). We leave optimizing away an unnecessary zero constant to the
  11734. // IR optimizer and backend.
  11735. // TODO: If we had a "freeze" IR instruction to generate a fixed undef
  11736. // value, we should use that here instead of a zero.
  11737. return llvm::Constant::getNullValue(ConvertType(E->getType()));
  11738. case X86::BI__builtin_ia32_vec_init_v8qi:
  11739. case X86::BI__builtin_ia32_vec_init_v4hi:
  11740. case X86::BI__builtin_ia32_vec_init_v2si:
  11741. return Builder.CreateBitCast(BuildVector(Ops),
  11742. llvm::Type::getX86_MMXTy(getLLVMContext()));
  11743. case X86::BI__builtin_ia32_vec_ext_v2si:
  11744. case X86::BI__builtin_ia32_vec_ext_v16qi:
  11745. case X86::BI__builtin_ia32_vec_ext_v8hi:
  11746. case X86::BI__builtin_ia32_vec_ext_v4si:
  11747. case X86::BI__builtin_ia32_vec_ext_v4sf:
  11748. case X86::BI__builtin_ia32_vec_ext_v2di:
  11749. case X86::BI__builtin_ia32_vec_ext_v32qi:
  11750. case X86::BI__builtin_ia32_vec_ext_v16hi:
  11751. case X86::BI__builtin_ia32_vec_ext_v8si:
  11752. case X86::BI__builtin_ia32_vec_ext_v4di: {
  11753. unsigned NumElts =
  11754. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  11755. uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
  11756. Index &= NumElts - 1;
  11757. // These builtins exist so we can ensure the index is an ICE and in range.
  11758. // Otherwise we could just do this in the header file.
  11759. return Builder.CreateExtractElement(Ops[0], Index);
  11760. }
  11761. case X86::BI__builtin_ia32_vec_set_v16qi:
  11762. case X86::BI__builtin_ia32_vec_set_v8hi:
  11763. case X86::BI__builtin_ia32_vec_set_v4si:
  11764. case X86::BI__builtin_ia32_vec_set_v2di:
  11765. case X86::BI__builtin_ia32_vec_set_v32qi:
  11766. case X86::BI__builtin_ia32_vec_set_v16hi:
  11767. case X86::BI__builtin_ia32_vec_set_v8si:
  11768. case X86::BI__builtin_ia32_vec_set_v4di: {
  11769. unsigned NumElts =
  11770. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  11771. unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
  11772. Index &= NumElts - 1;
  11773. // These builtins exist so we can ensure the index is an ICE and in range.
  11774. // Otherwise we could just do this in the header file.
  11775. return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
  11776. }
  11777. case X86::BI_mm_setcsr:
  11778. case X86::BI__builtin_ia32_ldmxcsr: {
  11779. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  11780. Builder.CreateStore(Ops[0], Tmp);
  11781. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
  11782. Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
  11783. }
  11784. case X86::BI_mm_getcsr:
  11785. case X86::BI__builtin_ia32_stmxcsr: {
  11786. Address Tmp = CreateMemTemp(E->getType());
  11787. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
  11788. Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
  11789. return Builder.CreateLoad(Tmp, "stmxcsr");
  11790. }
  11791. case X86::BI__builtin_ia32_xsave:
  11792. case X86::BI__builtin_ia32_xsave64:
  11793. case X86::BI__builtin_ia32_xrstor:
  11794. case X86::BI__builtin_ia32_xrstor64:
  11795. case X86::BI__builtin_ia32_xsaveopt:
  11796. case X86::BI__builtin_ia32_xsaveopt64:
  11797. case X86::BI__builtin_ia32_xrstors:
  11798. case X86::BI__builtin_ia32_xrstors64:
  11799. case X86::BI__builtin_ia32_xsavec:
  11800. case X86::BI__builtin_ia32_xsavec64:
  11801. case X86::BI__builtin_ia32_xsaves:
  11802. case X86::BI__builtin_ia32_xsaves64:
  11803. case X86::BI__builtin_ia32_xsetbv:
  11804. case X86::BI_xsetbv: {
  11805. Intrinsic::ID ID;
  11806. #define INTRINSIC_X86_XSAVE_ID(NAME) \
  11807. case X86::BI__builtin_ia32_##NAME: \
  11808. ID = Intrinsic::x86_##NAME; \
  11809. break
  11810. switch (BuiltinID) {
  11811. default: llvm_unreachable("Unsupported intrinsic!");
  11812. INTRINSIC_X86_XSAVE_ID(xsave);
  11813. INTRINSIC_X86_XSAVE_ID(xsave64);
  11814. INTRINSIC_X86_XSAVE_ID(xrstor);
  11815. INTRINSIC_X86_XSAVE_ID(xrstor64);
  11816. INTRINSIC_X86_XSAVE_ID(xsaveopt);
  11817. INTRINSIC_X86_XSAVE_ID(xsaveopt64);
  11818. INTRINSIC_X86_XSAVE_ID(xrstors);
  11819. INTRINSIC_X86_XSAVE_ID(xrstors64);
  11820. INTRINSIC_X86_XSAVE_ID(xsavec);
  11821. INTRINSIC_X86_XSAVE_ID(xsavec64);
  11822. INTRINSIC_X86_XSAVE_ID(xsaves);
  11823. INTRINSIC_X86_XSAVE_ID(xsaves64);
  11824. INTRINSIC_X86_XSAVE_ID(xsetbv);
  11825. case X86::BI_xsetbv:
  11826. ID = Intrinsic::x86_xsetbv;
  11827. break;
  11828. }
  11829. #undef INTRINSIC_X86_XSAVE_ID
  11830. Value *Mhi = Builder.CreateTrunc(
  11831. Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
  11832. Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
  11833. Ops[1] = Mhi;
  11834. Ops.push_back(Mlo);
  11835. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  11836. }
  11837. case X86::BI__builtin_ia32_xgetbv:
  11838. case X86::BI_xgetbv:
  11839. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
  11840. case X86::BI__builtin_ia32_storedqudi128_mask:
  11841. case X86::BI__builtin_ia32_storedqusi128_mask:
  11842. case X86::BI__builtin_ia32_storedquhi128_mask:
  11843. case X86::BI__builtin_ia32_storedquqi128_mask:
  11844. case X86::BI__builtin_ia32_storeupd128_mask:
  11845. case X86::BI__builtin_ia32_storeups128_mask:
  11846. case X86::BI__builtin_ia32_storedqudi256_mask:
  11847. case X86::BI__builtin_ia32_storedqusi256_mask:
  11848. case X86::BI__builtin_ia32_storedquhi256_mask:
  11849. case X86::BI__builtin_ia32_storedquqi256_mask:
  11850. case X86::BI__builtin_ia32_storeupd256_mask:
  11851. case X86::BI__builtin_ia32_storeups256_mask:
  11852. case X86::BI__builtin_ia32_storedqudi512_mask:
  11853. case X86::BI__builtin_ia32_storedqusi512_mask:
  11854. case X86::BI__builtin_ia32_storedquhi512_mask:
  11855. case X86::BI__builtin_ia32_storedquqi512_mask:
  11856. case X86::BI__builtin_ia32_storeupd512_mask:
  11857. case X86::BI__builtin_ia32_storeups512_mask:
  11858. return EmitX86MaskedStore(*this, Ops, Align(1));
  11859. case X86::BI__builtin_ia32_storesh128_mask:
  11860. case X86::BI__builtin_ia32_storess128_mask:
  11861. case X86::BI__builtin_ia32_storesd128_mask:
  11862. return EmitX86MaskedStore(*this, Ops, Align(1));
  11863. case X86::BI__builtin_ia32_vpopcntb_128:
  11864. case X86::BI__builtin_ia32_vpopcntd_128:
  11865. case X86::BI__builtin_ia32_vpopcntq_128:
  11866. case X86::BI__builtin_ia32_vpopcntw_128:
  11867. case X86::BI__builtin_ia32_vpopcntb_256:
  11868. case X86::BI__builtin_ia32_vpopcntd_256:
  11869. case X86::BI__builtin_ia32_vpopcntq_256:
  11870. case X86::BI__builtin_ia32_vpopcntw_256:
  11871. case X86::BI__builtin_ia32_vpopcntb_512:
  11872. case X86::BI__builtin_ia32_vpopcntd_512:
  11873. case X86::BI__builtin_ia32_vpopcntq_512:
  11874. case X86::BI__builtin_ia32_vpopcntw_512: {
  11875. llvm::Type *ResultType = ConvertType(E->getType());
  11876. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  11877. return Builder.CreateCall(F, Ops);
  11878. }
  11879. case X86::BI__builtin_ia32_cvtmask2b128:
  11880. case X86::BI__builtin_ia32_cvtmask2b256:
  11881. case X86::BI__builtin_ia32_cvtmask2b512:
  11882. case X86::BI__builtin_ia32_cvtmask2w128:
  11883. case X86::BI__builtin_ia32_cvtmask2w256:
  11884. case X86::BI__builtin_ia32_cvtmask2w512:
  11885. case X86::BI__builtin_ia32_cvtmask2d128:
  11886. case X86::BI__builtin_ia32_cvtmask2d256:
  11887. case X86::BI__builtin_ia32_cvtmask2d512:
  11888. case X86::BI__builtin_ia32_cvtmask2q128:
  11889. case X86::BI__builtin_ia32_cvtmask2q256:
  11890. case X86::BI__builtin_ia32_cvtmask2q512:
  11891. return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
  11892. case X86::BI__builtin_ia32_cvtb2mask128:
  11893. case X86::BI__builtin_ia32_cvtb2mask256:
  11894. case X86::BI__builtin_ia32_cvtb2mask512:
  11895. case X86::BI__builtin_ia32_cvtw2mask128:
  11896. case X86::BI__builtin_ia32_cvtw2mask256:
  11897. case X86::BI__builtin_ia32_cvtw2mask512:
  11898. case X86::BI__builtin_ia32_cvtd2mask128:
  11899. case X86::BI__builtin_ia32_cvtd2mask256:
  11900. case X86::BI__builtin_ia32_cvtd2mask512:
  11901. case X86::BI__builtin_ia32_cvtq2mask128:
  11902. case X86::BI__builtin_ia32_cvtq2mask256:
  11903. case X86::BI__builtin_ia32_cvtq2mask512:
  11904. return EmitX86ConvertToMask(*this, Ops[0]);
  11905. case X86::BI__builtin_ia32_cvtdq2ps512_mask:
  11906. case X86::BI__builtin_ia32_cvtqq2ps512_mask:
  11907. case X86::BI__builtin_ia32_cvtqq2pd512_mask:
  11908. case X86::BI__builtin_ia32_vcvtw2ph512_mask:
  11909. case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
  11910. case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
  11911. return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
  11912. case X86::BI__builtin_ia32_cvtudq2ps512_mask:
  11913. case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
  11914. case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
  11915. case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
  11916. case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
  11917. case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
  11918. return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
  11919. case X86::BI__builtin_ia32_vfmaddss3:
  11920. case X86::BI__builtin_ia32_vfmaddsd3:
  11921. case X86::BI__builtin_ia32_vfmaddsh3_mask:
  11922. case X86::BI__builtin_ia32_vfmaddss3_mask:
  11923. case X86::BI__builtin_ia32_vfmaddsd3_mask:
  11924. return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
  11925. case X86::BI__builtin_ia32_vfmaddss:
  11926. case X86::BI__builtin_ia32_vfmaddsd:
  11927. return EmitScalarFMAExpr(*this, E, Ops,
  11928. Constant::getNullValue(Ops[0]->getType()));
  11929. case X86::BI__builtin_ia32_vfmaddsh3_maskz:
  11930. case X86::BI__builtin_ia32_vfmaddss3_maskz:
  11931. case X86::BI__builtin_ia32_vfmaddsd3_maskz:
  11932. return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
  11933. case X86::BI__builtin_ia32_vfmaddsh3_mask3:
  11934. case X86::BI__builtin_ia32_vfmaddss3_mask3:
  11935. case X86::BI__builtin_ia32_vfmaddsd3_mask3:
  11936. return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
  11937. case X86::BI__builtin_ia32_vfmsubsh3_mask3:
  11938. case X86::BI__builtin_ia32_vfmsubss3_mask3:
  11939. case X86::BI__builtin_ia32_vfmsubsd3_mask3:
  11940. return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
  11941. /*NegAcc*/ true);
  11942. case X86::BI__builtin_ia32_vfmaddph:
  11943. case X86::BI__builtin_ia32_vfmaddps:
  11944. case X86::BI__builtin_ia32_vfmaddpd:
  11945. case X86::BI__builtin_ia32_vfmaddph256:
  11946. case X86::BI__builtin_ia32_vfmaddps256:
  11947. case X86::BI__builtin_ia32_vfmaddpd256:
  11948. case X86::BI__builtin_ia32_vfmaddph512_mask:
  11949. case X86::BI__builtin_ia32_vfmaddph512_maskz:
  11950. case X86::BI__builtin_ia32_vfmaddph512_mask3:
  11951. case X86::BI__builtin_ia32_vfmaddps512_mask:
  11952. case X86::BI__builtin_ia32_vfmaddps512_maskz:
  11953. case X86::BI__builtin_ia32_vfmaddps512_mask3:
  11954. case X86::BI__builtin_ia32_vfmsubps512_mask3:
  11955. case X86::BI__builtin_ia32_vfmaddpd512_mask:
  11956. case X86::BI__builtin_ia32_vfmaddpd512_maskz:
  11957. case X86::BI__builtin_ia32_vfmaddpd512_mask3:
  11958. case X86::BI__builtin_ia32_vfmsubpd512_mask3:
  11959. case X86::BI__builtin_ia32_vfmsubph512_mask3:
  11960. return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
  11961. case X86::BI__builtin_ia32_vfmaddsubph512_mask:
  11962. case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
  11963. case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
  11964. case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
  11965. case X86::BI__builtin_ia32_vfmaddsubps512_mask:
  11966. case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
  11967. case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
  11968. case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
  11969. case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
  11970. case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
  11971. case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
  11972. case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
  11973. return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
  11974. case X86::BI__builtin_ia32_movdqa32store128_mask:
  11975. case X86::BI__builtin_ia32_movdqa64store128_mask:
  11976. case X86::BI__builtin_ia32_storeaps128_mask:
  11977. case X86::BI__builtin_ia32_storeapd128_mask:
  11978. case X86::BI__builtin_ia32_movdqa32store256_mask:
  11979. case X86::BI__builtin_ia32_movdqa64store256_mask:
  11980. case X86::BI__builtin_ia32_storeaps256_mask:
  11981. case X86::BI__builtin_ia32_storeapd256_mask:
  11982. case X86::BI__builtin_ia32_movdqa32store512_mask:
  11983. case X86::BI__builtin_ia32_movdqa64store512_mask:
  11984. case X86::BI__builtin_ia32_storeaps512_mask:
  11985. case X86::BI__builtin_ia32_storeapd512_mask:
  11986. return EmitX86MaskedStore(
  11987. *this, Ops,
  11988. getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
  11989. case X86::BI__builtin_ia32_loadups128_mask:
  11990. case X86::BI__builtin_ia32_loadups256_mask:
  11991. case X86::BI__builtin_ia32_loadups512_mask:
  11992. case X86::BI__builtin_ia32_loadupd128_mask:
  11993. case X86::BI__builtin_ia32_loadupd256_mask:
  11994. case X86::BI__builtin_ia32_loadupd512_mask:
  11995. case X86::BI__builtin_ia32_loaddquqi128_mask:
  11996. case X86::BI__builtin_ia32_loaddquqi256_mask:
  11997. case X86::BI__builtin_ia32_loaddquqi512_mask:
  11998. case X86::BI__builtin_ia32_loaddquhi128_mask:
  11999. case X86::BI__builtin_ia32_loaddquhi256_mask:
  12000. case X86::BI__builtin_ia32_loaddquhi512_mask:
  12001. case X86::BI__builtin_ia32_loaddqusi128_mask:
  12002. case X86::BI__builtin_ia32_loaddqusi256_mask:
  12003. case X86::BI__builtin_ia32_loaddqusi512_mask:
  12004. case X86::BI__builtin_ia32_loaddqudi128_mask:
  12005. case X86::BI__builtin_ia32_loaddqudi256_mask:
  12006. case X86::BI__builtin_ia32_loaddqudi512_mask:
  12007. return EmitX86MaskedLoad(*this, Ops, Align(1));
  12008. case X86::BI__builtin_ia32_loadsh128_mask:
  12009. case X86::BI__builtin_ia32_loadss128_mask:
  12010. case X86::BI__builtin_ia32_loadsd128_mask:
  12011. return EmitX86MaskedLoad(*this, Ops, Align(1));
  12012. case X86::BI__builtin_ia32_loadaps128_mask:
  12013. case X86::BI__builtin_ia32_loadaps256_mask:
  12014. case X86::BI__builtin_ia32_loadaps512_mask:
  12015. case X86::BI__builtin_ia32_loadapd128_mask:
  12016. case X86::BI__builtin_ia32_loadapd256_mask:
  12017. case X86::BI__builtin_ia32_loadapd512_mask:
  12018. case X86::BI__builtin_ia32_movdqa32load128_mask:
  12019. case X86::BI__builtin_ia32_movdqa32load256_mask:
  12020. case X86::BI__builtin_ia32_movdqa32load512_mask:
  12021. case X86::BI__builtin_ia32_movdqa64load128_mask:
  12022. case X86::BI__builtin_ia32_movdqa64load256_mask:
  12023. case X86::BI__builtin_ia32_movdqa64load512_mask:
  12024. return EmitX86MaskedLoad(
  12025. *this, Ops,
  12026. getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
  12027. case X86::BI__builtin_ia32_expandloaddf128_mask:
  12028. case X86::BI__builtin_ia32_expandloaddf256_mask:
  12029. case X86::BI__builtin_ia32_expandloaddf512_mask:
  12030. case X86::BI__builtin_ia32_expandloadsf128_mask:
  12031. case X86::BI__builtin_ia32_expandloadsf256_mask:
  12032. case X86::BI__builtin_ia32_expandloadsf512_mask:
  12033. case X86::BI__builtin_ia32_expandloaddi128_mask:
  12034. case X86::BI__builtin_ia32_expandloaddi256_mask:
  12035. case X86::BI__builtin_ia32_expandloaddi512_mask:
  12036. case X86::BI__builtin_ia32_expandloadsi128_mask:
  12037. case X86::BI__builtin_ia32_expandloadsi256_mask:
  12038. case X86::BI__builtin_ia32_expandloadsi512_mask:
  12039. case X86::BI__builtin_ia32_expandloadhi128_mask:
  12040. case X86::BI__builtin_ia32_expandloadhi256_mask:
  12041. case X86::BI__builtin_ia32_expandloadhi512_mask:
  12042. case X86::BI__builtin_ia32_expandloadqi128_mask:
  12043. case X86::BI__builtin_ia32_expandloadqi256_mask:
  12044. case X86::BI__builtin_ia32_expandloadqi512_mask:
  12045. return EmitX86ExpandLoad(*this, Ops);
  12046. case X86::BI__builtin_ia32_compressstoredf128_mask:
  12047. case X86::BI__builtin_ia32_compressstoredf256_mask:
  12048. case X86::BI__builtin_ia32_compressstoredf512_mask:
  12049. case X86::BI__builtin_ia32_compressstoresf128_mask:
  12050. case X86::BI__builtin_ia32_compressstoresf256_mask:
  12051. case X86::BI__builtin_ia32_compressstoresf512_mask:
  12052. case X86::BI__builtin_ia32_compressstoredi128_mask:
  12053. case X86::BI__builtin_ia32_compressstoredi256_mask:
  12054. case X86::BI__builtin_ia32_compressstoredi512_mask:
  12055. case X86::BI__builtin_ia32_compressstoresi128_mask:
  12056. case X86::BI__builtin_ia32_compressstoresi256_mask:
  12057. case X86::BI__builtin_ia32_compressstoresi512_mask:
  12058. case X86::BI__builtin_ia32_compressstorehi128_mask:
  12059. case X86::BI__builtin_ia32_compressstorehi256_mask:
  12060. case X86::BI__builtin_ia32_compressstorehi512_mask:
  12061. case X86::BI__builtin_ia32_compressstoreqi128_mask:
  12062. case X86::BI__builtin_ia32_compressstoreqi256_mask:
  12063. case X86::BI__builtin_ia32_compressstoreqi512_mask:
  12064. return EmitX86CompressStore(*this, Ops);
  12065. case X86::BI__builtin_ia32_expanddf128_mask:
  12066. case X86::BI__builtin_ia32_expanddf256_mask:
  12067. case X86::BI__builtin_ia32_expanddf512_mask:
  12068. case X86::BI__builtin_ia32_expandsf128_mask:
  12069. case X86::BI__builtin_ia32_expandsf256_mask:
  12070. case X86::BI__builtin_ia32_expandsf512_mask:
  12071. case X86::BI__builtin_ia32_expanddi128_mask:
  12072. case X86::BI__builtin_ia32_expanddi256_mask:
  12073. case X86::BI__builtin_ia32_expanddi512_mask:
  12074. case X86::BI__builtin_ia32_expandsi128_mask:
  12075. case X86::BI__builtin_ia32_expandsi256_mask:
  12076. case X86::BI__builtin_ia32_expandsi512_mask:
  12077. case X86::BI__builtin_ia32_expandhi128_mask:
  12078. case X86::BI__builtin_ia32_expandhi256_mask:
  12079. case X86::BI__builtin_ia32_expandhi512_mask:
  12080. case X86::BI__builtin_ia32_expandqi128_mask:
  12081. case X86::BI__builtin_ia32_expandqi256_mask:
  12082. case X86::BI__builtin_ia32_expandqi512_mask:
  12083. return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
  12084. case X86::BI__builtin_ia32_compressdf128_mask:
  12085. case X86::BI__builtin_ia32_compressdf256_mask:
  12086. case X86::BI__builtin_ia32_compressdf512_mask:
  12087. case X86::BI__builtin_ia32_compresssf128_mask:
  12088. case X86::BI__builtin_ia32_compresssf256_mask:
  12089. case X86::BI__builtin_ia32_compresssf512_mask:
  12090. case X86::BI__builtin_ia32_compressdi128_mask:
  12091. case X86::BI__builtin_ia32_compressdi256_mask:
  12092. case X86::BI__builtin_ia32_compressdi512_mask:
  12093. case X86::BI__builtin_ia32_compresssi128_mask:
  12094. case X86::BI__builtin_ia32_compresssi256_mask:
  12095. case X86::BI__builtin_ia32_compresssi512_mask:
  12096. case X86::BI__builtin_ia32_compresshi128_mask:
  12097. case X86::BI__builtin_ia32_compresshi256_mask:
  12098. case X86::BI__builtin_ia32_compresshi512_mask:
  12099. case X86::BI__builtin_ia32_compressqi128_mask:
  12100. case X86::BI__builtin_ia32_compressqi256_mask:
  12101. case X86::BI__builtin_ia32_compressqi512_mask:
  12102. return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
  12103. case X86::BI__builtin_ia32_gather3div2df:
  12104. case X86::BI__builtin_ia32_gather3div2di:
  12105. case X86::BI__builtin_ia32_gather3div4df:
  12106. case X86::BI__builtin_ia32_gather3div4di:
  12107. case X86::BI__builtin_ia32_gather3div4sf:
  12108. case X86::BI__builtin_ia32_gather3div4si:
  12109. case X86::BI__builtin_ia32_gather3div8sf:
  12110. case X86::BI__builtin_ia32_gather3div8si:
  12111. case X86::BI__builtin_ia32_gather3siv2df:
  12112. case X86::BI__builtin_ia32_gather3siv2di:
  12113. case X86::BI__builtin_ia32_gather3siv4df:
  12114. case X86::BI__builtin_ia32_gather3siv4di:
  12115. case X86::BI__builtin_ia32_gather3siv4sf:
  12116. case X86::BI__builtin_ia32_gather3siv4si:
  12117. case X86::BI__builtin_ia32_gather3siv8sf:
  12118. case X86::BI__builtin_ia32_gather3siv8si:
  12119. case X86::BI__builtin_ia32_gathersiv8df:
  12120. case X86::BI__builtin_ia32_gathersiv16sf:
  12121. case X86::BI__builtin_ia32_gatherdiv8df:
  12122. case X86::BI__builtin_ia32_gatherdiv16sf:
  12123. case X86::BI__builtin_ia32_gathersiv8di:
  12124. case X86::BI__builtin_ia32_gathersiv16si:
  12125. case X86::BI__builtin_ia32_gatherdiv8di:
  12126. case X86::BI__builtin_ia32_gatherdiv16si: {
  12127. Intrinsic::ID IID;
  12128. switch (BuiltinID) {
  12129. default: llvm_unreachable("Unexpected builtin");
  12130. case X86::BI__builtin_ia32_gather3div2df:
  12131. IID = Intrinsic::x86_avx512_mask_gather3div2_df;
  12132. break;
  12133. case X86::BI__builtin_ia32_gather3div2di:
  12134. IID = Intrinsic::x86_avx512_mask_gather3div2_di;
  12135. break;
  12136. case X86::BI__builtin_ia32_gather3div4df:
  12137. IID = Intrinsic::x86_avx512_mask_gather3div4_df;
  12138. break;
  12139. case X86::BI__builtin_ia32_gather3div4di:
  12140. IID = Intrinsic::x86_avx512_mask_gather3div4_di;
  12141. break;
  12142. case X86::BI__builtin_ia32_gather3div4sf:
  12143. IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
  12144. break;
  12145. case X86::BI__builtin_ia32_gather3div4si:
  12146. IID = Intrinsic::x86_avx512_mask_gather3div4_si;
  12147. break;
  12148. case X86::BI__builtin_ia32_gather3div8sf:
  12149. IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
  12150. break;
  12151. case X86::BI__builtin_ia32_gather3div8si:
  12152. IID = Intrinsic::x86_avx512_mask_gather3div8_si;
  12153. break;
  12154. case X86::BI__builtin_ia32_gather3siv2df:
  12155. IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
  12156. break;
  12157. case X86::BI__builtin_ia32_gather3siv2di:
  12158. IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
  12159. break;
  12160. case X86::BI__builtin_ia32_gather3siv4df:
  12161. IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
  12162. break;
  12163. case X86::BI__builtin_ia32_gather3siv4di:
  12164. IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
  12165. break;
  12166. case X86::BI__builtin_ia32_gather3siv4sf:
  12167. IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
  12168. break;
  12169. case X86::BI__builtin_ia32_gather3siv4si:
  12170. IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
  12171. break;
  12172. case X86::BI__builtin_ia32_gather3siv8sf:
  12173. IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
  12174. break;
  12175. case X86::BI__builtin_ia32_gather3siv8si:
  12176. IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
  12177. break;
  12178. case X86::BI__builtin_ia32_gathersiv8df:
  12179. IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
  12180. break;
  12181. case X86::BI__builtin_ia32_gathersiv16sf:
  12182. IID = Intrinsic::x86_avx512_mask_gather_dps_512;
  12183. break;
  12184. case X86::BI__builtin_ia32_gatherdiv8df:
  12185. IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
  12186. break;
  12187. case X86::BI__builtin_ia32_gatherdiv16sf:
  12188. IID = Intrinsic::x86_avx512_mask_gather_qps_512;
  12189. break;
  12190. case X86::BI__builtin_ia32_gathersiv8di:
  12191. IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
  12192. break;
  12193. case X86::BI__builtin_ia32_gathersiv16si:
  12194. IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
  12195. break;
  12196. case X86::BI__builtin_ia32_gatherdiv8di:
  12197. IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
  12198. break;
  12199. case X86::BI__builtin_ia32_gatherdiv16si:
  12200. IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
  12201. break;
  12202. }
  12203. unsigned MinElts = std::min(
  12204. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
  12205. cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
  12206. Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
  12207. Function *Intr = CGM.getIntrinsic(IID);
  12208. return Builder.CreateCall(Intr, Ops);
  12209. }
  12210. case X86::BI__builtin_ia32_scattersiv8df:
  12211. case X86::BI__builtin_ia32_scattersiv16sf:
  12212. case X86::BI__builtin_ia32_scatterdiv8df:
  12213. case X86::BI__builtin_ia32_scatterdiv16sf:
  12214. case X86::BI__builtin_ia32_scattersiv8di:
  12215. case X86::BI__builtin_ia32_scattersiv16si:
  12216. case X86::BI__builtin_ia32_scatterdiv8di:
  12217. case X86::BI__builtin_ia32_scatterdiv16si:
  12218. case X86::BI__builtin_ia32_scatterdiv2df:
  12219. case X86::BI__builtin_ia32_scatterdiv2di:
  12220. case X86::BI__builtin_ia32_scatterdiv4df:
  12221. case X86::BI__builtin_ia32_scatterdiv4di:
  12222. case X86::BI__builtin_ia32_scatterdiv4sf:
  12223. case X86::BI__builtin_ia32_scatterdiv4si:
  12224. case X86::BI__builtin_ia32_scatterdiv8sf:
  12225. case X86::BI__builtin_ia32_scatterdiv8si:
  12226. case X86::BI__builtin_ia32_scattersiv2df:
  12227. case X86::BI__builtin_ia32_scattersiv2di:
  12228. case X86::BI__builtin_ia32_scattersiv4df:
  12229. case X86::BI__builtin_ia32_scattersiv4di:
  12230. case X86::BI__builtin_ia32_scattersiv4sf:
  12231. case X86::BI__builtin_ia32_scattersiv4si:
  12232. case X86::BI__builtin_ia32_scattersiv8sf:
  12233. case X86::BI__builtin_ia32_scattersiv8si: {
  12234. Intrinsic::ID IID;
  12235. switch (BuiltinID) {
  12236. default: llvm_unreachable("Unexpected builtin");
  12237. case X86::BI__builtin_ia32_scattersiv8df:
  12238. IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
  12239. break;
  12240. case X86::BI__builtin_ia32_scattersiv16sf:
  12241. IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
  12242. break;
  12243. case X86::BI__builtin_ia32_scatterdiv8df:
  12244. IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
  12245. break;
  12246. case X86::BI__builtin_ia32_scatterdiv16sf:
  12247. IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
  12248. break;
  12249. case X86::BI__builtin_ia32_scattersiv8di:
  12250. IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
  12251. break;
  12252. case X86::BI__builtin_ia32_scattersiv16si:
  12253. IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
  12254. break;
  12255. case X86::BI__builtin_ia32_scatterdiv8di:
  12256. IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
  12257. break;
  12258. case X86::BI__builtin_ia32_scatterdiv16si:
  12259. IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
  12260. break;
  12261. case X86::BI__builtin_ia32_scatterdiv2df:
  12262. IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
  12263. break;
  12264. case X86::BI__builtin_ia32_scatterdiv2di:
  12265. IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
  12266. break;
  12267. case X86::BI__builtin_ia32_scatterdiv4df:
  12268. IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
  12269. break;
  12270. case X86::BI__builtin_ia32_scatterdiv4di:
  12271. IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
  12272. break;
  12273. case X86::BI__builtin_ia32_scatterdiv4sf:
  12274. IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
  12275. break;
  12276. case X86::BI__builtin_ia32_scatterdiv4si:
  12277. IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
  12278. break;
  12279. case X86::BI__builtin_ia32_scatterdiv8sf:
  12280. IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
  12281. break;
  12282. case X86::BI__builtin_ia32_scatterdiv8si:
  12283. IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
  12284. break;
  12285. case X86::BI__builtin_ia32_scattersiv2df:
  12286. IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
  12287. break;
  12288. case X86::BI__builtin_ia32_scattersiv2di:
  12289. IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
  12290. break;
  12291. case X86::BI__builtin_ia32_scattersiv4df:
  12292. IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
  12293. break;
  12294. case X86::BI__builtin_ia32_scattersiv4di:
  12295. IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
  12296. break;
  12297. case X86::BI__builtin_ia32_scattersiv4sf:
  12298. IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
  12299. break;
  12300. case X86::BI__builtin_ia32_scattersiv4si:
  12301. IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
  12302. break;
  12303. case X86::BI__builtin_ia32_scattersiv8sf:
  12304. IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
  12305. break;
  12306. case X86::BI__builtin_ia32_scattersiv8si:
  12307. IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
  12308. break;
  12309. }
  12310. unsigned MinElts = std::min(
  12311. cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
  12312. cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
  12313. Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
  12314. Function *Intr = CGM.getIntrinsic(IID);
  12315. return Builder.CreateCall(Intr, Ops);
  12316. }
  12317. case X86::BI__builtin_ia32_vextractf128_pd256:
  12318. case X86::BI__builtin_ia32_vextractf128_ps256:
  12319. case X86::BI__builtin_ia32_vextractf128_si256:
  12320. case X86::BI__builtin_ia32_extract128i256:
  12321. case X86::BI__builtin_ia32_extractf64x4_mask:
  12322. case X86::BI__builtin_ia32_extractf32x4_mask:
  12323. case X86::BI__builtin_ia32_extracti64x4_mask:
  12324. case X86::BI__builtin_ia32_extracti32x4_mask:
  12325. case X86::BI__builtin_ia32_extractf32x8_mask:
  12326. case X86::BI__builtin_ia32_extracti32x8_mask:
  12327. case X86::BI__builtin_ia32_extractf32x4_256_mask:
  12328. case X86::BI__builtin_ia32_extracti32x4_256_mask:
  12329. case X86::BI__builtin_ia32_extractf64x2_256_mask:
  12330. case X86::BI__builtin_ia32_extracti64x2_256_mask:
  12331. case X86::BI__builtin_ia32_extractf64x2_512_mask:
  12332. case X86::BI__builtin_ia32_extracti64x2_512_mask: {
  12333. auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
  12334. unsigned NumElts = DstTy->getNumElements();
  12335. unsigned SrcNumElts =
  12336. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12337. unsigned SubVectors = SrcNumElts / NumElts;
  12338. unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
  12339. assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
  12340. Index &= SubVectors - 1; // Remove any extra bits.
  12341. Index *= NumElts;
  12342. int Indices[16];
  12343. for (unsigned i = 0; i != NumElts; ++i)
  12344. Indices[i] = i + Index;
  12345. Value *Res = Builder.CreateShuffleVector(Ops[0],
  12346. makeArrayRef(Indices, NumElts),
  12347. "extract");
  12348. if (Ops.size() == 4)
  12349. Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
  12350. return Res;
  12351. }
  12352. case X86::BI__builtin_ia32_vinsertf128_pd256:
  12353. case X86::BI__builtin_ia32_vinsertf128_ps256:
  12354. case X86::BI__builtin_ia32_vinsertf128_si256:
  12355. case X86::BI__builtin_ia32_insert128i256:
  12356. case X86::BI__builtin_ia32_insertf64x4:
  12357. case X86::BI__builtin_ia32_insertf32x4:
  12358. case X86::BI__builtin_ia32_inserti64x4:
  12359. case X86::BI__builtin_ia32_inserti32x4:
  12360. case X86::BI__builtin_ia32_insertf32x8:
  12361. case X86::BI__builtin_ia32_inserti32x8:
  12362. case X86::BI__builtin_ia32_insertf32x4_256:
  12363. case X86::BI__builtin_ia32_inserti32x4_256:
  12364. case X86::BI__builtin_ia32_insertf64x2_256:
  12365. case X86::BI__builtin_ia32_inserti64x2_256:
  12366. case X86::BI__builtin_ia32_insertf64x2_512:
  12367. case X86::BI__builtin_ia32_inserti64x2_512: {
  12368. unsigned DstNumElts =
  12369. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12370. unsigned SrcNumElts =
  12371. cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
  12372. unsigned SubVectors = DstNumElts / SrcNumElts;
  12373. unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
  12374. assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
  12375. Index &= SubVectors - 1; // Remove any extra bits.
  12376. Index *= SrcNumElts;
  12377. int Indices[16];
  12378. for (unsigned i = 0; i != DstNumElts; ++i)
  12379. Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
  12380. Value *Op1 = Builder.CreateShuffleVector(Ops[1],
  12381. makeArrayRef(Indices, DstNumElts),
  12382. "widen");
  12383. for (unsigned i = 0; i != DstNumElts; ++i) {
  12384. if (i >= Index && i < (Index + SrcNumElts))
  12385. Indices[i] = (i - Index) + DstNumElts;
  12386. else
  12387. Indices[i] = i;
  12388. }
  12389. return Builder.CreateShuffleVector(Ops[0], Op1,
  12390. makeArrayRef(Indices, DstNumElts),
  12391. "insert");
  12392. }
  12393. case X86::BI__builtin_ia32_pmovqd512_mask:
  12394. case X86::BI__builtin_ia32_pmovwb512_mask: {
  12395. Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
  12396. return EmitX86Select(*this, Ops[2], Res, Ops[1]);
  12397. }
  12398. case X86::BI__builtin_ia32_pmovdb512_mask:
  12399. case X86::BI__builtin_ia32_pmovdw512_mask:
  12400. case X86::BI__builtin_ia32_pmovqw512_mask: {
  12401. if (const auto *C = dyn_cast<Constant>(Ops[2]))
  12402. if (C->isAllOnesValue())
  12403. return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
  12404. Intrinsic::ID IID;
  12405. switch (BuiltinID) {
  12406. default: llvm_unreachable("Unsupported intrinsic!");
  12407. case X86::BI__builtin_ia32_pmovdb512_mask:
  12408. IID = Intrinsic::x86_avx512_mask_pmov_db_512;
  12409. break;
  12410. case X86::BI__builtin_ia32_pmovdw512_mask:
  12411. IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
  12412. break;
  12413. case X86::BI__builtin_ia32_pmovqw512_mask:
  12414. IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
  12415. break;
  12416. }
  12417. Function *Intr = CGM.getIntrinsic(IID);
  12418. return Builder.CreateCall(Intr, Ops);
  12419. }
  12420. case X86::BI__builtin_ia32_pblendw128:
  12421. case X86::BI__builtin_ia32_blendpd:
  12422. case X86::BI__builtin_ia32_blendps:
  12423. case X86::BI__builtin_ia32_blendpd256:
  12424. case X86::BI__builtin_ia32_blendps256:
  12425. case X86::BI__builtin_ia32_pblendw256:
  12426. case X86::BI__builtin_ia32_pblendd128:
  12427. case X86::BI__builtin_ia32_pblendd256: {
  12428. unsigned NumElts =
  12429. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12430. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12431. int Indices[16];
  12432. // If there are more than 8 elements, the immediate is used twice so make
  12433. // sure we handle that.
  12434. for (unsigned i = 0; i != NumElts; ++i)
  12435. Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
  12436. return Builder.CreateShuffleVector(Ops[0], Ops[1],
  12437. makeArrayRef(Indices, NumElts),
  12438. "blend");
  12439. }
  12440. case X86::BI__builtin_ia32_pshuflw:
  12441. case X86::BI__builtin_ia32_pshuflw256:
  12442. case X86::BI__builtin_ia32_pshuflw512: {
  12443. uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12444. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12445. unsigned NumElts = Ty->getNumElements();
  12446. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12447. Imm = (Imm & 0xff) * 0x01010101;
  12448. int Indices[32];
  12449. for (unsigned l = 0; l != NumElts; l += 8) {
  12450. for (unsigned i = 0; i != 4; ++i) {
  12451. Indices[l + i] = l + (Imm & 3);
  12452. Imm >>= 2;
  12453. }
  12454. for (unsigned i = 4; i != 8; ++i)
  12455. Indices[l + i] = l + i;
  12456. }
  12457. return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
  12458. "pshuflw");
  12459. }
  12460. case X86::BI__builtin_ia32_pshufhw:
  12461. case X86::BI__builtin_ia32_pshufhw256:
  12462. case X86::BI__builtin_ia32_pshufhw512: {
  12463. uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12464. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12465. unsigned NumElts = Ty->getNumElements();
  12466. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12467. Imm = (Imm & 0xff) * 0x01010101;
  12468. int Indices[32];
  12469. for (unsigned l = 0; l != NumElts; l += 8) {
  12470. for (unsigned i = 0; i != 4; ++i)
  12471. Indices[l + i] = l + i;
  12472. for (unsigned i = 4; i != 8; ++i) {
  12473. Indices[l + i] = l + 4 + (Imm & 3);
  12474. Imm >>= 2;
  12475. }
  12476. }
  12477. return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
  12478. "pshufhw");
  12479. }
  12480. case X86::BI__builtin_ia32_pshufd:
  12481. case X86::BI__builtin_ia32_pshufd256:
  12482. case X86::BI__builtin_ia32_pshufd512:
  12483. case X86::BI__builtin_ia32_vpermilpd:
  12484. case X86::BI__builtin_ia32_vpermilps:
  12485. case X86::BI__builtin_ia32_vpermilpd256:
  12486. case X86::BI__builtin_ia32_vpermilps256:
  12487. case X86::BI__builtin_ia32_vpermilpd512:
  12488. case X86::BI__builtin_ia32_vpermilps512: {
  12489. uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12490. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12491. unsigned NumElts = Ty->getNumElements();
  12492. unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
  12493. unsigned NumLaneElts = NumElts / NumLanes;
  12494. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12495. Imm = (Imm & 0xff) * 0x01010101;
  12496. int Indices[16];
  12497. for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
  12498. for (unsigned i = 0; i != NumLaneElts; ++i) {
  12499. Indices[i + l] = (Imm % NumLaneElts) + l;
  12500. Imm /= NumLaneElts;
  12501. }
  12502. }
  12503. return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
  12504. "permil");
  12505. }
  12506. case X86::BI__builtin_ia32_shufpd:
  12507. case X86::BI__builtin_ia32_shufpd256:
  12508. case X86::BI__builtin_ia32_shufpd512:
  12509. case X86::BI__builtin_ia32_shufps:
  12510. case X86::BI__builtin_ia32_shufps256:
  12511. case X86::BI__builtin_ia32_shufps512: {
  12512. uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12513. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12514. unsigned NumElts = Ty->getNumElements();
  12515. unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
  12516. unsigned NumLaneElts = NumElts / NumLanes;
  12517. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
  12518. Imm = (Imm & 0xff) * 0x01010101;
  12519. int Indices[16];
  12520. for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
  12521. for (unsigned i = 0; i != NumLaneElts; ++i) {
  12522. unsigned Index = Imm % NumLaneElts;
  12523. Imm /= NumLaneElts;
  12524. if (i >= (NumLaneElts / 2))
  12525. Index += NumElts;
  12526. Indices[l + i] = l + Index;
  12527. }
  12528. }
  12529. return Builder.CreateShuffleVector(Ops[0], Ops[1],
  12530. makeArrayRef(Indices, NumElts),
  12531. "shufp");
  12532. }
  12533. case X86::BI__builtin_ia32_permdi256:
  12534. case X86::BI__builtin_ia32_permdf256:
  12535. case X86::BI__builtin_ia32_permdi512:
  12536. case X86::BI__builtin_ia32_permdf512: {
  12537. unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  12538. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12539. unsigned NumElts = Ty->getNumElements();
  12540. // These intrinsics operate on 256-bit lanes of four 64-bit elements.
  12541. int Indices[8];
  12542. for (unsigned l = 0; l != NumElts; l += 4)
  12543. for (unsigned i = 0; i != 4; ++i)
  12544. Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
  12545. return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
  12546. "perm");
  12547. }
  12548. case X86::BI__builtin_ia32_palignr128:
  12549. case X86::BI__builtin_ia32_palignr256:
  12550. case X86::BI__builtin_ia32_palignr512: {
  12551. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
  12552. unsigned NumElts =
  12553. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12554. assert(NumElts % 16 == 0);
  12555. // If palignr is shifting the pair of vectors more than the size of two
  12556. // lanes, emit zero.
  12557. if (ShiftVal >= 32)
  12558. return llvm::Constant::getNullValue(ConvertType(E->getType()));
  12559. // If palignr is shifting the pair of input vectors more than one lane,
  12560. // but less than two lanes, convert to shifting in zeroes.
  12561. if (ShiftVal > 16) {
  12562. ShiftVal -= 16;
  12563. Ops[1] = Ops[0];
  12564. Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
  12565. }
  12566. int Indices[64];
  12567. // 256-bit palignr operates on 128-bit lanes so we need to handle that
  12568. for (unsigned l = 0; l != NumElts; l += 16) {
  12569. for (unsigned i = 0; i != 16; ++i) {
  12570. unsigned Idx = ShiftVal + i;
  12571. if (Idx >= 16)
  12572. Idx += NumElts - 16; // End of lane, switch operand.
  12573. Indices[l + i] = Idx + l;
  12574. }
  12575. }
  12576. return Builder.CreateShuffleVector(Ops[1], Ops[0],
  12577. makeArrayRef(Indices, NumElts),
  12578. "palignr");
  12579. }
  12580. case X86::BI__builtin_ia32_alignd128:
  12581. case X86::BI__builtin_ia32_alignd256:
  12582. case X86::BI__builtin_ia32_alignd512:
  12583. case X86::BI__builtin_ia32_alignq128:
  12584. case X86::BI__builtin_ia32_alignq256:
  12585. case X86::BI__builtin_ia32_alignq512: {
  12586. unsigned NumElts =
  12587. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12588. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
  12589. // Mask the shift amount to width of a vector.
  12590. ShiftVal &= NumElts - 1;
  12591. int Indices[16];
  12592. for (unsigned i = 0; i != NumElts; ++i)
  12593. Indices[i] = i + ShiftVal;
  12594. return Builder.CreateShuffleVector(Ops[1], Ops[0],
  12595. makeArrayRef(Indices, NumElts),
  12596. "valign");
  12597. }
  12598. case X86::BI__builtin_ia32_shuf_f32x4_256:
  12599. case X86::BI__builtin_ia32_shuf_f64x2_256:
  12600. case X86::BI__builtin_ia32_shuf_i32x4_256:
  12601. case X86::BI__builtin_ia32_shuf_i64x2_256:
  12602. case X86::BI__builtin_ia32_shuf_f32x4:
  12603. case X86::BI__builtin_ia32_shuf_f64x2:
  12604. case X86::BI__builtin_ia32_shuf_i32x4:
  12605. case X86::BI__builtin_ia32_shuf_i64x2: {
  12606. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12607. auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12608. unsigned NumElts = Ty->getNumElements();
  12609. unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
  12610. unsigned NumLaneElts = NumElts / NumLanes;
  12611. int Indices[16];
  12612. for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
  12613. unsigned Index = (Imm % NumLanes) * NumLaneElts;
  12614. Imm /= NumLanes; // Discard the bits we just used.
  12615. if (l >= (NumElts / 2))
  12616. Index += NumElts; // Switch to other source.
  12617. for (unsigned i = 0; i != NumLaneElts; ++i) {
  12618. Indices[l + i] = Index + i;
  12619. }
  12620. }
  12621. return Builder.CreateShuffleVector(Ops[0], Ops[1],
  12622. makeArrayRef(Indices, NumElts),
  12623. "shuf");
  12624. }
  12625. case X86::BI__builtin_ia32_vperm2f128_pd256:
  12626. case X86::BI__builtin_ia32_vperm2f128_ps256:
  12627. case X86::BI__builtin_ia32_vperm2f128_si256:
  12628. case X86::BI__builtin_ia32_permti256: {
  12629. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  12630. unsigned NumElts =
  12631. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  12632. // This takes a very simple approach since there are two lanes and a
  12633. // shuffle can have 2 inputs. So we reserve the first input for the first
  12634. // lane and the second input for the second lane. This may result in
  12635. // duplicate sources, but this can be dealt with in the backend.
  12636. Value *OutOps[2];
  12637. int Indices[8];
  12638. for (unsigned l = 0; l != 2; ++l) {
  12639. // Determine the source for this lane.
  12640. if (Imm & (1 << ((l * 4) + 3)))
  12641. OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
  12642. else if (Imm & (1 << ((l * 4) + 1)))
  12643. OutOps[l] = Ops[1];
  12644. else
  12645. OutOps[l] = Ops[0];
  12646. for (unsigned i = 0; i != NumElts/2; ++i) {
  12647. // Start with ith element of the source for this lane.
  12648. unsigned Idx = (l * NumElts) + i;
  12649. // If bit 0 of the immediate half is set, switch to the high half of
  12650. // the source.
  12651. if (Imm & (1 << (l * 4)))
  12652. Idx += NumElts/2;
  12653. Indices[(l * (NumElts/2)) + i] = Idx;
  12654. }
  12655. }
  12656. return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
  12657. makeArrayRef(Indices, NumElts),
  12658. "vperm");
  12659. }
  12660. case X86::BI__builtin_ia32_pslldqi128_byteshift:
  12661. case X86::BI__builtin_ia32_pslldqi256_byteshift:
  12662. case X86::BI__builtin_ia32_pslldqi512_byteshift: {
  12663. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  12664. auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12665. // Builtin type is vXi64 so multiply by 8 to get bytes.
  12666. unsigned NumElts = ResultType->getNumElements() * 8;
  12667. // If pslldq is shifting the vector more than 15 bytes, emit zero.
  12668. if (ShiftVal >= 16)
  12669. return llvm::Constant::getNullValue(ResultType);
  12670. int Indices[64];
  12671. // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
  12672. for (unsigned l = 0; l != NumElts; l += 16) {
  12673. for (unsigned i = 0; i != 16; ++i) {
  12674. unsigned Idx = NumElts + i - ShiftVal;
  12675. if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
  12676. Indices[l + i] = Idx + l;
  12677. }
  12678. }
  12679. auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
  12680. Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
  12681. Value *Zero = llvm::Constant::getNullValue(VecTy);
  12682. Value *SV = Builder.CreateShuffleVector(Zero, Cast,
  12683. makeArrayRef(Indices, NumElts),
  12684. "pslldq");
  12685. return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
  12686. }
  12687. case X86::BI__builtin_ia32_psrldqi128_byteshift:
  12688. case X86::BI__builtin_ia32_psrldqi256_byteshift:
  12689. case X86::BI__builtin_ia32_psrldqi512_byteshift: {
  12690. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  12691. auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
  12692. // Builtin type is vXi64 so multiply by 8 to get bytes.
  12693. unsigned NumElts = ResultType->getNumElements() * 8;
  12694. // If psrldq is shifting the vector more than 15 bytes, emit zero.
  12695. if (ShiftVal >= 16)
  12696. return llvm::Constant::getNullValue(ResultType);
  12697. int Indices[64];
  12698. // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
  12699. for (unsigned l = 0; l != NumElts; l += 16) {
  12700. for (unsigned i = 0; i != 16; ++i) {
  12701. unsigned Idx = i + ShiftVal;
  12702. if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
  12703. Indices[l + i] = Idx + l;
  12704. }
  12705. }
  12706. auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
  12707. Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
  12708. Value *Zero = llvm::Constant::getNullValue(VecTy);
  12709. Value *SV = Builder.CreateShuffleVector(Cast, Zero,
  12710. makeArrayRef(Indices, NumElts),
  12711. "psrldq");
  12712. return Builder.CreateBitCast(SV, ResultType, "cast");
  12713. }
  12714. case X86::BI__builtin_ia32_kshiftliqi:
  12715. case X86::BI__builtin_ia32_kshiftlihi:
  12716. case X86::BI__builtin_ia32_kshiftlisi:
  12717. case X86::BI__builtin_ia32_kshiftlidi: {
  12718. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  12719. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  12720. if (ShiftVal >= NumElts)
  12721. return llvm::Constant::getNullValue(Ops[0]->getType());
  12722. Value *In = getMaskVecValue(*this, Ops[0], NumElts);
  12723. int Indices[64];
  12724. for (unsigned i = 0; i != NumElts; ++i)
  12725. Indices[i] = NumElts + i - ShiftVal;
  12726. Value *Zero = llvm::Constant::getNullValue(In->getType());
  12727. Value *SV = Builder.CreateShuffleVector(Zero, In,
  12728. makeArrayRef(Indices, NumElts),
  12729. "kshiftl");
  12730. return Builder.CreateBitCast(SV, Ops[0]->getType());
  12731. }
  12732. case X86::BI__builtin_ia32_kshiftriqi:
  12733. case X86::BI__builtin_ia32_kshiftrihi:
  12734. case X86::BI__builtin_ia32_kshiftrisi:
  12735. case X86::BI__builtin_ia32_kshiftridi: {
  12736. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
  12737. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  12738. if (ShiftVal >= NumElts)
  12739. return llvm::Constant::getNullValue(Ops[0]->getType());
  12740. Value *In = getMaskVecValue(*this, Ops[0], NumElts);
  12741. int Indices[64];
  12742. for (unsigned i = 0; i != NumElts; ++i)
  12743. Indices[i] = i + ShiftVal;
  12744. Value *Zero = llvm::Constant::getNullValue(In->getType());
  12745. Value *SV = Builder.CreateShuffleVector(In, Zero,
  12746. makeArrayRef(Indices, NumElts),
  12747. "kshiftr");
  12748. return Builder.CreateBitCast(SV, Ops[0]->getType());
  12749. }
  12750. case X86::BI__builtin_ia32_movnti:
  12751. case X86::BI__builtin_ia32_movnti64:
  12752. case X86::BI__builtin_ia32_movntsd:
  12753. case X86::BI__builtin_ia32_movntss: {
  12754. llvm::MDNode *Node = llvm::MDNode::get(
  12755. getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
  12756. Value *Ptr = Ops[0];
  12757. Value *Src = Ops[1];
  12758. // Extract the 0'th element of the source vector.
  12759. if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
  12760. BuiltinID == X86::BI__builtin_ia32_movntss)
  12761. Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
  12762. // Convert the type of the pointer to a pointer to the stored type.
  12763. Value *BC = Builder.CreateBitCast(
  12764. Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
  12765. // Unaligned nontemporal store of the scalar value.
  12766. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
  12767. SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
  12768. SI->setAlignment(llvm::Align(1));
  12769. return SI;
  12770. }
  12771. // Rotate is a special case of funnel shift - 1st 2 args are the same.
  12772. case X86::BI__builtin_ia32_vprotb:
  12773. case X86::BI__builtin_ia32_vprotw:
  12774. case X86::BI__builtin_ia32_vprotd:
  12775. case X86::BI__builtin_ia32_vprotq:
  12776. case X86::BI__builtin_ia32_vprotbi:
  12777. case X86::BI__builtin_ia32_vprotwi:
  12778. case X86::BI__builtin_ia32_vprotdi:
  12779. case X86::BI__builtin_ia32_vprotqi:
  12780. case X86::BI__builtin_ia32_prold128:
  12781. case X86::BI__builtin_ia32_prold256:
  12782. case X86::BI__builtin_ia32_prold512:
  12783. case X86::BI__builtin_ia32_prolq128:
  12784. case X86::BI__builtin_ia32_prolq256:
  12785. case X86::BI__builtin_ia32_prolq512:
  12786. case X86::BI__builtin_ia32_prolvd128:
  12787. case X86::BI__builtin_ia32_prolvd256:
  12788. case X86::BI__builtin_ia32_prolvd512:
  12789. case X86::BI__builtin_ia32_prolvq128:
  12790. case X86::BI__builtin_ia32_prolvq256:
  12791. case X86::BI__builtin_ia32_prolvq512:
  12792. return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
  12793. case X86::BI__builtin_ia32_prord128:
  12794. case X86::BI__builtin_ia32_prord256:
  12795. case X86::BI__builtin_ia32_prord512:
  12796. case X86::BI__builtin_ia32_prorq128:
  12797. case X86::BI__builtin_ia32_prorq256:
  12798. case X86::BI__builtin_ia32_prorq512:
  12799. case X86::BI__builtin_ia32_prorvd128:
  12800. case X86::BI__builtin_ia32_prorvd256:
  12801. case X86::BI__builtin_ia32_prorvd512:
  12802. case X86::BI__builtin_ia32_prorvq128:
  12803. case X86::BI__builtin_ia32_prorvq256:
  12804. case X86::BI__builtin_ia32_prorvq512:
  12805. return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
  12806. case X86::BI__builtin_ia32_selectb_128:
  12807. case X86::BI__builtin_ia32_selectb_256:
  12808. case X86::BI__builtin_ia32_selectb_512:
  12809. case X86::BI__builtin_ia32_selectw_128:
  12810. case X86::BI__builtin_ia32_selectw_256:
  12811. case X86::BI__builtin_ia32_selectw_512:
  12812. case X86::BI__builtin_ia32_selectd_128:
  12813. case X86::BI__builtin_ia32_selectd_256:
  12814. case X86::BI__builtin_ia32_selectd_512:
  12815. case X86::BI__builtin_ia32_selectq_128:
  12816. case X86::BI__builtin_ia32_selectq_256:
  12817. case X86::BI__builtin_ia32_selectq_512:
  12818. case X86::BI__builtin_ia32_selectph_128:
  12819. case X86::BI__builtin_ia32_selectph_256:
  12820. case X86::BI__builtin_ia32_selectph_512:
  12821. case X86::BI__builtin_ia32_selectps_128:
  12822. case X86::BI__builtin_ia32_selectps_256:
  12823. case X86::BI__builtin_ia32_selectps_512:
  12824. case X86::BI__builtin_ia32_selectpd_128:
  12825. case X86::BI__builtin_ia32_selectpd_256:
  12826. case X86::BI__builtin_ia32_selectpd_512:
  12827. return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
  12828. case X86::BI__builtin_ia32_selectsh_128:
  12829. case X86::BI__builtin_ia32_selectss_128:
  12830. case X86::BI__builtin_ia32_selectsd_128: {
  12831. Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  12832. Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  12833. A = EmitX86ScalarSelect(*this, Ops[0], A, B);
  12834. return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
  12835. }
  12836. case X86::BI__builtin_ia32_cmpb128_mask:
  12837. case X86::BI__builtin_ia32_cmpb256_mask:
  12838. case X86::BI__builtin_ia32_cmpb512_mask:
  12839. case X86::BI__builtin_ia32_cmpw128_mask:
  12840. case X86::BI__builtin_ia32_cmpw256_mask:
  12841. case X86::BI__builtin_ia32_cmpw512_mask:
  12842. case X86::BI__builtin_ia32_cmpd128_mask:
  12843. case X86::BI__builtin_ia32_cmpd256_mask:
  12844. case X86::BI__builtin_ia32_cmpd512_mask:
  12845. case X86::BI__builtin_ia32_cmpq128_mask:
  12846. case X86::BI__builtin_ia32_cmpq256_mask:
  12847. case X86::BI__builtin_ia32_cmpq512_mask: {
  12848. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  12849. return EmitX86MaskedCompare(*this, CC, true, Ops);
  12850. }
  12851. case X86::BI__builtin_ia32_ucmpb128_mask:
  12852. case X86::BI__builtin_ia32_ucmpb256_mask:
  12853. case X86::BI__builtin_ia32_ucmpb512_mask:
  12854. case X86::BI__builtin_ia32_ucmpw128_mask:
  12855. case X86::BI__builtin_ia32_ucmpw256_mask:
  12856. case X86::BI__builtin_ia32_ucmpw512_mask:
  12857. case X86::BI__builtin_ia32_ucmpd128_mask:
  12858. case X86::BI__builtin_ia32_ucmpd256_mask:
  12859. case X86::BI__builtin_ia32_ucmpd512_mask:
  12860. case X86::BI__builtin_ia32_ucmpq128_mask:
  12861. case X86::BI__builtin_ia32_ucmpq256_mask:
  12862. case X86::BI__builtin_ia32_ucmpq512_mask: {
  12863. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  12864. return EmitX86MaskedCompare(*this, CC, false, Ops);
  12865. }
  12866. case X86::BI__builtin_ia32_vpcomb:
  12867. case X86::BI__builtin_ia32_vpcomw:
  12868. case X86::BI__builtin_ia32_vpcomd:
  12869. case X86::BI__builtin_ia32_vpcomq:
  12870. return EmitX86vpcom(*this, Ops, true);
  12871. case X86::BI__builtin_ia32_vpcomub:
  12872. case X86::BI__builtin_ia32_vpcomuw:
  12873. case X86::BI__builtin_ia32_vpcomud:
  12874. case X86::BI__builtin_ia32_vpcomuq:
  12875. return EmitX86vpcom(*this, Ops, false);
  12876. case X86::BI__builtin_ia32_kortestcqi:
  12877. case X86::BI__builtin_ia32_kortestchi:
  12878. case X86::BI__builtin_ia32_kortestcsi:
  12879. case X86::BI__builtin_ia32_kortestcdi: {
  12880. Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
  12881. Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
  12882. Value *Cmp = Builder.CreateICmpEQ(Or, C);
  12883. return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
  12884. }
  12885. case X86::BI__builtin_ia32_kortestzqi:
  12886. case X86::BI__builtin_ia32_kortestzhi:
  12887. case X86::BI__builtin_ia32_kortestzsi:
  12888. case X86::BI__builtin_ia32_kortestzdi: {
  12889. Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
  12890. Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
  12891. Value *Cmp = Builder.CreateICmpEQ(Or, C);
  12892. return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
  12893. }
  12894. case X86::BI__builtin_ia32_ktestcqi:
  12895. case X86::BI__builtin_ia32_ktestzqi:
  12896. case X86::BI__builtin_ia32_ktestchi:
  12897. case X86::BI__builtin_ia32_ktestzhi:
  12898. case X86::BI__builtin_ia32_ktestcsi:
  12899. case X86::BI__builtin_ia32_ktestzsi:
  12900. case X86::BI__builtin_ia32_ktestcdi:
  12901. case X86::BI__builtin_ia32_ktestzdi: {
  12902. Intrinsic::ID IID;
  12903. switch (BuiltinID) {
  12904. default: llvm_unreachable("Unsupported intrinsic!");
  12905. case X86::BI__builtin_ia32_ktestcqi:
  12906. IID = Intrinsic::x86_avx512_ktestc_b;
  12907. break;
  12908. case X86::BI__builtin_ia32_ktestzqi:
  12909. IID = Intrinsic::x86_avx512_ktestz_b;
  12910. break;
  12911. case X86::BI__builtin_ia32_ktestchi:
  12912. IID = Intrinsic::x86_avx512_ktestc_w;
  12913. break;
  12914. case X86::BI__builtin_ia32_ktestzhi:
  12915. IID = Intrinsic::x86_avx512_ktestz_w;
  12916. break;
  12917. case X86::BI__builtin_ia32_ktestcsi:
  12918. IID = Intrinsic::x86_avx512_ktestc_d;
  12919. break;
  12920. case X86::BI__builtin_ia32_ktestzsi:
  12921. IID = Intrinsic::x86_avx512_ktestz_d;
  12922. break;
  12923. case X86::BI__builtin_ia32_ktestcdi:
  12924. IID = Intrinsic::x86_avx512_ktestc_q;
  12925. break;
  12926. case X86::BI__builtin_ia32_ktestzdi:
  12927. IID = Intrinsic::x86_avx512_ktestz_q;
  12928. break;
  12929. }
  12930. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  12931. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  12932. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  12933. Function *Intr = CGM.getIntrinsic(IID);
  12934. return Builder.CreateCall(Intr, {LHS, RHS});
  12935. }
  12936. case X86::BI__builtin_ia32_kaddqi:
  12937. case X86::BI__builtin_ia32_kaddhi:
  12938. case X86::BI__builtin_ia32_kaddsi:
  12939. case X86::BI__builtin_ia32_kadddi: {
  12940. Intrinsic::ID IID;
  12941. switch (BuiltinID) {
  12942. default: llvm_unreachable("Unsupported intrinsic!");
  12943. case X86::BI__builtin_ia32_kaddqi:
  12944. IID = Intrinsic::x86_avx512_kadd_b;
  12945. break;
  12946. case X86::BI__builtin_ia32_kaddhi:
  12947. IID = Intrinsic::x86_avx512_kadd_w;
  12948. break;
  12949. case X86::BI__builtin_ia32_kaddsi:
  12950. IID = Intrinsic::x86_avx512_kadd_d;
  12951. break;
  12952. case X86::BI__builtin_ia32_kadddi:
  12953. IID = Intrinsic::x86_avx512_kadd_q;
  12954. break;
  12955. }
  12956. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  12957. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  12958. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  12959. Function *Intr = CGM.getIntrinsic(IID);
  12960. Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
  12961. return Builder.CreateBitCast(Res, Ops[0]->getType());
  12962. }
  12963. case X86::BI__builtin_ia32_kandqi:
  12964. case X86::BI__builtin_ia32_kandhi:
  12965. case X86::BI__builtin_ia32_kandsi:
  12966. case X86::BI__builtin_ia32_kanddi:
  12967. return EmitX86MaskLogic(*this, Instruction::And, Ops);
  12968. case X86::BI__builtin_ia32_kandnqi:
  12969. case X86::BI__builtin_ia32_kandnhi:
  12970. case X86::BI__builtin_ia32_kandnsi:
  12971. case X86::BI__builtin_ia32_kandndi:
  12972. return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
  12973. case X86::BI__builtin_ia32_korqi:
  12974. case X86::BI__builtin_ia32_korhi:
  12975. case X86::BI__builtin_ia32_korsi:
  12976. case X86::BI__builtin_ia32_kordi:
  12977. return EmitX86MaskLogic(*this, Instruction::Or, Ops);
  12978. case X86::BI__builtin_ia32_kxnorqi:
  12979. case X86::BI__builtin_ia32_kxnorhi:
  12980. case X86::BI__builtin_ia32_kxnorsi:
  12981. case X86::BI__builtin_ia32_kxnordi:
  12982. return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
  12983. case X86::BI__builtin_ia32_kxorqi:
  12984. case X86::BI__builtin_ia32_kxorhi:
  12985. case X86::BI__builtin_ia32_kxorsi:
  12986. case X86::BI__builtin_ia32_kxordi:
  12987. return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
  12988. case X86::BI__builtin_ia32_knotqi:
  12989. case X86::BI__builtin_ia32_knothi:
  12990. case X86::BI__builtin_ia32_knotsi:
  12991. case X86::BI__builtin_ia32_knotdi: {
  12992. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  12993. Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
  12994. return Builder.CreateBitCast(Builder.CreateNot(Res),
  12995. Ops[0]->getType());
  12996. }
  12997. case X86::BI__builtin_ia32_kmovb:
  12998. case X86::BI__builtin_ia32_kmovw:
  12999. case X86::BI__builtin_ia32_kmovd:
  13000. case X86::BI__builtin_ia32_kmovq: {
  13001. // Bitcast to vXi1 type and then back to integer. This gets the mask
  13002. // register type into the IR, but might be optimized out depending on
  13003. // what's around it.
  13004. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13005. Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
  13006. return Builder.CreateBitCast(Res, Ops[0]->getType());
  13007. }
  13008. case X86::BI__builtin_ia32_kunpckdi:
  13009. case X86::BI__builtin_ia32_kunpcksi:
  13010. case X86::BI__builtin_ia32_kunpckhi: {
  13011. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
  13012. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  13013. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  13014. int Indices[64];
  13015. for (unsigned i = 0; i != NumElts; ++i)
  13016. Indices[i] = i;
  13017. // First extract half of each vector. This gives better codegen than
  13018. // doing it in a single shuffle.
  13019. LHS = Builder.CreateShuffleVector(LHS, LHS,
  13020. makeArrayRef(Indices, NumElts / 2));
  13021. RHS = Builder.CreateShuffleVector(RHS, RHS,
  13022. makeArrayRef(Indices, NumElts / 2));
  13023. // Concat the vectors.
  13024. // NOTE: Operands are swapped to match the intrinsic definition.
  13025. Value *Res = Builder.CreateShuffleVector(RHS, LHS,
  13026. makeArrayRef(Indices, NumElts));
  13027. return Builder.CreateBitCast(Res, Ops[0]->getType());
  13028. }
  13029. case X86::BI__builtin_ia32_vplzcntd_128:
  13030. case X86::BI__builtin_ia32_vplzcntd_256:
  13031. case X86::BI__builtin_ia32_vplzcntd_512:
  13032. case X86::BI__builtin_ia32_vplzcntq_128:
  13033. case X86::BI__builtin_ia32_vplzcntq_256:
  13034. case X86::BI__builtin_ia32_vplzcntq_512: {
  13035. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
  13036. return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
  13037. }
  13038. case X86::BI__builtin_ia32_sqrtss:
  13039. case X86::BI__builtin_ia32_sqrtsd: {
  13040. Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  13041. Function *F;
  13042. if (Builder.getIsFPConstrained()) {
  13043. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13044. F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
  13045. A->getType());
  13046. A = Builder.CreateConstrainedFPCall(F, {A});
  13047. } else {
  13048. F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
  13049. A = Builder.CreateCall(F, {A});
  13050. }
  13051. return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
  13052. }
  13053. case X86::BI__builtin_ia32_sqrtsh_round_mask:
  13054. case X86::BI__builtin_ia32_sqrtsd_round_mask:
  13055. case X86::BI__builtin_ia32_sqrtss_round_mask: {
  13056. unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
  13057. // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
  13058. // otherwise keep the intrinsic.
  13059. if (CC != 4) {
  13060. Intrinsic::ID IID;
  13061. switch (BuiltinID) {
  13062. default:
  13063. llvm_unreachable("Unsupported intrinsic!");
  13064. case X86::BI__builtin_ia32_sqrtsh_round_mask:
  13065. IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
  13066. break;
  13067. case X86::BI__builtin_ia32_sqrtsd_round_mask:
  13068. IID = Intrinsic::x86_avx512_mask_sqrt_sd;
  13069. break;
  13070. case X86::BI__builtin_ia32_sqrtss_round_mask:
  13071. IID = Intrinsic::x86_avx512_mask_sqrt_ss;
  13072. break;
  13073. }
  13074. return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13075. }
  13076. Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  13077. Function *F;
  13078. if (Builder.getIsFPConstrained()) {
  13079. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13080. F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
  13081. A->getType());
  13082. A = Builder.CreateConstrainedFPCall(F, A);
  13083. } else {
  13084. F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
  13085. A = Builder.CreateCall(F, A);
  13086. }
  13087. Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  13088. A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
  13089. return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
  13090. }
  13091. case X86::BI__builtin_ia32_sqrtpd256:
  13092. case X86::BI__builtin_ia32_sqrtpd:
  13093. case X86::BI__builtin_ia32_sqrtps256:
  13094. case X86::BI__builtin_ia32_sqrtps:
  13095. case X86::BI__builtin_ia32_sqrtph256:
  13096. case X86::BI__builtin_ia32_sqrtph:
  13097. case X86::BI__builtin_ia32_sqrtph512:
  13098. case X86::BI__builtin_ia32_sqrtps512:
  13099. case X86::BI__builtin_ia32_sqrtpd512: {
  13100. if (Ops.size() == 2) {
  13101. unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
  13102. // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
  13103. // otherwise keep the intrinsic.
  13104. if (CC != 4) {
  13105. Intrinsic::ID IID;
  13106. switch (BuiltinID) {
  13107. default:
  13108. llvm_unreachable("Unsupported intrinsic!");
  13109. case X86::BI__builtin_ia32_sqrtph512:
  13110. IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
  13111. break;
  13112. case X86::BI__builtin_ia32_sqrtps512:
  13113. IID = Intrinsic::x86_avx512_sqrt_ps_512;
  13114. break;
  13115. case X86::BI__builtin_ia32_sqrtpd512:
  13116. IID = Intrinsic::x86_avx512_sqrt_pd_512;
  13117. break;
  13118. }
  13119. return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13120. }
  13121. }
  13122. if (Builder.getIsFPConstrained()) {
  13123. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13124. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
  13125. Ops[0]->getType());
  13126. return Builder.CreateConstrainedFPCall(F, Ops[0]);
  13127. } else {
  13128. Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
  13129. return Builder.CreateCall(F, Ops[0]);
  13130. }
  13131. }
  13132. case X86::BI__builtin_ia32_pmuludq128:
  13133. case X86::BI__builtin_ia32_pmuludq256:
  13134. case X86::BI__builtin_ia32_pmuludq512:
  13135. return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
  13136. case X86::BI__builtin_ia32_pmuldq128:
  13137. case X86::BI__builtin_ia32_pmuldq256:
  13138. case X86::BI__builtin_ia32_pmuldq512:
  13139. return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
  13140. case X86::BI__builtin_ia32_pternlogd512_mask:
  13141. case X86::BI__builtin_ia32_pternlogq512_mask:
  13142. case X86::BI__builtin_ia32_pternlogd128_mask:
  13143. case X86::BI__builtin_ia32_pternlogd256_mask:
  13144. case X86::BI__builtin_ia32_pternlogq128_mask:
  13145. case X86::BI__builtin_ia32_pternlogq256_mask:
  13146. return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
  13147. case X86::BI__builtin_ia32_pternlogd512_maskz:
  13148. case X86::BI__builtin_ia32_pternlogq512_maskz:
  13149. case X86::BI__builtin_ia32_pternlogd128_maskz:
  13150. case X86::BI__builtin_ia32_pternlogd256_maskz:
  13151. case X86::BI__builtin_ia32_pternlogq128_maskz:
  13152. case X86::BI__builtin_ia32_pternlogq256_maskz:
  13153. return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
  13154. case X86::BI__builtin_ia32_vpshldd128:
  13155. case X86::BI__builtin_ia32_vpshldd256:
  13156. case X86::BI__builtin_ia32_vpshldd512:
  13157. case X86::BI__builtin_ia32_vpshldq128:
  13158. case X86::BI__builtin_ia32_vpshldq256:
  13159. case X86::BI__builtin_ia32_vpshldq512:
  13160. case X86::BI__builtin_ia32_vpshldw128:
  13161. case X86::BI__builtin_ia32_vpshldw256:
  13162. case X86::BI__builtin_ia32_vpshldw512:
  13163. return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
  13164. case X86::BI__builtin_ia32_vpshrdd128:
  13165. case X86::BI__builtin_ia32_vpshrdd256:
  13166. case X86::BI__builtin_ia32_vpshrdd512:
  13167. case X86::BI__builtin_ia32_vpshrdq128:
  13168. case X86::BI__builtin_ia32_vpshrdq256:
  13169. case X86::BI__builtin_ia32_vpshrdq512:
  13170. case X86::BI__builtin_ia32_vpshrdw128:
  13171. case X86::BI__builtin_ia32_vpshrdw256:
  13172. case X86::BI__builtin_ia32_vpshrdw512:
  13173. // Ops 0 and 1 are swapped.
  13174. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
  13175. case X86::BI__builtin_ia32_vpshldvd128:
  13176. case X86::BI__builtin_ia32_vpshldvd256:
  13177. case X86::BI__builtin_ia32_vpshldvd512:
  13178. case X86::BI__builtin_ia32_vpshldvq128:
  13179. case X86::BI__builtin_ia32_vpshldvq256:
  13180. case X86::BI__builtin_ia32_vpshldvq512:
  13181. case X86::BI__builtin_ia32_vpshldvw128:
  13182. case X86::BI__builtin_ia32_vpshldvw256:
  13183. case X86::BI__builtin_ia32_vpshldvw512:
  13184. return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
  13185. case X86::BI__builtin_ia32_vpshrdvd128:
  13186. case X86::BI__builtin_ia32_vpshrdvd256:
  13187. case X86::BI__builtin_ia32_vpshrdvd512:
  13188. case X86::BI__builtin_ia32_vpshrdvq128:
  13189. case X86::BI__builtin_ia32_vpshrdvq256:
  13190. case X86::BI__builtin_ia32_vpshrdvq512:
  13191. case X86::BI__builtin_ia32_vpshrdvw128:
  13192. case X86::BI__builtin_ia32_vpshrdvw256:
  13193. case X86::BI__builtin_ia32_vpshrdvw512:
  13194. // Ops 0 and 1 are swapped.
  13195. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
  13196. // Reductions
  13197. case X86::BI__builtin_ia32_reduce_add_d512:
  13198. case X86::BI__builtin_ia32_reduce_add_q512: {
  13199. Function *F =
  13200. CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType());
  13201. return Builder.CreateCall(F, {Ops[0]});
  13202. }
  13203. case X86::BI__builtin_ia32_reduce_fadd_pd512:
  13204. case X86::BI__builtin_ia32_reduce_fadd_ps512:
  13205. case X86::BI__builtin_ia32_reduce_fadd_ph512:
  13206. case X86::BI__builtin_ia32_reduce_fadd_ph256:
  13207. case X86::BI__builtin_ia32_reduce_fadd_ph128: {
  13208. Function *F =
  13209. CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
  13210. Builder.getFastMathFlags().setAllowReassoc();
  13211. return Builder.CreateCall(F, {Ops[0], Ops[1]});
  13212. }
  13213. case X86::BI__builtin_ia32_reduce_fmul_pd512:
  13214. case X86::BI__builtin_ia32_reduce_fmul_ps512:
  13215. case X86::BI__builtin_ia32_reduce_fmul_ph512:
  13216. case X86::BI__builtin_ia32_reduce_fmul_ph256:
  13217. case X86::BI__builtin_ia32_reduce_fmul_ph128: {
  13218. Function *F =
  13219. CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
  13220. Builder.getFastMathFlags().setAllowReassoc();
  13221. return Builder.CreateCall(F, {Ops[0], Ops[1]});
  13222. }
  13223. case X86::BI__builtin_ia32_reduce_fmax_pd512:
  13224. case X86::BI__builtin_ia32_reduce_fmax_ps512:
  13225. case X86::BI__builtin_ia32_reduce_fmax_ph512:
  13226. case X86::BI__builtin_ia32_reduce_fmax_ph256:
  13227. case X86::BI__builtin_ia32_reduce_fmax_ph128: {
  13228. Function *F =
  13229. CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
  13230. Builder.getFastMathFlags().setNoNaNs();
  13231. return Builder.CreateCall(F, {Ops[0]});
  13232. }
  13233. case X86::BI__builtin_ia32_reduce_fmin_pd512:
  13234. case X86::BI__builtin_ia32_reduce_fmin_ps512:
  13235. case X86::BI__builtin_ia32_reduce_fmin_ph512:
  13236. case X86::BI__builtin_ia32_reduce_fmin_ph256:
  13237. case X86::BI__builtin_ia32_reduce_fmin_ph128: {
  13238. Function *F =
  13239. CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
  13240. Builder.getFastMathFlags().setNoNaNs();
  13241. return Builder.CreateCall(F, {Ops[0]});
  13242. }
  13243. case X86::BI__builtin_ia32_reduce_mul_d512:
  13244. case X86::BI__builtin_ia32_reduce_mul_q512: {
  13245. Function *F =
  13246. CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());
  13247. return Builder.CreateCall(F, {Ops[0]});
  13248. }
  13249. // 3DNow!
  13250. case X86::BI__builtin_ia32_pswapdsf:
  13251. case X86::BI__builtin_ia32_pswapdsi: {
  13252. llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
  13253. Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
  13254. llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
  13255. return Builder.CreateCall(F, Ops, "pswapd");
  13256. }
  13257. case X86::BI__builtin_ia32_rdrand16_step:
  13258. case X86::BI__builtin_ia32_rdrand32_step:
  13259. case X86::BI__builtin_ia32_rdrand64_step:
  13260. case X86::BI__builtin_ia32_rdseed16_step:
  13261. case X86::BI__builtin_ia32_rdseed32_step:
  13262. case X86::BI__builtin_ia32_rdseed64_step: {
  13263. Intrinsic::ID ID;
  13264. switch (BuiltinID) {
  13265. default: llvm_unreachable("Unsupported intrinsic!");
  13266. case X86::BI__builtin_ia32_rdrand16_step:
  13267. ID = Intrinsic::x86_rdrand_16;
  13268. break;
  13269. case X86::BI__builtin_ia32_rdrand32_step:
  13270. ID = Intrinsic::x86_rdrand_32;
  13271. break;
  13272. case X86::BI__builtin_ia32_rdrand64_step:
  13273. ID = Intrinsic::x86_rdrand_64;
  13274. break;
  13275. case X86::BI__builtin_ia32_rdseed16_step:
  13276. ID = Intrinsic::x86_rdseed_16;
  13277. break;
  13278. case X86::BI__builtin_ia32_rdseed32_step:
  13279. ID = Intrinsic::x86_rdseed_32;
  13280. break;
  13281. case X86::BI__builtin_ia32_rdseed64_step:
  13282. ID = Intrinsic::x86_rdseed_64;
  13283. break;
  13284. }
  13285. Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
  13286. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
  13287. Ops[0]);
  13288. return Builder.CreateExtractValue(Call, 1);
  13289. }
  13290. case X86::BI__builtin_ia32_addcarryx_u32:
  13291. case X86::BI__builtin_ia32_addcarryx_u64:
  13292. case X86::BI__builtin_ia32_subborrow_u32:
  13293. case X86::BI__builtin_ia32_subborrow_u64: {
  13294. Intrinsic::ID IID;
  13295. switch (BuiltinID) {
  13296. default: llvm_unreachable("Unsupported intrinsic!");
  13297. case X86::BI__builtin_ia32_addcarryx_u32:
  13298. IID = Intrinsic::x86_addcarry_32;
  13299. break;
  13300. case X86::BI__builtin_ia32_addcarryx_u64:
  13301. IID = Intrinsic::x86_addcarry_64;
  13302. break;
  13303. case X86::BI__builtin_ia32_subborrow_u32:
  13304. IID = Intrinsic::x86_subborrow_32;
  13305. break;
  13306. case X86::BI__builtin_ia32_subborrow_u64:
  13307. IID = Intrinsic::x86_subborrow_64;
  13308. break;
  13309. }
  13310. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
  13311. { Ops[0], Ops[1], Ops[2] });
  13312. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
  13313. Ops[3]);
  13314. return Builder.CreateExtractValue(Call, 0);
  13315. }
  13316. case X86::BI__builtin_ia32_fpclassps128_mask:
  13317. case X86::BI__builtin_ia32_fpclassps256_mask:
  13318. case X86::BI__builtin_ia32_fpclassps512_mask:
  13319. case X86::BI__builtin_ia32_fpclassph128_mask:
  13320. case X86::BI__builtin_ia32_fpclassph256_mask:
  13321. case X86::BI__builtin_ia32_fpclassph512_mask:
  13322. case X86::BI__builtin_ia32_fpclasspd128_mask:
  13323. case X86::BI__builtin_ia32_fpclasspd256_mask:
  13324. case X86::BI__builtin_ia32_fpclasspd512_mask: {
  13325. unsigned NumElts =
  13326. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13327. Value *MaskIn = Ops[2];
  13328. Ops.erase(&Ops[2]);
  13329. Intrinsic::ID ID;
  13330. switch (BuiltinID) {
  13331. default: llvm_unreachable("Unsupported intrinsic!");
  13332. case X86::BI__builtin_ia32_fpclassph128_mask:
  13333. ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
  13334. break;
  13335. case X86::BI__builtin_ia32_fpclassph256_mask:
  13336. ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
  13337. break;
  13338. case X86::BI__builtin_ia32_fpclassph512_mask:
  13339. ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
  13340. break;
  13341. case X86::BI__builtin_ia32_fpclassps128_mask:
  13342. ID = Intrinsic::x86_avx512_fpclass_ps_128;
  13343. break;
  13344. case X86::BI__builtin_ia32_fpclassps256_mask:
  13345. ID = Intrinsic::x86_avx512_fpclass_ps_256;
  13346. break;
  13347. case X86::BI__builtin_ia32_fpclassps512_mask:
  13348. ID = Intrinsic::x86_avx512_fpclass_ps_512;
  13349. break;
  13350. case X86::BI__builtin_ia32_fpclasspd128_mask:
  13351. ID = Intrinsic::x86_avx512_fpclass_pd_128;
  13352. break;
  13353. case X86::BI__builtin_ia32_fpclasspd256_mask:
  13354. ID = Intrinsic::x86_avx512_fpclass_pd_256;
  13355. break;
  13356. case X86::BI__builtin_ia32_fpclasspd512_mask:
  13357. ID = Intrinsic::x86_avx512_fpclass_pd_512;
  13358. break;
  13359. }
  13360. Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  13361. return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
  13362. }
  13363. case X86::BI__builtin_ia32_vp2intersect_q_512:
  13364. case X86::BI__builtin_ia32_vp2intersect_q_256:
  13365. case X86::BI__builtin_ia32_vp2intersect_q_128:
  13366. case X86::BI__builtin_ia32_vp2intersect_d_512:
  13367. case X86::BI__builtin_ia32_vp2intersect_d_256:
  13368. case X86::BI__builtin_ia32_vp2intersect_d_128: {
  13369. unsigned NumElts =
  13370. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13371. Intrinsic::ID ID;
  13372. switch (BuiltinID) {
  13373. default: llvm_unreachable("Unsupported intrinsic!");
  13374. case X86::BI__builtin_ia32_vp2intersect_q_512:
  13375. ID = Intrinsic::x86_avx512_vp2intersect_q_512;
  13376. break;
  13377. case X86::BI__builtin_ia32_vp2intersect_q_256:
  13378. ID = Intrinsic::x86_avx512_vp2intersect_q_256;
  13379. break;
  13380. case X86::BI__builtin_ia32_vp2intersect_q_128:
  13381. ID = Intrinsic::x86_avx512_vp2intersect_q_128;
  13382. break;
  13383. case X86::BI__builtin_ia32_vp2intersect_d_512:
  13384. ID = Intrinsic::x86_avx512_vp2intersect_d_512;
  13385. break;
  13386. case X86::BI__builtin_ia32_vp2intersect_d_256:
  13387. ID = Intrinsic::x86_avx512_vp2intersect_d_256;
  13388. break;
  13389. case X86::BI__builtin_ia32_vp2intersect_d_128:
  13390. ID = Intrinsic::x86_avx512_vp2intersect_d_128;
  13391. break;
  13392. }
  13393. Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
  13394. Value *Result = Builder.CreateExtractValue(Call, 0);
  13395. Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
  13396. Builder.CreateDefaultAlignedStore(Result, Ops[2]);
  13397. Result = Builder.CreateExtractValue(Call, 1);
  13398. Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
  13399. return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
  13400. }
  13401. case X86::BI__builtin_ia32_vpmultishiftqb128:
  13402. case X86::BI__builtin_ia32_vpmultishiftqb256:
  13403. case X86::BI__builtin_ia32_vpmultishiftqb512: {
  13404. Intrinsic::ID ID;
  13405. switch (BuiltinID) {
  13406. default: llvm_unreachable("Unsupported intrinsic!");
  13407. case X86::BI__builtin_ia32_vpmultishiftqb128:
  13408. ID = Intrinsic::x86_avx512_pmultishift_qb_128;
  13409. break;
  13410. case X86::BI__builtin_ia32_vpmultishiftqb256:
  13411. ID = Intrinsic::x86_avx512_pmultishift_qb_256;
  13412. break;
  13413. case X86::BI__builtin_ia32_vpmultishiftqb512:
  13414. ID = Intrinsic::x86_avx512_pmultishift_qb_512;
  13415. break;
  13416. }
  13417. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  13418. }
  13419. case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
  13420. case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
  13421. case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
  13422. unsigned NumElts =
  13423. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13424. Value *MaskIn = Ops[2];
  13425. Ops.erase(&Ops[2]);
  13426. Intrinsic::ID ID;
  13427. switch (BuiltinID) {
  13428. default: llvm_unreachable("Unsupported intrinsic!");
  13429. case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
  13430. ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
  13431. break;
  13432. case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
  13433. ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
  13434. break;
  13435. case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
  13436. ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
  13437. break;
  13438. }
  13439. Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  13440. return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
  13441. }
  13442. // packed comparison intrinsics
  13443. case X86::BI__builtin_ia32_cmpeqps:
  13444. case X86::BI__builtin_ia32_cmpeqpd:
  13445. return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
  13446. case X86::BI__builtin_ia32_cmpltps:
  13447. case X86::BI__builtin_ia32_cmpltpd:
  13448. return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
  13449. case X86::BI__builtin_ia32_cmpleps:
  13450. case X86::BI__builtin_ia32_cmplepd:
  13451. return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
  13452. case X86::BI__builtin_ia32_cmpunordps:
  13453. case X86::BI__builtin_ia32_cmpunordpd:
  13454. return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
  13455. case X86::BI__builtin_ia32_cmpneqps:
  13456. case X86::BI__builtin_ia32_cmpneqpd:
  13457. return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
  13458. case X86::BI__builtin_ia32_cmpnltps:
  13459. case X86::BI__builtin_ia32_cmpnltpd:
  13460. return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
  13461. case X86::BI__builtin_ia32_cmpnleps:
  13462. case X86::BI__builtin_ia32_cmpnlepd:
  13463. return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
  13464. case X86::BI__builtin_ia32_cmpordps:
  13465. case X86::BI__builtin_ia32_cmpordpd:
  13466. return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
  13467. case X86::BI__builtin_ia32_cmpph128_mask:
  13468. case X86::BI__builtin_ia32_cmpph256_mask:
  13469. case X86::BI__builtin_ia32_cmpph512_mask:
  13470. case X86::BI__builtin_ia32_cmpps128_mask:
  13471. case X86::BI__builtin_ia32_cmpps256_mask:
  13472. case X86::BI__builtin_ia32_cmpps512_mask:
  13473. case X86::BI__builtin_ia32_cmppd128_mask:
  13474. case X86::BI__builtin_ia32_cmppd256_mask:
  13475. case X86::BI__builtin_ia32_cmppd512_mask:
  13476. IsMaskFCmp = true;
  13477. LLVM_FALLTHROUGH;
  13478. case X86::BI__builtin_ia32_cmpps:
  13479. case X86::BI__builtin_ia32_cmpps256:
  13480. case X86::BI__builtin_ia32_cmppd:
  13481. case X86::BI__builtin_ia32_cmppd256: {
  13482. // Lowering vector comparisons to fcmp instructions, while
  13483. // ignoring signalling behaviour requested
  13484. // ignoring rounding mode requested
  13485. // This is only possible if fp-model is not strict and FENV_ACCESS is off.
  13486. // The third argument is the comparison condition, and integer in the
  13487. // range [0, 31]
  13488. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
  13489. // Lowering to IR fcmp instruction.
  13490. // Ignoring requested signaling behaviour,
  13491. // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
  13492. FCmpInst::Predicate Pred;
  13493. bool IsSignaling;
  13494. // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
  13495. // behavior is inverted. We'll handle that after the switch.
  13496. switch (CC & 0xf) {
  13497. case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
  13498. case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
  13499. case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
  13500. case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
  13501. case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
  13502. case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
  13503. case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
  13504. case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
  13505. case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
  13506. case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
  13507. case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
  13508. case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
  13509. case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
  13510. case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
  13511. case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
  13512. case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
  13513. default: llvm_unreachable("Unhandled CC");
  13514. }
  13515. // Invert the signalling behavior for 16-31.
  13516. if (CC & 0x10)
  13517. IsSignaling = !IsSignaling;
  13518. // If the predicate is true or false and we're using constrained intrinsics,
  13519. // we don't have a compare intrinsic we can use. Just use the legacy X86
  13520. // specific intrinsic.
  13521. // If the intrinsic is mask enabled and we're using constrained intrinsics,
  13522. // use the legacy X86 specific intrinsic.
  13523. if (Builder.getIsFPConstrained() &&
  13524. (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
  13525. IsMaskFCmp)) {
  13526. Intrinsic::ID IID;
  13527. switch (BuiltinID) {
  13528. default: llvm_unreachable("Unexpected builtin");
  13529. case X86::BI__builtin_ia32_cmpps:
  13530. IID = Intrinsic::x86_sse_cmp_ps;
  13531. break;
  13532. case X86::BI__builtin_ia32_cmpps256:
  13533. IID = Intrinsic::x86_avx_cmp_ps_256;
  13534. break;
  13535. case X86::BI__builtin_ia32_cmppd:
  13536. IID = Intrinsic::x86_sse2_cmp_pd;
  13537. break;
  13538. case X86::BI__builtin_ia32_cmppd256:
  13539. IID = Intrinsic::x86_avx_cmp_pd_256;
  13540. break;
  13541. case X86::BI__builtin_ia32_cmpps512_mask:
  13542. IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
  13543. break;
  13544. case X86::BI__builtin_ia32_cmppd512_mask:
  13545. IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
  13546. break;
  13547. case X86::BI__builtin_ia32_cmpps128_mask:
  13548. IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
  13549. break;
  13550. case X86::BI__builtin_ia32_cmpps256_mask:
  13551. IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
  13552. break;
  13553. case X86::BI__builtin_ia32_cmppd128_mask:
  13554. IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
  13555. break;
  13556. case X86::BI__builtin_ia32_cmppd256_mask:
  13557. IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
  13558. break;
  13559. }
  13560. Function *Intr = CGM.getIntrinsic(IID);
  13561. if (IsMaskFCmp) {
  13562. unsigned NumElts =
  13563. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13564. Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
  13565. Value *Cmp = Builder.CreateCall(Intr, Ops);
  13566. return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
  13567. }
  13568. return Builder.CreateCall(Intr, Ops);
  13569. }
  13570. // Builtins without the _mask suffix return a vector of integers
  13571. // of the same width as the input vectors
  13572. if (IsMaskFCmp) {
  13573. // We ignore SAE if strict FP is disabled. We only keep precise
  13574. // exception behavior under strict FP.
  13575. // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
  13576. // object will be required.
  13577. unsigned NumElts =
  13578. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
  13579. Value *Cmp;
  13580. if (IsSignaling)
  13581. Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
  13582. else
  13583. Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
  13584. return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
  13585. }
  13586. return getVectorFCmpIR(Pred, IsSignaling);
  13587. }
  13588. // SSE scalar comparison intrinsics
  13589. case X86::BI__builtin_ia32_cmpeqss:
  13590. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
  13591. case X86::BI__builtin_ia32_cmpltss:
  13592. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
  13593. case X86::BI__builtin_ia32_cmpless:
  13594. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
  13595. case X86::BI__builtin_ia32_cmpunordss:
  13596. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
  13597. case X86::BI__builtin_ia32_cmpneqss:
  13598. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
  13599. case X86::BI__builtin_ia32_cmpnltss:
  13600. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
  13601. case X86::BI__builtin_ia32_cmpnless:
  13602. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
  13603. case X86::BI__builtin_ia32_cmpordss:
  13604. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
  13605. case X86::BI__builtin_ia32_cmpeqsd:
  13606. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
  13607. case X86::BI__builtin_ia32_cmpltsd:
  13608. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
  13609. case X86::BI__builtin_ia32_cmplesd:
  13610. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
  13611. case X86::BI__builtin_ia32_cmpunordsd:
  13612. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
  13613. case X86::BI__builtin_ia32_cmpneqsd:
  13614. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
  13615. case X86::BI__builtin_ia32_cmpnltsd:
  13616. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
  13617. case X86::BI__builtin_ia32_cmpnlesd:
  13618. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
  13619. case X86::BI__builtin_ia32_cmpordsd:
  13620. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
  13621. // f16c half2float intrinsics
  13622. case X86::BI__builtin_ia32_vcvtph2ps:
  13623. case X86::BI__builtin_ia32_vcvtph2ps256:
  13624. case X86::BI__builtin_ia32_vcvtph2ps_mask:
  13625. case X86::BI__builtin_ia32_vcvtph2ps256_mask:
  13626. case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
  13627. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
  13628. return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
  13629. }
  13630. // AVX512 bf16 intrinsics
  13631. case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
  13632. Ops[2] = getMaskVecValue(
  13633. *this, Ops[2],
  13634. cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
  13635. Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
  13636. return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13637. }
  13638. case X86::BI__builtin_ia32_cvtsbf162ss_32:
  13639. return EmitX86CvtBF16ToFloatExpr(*this, E, Ops);
  13640. case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
  13641. case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
  13642. Intrinsic::ID IID;
  13643. switch (BuiltinID) {
  13644. default: llvm_unreachable("Unsupported intrinsic!");
  13645. case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
  13646. IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
  13647. break;
  13648. case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
  13649. IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
  13650. break;
  13651. }
  13652. Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
  13653. return EmitX86Select(*this, Ops[2], Res, Ops[1]);
  13654. }
  13655. case X86::BI__emul:
  13656. case X86::BI__emulu: {
  13657. llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
  13658. bool isSigned = (BuiltinID == X86::BI__emul);
  13659. Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
  13660. Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
  13661. return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
  13662. }
  13663. case X86::BI__mulh:
  13664. case X86::BI__umulh:
  13665. case X86::BI_mul128:
  13666. case X86::BI_umul128: {
  13667. llvm::Type *ResType = ConvertType(E->getType());
  13668. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  13669. bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
  13670. Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
  13671. Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
  13672. Value *MulResult, *HigherBits;
  13673. if (IsSigned) {
  13674. MulResult = Builder.CreateNSWMul(LHS, RHS);
  13675. HigherBits = Builder.CreateAShr(MulResult, 64);
  13676. } else {
  13677. MulResult = Builder.CreateNUWMul(LHS, RHS);
  13678. HigherBits = Builder.CreateLShr(MulResult, 64);
  13679. }
  13680. HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
  13681. if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
  13682. return HigherBits;
  13683. Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
  13684. Builder.CreateStore(HigherBits, HighBitsAddress);
  13685. return Builder.CreateIntCast(MulResult, ResType, IsSigned);
  13686. }
  13687. case X86::BI__faststorefence: {
  13688. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  13689. llvm::SyncScope::System);
  13690. }
  13691. case X86::BI__shiftleft128:
  13692. case X86::BI__shiftright128: {
  13693. llvm::Function *F = CGM.getIntrinsic(
  13694. BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
  13695. Int64Ty);
  13696. // Flip low/high ops and zero-extend amount to matching type.
  13697. // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
  13698. // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
  13699. std::swap(Ops[0], Ops[1]);
  13700. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  13701. return Builder.CreateCall(F, Ops);
  13702. }
  13703. case X86::BI_ReadWriteBarrier:
  13704. case X86::BI_ReadBarrier:
  13705. case X86::BI_WriteBarrier: {
  13706. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  13707. llvm::SyncScope::SingleThread);
  13708. }
  13709. case X86::BI_AddressOfReturnAddress: {
  13710. Function *F =
  13711. CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
  13712. return Builder.CreateCall(F);
  13713. }
  13714. case X86::BI__stosb: {
  13715. // We treat __stosb as a volatile memset - it may not generate "rep stosb"
  13716. // instruction, but it will create a memset that won't be optimized away.
  13717. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
  13718. }
  13719. case X86::BI__ud2:
  13720. // llvm.trap makes a ud2a instruction on x86.
  13721. return EmitTrapCall(Intrinsic::trap);
  13722. case X86::BI__int2c: {
  13723. // This syscall signals a driver assertion failure in x86 NT kernels.
  13724. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
  13725. llvm::InlineAsm *IA =
  13726. llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
  13727. llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
  13728. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  13729. llvm::Attribute::NoReturn);
  13730. llvm::CallInst *CI = Builder.CreateCall(IA);
  13731. CI->setAttributes(NoReturnAttr);
  13732. return CI;
  13733. }
  13734. case X86::BI__readfsbyte:
  13735. case X86::BI__readfsword:
  13736. case X86::BI__readfsdword:
  13737. case X86::BI__readfsqword: {
  13738. llvm::Type *IntTy = ConvertType(E->getType());
  13739. Value *Ptr =
  13740. Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
  13741. LoadInst *Load = Builder.CreateAlignedLoad(
  13742. IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
  13743. Load->setVolatile(true);
  13744. return Load;
  13745. }
  13746. case X86::BI__readgsbyte:
  13747. case X86::BI__readgsword:
  13748. case X86::BI__readgsdword:
  13749. case X86::BI__readgsqword: {
  13750. llvm::Type *IntTy = ConvertType(E->getType());
  13751. Value *Ptr =
  13752. Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
  13753. LoadInst *Load = Builder.CreateAlignedLoad(
  13754. IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
  13755. Load->setVolatile(true);
  13756. return Load;
  13757. }
  13758. case X86::BI__builtin_ia32_paddsb512:
  13759. case X86::BI__builtin_ia32_paddsw512:
  13760. case X86::BI__builtin_ia32_paddsb256:
  13761. case X86::BI__builtin_ia32_paddsw256:
  13762. case X86::BI__builtin_ia32_paddsb128:
  13763. case X86::BI__builtin_ia32_paddsw128:
  13764. return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat);
  13765. case X86::BI__builtin_ia32_paddusb512:
  13766. case X86::BI__builtin_ia32_paddusw512:
  13767. case X86::BI__builtin_ia32_paddusb256:
  13768. case X86::BI__builtin_ia32_paddusw256:
  13769. case X86::BI__builtin_ia32_paddusb128:
  13770. case X86::BI__builtin_ia32_paddusw128:
  13771. return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat);
  13772. case X86::BI__builtin_ia32_psubsb512:
  13773. case X86::BI__builtin_ia32_psubsw512:
  13774. case X86::BI__builtin_ia32_psubsb256:
  13775. case X86::BI__builtin_ia32_psubsw256:
  13776. case X86::BI__builtin_ia32_psubsb128:
  13777. case X86::BI__builtin_ia32_psubsw128:
  13778. return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat);
  13779. case X86::BI__builtin_ia32_psubusb512:
  13780. case X86::BI__builtin_ia32_psubusw512:
  13781. case X86::BI__builtin_ia32_psubusb256:
  13782. case X86::BI__builtin_ia32_psubusw256:
  13783. case X86::BI__builtin_ia32_psubusb128:
  13784. case X86::BI__builtin_ia32_psubusw128:
  13785. return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat);
  13786. case X86::BI__builtin_ia32_encodekey128_u32: {
  13787. Intrinsic::ID IID = Intrinsic::x86_encodekey128;
  13788. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
  13789. for (int i = 0; i < 3; ++i) {
  13790. Value *Extract = Builder.CreateExtractValue(Call, i + 1);
  13791. Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
  13792. Ptr = Builder.CreateBitCast(
  13793. Ptr, llvm::PointerType::getUnqual(Extract->getType()));
  13794. Builder.CreateAlignedStore(Extract, Ptr, Align(1));
  13795. }
  13796. return Builder.CreateExtractValue(Call, 0);
  13797. }
  13798. case X86::BI__builtin_ia32_encodekey256_u32: {
  13799. Intrinsic::ID IID = Intrinsic::x86_encodekey256;
  13800. Value *Call =
  13801. Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
  13802. for (int i = 0; i < 4; ++i) {
  13803. Value *Extract = Builder.CreateExtractValue(Call, i + 1);
  13804. Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
  13805. Ptr = Builder.CreateBitCast(
  13806. Ptr, llvm::PointerType::getUnqual(Extract->getType()));
  13807. Builder.CreateAlignedStore(Extract, Ptr, Align(1));
  13808. }
  13809. return Builder.CreateExtractValue(Call, 0);
  13810. }
  13811. case X86::BI__builtin_ia32_aesenc128kl_u8:
  13812. case X86::BI__builtin_ia32_aesdec128kl_u8:
  13813. case X86::BI__builtin_ia32_aesenc256kl_u8:
  13814. case X86::BI__builtin_ia32_aesdec256kl_u8: {
  13815. Intrinsic::ID IID;
  13816. StringRef BlockName;
  13817. switch (BuiltinID) {
  13818. default:
  13819. llvm_unreachable("Unexpected builtin");
  13820. case X86::BI__builtin_ia32_aesenc128kl_u8:
  13821. IID = Intrinsic::x86_aesenc128kl;
  13822. BlockName = "aesenc128kl";
  13823. break;
  13824. case X86::BI__builtin_ia32_aesdec128kl_u8:
  13825. IID = Intrinsic::x86_aesdec128kl;
  13826. BlockName = "aesdec128kl";
  13827. break;
  13828. case X86::BI__builtin_ia32_aesenc256kl_u8:
  13829. IID = Intrinsic::x86_aesenc256kl;
  13830. BlockName = "aesenc256kl";
  13831. break;
  13832. case X86::BI__builtin_ia32_aesdec256kl_u8:
  13833. IID = Intrinsic::x86_aesdec256kl;
  13834. BlockName = "aesdec256kl";
  13835. break;
  13836. }
  13837. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
  13838. BasicBlock *NoError =
  13839. createBasicBlock(BlockName + "_no_error", this->CurFn);
  13840. BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
  13841. BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
  13842. Value *Ret = Builder.CreateExtractValue(Call, 0);
  13843. Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
  13844. Value *Out = Builder.CreateExtractValue(Call, 1);
  13845. Builder.CreateCondBr(Succ, NoError, Error);
  13846. Builder.SetInsertPoint(NoError);
  13847. Builder.CreateDefaultAlignedStore(Out, Ops[0]);
  13848. Builder.CreateBr(End);
  13849. Builder.SetInsertPoint(Error);
  13850. Constant *Zero = llvm::Constant::getNullValue(Out->getType());
  13851. Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
  13852. Builder.CreateBr(End);
  13853. Builder.SetInsertPoint(End);
  13854. return Builder.CreateExtractValue(Call, 0);
  13855. }
  13856. case X86::BI__builtin_ia32_aesencwide128kl_u8:
  13857. case X86::BI__builtin_ia32_aesdecwide128kl_u8:
  13858. case X86::BI__builtin_ia32_aesencwide256kl_u8:
  13859. case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
  13860. Intrinsic::ID IID;
  13861. StringRef BlockName;
  13862. switch (BuiltinID) {
  13863. case X86::BI__builtin_ia32_aesencwide128kl_u8:
  13864. IID = Intrinsic::x86_aesencwide128kl;
  13865. BlockName = "aesencwide128kl";
  13866. break;
  13867. case X86::BI__builtin_ia32_aesdecwide128kl_u8:
  13868. IID = Intrinsic::x86_aesdecwide128kl;
  13869. BlockName = "aesdecwide128kl";
  13870. break;
  13871. case X86::BI__builtin_ia32_aesencwide256kl_u8:
  13872. IID = Intrinsic::x86_aesencwide256kl;
  13873. BlockName = "aesencwide256kl";
  13874. break;
  13875. case X86::BI__builtin_ia32_aesdecwide256kl_u8:
  13876. IID = Intrinsic::x86_aesdecwide256kl;
  13877. BlockName = "aesdecwide256kl";
  13878. break;
  13879. }
  13880. llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
  13881. Value *InOps[9];
  13882. InOps[0] = Ops[2];
  13883. for (int i = 0; i != 8; ++i) {
  13884. Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
  13885. InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
  13886. }
  13887. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
  13888. BasicBlock *NoError =
  13889. createBasicBlock(BlockName + "_no_error", this->CurFn);
  13890. BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
  13891. BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
  13892. Value *Ret = Builder.CreateExtractValue(Call, 0);
  13893. Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
  13894. Builder.CreateCondBr(Succ, NoError, Error);
  13895. Builder.SetInsertPoint(NoError);
  13896. for (int i = 0; i != 8; ++i) {
  13897. Value *Extract = Builder.CreateExtractValue(Call, i + 1);
  13898. Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
  13899. Builder.CreateAlignedStore(Extract, Ptr, Align(16));
  13900. }
  13901. Builder.CreateBr(End);
  13902. Builder.SetInsertPoint(Error);
  13903. for (int i = 0; i != 8; ++i) {
  13904. Value *Out = Builder.CreateExtractValue(Call, i + 1);
  13905. Constant *Zero = llvm::Constant::getNullValue(Out->getType());
  13906. Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
  13907. Builder.CreateAlignedStore(Zero, Ptr, Align(16));
  13908. }
  13909. Builder.CreateBr(End);
  13910. Builder.SetInsertPoint(End);
  13911. return Builder.CreateExtractValue(Call, 0);
  13912. }
  13913. case X86::BI__builtin_ia32_vfcmaddcph512_mask:
  13914. IsConjFMA = true;
  13915. LLVM_FALLTHROUGH;
  13916. case X86::BI__builtin_ia32_vfmaddcph512_mask: {
  13917. Intrinsic::ID IID = IsConjFMA
  13918. ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
  13919. : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
  13920. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13921. return EmitX86Select(*this, Ops[3], Call, Ops[0]);
  13922. }
  13923. case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
  13924. IsConjFMA = true;
  13925. LLVM_FALLTHROUGH;
  13926. case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
  13927. Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
  13928. : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
  13929. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13930. Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
  13931. return EmitX86Select(*this, And, Call, Ops[0]);
  13932. }
  13933. case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
  13934. IsConjFMA = true;
  13935. LLVM_FALLTHROUGH;
  13936. case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
  13937. Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
  13938. : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
  13939. Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
  13940. static constexpr int Mask[] = {0, 5, 6, 7};
  13941. return Builder.CreateShuffleVector(Call, Ops[2], Mask);
  13942. }
  13943. }
  13944. }
  13945. Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
  13946. const CallExpr *E) {
  13947. SmallVector<Value*, 4> Ops;
  13948. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  13949. if (E->getArg(i)->getType()->isArrayType())
  13950. Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
  13951. else
  13952. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  13953. }
  13954. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  13955. switch (BuiltinID) {
  13956. default: return nullptr;
  13957. // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
  13958. // call __builtin_readcyclecounter.
  13959. case PPC::BI__builtin_ppc_get_timebase:
  13960. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
  13961. // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
  13962. case PPC::BI__builtin_altivec_lvx:
  13963. case PPC::BI__builtin_altivec_lvxl:
  13964. case PPC::BI__builtin_altivec_lvebx:
  13965. case PPC::BI__builtin_altivec_lvehx:
  13966. case PPC::BI__builtin_altivec_lvewx:
  13967. case PPC::BI__builtin_altivec_lvsl:
  13968. case PPC::BI__builtin_altivec_lvsr:
  13969. case PPC::BI__builtin_vsx_lxvd2x:
  13970. case PPC::BI__builtin_vsx_lxvw4x:
  13971. case PPC::BI__builtin_vsx_lxvd2x_be:
  13972. case PPC::BI__builtin_vsx_lxvw4x_be:
  13973. case PPC::BI__builtin_vsx_lxvl:
  13974. case PPC::BI__builtin_vsx_lxvll:
  13975. {
  13976. if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
  13977. BuiltinID == PPC::BI__builtin_vsx_lxvll){
  13978. Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
  13979. }else {
  13980. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  13981. Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
  13982. Ops.pop_back();
  13983. }
  13984. switch (BuiltinID) {
  13985. default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
  13986. case PPC::BI__builtin_altivec_lvx:
  13987. ID = Intrinsic::ppc_altivec_lvx;
  13988. break;
  13989. case PPC::BI__builtin_altivec_lvxl:
  13990. ID = Intrinsic::ppc_altivec_lvxl;
  13991. break;
  13992. case PPC::BI__builtin_altivec_lvebx:
  13993. ID = Intrinsic::ppc_altivec_lvebx;
  13994. break;
  13995. case PPC::BI__builtin_altivec_lvehx:
  13996. ID = Intrinsic::ppc_altivec_lvehx;
  13997. break;
  13998. case PPC::BI__builtin_altivec_lvewx:
  13999. ID = Intrinsic::ppc_altivec_lvewx;
  14000. break;
  14001. case PPC::BI__builtin_altivec_lvsl:
  14002. ID = Intrinsic::ppc_altivec_lvsl;
  14003. break;
  14004. case PPC::BI__builtin_altivec_lvsr:
  14005. ID = Intrinsic::ppc_altivec_lvsr;
  14006. break;
  14007. case PPC::BI__builtin_vsx_lxvd2x:
  14008. ID = Intrinsic::ppc_vsx_lxvd2x;
  14009. break;
  14010. case PPC::BI__builtin_vsx_lxvw4x:
  14011. ID = Intrinsic::ppc_vsx_lxvw4x;
  14012. break;
  14013. case PPC::BI__builtin_vsx_lxvd2x_be:
  14014. ID = Intrinsic::ppc_vsx_lxvd2x_be;
  14015. break;
  14016. case PPC::BI__builtin_vsx_lxvw4x_be:
  14017. ID = Intrinsic::ppc_vsx_lxvw4x_be;
  14018. break;
  14019. case PPC::BI__builtin_vsx_lxvl:
  14020. ID = Intrinsic::ppc_vsx_lxvl;
  14021. break;
  14022. case PPC::BI__builtin_vsx_lxvll:
  14023. ID = Intrinsic::ppc_vsx_lxvll;
  14024. break;
  14025. }
  14026. llvm::Function *F = CGM.getIntrinsic(ID);
  14027. return Builder.CreateCall(F, Ops, "");
  14028. }
  14029. // vec_st, vec_xst_be
  14030. case PPC::BI__builtin_altivec_stvx:
  14031. case PPC::BI__builtin_altivec_stvxl:
  14032. case PPC::BI__builtin_altivec_stvebx:
  14033. case PPC::BI__builtin_altivec_stvehx:
  14034. case PPC::BI__builtin_altivec_stvewx:
  14035. case PPC::BI__builtin_vsx_stxvd2x:
  14036. case PPC::BI__builtin_vsx_stxvw4x:
  14037. case PPC::BI__builtin_vsx_stxvd2x_be:
  14038. case PPC::BI__builtin_vsx_stxvw4x_be:
  14039. case PPC::BI__builtin_vsx_stxvl:
  14040. case PPC::BI__builtin_vsx_stxvll:
  14041. {
  14042. if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
  14043. BuiltinID == PPC::BI__builtin_vsx_stxvll ){
  14044. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  14045. }else {
  14046. Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
  14047. Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
  14048. Ops.pop_back();
  14049. }
  14050. switch (BuiltinID) {
  14051. default: llvm_unreachable("Unsupported st intrinsic!");
  14052. case PPC::BI__builtin_altivec_stvx:
  14053. ID = Intrinsic::ppc_altivec_stvx;
  14054. break;
  14055. case PPC::BI__builtin_altivec_stvxl:
  14056. ID = Intrinsic::ppc_altivec_stvxl;
  14057. break;
  14058. case PPC::BI__builtin_altivec_stvebx:
  14059. ID = Intrinsic::ppc_altivec_stvebx;
  14060. break;
  14061. case PPC::BI__builtin_altivec_stvehx:
  14062. ID = Intrinsic::ppc_altivec_stvehx;
  14063. break;
  14064. case PPC::BI__builtin_altivec_stvewx:
  14065. ID = Intrinsic::ppc_altivec_stvewx;
  14066. break;
  14067. case PPC::BI__builtin_vsx_stxvd2x:
  14068. ID = Intrinsic::ppc_vsx_stxvd2x;
  14069. break;
  14070. case PPC::BI__builtin_vsx_stxvw4x:
  14071. ID = Intrinsic::ppc_vsx_stxvw4x;
  14072. break;
  14073. case PPC::BI__builtin_vsx_stxvd2x_be:
  14074. ID = Intrinsic::ppc_vsx_stxvd2x_be;
  14075. break;
  14076. case PPC::BI__builtin_vsx_stxvw4x_be:
  14077. ID = Intrinsic::ppc_vsx_stxvw4x_be;
  14078. break;
  14079. case PPC::BI__builtin_vsx_stxvl:
  14080. ID = Intrinsic::ppc_vsx_stxvl;
  14081. break;
  14082. case PPC::BI__builtin_vsx_stxvll:
  14083. ID = Intrinsic::ppc_vsx_stxvll;
  14084. break;
  14085. }
  14086. llvm::Function *F = CGM.getIntrinsic(ID);
  14087. return Builder.CreateCall(F, Ops, "");
  14088. }
  14089. case PPC::BI__builtin_vsx_ldrmb: {
  14090. // Essentially boils down to performing an unaligned VMX load sequence so
  14091. // as to avoid crossing a page boundary and then shuffling the elements
  14092. // into the right side of the vector register.
  14093. int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
  14094. llvm::Type *ResTy = ConvertType(E->getType());
  14095. bool IsLE = getTarget().isLittleEndian();
  14096. // If the user wants the entire vector, just load the entire vector.
  14097. if (NumBytes == 16) {
  14098. Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo());
  14099. Value *LD =
  14100. Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1)));
  14101. if (!IsLE)
  14102. return LD;
  14103. // Reverse the bytes on LE.
  14104. SmallVector<int, 16> RevMask;
  14105. for (int Idx = 0; Idx < 16; Idx++)
  14106. RevMask.push_back(15 - Idx);
  14107. return Builder.CreateShuffleVector(LD, LD, RevMask);
  14108. }
  14109. llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
  14110. llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
  14111. : Intrinsic::ppc_altivec_lvsl);
  14112. llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
  14113. Value *HiMem = Builder.CreateGEP(
  14114. Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1));
  14115. Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo");
  14116. Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
  14117. Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1");
  14118. Ops.clear();
  14119. Ops.push_back(IsLE ? HiLd : LoLd);
  14120. Ops.push_back(IsLE ? LoLd : HiLd);
  14121. Ops.push_back(Mask1);
  14122. Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1");
  14123. Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
  14124. if (IsLE) {
  14125. SmallVector<int, 16> Consts;
  14126. for (int Idx = 0; Idx < 16; Idx++) {
  14127. int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
  14128. : 16 - (NumBytes - Idx);
  14129. Consts.push_back(Val);
  14130. }
  14131. return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
  14132. Zero, Consts);
  14133. }
  14134. SmallVector<Constant *, 16> Consts;
  14135. for (int Idx = 0; Idx < 16; Idx++)
  14136. Consts.push_back(Builder.getInt8(NumBytes + Idx));
  14137. Value *Mask2 = ConstantVector::get(Consts);
  14138. return Builder.CreateBitCast(
  14139. Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
  14140. }
  14141. case PPC::BI__builtin_vsx_strmb: {
  14142. int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
  14143. bool IsLE = getTarget().isLittleEndian();
  14144. auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
  14145. // Storing the whole vector, simply store it on BE and reverse bytes and
  14146. // store on LE.
  14147. if (Width == 16) {
  14148. Value *BC =
  14149. Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo());
  14150. Value *StVec = Ops[2];
  14151. if (IsLE) {
  14152. SmallVector<int, 16> RevMask;
  14153. for (int Idx = 0; Idx < 16; Idx++)
  14154. RevMask.push_back(15 - Idx);
  14155. StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask);
  14156. }
  14157. return Builder.CreateStore(
  14158. StVec, Address(BC, Ops[2]->getType(), CharUnits::fromQuantity(1)));
  14159. }
  14160. auto *ConvTy = Int64Ty;
  14161. unsigned NumElts = 0;
  14162. switch (Width) {
  14163. default:
  14164. llvm_unreachable("width for stores must be a power of 2");
  14165. case 8:
  14166. ConvTy = Int64Ty;
  14167. NumElts = 2;
  14168. break;
  14169. case 4:
  14170. ConvTy = Int32Ty;
  14171. NumElts = 4;
  14172. break;
  14173. case 2:
  14174. ConvTy = Int16Ty;
  14175. NumElts = 8;
  14176. break;
  14177. case 1:
  14178. ConvTy = Int8Ty;
  14179. NumElts = 16;
  14180. break;
  14181. }
  14182. Value *Vec = Builder.CreateBitCast(
  14183. Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts));
  14184. Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0],
  14185. ConstantInt::get(Int64Ty, Offset));
  14186. Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo());
  14187. Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
  14188. if (IsLE && Width > 1) {
  14189. Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
  14190. Elt = Builder.CreateCall(F, Elt);
  14191. }
  14192. return Builder.CreateStore(
  14193. Elt, Address(PtrBC, ConvTy, CharUnits::fromQuantity(1)));
  14194. };
  14195. unsigned Stored = 0;
  14196. unsigned RemainingBytes = NumBytes;
  14197. Value *Result;
  14198. if (NumBytes == 16)
  14199. return StoreSubVec(16, 0, 0);
  14200. if (NumBytes >= 8) {
  14201. Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
  14202. RemainingBytes -= 8;
  14203. Stored += 8;
  14204. }
  14205. if (RemainingBytes >= 4) {
  14206. Result = StoreSubVec(4, NumBytes - Stored - 4,
  14207. IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
  14208. RemainingBytes -= 4;
  14209. Stored += 4;
  14210. }
  14211. if (RemainingBytes >= 2) {
  14212. Result = StoreSubVec(2, NumBytes - Stored - 2,
  14213. IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
  14214. RemainingBytes -= 2;
  14215. Stored += 2;
  14216. }
  14217. if (RemainingBytes)
  14218. Result =
  14219. StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
  14220. return Result;
  14221. }
  14222. // Square root
  14223. case PPC::BI__builtin_vsx_xvsqrtsp:
  14224. case PPC::BI__builtin_vsx_xvsqrtdp: {
  14225. llvm::Type *ResultType = ConvertType(E->getType());
  14226. Value *X = EmitScalarExpr(E->getArg(0));
  14227. if (Builder.getIsFPConstrained()) {
  14228. llvm::Function *F = CGM.getIntrinsic(
  14229. Intrinsic::experimental_constrained_sqrt, ResultType);
  14230. return Builder.CreateConstrainedFPCall(F, X);
  14231. } else {
  14232. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  14233. return Builder.CreateCall(F, X);
  14234. }
  14235. }
  14236. // Count leading zeros
  14237. case PPC::BI__builtin_altivec_vclzb:
  14238. case PPC::BI__builtin_altivec_vclzh:
  14239. case PPC::BI__builtin_altivec_vclzw:
  14240. case PPC::BI__builtin_altivec_vclzd: {
  14241. llvm::Type *ResultType = ConvertType(E->getType());
  14242. Value *X = EmitScalarExpr(E->getArg(0));
  14243. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  14244. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
  14245. return Builder.CreateCall(F, {X, Undef});
  14246. }
  14247. case PPC::BI__builtin_altivec_vctzb:
  14248. case PPC::BI__builtin_altivec_vctzh:
  14249. case PPC::BI__builtin_altivec_vctzw:
  14250. case PPC::BI__builtin_altivec_vctzd: {
  14251. llvm::Type *ResultType = ConvertType(E->getType());
  14252. Value *X = EmitScalarExpr(E->getArg(0));
  14253. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  14254. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
  14255. return Builder.CreateCall(F, {X, Undef});
  14256. }
  14257. case PPC::BI__builtin_altivec_vec_replace_elt:
  14258. case PPC::BI__builtin_altivec_vec_replace_unaligned: {
  14259. // The third argument of vec_replace_elt and vec_replace_unaligned must
  14260. // be a compile time constant and will be emitted either to the vinsw
  14261. // or vinsd instruction.
  14262. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  14263. assert(ArgCI &&
  14264. "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
  14265. llvm::Type *ResultType = ConvertType(E->getType());
  14266. llvm::Function *F = nullptr;
  14267. Value *Call = nullptr;
  14268. int64_t ConstArg = ArgCI->getSExtValue();
  14269. unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
  14270. bool Is32Bit = false;
  14271. assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
  14272. // The input to vec_replace_elt is an element index, not a byte index.
  14273. if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
  14274. ConstArg *= ArgWidth / 8;
  14275. if (ArgWidth == 32) {
  14276. Is32Bit = true;
  14277. // When the second argument is 32 bits, it can either be an integer or
  14278. // a float. The vinsw intrinsic is used in this case.
  14279. F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
  14280. // Fix the constant according to endianess.
  14281. if (getTarget().isLittleEndian())
  14282. ConstArg = 12 - ConstArg;
  14283. } else {
  14284. // When the second argument is 64 bits, it can either be a long long or
  14285. // a double. The vinsd intrinsic is used in this case.
  14286. F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
  14287. // Fix the constant for little endian.
  14288. if (getTarget().isLittleEndian())
  14289. ConstArg = 8 - ConstArg;
  14290. }
  14291. Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
  14292. // Depending on ArgWidth, the input vector could be a float or a double.
  14293. // If the input vector is a float type, bitcast the inputs to integers. Or,
  14294. // if the input vector is a double, bitcast the inputs to 64-bit integers.
  14295. if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
  14296. Ops[0] = Builder.CreateBitCast(
  14297. Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
  14298. : llvm::FixedVectorType::get(Int64Ty, 2));
  14299. Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
  14300. }
  14301. // Emit the call to vinsw or vinsd.
  14302. Call = Builder.CreateCall(F, Ops);
  14303. // Depending on the builtin, bitcast to the approriate result type.
  14304. if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
  14305. !Ops[1]->getType()->isIntegerTy())
  14306. return Builder.CreateBitCast(Call, ResultType);
  14307. else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
  14308. Ops[1]->getType()->isIntegerTy())
  14309. return Call;
  14310. else
  14311. return Builder.CreateBitCast(Call,
  14312. llvm::FixedVectorType::get(Int8Ty, 16));
  14313. }
  14314. case PPC::BI__builtin_altivec_vpopcntb:
  14315. case PPC::BI__builtin_altivec_vpopcnth:
  14316. case PPC::BI__builtin_altivec_vpopcntw:
  14317. case PPC::BI__builtin_altivec_vpopcntd: {
  14318. llvm::Type *ResultType = ConvertType(E->getType());
  14319. Value *X = EmitScalarExpr(E->getArg(0));
  14320. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  14321. return Builder.CreateCall(F, X);
  14322. }
  14323. case PPC::BI__builtin_altivec_vadduqm:
  14324. case PPC::BI__builtin_altivec_vsubuqm: {
  14325. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  14326. Ops[0] =
  14327. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int128Ty, 1));
  14328. Ops[1] =
  14329. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int128Ty, 1));
  14330. if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
  14331. return Builder.CreateAdd(Ops[0], Ops[1], "vadduqm");
  14332. else
  14333. return Builder.CreateSub(Ops[0], Ops[1], "vsubuqm");
  14334. }
  14335. // Rotate and insert under mask operation.
  14336. // __rldimi(rs, is, shift, mask)
  14337. // (rotl64(rs, shift) & mask) | (is & ~mask)
  14338. // __rlwimi(rs, is, shift, mask)
  14339. // (rotl(rs, shift) & mask) | (is & ~mask)
  14340. case PPC::BI__builtin_ppc_rldimi:
  14341. case PPC::BI__builtin_ppc_rlwimi: {
  14342. llvm::Type *Ty = Ops[0]->getType();
  14343. Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
  14344. if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
  14345. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  14346. Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[2]});
  14347. Value *X = Builder.CreateAnd(Shift, Ops[3]);
  14348. Value *Y = Builder.CreateAnd(Ops[1], Builder.CreateNot(Ops[3]));
  14349. return Builder.CreateOr(X, Y);
  14350. }
  14351. // Rotate and insert under mask operation.
  14352. // __rlwnm(rs, shift, mask)
  14353. // rotl(rs, shift) & mask
  14354. case PPC::BI__builtin_ppc_rlwnm: {
  14355. llvm::Type *Ty = Ops[0]->getType();
  14356. Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
  14357. Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[1]});
  14358. return Builder.CreateAnd(Shift, Ops[2]);
  14359. }
  14360. case PPC::BI__builtin_ppc_poppar4:
  14361. case PPC::BI__builtin_ppc_poppar8: {
  14362. llvm::Type *ArgType = Ops[0]->getType();
  14363. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  14364. Value *Tmp = Builder.CreateCall(F, Ops[0]);
  14365. llvm::Type *ResultType = ConvertType(E->getType());
  14366. Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
  14367. if (Result->getType() != ResultType)
  14368. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  14369. "cast");
  14370. return Result;
  14371. }
  14372. case PPC::BI__builtin_ppc_cmpb: {
  14373. if (getTarget().getTriple().isPPC64()) {
  14374. Function *F =
  14375. CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
  14376. return Builder.CreateCall(F, Ops, "cmpb");
  14377. }
  14378. // For 32 bit, emit the code as below:
  14379. // %conv = trunc i64 %a to i32
  14380. // %conv1 = trunc i64 %b to i32
  14381. // %shr = lshr i64 %a, 32
  14382. // %conv2 = trunc i64 %shr to i32
  14383. // %shr3 = lshr i64 %b, 32
  14384. // %conv4 = trunc i64 %shr3 to i32
  14385. // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
  14386. // %conv5 = zext i32 %0 to i64
  14387. // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
  14388. // %conv614 = zext i32 %1 to i64
  14389. // %shl = shl nuw i64 %conv614, 32
  14390. // %or = or i64 %shl, %conv5
  14391. // ret i64 %or
  14392. Function *F =
  14393. CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
  14394. Value *ArgOneLo = Builder.CreateTrunc(Ops[0], Int32Ty);
  14395. Value *ArgTwoLo = Builder.CreateTrunc(Ops[1], Int32Ty);
  14396. Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
  14397. Value *ArgOneHi =
  14398. Builder.CreateTrunc(Builder.CreateLShr(Ops[0], ShiftAmt), Int32Ty);
  14399. Value *ArgTwoHi =
  14400. Builder.CreateTrunc(Builder.CreateLShr(Ops[1], ShiftAmt), Int32Ty);
  14401. Value *ResLo = Builder.CreateZExt(
  14402. Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
  14403. Value *ResHiShift = Builder.CreateZExt(
  14404. Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
  14405. Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
  14406. return Builder.CreateOr(ResLo, ResHi);
  14407. }
  14408. // Copy sign
  14409. case PPC::BI__builtin_vsx_xvcpsgnsp:
  14410. case PPC::BI__builtin_vsx_xvcpsgndp: {
  14411. llvm::Type *ResultType = ConvertType(E->getType());
  14412. Value *X = EmitScalarExpr(E->getArg(0));
  14413. Value *Y = EmitScalarExpr(E->getArg(1));
  14414. ID = Intrinsic::copysign;
  14415. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  14416. return Builder.CreateCall(F, {X, Y});
  14417. }
  14418. // Rounding/truncation
  14419. case PPC::BI__builtin_vsx_xvrspip:
  14420. case PPC::BI__builtin_vsx_xvrdpip:
  14421. case PPC::BI__builtin_vsx_xvrdpim:
  14422. case PPC::BI__builtin_vsx_xvrspim:
  14423. case PPC::BI__builtin_vsx_xvrdpi:
  14424. case PPC::BI__builtin_vsx_xvrspi:
  14425. case PPC::BI__builtin_vsx_xvrdpic:
  14426. case PPC::BI__builtin_vsx_xvrspic:
  14427. case PPC::BI__builtin_vsx_xvrdpiz:
  14428. case PPC::BI__builtin_vsx_xvrspiz: {
  14429. llvm::Type *ResultType = ConvertType(E->getType());
  14430. Value *X = EmitScalarExpr(E->getArg(0));
  14431. if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
  14432. BuiltinID == PPC::BI__builtin_vsx_xvrspim)
  14433. ID = Builder.getIsFPConstrained()
  14434. ? Intrinsic::experimental_constrained_floor
  14435. : Intrinsic::floor;
  14436. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
  14437. BuiltinID == PPC::BI__builtin_vsx_xvrspi)
  14438. ID = Builder.getIsFPConstrained()
  14439. ? Intrinsic::experimental_constrained_round
  14440. : Intrinsic::round;
  14441. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
  14442. BuiltinID == PPC::BI__builtin_vsx_xvrspic)
  14443. ID = Builder.getIsFPConstrained()
  14444. ? Intrinsic::experimental_constrained_rint
  14445. : Intrinsic::rint;
  14446. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
  14447. BuiltinID == PPC::BI__builtin_vsx_xvrspip)
  14448. ID = Builder.getIsFPConstrained()
  14449. ? Intrinsic::experimental_constrained_ceil
  14450. : Intrinsic::ceil;
  14451. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
  14452. BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
  14453. ID = Builder.getIsFPConstrained()
  14454. ? Intrinsic::experimental_constrained_trunc
  14455. : Intrinsic::trunc;
  14456. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  14457. return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
  14458. : Builder.CreateCall(F, X);
  14459. }
  14460. // Absolute value
  14461. case PPC::BI__builtin_vsx_xvabsdp:
  14462. case PPC::BI__builtin_vsx_xvabssp: {
  14463. llvm::Type *ResultType = ConvertType(E->getType());
  14464. Value *X = EmitScalarExpr(E->getArg(0));
  14465. llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  14466. return Builder.CreateCall(F, X);
  14467. }
  14468. // Fastmath by default
  14469. case PPC::BI__builtin_ppc_recipdivf:
  14470. case PPC::BI__builtin_ppc_recipdivd:
  14471. case PPC::BI__builtin_ppc_rsqrtf:
  14472. case PPC::BI__builtin_ppc_rsqrtd: {
  14473. FastMathFlags FMF = Builder.getFastMathFlags();
  14474. Builder.getFastMathFlags().setFast();
  14475. llvm::Type *ResultType = ConvertType(E->getType());
  14476. Value *X = EmitScalarExpr(E->getArg(0));
  14477. if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
  14478. BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
  14479. Value *Y = EmitScalarExpr(E->getArg(1));
  14480. Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
  14481. Builder.getFastMathFlags() &= (FMF);
  14482. return FDiv;
  14483. }
  14484. auto *One = ConstantFP::get(ResultType, 1.0);
  14485. llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  14486. Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
  14487. Builder.getFastMathFlags() &= (FMF);
  14488. return FDiv;
  14489. }
  14490. case PPC::BI__builtin_ppc_alignx: {
  14491. ConstantInt *AlignmentCI = cast<ConstantInt>(Ops[0]);
  14492. if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
  14493. AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
  14494. llvm::Value::MaximumAlignment);
  14495. emitAlignmentAssumption(Ops[1], E->getArg(1),
  14496. /*The expr loc is sufficient.*/ SourceLocation(),
  14497. AlignmentCI, nullptr);
  14498. return Ops[1];
  14499. }
  14500. case PPC::BI__builtin_ppc_rdlam: {
  14501. llvm::Type *Ty = Ops[0]->getType();
  14502. Value *ShiftAmt = Builder.CreateIntCast(Ops[1], Ty, false);
  14503. Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
  14504. Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt});
  14505. return Builder.CreateAnd(Rotate, Ops[2]);
  14506. }
  14507. case PPC::BI__builtin_ppc_load2r: {
  14508. Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
  14509. Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
  14510. Value *LoadIntrinsic = Builder.CreateCall(F, Ops);
  14511. return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
  14512. }
  14513. // FMA variations
  14514. case PPC::BI__builtin_vsx_xvmaddadp:
  14515. case PPC::BI__builtin_vsx_xvmaddasp:
  14516. case PPC::BI__builtin_vsx_xvnmaddadp:
  14517. case PPC::BI__builtin_vsx_xvnmaddasp:
  14518. case PPC::BI__builtin_vsx_xvmsubadp:
  14519. case PPC::BI__builtin_vsx_xvmsubasp:
  14520. case PPC::BI__builtin_vsx_xvnmsubadp:
  14521. case PPC::BI__builtin_vsx_xvnmsubasp: {
  14522. llvm::Type *ResultType = ConvertType(E->getType());
  14523. Value *X = EmitScalarExpr(E->getArg(0));
  14524. Value *Y = EmitScalarExpr(E->getArg(1));
  14525. Value *Z = EmitScalarExpr(E->getArg(2));
  14526. llvm::Function *F;
  14527. if (Builder.getIsFPConstrained())
  14528. F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  14529. else
  14530. F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  14531. switch (BuiltinID) {
  14532. case PPC::BI__builtin_vsx_xvmaddadp:
  14533. case PPC::BI__builtin_vsx_xvmaddasp:
  14534. if (Builder.getIsFPConstrained())
  14535. return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
  14536. else
  14537. return Builder.CreateCall(F, {X, Y, Z});
  14538. case PPC::BI__builtin_vsx_xvnmaddadp:
  14539. case PPC::BI__builtin_vsx_xvnmaddasp:
  14540. if (Builder.getIsFPConstrained())
  14541. return Builder.CreateFNeg(
  14542. Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
  14543. else
  14544. return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
  14545. case PPC::BI__builtin_vsx_xvmsubadp:
  14546. case PPC::BI__builtin_vsx_xvmsubasp:
  14547. if (Builder.getIsFPConstrained())
  14548. return Builder.CreateConstrainedFPCall(
  14549. F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  14550. else
  14551. return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  14552. case PPC::BI__builtin_vsx_xvnmsubadp:
  14553. case PPC::BI__builtin_vsx_xvnmsubasp:
  14554. if (Builder.getIsFPConstrained())
  14555. return Builder.CreateFNeg(
  14556. Builder.CreateConstrainedFPCall(
  14557. F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
  14558. "neg");
  14559. else
  14560. return Builder.CreateFNeg(
  14561. Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
  14562. "neg");
  14563. }
  14564. llvm_unreachable("Unknown FMA operation");
  14565. return nullptr; // Suppress no-return warning
  14566. }
  14567. case PPC::BI__builtin_vsx_insertword: {
  14568. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
  14569. // Third argument is a compile time constant int. It must be clamped to
  14570. // to the range [0, 12].
  14571. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  14572. assert(ArgCI &&
  14573. "Third arg to xxinsertw intrinsic must be constant integer");
  14574. const int64_t MaxIndex = 12;
  14575. int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
  14576. // The builtin semantics don't exactly match the xxinsertw instructions
  14577. // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
  14578. // word from the first argument, and inserts it in the second argument. The
  14579. // instruction extracts the word from its second input register and inserts
  14580. // it into its first input register, so swap the first and second arguments.
  14581. std::swap(Ops[0], Ops[1]);
  14582. // Need to cast the second argument from a vector of unsigned int to a
  14583. // vector of long long.
  14584. Ops[1] =
  14585. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
  14586. if (getTarget().isLittleEndian()) {
  14587. // Reverse the double words in the vector we will extract from.
  14588. Ops[0] =
  14589. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
  14590. Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0});
  14591. // Reverse the index.
  14592. Index = MaxIndex - Index;
  14593. }
  14594. // Intrinsic expects the first arg to be a vector of int.
  14595. Ops[0] =
  14596. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
  14597. Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
  14598. return Builder.CreateCall(F, Ops);
  14599. }
  14600. case PPC::BI__builtin_vsx_extractuword: {
  14601. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
  14602. // Intrinsic expects the first argument to be a vector of doublewords.
  14603. Ops[0] =
  14604. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
  14605. // The second argument is a compile time constant int that needs to
  14606. // be clamped to the range [0, 12].
  14607. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
  14608. assert(ArgCI &&
  14609. "Second Arg to xxextractuw intrinsic must be a constant integer!");
  14610. const int64_t MaxIndex = 12;
  14611. int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
  14612. if (getTarget().isLittleEndian()) {
  14613. // Reverse the index.
  14614. Index = MaxIndex - Index;
  14615. Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
  14616. // Emit the call, then reverse the double words of the results vector.
  14617. Value *Call = Builder.CreateCall(F, Ops);
  14618. Value *ShuffleCall =
  14619. Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
  14620. return ShuffleCall;
  14621. } else {
  14622. Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
  14623. return Builder.CreateCall(F, Ops);
  14624. }
  14625. }
  14626. case PPC::BI__builtin_vsx_xxpermdi: {
  14627. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  14628. assert(ArgCI && "Third arg must be constant integer!");
  14629. unsigned Index = ArgCI->getZExtValue();
  14630. Ops[0] =
  14631. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
  14632. Ops[1] =
  14633. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
  14634. // Account for endianness by treating this as just a shuffle. So we use the
  14635. // same indices for both LE and BE in order to produce expected results in
  14636. // both cases.
  14637. int ElemIdx0 = (Index & 2) >> 1;
  14638. int ElemIdx1 = 2 + (Index & 1);
  14639. int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
  14640. Value *ShuffleCall =
  14641. Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
  14642. QualType BIRetType = E->getType();
  14643. auto RetTy = ConvertType(BIRetType);
  14644. return Builder.CreateBitCast(ShuffleCall, RetTy);
  14645. }
  14646. case PPC::BI__builtin_vsx_xxsldwi: {
  14647. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  14648. assert(ArgCI && "Third argument must be a compile time constant");
  14649. unsigned Index = ArgCI->getZExtValue() & 0x3;
  14650. Ops[0] =
  14651. Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
  14652. Ops[1] =
  14653. Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4));
  14654. // Create a shuffle mask
  14655. int ElemIdx0;
  14656. int ElemIdx1;
  14657. int ElemIdx2;
  14658. int ElemIdx3;
  14659. if (getTarget().isLittleEndian()) {
  14660. // Little endian element N comes from element 8+N-Index of the
  14661. // concatenated wide vector (of course, using modulo arithmetic on
  14662. // the total number of elements).
  14663. ElemIdx0 = (8 - Index) % 8;
  14664. ElemIdx1 = (9 - Index) % 8;
  14665. ElemIdx2 = (10 - Index) % 8;
  14666. ElemIdx3 = (11 - Index) % 8;
  14667. } else {
  14668. // Big endian ElemIdx<N> = Index + N
  14669. ElemIdx0 = Index;
  14670. ElemIdx1 = Index + 1;
  14671. ElemIdx2 = Index + 2;
  14672. ElemIdx3 = Index + 3;
  14673. }
  14674. int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
  14675. Value *ShuffleCall =
  14676. Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
  14677. QualType BIRetType = E->getType();
  14678. auto RetTy = ConvertType(BIRetType);
  14679. return Builder.CreateBitCast(ShuffleCall, RetTy);
  14680. }
  14681. case PPC::BI__builtin_pack_vector_int128: {
  14682. bool isLittleEndian = getTarget().isLittleEndian();
  14683. Value *UndefValue =
  14684. llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2));
  14685. Value *Res = Builder.CreateInsertElement(
  14686. UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
  14687. Res = Builder.CreateInsertElement(Res, Ops[1],
  14688. (uint64_t)(isLittleEndian ? 0 : 1));
  14689. return Builder.CreateBitCast(Res, ConvertType(E->getType()));
  14690. }
  14691. case PPC::BI__builtin_unpack_vector_int128: {
  14692. ConstantInt *Index = cast<ConstantInt>(Ops[1]);
  14693. Value *Unpacked = Builder.CreateBitCast(
  14694. Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
  14695. if (getTarget().isLittleEndian())
  14696. Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
  14697. return Builder.CreateExtractElement(Unpacked, Index);
  14698. }
  14699. case PPC::BI__builtin_ppc_sthcx: {
  14700. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
  14701. Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
  14702. Ops[1] = Builder.CreateSExt(Ops[1], Int32Ty);
  14703. return Builder.CreateCall(F, Ops);
  14704. }
  14705. // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
  14706. // Some of the MMA instructions accumulate their result into an existing
  14707. // accumulator whereas the others generate a new accumulator. So we need to
  14708. // use custom code generation to expand a builtin call with a pointer to a
  14709. // load (if the corresponding instruction accumulates its result) followed by
  14710. // the call to the intrinsic and a store of the result.
  14711. #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \
  14712. case PPC::BI__builtin_##Name:
  14713. #include "clang/Basic/BuiltinsPPC.def"
  14714. {
  14715. // The first argument of these two builtins is a pointer used to store their
  14716. // result. However, the llvm intrinsics return their result in multiple
  14717. // return values. So, here we emit code extracting these values from the
  14718. // intrinsic results and storing them using that pointer.
  14719. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
  14720. BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
  14721. BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
  14722. unsigned NumVecs = 2;
  14723. auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
  14724. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
  14725. NumVecs = 4;
  14726. Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
  14727. }
  14728. llvm::Function *F = CGM.getIntrinsic(Intrinsic);
  14729. Address Addr = EmitPointerWithAlignment(E->getArg(1));
  14730. Value *Vec = Builder.CreateLoad(Addr);
  14731. Value *Call = Builder.CreateCall(F, {Vec});
  14732. llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
  14733. Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo());
  14734. for (unsigned i=0; i<NumVecs; i++) {
  14735. Value *Vec = Builder.CreateExtractValue(Call, i);
  14736. llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
  14737. Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
  14738. Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
  14739. }
  14740. return Call;
  14741. }
  14742. if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
  14743. BuiltinID == PPC::BI__builtin_mma_build_acc) {
  14744. // Reverse the order of the operands for LE, so the
  14745. // same builtin call can be used on both LE and BE
  14746. // without the need for the programmer to swap operands.
  14747. // The operands are reversed starting from the second argument,
  14748. // the first operand is the pointer to the pair/accumulator
  14749. // that is being built.
  14750. if (getTarget().isLittleEndian())
  14751. std::reverse(Ops.begin() + 1, Ops.end());
  14752. }
  14753. bool Accumulate;
  14754. switch (BuiltinID) {
  14755. #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
  14756. case PPC::BI__builtin_##Name: \
  14757. ID = Intrinsic::ppc_##Intr; \
  14758. Accumulate = Acc; \
  14759. break;
  14760. #include "clang/Basic/BuiltinsPPC.def"
  14761. }
  14762. if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
  14763. BuiltinID == PPC::BI__builtin_vsx_stxvp ||
  14764. BuiltinID == PPC::BI__builtin_mma_lxvp ||
  14765. BuiltinID == PPC::BI__builtin_mma_stxvp) {
  14766. if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
  14767. BuiltinID == PPC::BI__builtin_mma_lxvp) {
  14768. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  14769. Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
  14770. } else {
  14771. Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
  14772. Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
  14773. }
  14774. Ops.pop_back();
  14775. llvm::Function *F = CGM.getIntrinsic(ID);
  14776. return Builder.CreateCall(F, Ops, "");
  14777. }
  14778. SmallVector<Value*, 4> CallOps;
  14779. if (Accumulate) {
  14780. Address Addr = EmitPointerWithAlignment(E->getArg(0));
  14781. Value *Acc = Builder.CreateLoad(Addr);
  14782. CallOps.push_back(Acc);
  14783. }
  14784. for (unsigned i=1; i<Ops.size(); i++)
  14785. CallOps.push_back(Ops[i]);
  14786. llvm::Function *F = CGM.getIntrinsic(ID);
  14787. Value *Call = Builder.CreateCall(F, CallOps);
  14788. return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
  14789. }
  14790. case PPC::BI__builtin_ppc_compare_and_swap:
  14791. case PPC::BI__builtin_ppc_compare_and_swaplp: {
  14792. Address Addr = EmitPointerWithAlignment(E->getArg(0));
  14793. Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
  14794. Value *OldVal = Builder.CreateLoad(OldValAddr);
  14795. QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
  14796. LValue LV = MakeAddrLValue(Addr, AtomicTy);
  14797. auto Pair = EmitAtomicCompareExchange(
  14798. LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(),
  14799. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
  14800. // Unlike c11's atomic_compare_exchange, accroding to
  14801. // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
  14802. // > In either case, the contents of the memory location specified by addr
  14803. // > are copied into the memory location specified by old_val_addr.
  14804. // But it hasn't specified storing to OldValAddr is atomic or not and
  14805. // which order to use. Now following XL's codegen, treat it as a normal
  14806. // store.
  14807. Value *LoadedVal = Pair.first.getScalarVal();
  14808. Builder.CreateStore(LoadedVal, OldValAddr);
  14809. return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
  14810. }
  14811. case PPC::BI__builtin_ppc_fetch_and_add:
  14812. case PPC::BI__builtin_ppc_fetch_and_addlp: {
  14813. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
  14814. llvm::AtomicOrdering::Monotonic);
  14815. }
  14816. case PPC::BI__builtin_ppc_fetch_and_and:
  14817. case PPC::BI__builtin_ppc_fetch_and_andlp: {
  14818. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
  14819. llvm::AtomicOrdering::Monotonic);
  14820. }
  14821. case PPC::BI__builtin_ppc_fetch_and_or:
  14822. case PPC::BI__builtin_ppc_fetch_and_orlp: {
  14823. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
  14824. llvm::AtomicOrdering::Monotonic);
  14825. }
  14826. case PPC::BI__builtin_ppc_fetch_and_swap:
  14827. case PPC::BI__builtin_ppc_fetch_and_swaplp: {
  14828. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
  14829. llvm::AtomicOrdering::Monotonic);
  14830. }
  14831. case PPC::BI__builtin_ppc_ldarx:
  14832. case PPC::BI__builtin_ppc_lwarx:
  14833. case PPC::BI__builtin_ppc_lharx:
  14834. case PPC::BI__builtin_ppc_lbarx:
  14835. return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
  14836. case PPC::BI__builtin_ppc_mfspr: {
  14837. llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
  14838. ? Int32Ty
  14839. : Int64Ty;
  14840. Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
  14841. return Builder.CreateCall(F, Ops);
  14842. }
  14843. case PPC::BI__builtin_ppc_mtspr: {
  14844. llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
  14845. ? Int32Ty
  14846. : Int64Ty;
  14847. Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
  14848. return Builder.CreateCall(F, Ops);
  14849. }
  14850. case PPC::BI__builtin_ppc_popcntb: {
  14851. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  14852. llvm::Type *ArgType = ArgValue->getType();
  14853. Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
  14854. return Builder.CreateCall(F, Ops, "popcntb");
  14855. }
  14856. case PPC::BI__builtin_ppc_mtfsf: {
  14857. // The builtin takes a uint32 that needs to be cast to an
  14858. // f64 to be passed to the intrinsic.
  14859. Value *Cast = Builder.CreateUIToFP(Ops[1], DoubleTy);
  14860. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
  14861. return Builder.CreateCall(F, {Ops[0], Cast}, "");
  14862. }
  14863. case PPC::BI__builtin_ppc_swdiv_nochk:
  14864. case PPC::BI__builtin_ppc_swdivs_nochk: {
  14865. FastMathFlags FMF = Builder.getFastMathFlags();
  14866. Builder.getFastMathFlags().setFast();
  14867. Value *FDiv = Builder.CreateFDiv(Ops[0], Ops[1], "swdiv_nochk");
  14868. Builder.getFastMathFlags() &= (FMF);
  14869. return FDiv;
  14870. }
  14871. case PPC::BI__builtin_ppc_fric:
  14872. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  14873. *this, E, Intrinsic::rint,
  14874. Intrinsic::experimental_constrained_rint))
  14875. .getScalarVal();
  14876. case PPC::BI__builtin_ppc_frim:
  14877. case PPC::BI__builtin_ppc_frims:
  14878. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  14879. *this, E, Intrinsic::floor,
  14880. Intrinsic::experimental_constrained_floor))
  14881. .getScalarVal();
  14882. case PPC::BI__builtin_ppc_frin:
  14883. case PPC::BI__builtin_ppc_frins:
  14884. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  14885. *this, E, Intrinsic::round,
  14886. Intrinsic::experimental_constrained_round))
  14887. .getScalarVal();
  14888. case PPC::BI__builtin_ppc_frip:
  14889. case PPC::BI__builtin_ppc_frips:
  14890. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  14891. *this, E, Intrinsic::ceil,
  14892. Intrinsic::experimental_constrained_ceil))
  14893. .getScalarVal();
  14894. case PPC::BI__builtin_ppc_friz:
  14895. case PPC::BI__builtin_ppc_frizs:
  14896. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  14897. *this, E, Intrinsic::trunc,
  14898. Intrinsic::experimental_constrained_trunc))
  14899. .getScalarVal();
  14900. case PPC::BI__builtin_ppc_fsqrt:
  14901. case PPC::BI__builtin_ppc_fsqrts:
  14902. return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
  14903. *this, E, Intrinsic::sqrt,
  14904. Intrinsic::experimental_constrained_sqrt))
  14905. .getScalarVal();
  14906. case PPC::BI__builtin_ppc_test_data_class: {
  14907. llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType();
  14908. unsigned IntrinsicID;
  14909. if (ArgType->isDoubleTy())
  14910. IntrinsicID = Intrinsic::ppc_test_data_class_d;
  14911. else if (ArgType->isFloatTy())
  14912. IntrinsicID = Intrinsic::ppc_test_data_class_f;
  14913. else
  14914. llvm_unreachable("Invalid Argument Type");
  14915. return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops,
  14916. "test_data_class");
  14917. }
  14918. case PPC::BI__builtin_ppc_swdiv:
  14919. case PPC::BI__builtin_ppc_swdivs:
  14920. return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv");
  14921. }
  14922. }
  14923. namespace {
  14924. // If \p E is not null pointer, insert address space cast to match return
  14925. // type of \p E if necessary.
  14926. Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
  14927. const CallExpr *E = nullptr) {
  14928. auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
  14929. auto *Call = CGF.Builder.CreateCall(F);
  14930. Call->addRetAttr(
  14931. Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
  14932. Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
  14933. if (!E)
  14934. return Call;
  14935. QualType BuiltinRetType = E->getType();
  14936. auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
  14937. if (RetTy == Call->getType())
  14938. return Call;
  14939. return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
  14940. }
  14941. // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
  14942. Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
  14943. const unsigned XOffset = 4;
  14944. auto *DP = EmitAMDGPUDispatchPtr(CGF);
  14945. // Indexing the HSA kernel_dispatch_packet struct.
  14946. auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2);
  14947. auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
  14948. auto *DstTy =
  14949. CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
  14950. auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
  14951. auto *LD = CGF.Builder.CreateLoad(
  14952. Address(Cast, CGF.Int16Ty, CharUnits::fromQuantity(2)));
  14953. llvm::MDBuilder MDHelper(CGF.getLLVMContext());
  14954. llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
  14955. APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
  14956. LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
  14957. LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
  14958. llvm::MDNode::get(CGF.getLLVMContext(), None));
  14959. return LD;
  14960. }
  14961. // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
  14962. Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
  14963. const unsigned XOffset = 12;
  14964. auto *DP = EmitAMDGPUDispatchPtr(CGF);
  14965. // Indexing the HSA kernel_dispatch_packet struct.
  14966. auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
  14967. auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
  14968. auto *DstTy =
  14969. CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
  14970. auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
  14971. auto *LD = CGF.Builder.CreateLoad(
  14972. Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4)));
  14973. LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
  14974. llvm::MDNode::get(CGF.getLLVMContext(), None));
  14975. return LD;
  14976. }
  14977. } // namespace
  14978. // For processing memory ordering and memory scope arguments of various
  14979. // amdgcn builtins.
  14980. // \p Order takes a C++11 comptabile memory-ordering specifier and converts
  14981. // it into LLVM's memory ordering specifier using atomic C ABI, and writes
  14982. // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
  14983. // specific SyncScopeID and writes it to \p SSID.
  14984. bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
  14985. llvm::AtomicOrdering &AO,
  14986. llvm::SyncScope::ID &SSID) {
  14987. if (isa<llvm::ConstantInt>(Order)) {
  14988. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  14989. // Map C11/C++11 memory ordering to LLVM memory ordering
  14990. assert(llvm::isValidAtomicOrderingCABI(ord));
  14991. switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
  14992. case llvm::AtomicOrderingCABI::acquire:
  14993. case llvm::AtomicOrderingCABI::consume:
  14994. AO = llvm::AtomicOrdering::Acquire;
  14995. break;
  14996. case llvm::AtomicOrderingCABI::release:
  14997. AO = llvm::AtomicOrdering::Release;
  14998. break;
  14999. case llvm::AtomicOrderingCABI::acq_rel:
  15000. AO = llvm::AtomicOrdering::AcquireRelease;
  15001. break;
  15002. case llvm::AtomicOrderingCABI::seq_cst:
  15003. AO = llvm::AtomicOrdering::SequentiallyConsistent;
  15004. break;
  15005. case llvm::AtomicOrderingCABI::relaxed:
  15006. AO = llvm::AtomicOrdering::Monotonic;
  15007. break;
  15008. }
  15009. StringRef scp;
  15010. llvm::getConstantStringInfo(Scope, scp);
  15011. SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
  15012. return true;
  15013. }
  15014. return false;
  15015. }
  15016. Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
  15017. const CallExpr *E) {
  15018. llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
  15019. llvm::SyncScope::ID SSID;
  15020. switch (BuiltinID) {
  15021. case AMDGPU::BI__builtin_amdgcn_div_scale:
  15022. case AMDGPU::BI__builtin_amdgcn_div_scalef: {
  15023. // Translate from the intrinsics's struct return to the builtin's out
  15024. // argument.
  15025. Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
  15026. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  15027. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  15028. llvm::Value *Z = EmitScalarExpr(E->getArg(2));
  15029. llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
  15030. X->getType());
  15031. llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
  15032. llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
  15033. llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
  15034. llvm::Type *RealFlagType = FlagOutPtr.getElementType();
  15035. llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
  15036. Builder.CreateStore(FlagExt, FlagOutPtr);
  15037. return Result;
  15038. }
  15039. case AMDGPU::BI__builtin_amdgcn_div_fmas:
  15040. case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
  15041. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15042. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15043. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15044. llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
  15045. llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
  15046. Src0->getType());
  15047. llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
  15048. return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
  15049. }
  15050. case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
  15051. return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
  15052. case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
  15053. return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
  15054. case AMDGPU::BI__builtin_amdgcn_mov_dpp:
  15055. case AMDGPU::BI__builtin_amdgcn_update_dpp: {
  15056. llvm::SmallVector<llvm::Value *, 6> Args;
  15057. for (unsigned I = 0; I != E->getNumArgs(); ++I)
  15058. Args.push_back(EmitScalarExpr(E->getArg(I)));
  15059. assert(Args.size() == 5 || Args.size() == 6);
  15060. if (Args.size() == 5)
  15061. Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
  15062. Function *F =
  15063. CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
  15064. return Builder.CreateCall(F, Args);
  15065. }
  15066. case AMDGPU::BI__builtin_amdgcn_div_fixup:
  15067. case AMDGPU::BI__builtin_amdgcn_div_fixupf:
  15068. case AMDGPU::BI__builtin_amdgcn_div_fixuph:
  15069. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
  15070. case AMDGPU::BI__builtin_amdgcn_trig_preop:
  15071. case AMDGPU::BI__builtin_amdgcn_trig_preopf:
  15072. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
  15073. case AMDGPU::BI__builtin_amdgcn_rcp:
  15074. case AMDGPU::BI__builtin_amdgcn_rcpf:
  15075. case AMDGPU::BI__builtin_amdgcn_rcph:
  15076. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
  15077. case AMDGPU::BI__builtin_amdgcn_sqrt:
  15078. case AMDGPU::BI__builtin_amdgcn_sqrtf:
  15079. case AMDGPU::BI__builtin_amdgcn_sqrth:
  15080. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
  15081. case AMDGPU::BI__builtin_amdgcn_rsq:
  15082. case AMDGPU::BI__builtin_amdgcn_rsqf:
  15083. case AMDGPU::BI__builtin_amdgcn_rsqh:
  15084. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
  15085. case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
  15086. case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
  15087. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
  15088. case AMDGPU::BI__builtin_amdgcn_sinf:
  15089. case AMDGPU::BI__builtin_amdgcn_sinh:
  15090. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
  15091. case AMDGPU::BI__builtin_amdgcn_cosf:
  15092. case AMDGPU::BI__builtin_amdgcn_cosh:
  15093. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
  15094. case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
  15095. return EmitAMDGPUDispatchPtr(*this, E);
  15096. case AMDGPU::BI__builtin_amdgcn_log_clampf:
  15097. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
  15098. case AMDGPU::BI__builtin_amdgcn_ldexp:
  15099. case AMDGPU::BI__builtin_amdgcn_ldexpf:
  15100. case AMDGPU::BI__builtin_amdgcn_ldexph:
  15101. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
  15102. case AMDGPU::BI__builtin_amdgcn_frexp_mant:
  15103. case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
  15104. case AMDGPU::BI__builtin_amdgcn_frexp_manth:
  15105. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
  15106. case AMDGPU::BI__builtin_amdgcn_frexp_exp:
  15107. case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
  15108. Value *Src0 = EmitScalarExpr(E->getArg(0));
  15109. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
  15110. { Builder.getInt32Ty(), Src0->getType() });
  15111. return Builder.CreateCall(F, Src0);
  15112. }
  15113. case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
  15114. Value *Src0 = EmitScalarExpr(E->getArg(0));
  15115. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
  15116. { Builder.getInt16Ty(), Src0->getType() });
  15117. return Builder.CreateCall(F, Src0);
  15118. }
  15119. case AMDGPU::BI__builtin_amdgcn_fract:
  15120. case AMDGPU::BI__builtin_amdgcn_fractf:
  15121. case AMDGPU::BI__builtin_amdgcn_fracth:
  15122. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
  15123. case AMDGPU::BI__builtin_amdgcn_lerp:
  15124. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
  15125. case AMDGPU::BI__builtin_amdgcn_ubfe:
  15126. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
  15127. case AMDGPU::BI__builtin_amdgcn_sbfe:
  15128. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
  15129. case AMDGPU::BI__builtin_amdgcn_uicmp:
  15130. case AMDGPU::BI__builtin_amdgcn_uicmpl:
  15131. case AMDGPU::BI__builtin_amdgcn_sicmp:
  15132. case AMDGPU::BI__builtin_amdgcn_sicmpl: {
  15133. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15134. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15135. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15136. // FIXME-GFX10: How should 32 bit mask be handled?
  15137. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
  15138. { Builder.getInt64Ty(), Src0->getType() });
  15139. return Builder.CreateCall(F, { Src0, Src1, Src2 });
  15140. }
  15141. case AMDGPU::BI__builtin_amdgcn_fcmp:
  15142. case AMDGPU::BI__builtin_amdgcn_fcmpf: {
  15143. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15144. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15145. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15146. // FIXME-GFX10: How should 32 bit mask be handled?
  15147. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
  15148. { Builder.getInt64Ty(), Src0->getType() });
  15149. return Builder.CreateCall(F, { Src0, Src1, Src2 });
  15150. }
  15151. case AMDGPU::BI__builtin_amdgcn_class:
  15152. case AMDGPU::BI__builtin_amdgcn_classf:
  15153. case AMDGPU::BI__builtin_amdgcn_classh:
  15154. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
  15155. case AMDGPU::BI__builtin_amdgcn_fmed3f:
  15156. case AMDGPU::BI__builtin_amdgcn_fmed3h:
  15157. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
  15158. case AMDGPU::BI__builtin_amdgcn_ds_append:
  15159. case AMDGPU::BI__builtin_amdgcn_ds_consume: {
  15160. Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
  15161. Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
  15162. Value *Src0 = EmitScalarExpr(E->getArg(0));
  15163. Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
  15164. return Builder.CreateCall(F, { Src0, Builder.getFalse() });
  15165. }
  15166. case AMDGPU::BI__builtin_amdgcn_ds_faddf:
  15167. case AMDGPU::BI__builtin_amdgcn_ds_fminf:
  15168. case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
  15169. Intrinsic::ID Intrin;
  15170. switch (BuiltinID) {
  15171. case AMDGPU::BI__builtin_amdgcn_ds_faddf:
  15172. Intrin = Intrinsic::amdgcn_ds_fadd;
  15173. break;
  15174. case AMDGPU::BI__builtin_amdgcn_ds_fminf:
  15175. Intrin = Intrinsic::amdgcn_ds_fmin;
  15176. break;
  15177. case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
  15178. Intrin = Intrinsic::amdgcn_ds_fmax;
  15179. break;
  15180. }
  15181. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15182. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15183. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15184. llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
  15185. llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
  15186. llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
  15187. llvm::FunctionType *FTy = F->getFunctionType();
  15188. llvm::Type *PTy = FTy->getParamType(0);
  15189. Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
  15190. return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
  15191. }
  15192. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
  15193. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
  15194. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
  15195. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
  15196. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
  15197. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
  15198. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
  15199. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
  15200. Intrinsic::ID IID;
  15201. llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
  15202. switch (BuiltinID) {
  15203. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
  15204. ArgTy = llvm::Type::getFloatTy(getLLVMContext());
  15205. IID = Intrinsic::amdgcn_global_atomic_fadd;
  15206. break;
  15207. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
  15208. ArgTy = llvm::FixedVectorType::get(
  15209. llvm::Type::getHalfTy(getLLVMContext()), 2);
  15210. IID = Intrinsic::amdgcn_global_atomic_fadd;
  15211. break;
  15212. case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
  15213. IID = Intrinsic::amdgcn_global_atomic_fadd;
  15214. break;
  15215. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
  15216. IID = Intrinsic::amdgcn_global_atomic_fmin;
  15217. break;
  15218. case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
  15219. IID = Intrinsic::amdgcn_global_atomic_fmax;
  15220. break;
  15221. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
  15222. IID = Intrinsic::amdgcn_flat_atomic_fadd;
  15223. break;
  15224. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
  15225. IID = Intrinsic::amdgcn_flat_atomic_fmin;
  15226. break;
  15227. case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
  15228. IID = Intrinsic::amdgcn_flat_atomic_fmax;
  15229. break;
  15230. }
  15231. llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
  15232. llvm::Value *Val = EmitScalarExpr(E->getArg(1));
  15233. llvm::Function *F =
  15234. CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
  15235. return Builder.CreateCall(F, {Addr, Val});
  15236. }
  15237. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
  15238. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
  15239. Intrinsic::ID IID;
  15240. llvm::Type *ArgTy;
  15241. switch (BuiltinID) {
  15242. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
  15243. ArgTy = llvm::Type::getFloatTy(getLLVMContext());
  15244. IID = Intrinsic::amdgcn_ds_fadd;
  15245. break;
  15246. case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
  15247. ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
  15248. IID = Intrinsic::amdgcn_ds_fadd;
  15249. break;
  15250. }
  15251. llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
  15252. llvm::Value *Val = EmitScalarExpr(E->getArg(1));
  15253. llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
  15254. llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
  15255. llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
  15256. llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
  15257. llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
  15258. return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
  15259. }
  15260. case AMDGPU::BI__builtin_amdgcn_read_exec: {
  15261. CallInst *CI = cast<CallInst>(
  15262. EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
  15263. CI->setConvergent();
  15264. return CI;
  15265. }
  15266. case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
  15267. case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
  15268. StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
  15269. "exec_lo" : "exec_hi";
  15270. CallInst *CI = cast<CallInst>(
  15271. EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
  15272. CI->setConvergent();
  15273. return CI;
  15274. }
  15275. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
  15276. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
  15277. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
  15278. case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
  15279. llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
  15280. llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
  15281. llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
  15282. llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
  15283. llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
  15284. llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
  15285. // The builtins take these arguments as vec4 where the last element is
  15286. // ignored. The intrinsic takes them as vec3.
  15287. RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
  15288. ArrayRef<int>{0, 1, 2});
  15289. RayDir =
  15290. Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
  15291. RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
  15292. ArrayRef<int>{0, 1, 2});
  15293. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
  15294. {NodePtr->getType(), RayDir->getType()});
  15295. return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
  15296. RayInverseDir, TextureDescr});
  15297. }
  15298. // amdgcn workitem
  15299. case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
  15300. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
  15301. case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
  15302. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
  15303. case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
  15304. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
  15305. // amdgcn workgroup size
  15306. case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
  15307. return EmitAMDGPUWorkGroupSize(*this, 0);
  15308. case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
  15309. return EmitAMDGPUWorkGroupSize(*this, 1);
  15310. case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
  15311. return EmitAMDGPUWorkGroupSize(*this, 2);
  15312. // amdgcn grid size
  15313. case AMDGPU::BI__builtin_amdgcn_grid_size_x:
  15314. return EmitAMDGPUGridSize(*this, 0);
  15315. case AMDGPU::BI__builtin_amdgcn_grid_size_y:
  15316. return EmitAMDGPUGridSize(*this, 1);
  15317. case AMDGPU::BI__builtin_amdgcn_grid_size_z:
  15318. return EmitAMDGPUGridSize(*this, 2);
  15319. // r600 intrinsics
  15320. case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
  15321. case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
  15322. return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
  15323. case AMDGPU::BI__builtin_r600_read_tidig_x:
  15324. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
  15325. case AMDGPU::BI__builtin_r600_read_tidig_y:
  15326. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
  15327. case AMDGPU::BI__builtin_r600_read_tidig_z:
  15328. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
  15329. case AMDGPU::BI__builtin_amdgcn_alignbit: {
  15330. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  15331. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  15332. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  15333. Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
  15334. return Builder.CreateCall(F, { Src0, Src1, Src2 });
  15335. }
  15336. case AMDGPU::BI__builtin_amdgcn_fence: {
  15337. if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
  15338. EmitScalarExpr(E->getArg(1)), AO, SSID))
  15339. return Builder.CreateFence(AO, SSID);
  15340. LLVM_FALLTHROUGH;
  15341. }
  15342. case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
  15343. case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
  15344. case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
  15345. case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
  15346. unsigned BuiltinAtomicOp;
  15347. llvm::Type *ResultType = ConvertType(E->getType());
  15348. switch (BuiltinID) {
  15349. case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
  15350. case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
  15351. BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
  15352. break;
  15353. case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
  15354. case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
  15355. BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
  15356. break;
  15357. }
  15358. Value *Ptr = EmitScalarExpr(E->getArg(0));
  15359. Value *Val = EmitScalarExpr(E->getArg(1));
  15360. llvm::Function *F =
  15361. CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
  15362. if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
  15363. EmitScalarExpr(E->getArg(3)), AO, SSID)) {
  15364. // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
  15365. // scope as unsigned values
  15366. Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
  15367. Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
  15368. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  15369. bool Volatile =
  15370. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  15371. Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
  15372. return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
  15373. }
  15374. LLVM_FALLTHROUGH;
  15375. }
  15376. default:
  15377. return nullptr;
  15378. }
  15379. }
  15380. /// Handle a SystemZ function in which the final argument is a pointer
  15381. /// to an int that receives the post-instruction CC value. At the LLVM level
  15382. /// this is represented as a function that returns a {result, cc} pair.
  15383. static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
  15384. unsigned IntrinsicID,
  15385. const CallExpr *E) {
  15386. unsigned NumArgs = E->getNumArgs() - 1;
  15387. SmallVector<Value *, 8> Args(NumArgs);
  15388. for (unsigned I = 0; I < NumArgs; ++I)
  15389. Args[I] = CGF.EmitScalarExpr(E->getArg(I));
  15390. Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
  15391. Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
  15392. Value *Call = CGF.Builder.CreateCall(F, Args);
  15393. Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
  15394. CGF.Builder.CreateStore(CC, CCPtr);
  15395. return CGF.Builder.CreateExtractValue(Call, 0);
  15396. }
  15397. Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
  15398. const CallExpr *E) {
  15399. switch (BuiltinID) {
  15400. case SystemZ::BI__builtin_tbegin: {
  15401. Value *TDB = EmitScalarExpr(E->getArg(0));
  15402. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
  15403. Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
  15404. return Builder.CreateCall(F, {TDB, Control});
  15405. }
  15406. case SystemZ::BI__builtin_tbegin_nofloat: {
  15407. Value *TDB = EmitScalarExpr(E->getArg(0));
  15408. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
  15409. Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
  15410. return Builder.CreateCall(F, {TDB, Control});
  15411. }
  15412. case SystemZ::BI__builtin_tbeginc: {
  15413. Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
  15414. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
  15415. Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
  15416. return Builder.CreateCall(F, {TDB, Control});
  15417. }
  15418. case SystemZ::BI__builtin_tabort: {
  15419. Value *Data = EmitScalarExpr(E->getArg(0));
  15420. Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
  15421. return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
  15422. }
  15423. case SystemZ::BI__builtin_non_tx_store: {
  15424. Value *Address = EmitScalarExpr(E->getArg(0));
  15425. Value *Data = EmitScalarExpr(E->getArg(1));
  15426. Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
  15427. return Builder.CreateCall(F, {Data, Address});
  15428. }
  15429. // Vector builtins. Note that most vector builtins are mapped automatically
  15430. // to target-specific LLVM intrinsics. The ones handled specially here can
  15431. // be represented via standard LLVM IR, which is preferable to enable common
  15432. // LLVM optimizations.
  15433. case SystemZ::BI__builtin_s390_vpopctb:
  15434. case SystemZ::BI__builtin_s390_vpopcth:
  15435. case SystemZ::BI__builtin_s390_vpopctf:
  15436. case SystemZ::BI__builtin_s390_vpopctg: {
  15437. llvm::Type *ResultType = ConvertType(E->getType());
  15438. Value *X = EmitScalarExpr(E->getArg(0));
  15439. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  15440. return Builder.CreateCall(F, X);
  15441. }
  15442. case SystemZ::BI__builtin_s390_vclzb:
  15443. case SystemZ::BI__builtin_s390_vclzh:
  15444. case SystemZ::BI__builtin_s390_vclzf:
  15445. case SystemZ::BI__builtin_s390_vclzg: {
  15446. llvm::Type *ResultType = ConvertType(E->getType());
  15447. Value *X = EmitScalarExpr(E->getArg(0));
  15448. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  15449. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
  15450. return Builder.CreateCall(F, {X, Undef});
  15451. }
  15452. case SystemZ::BI__builtin_s390_vctzb:
  15453. case SystemZ::BI__builtin_s390_vctzh:
  15454. case SystemZ::BI__builtin_s390_vctzf:
  15455. case SystemZ::BI__builtin_s390_vctzg: {
  15456. llvm::Type *ResultType = ConvertType(E->getType());
  15457. Value *X = EmitScalarExpr(E->getArg(0));
  15458. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  15459. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
  15460. return Builder.CreateCall(F, {X, Undef});
  15461. }
  15462. case SystemZ::BI__builtin_s390_vfsqsb:
  15463. case SystemZ::BI__builtin_s390_vfsqdb: {
  15464. llvm::Type *ResultType = ConvertType(E->getType());
  15465. Value *X = EmitScalarExpr(E->getArg(0));
  15466. if (Builder.getIsFPConstrained()) {
  15467. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
  15468. return Builder.CreateConstrainedFPCall(F, { X });
  15469. } else {
  15470. Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  15471. return Builder.CreateCall(F, X);
  15472. }
  15473. }
  15474. case SystemZ::BI__builtin_s390_vfmasb:
  15475. case SystemZ::BI__builtin_s390_vfmadb: {
  15476. llvm::Type *ResultType = ConvertType(E->getType());
  15477. Value *X = EmitScalarExpr(E->getArg(0));
  15478. Value *Y = EmitScalarExpr(E->getArg(1));
  15479. Value *Z = EmitScalarExpr(E->getArg(2));
  15480. if (Builder.getIsFPConstrained()) {
  15481. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  15482. return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
  15483. } else {
  15484. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  15485. return Builder.CreateCall(F, {X, Y, Z});
  15486. }
  15487. }
  15488. case SystemZ::BI__builtin_s390_vfmssb:
  15489. case SystemZ::BI__builtin_s390_vfmsdb: {
  15490. llvm::Type *ResultType = ConvertType(E->getType());
  15491. Value *X = EmitScalarExpr(E->getArg(0));
  15492. Value *Y = EmitScalarExpr(E->getArg(1));
  15493. Value *Z = EmitScalarExpr(E->getArg(2));
  15494. if (Builder.getIsFPConstrained()) {
  15495. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  15496. return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  15497. } else {
  15498. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  15499. return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
  15500. }
  15501. }
  15502. case SystemZ::BI__builtin_s390_vfnmasb:
  15503. case SystemZ::BI__builtin_s390_vfnmadb: {
  15504. llvm::Type *ResultType = ConvertType(E->getType());
  15505. Value *X = EmitScalarExpr(E->getArg(0));
  15506. Value *Y = EmitScalarExpr(E->getArg(1));
  15507. Value *Z = EmitScalarExpr(E->getArg(2));
  15508. if (Builder.getIsFPConstrained()) {
  15509. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  15510. return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
  15511. } else {
  15512. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  15513. return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
  15514. }
  15515. }
  15516. case SystemZ::BI__builtin_s390_vfnmssb:
  15517. case SystemZ::BI__builtin_s390_vfnmsdb: {
  15518. llvm::Type *ResultType = ConvertType(E->getType());
  15519. Value *X = EmitScalarExpr(E->getArg(0));
  15520. Value *Y = EmitScalarExpr(E->getArg(1));
  15521. Value *Z = EmitScalarExpr(E->getArg(2));
  15522. if (Builder.getIsFPConstrained()) {
  15523. Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
  15524. Value *NegZ = Builder.CreateFNeg(Z, "sub");
  15525. return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
  15526. } else {
  15527. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  15528. Value *NegZ = Builder.CreateFNeg(Z, "neg");
  15529. return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
  15530. }
  15531. }
  15532. case SystemZ::BI__builtin_s390_vflpsb:
  15533. case SystemZ::BI__builtin_s390_vflpdb: {
  15534. llvm::Type *ResultType = ConvertType(E->getType());
  15535. Value *X = EmitScalarExpr(E->getArg(0));
  15536. Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  15537. return Builder.CreateCall(F, X);
  15538. }
  15539. case SystemZ::BI__builtin_s390_vflnsb:
  15540. case SystemZ::BI__builtin_s390_vflndb: {
  15541. llvm::Type *ResultType = ConvertType(E->getType());
  15542. Value *X = EmitScalarExpr(E->getArg(0));
  15543. Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  15544. return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
  15545. }
  15546. case SystemZ::BI__builtin_s390_vfisb:
  15547. case SystemZ::BI__builtin_s390_vfidb: {
  15548. llvm::Type *ResultType = ConvertType(E->getType());
  15549. Value *X = EmitScalarExpr(E->getArg(0));
  15550. // Constant-fold the M4 and M5 mask arguments.
  15551. llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
  15552. llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
  15553. // Check whether this instance can be represented via a LLVM standard
  15554. // intrinsic. We only support some combinations of M4 and M5.
  15555. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  15556. Intrinsic::ID CI;
  15557. switch (M4.getZExtValue()) {
  15558. default: break;
  15559. case 0: // IEEE-inexact exception allowed
  15560. switch (M5.getZExtValue()) {
  15561. default: break;
  15562. case 0: ID = Intrinsic::rint;
  15563. CI = Intrinsic::experimental_constrained_rint; break;
  15564. }
  15565. break;
  15566. case 4: // IEEE-inexact exception suppressed
  15567. switch (M5.getZExtValue()) {
  15568. default: break;
  15569. case 0: ID = Intrinsic::nearbyint;
  15570. CI = Intrinsic::experimental_constrained_nearbyint; break;
  15571. case 1: ID = Intrinsic::round;
  15572. CI = Intrinsic::experimental_constrained_round; break;
  15573. case 5: ID = Intrinsic::trunc;
  15574. CI = Intrinsic::experimental_constrained_trunc; break;
  15575. case 6: ID = Intrinsic::ceil;
  15576. CI = Intrinsic::experimental_constrained_ceil; break;
  15577. case 7: ID = Intrinsic::floor;
  15578. CI = Intrinsic::experimental_constrained_floor; break;
  15579. }
  15580. break;
  15581. }
  15582. if (ID != Intrinsic::not_intrinsic) {
  15583. if (Builder.getIsFPConstrained()) {
  15584. Function *F = CGM.getIntrinsic(CI, ResultType);
  15585. return Builder.CreateConstrainedFPCall(F, X);
  15586. } else {
  15587. Function *F = CGM.getIntrinsic(ID, ResultType);
  15588. return Builder.CreateCall(F, X);
  15589. }
  15590. }
  15591. switch (BuiltinID) { // FIXME: constrained version?
  15592. case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
  15593. case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
  15594. default: llvm_unreachable("Unknown BuiltinID");
  15595. }
  15596. Function *F = CGM.getIntrinsic(ID);
  15597. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  15598. Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
  15599. return Builder.CreateCall(F, {X, M4Value, M5Value});
  15600. }
  15601. case SystemZ::BI__builtin_s390_vfmaxsb:
  15602. case SystemZ::BI__builtin_s390_vfmaxdb: {
  15603. llvm::Type *ResultType = ConvertType(E->getType());
  15604. Value *X = EmitScalarExpr(E->getArg(0));
  15605. Value *Y = EmitScalarExpr(E->getArg(1));
  15606. // Constant-fold the M4 mask argument.
  15607. llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
  15608. // Check whether this instance can be represented via a LLVM standard
  15609. // intrinsic. We only support some values of M4.
  15610. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  15611. Intrinsic::ID CI;
  15612. switch (M4.getZExtValue()) {
  15613. default: break;
  15614. case 4: ID = Intrinsic::maxnum;
  15615. CI = Intrinsic::experimental_constrained_maxnum; break;
  15616. }
  15617. if (ID != Intrinsic::not_intrinsic) {
  15618. if (Builder.getIsFPConstrained()) {
  15619. Function *F = CGM.getIntrinsic(CI, ResultType);
  15620. return Builder.CreateConstrainedFPCall(F, {X, Y});
  15621. } else {
  15622. Function *F = CGM.getIntrinsic(ID, ResultType);
  15623. return Builder.CreateCall(F, {X, Y});
  15624. }
  15625. }
  15626. switch (BuiltinID) {
  15627. case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
  15628. case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
  15629. default: llvm_unreachable("Unknown BuiltinID");
  15630. }
  15631. Function *F = CGM.getIntrinsic(ID);
  15632. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  15633. return Builder.CreateCall(F, {X, Y, M4Value});
  15634. }
  15635. case SystemZ::BI__builtin_s390_vfminsb:
  15636. case SystemZ::BI__builtin_s390_vfmindb: {
  15637. llvm::Type *ResultType = ConvertType(E->getType());
  15638. Value *X = EmitScalarExpr(E->getArg(0));
  15639. Value *Y = EmitScalarExpr(E->getArg(1));
  15640. // Constant-fold the M4 mask argument.
  15641. llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
  15642. // Check whether this instance can be represented via a LLVM standard
  15643. // intrinsic. We only support some values of M4.
  15644. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  15645. Intrinsic::ID CI;
  15646. switch (M4.getZExtValue()) {
  15647. default: break;
  15648. case 4: ID = Intrinsic::minnum;
  15649. CI = Intrinsic::experimental_constrained_minnum; break;
  15650. }
  15651. if (ID != Intrinsic::not_intrinsic) {
  15652. if (Builder.getIsFPConstrained()) {
  15653. Function *F = CGM.getIntrinsic(CI, ResultType);
  15654. return Builder.CreateConstrainedFPCall(F, {X, Y});
  15655. } else {
  15656. Function *F = CGM.getIntrinsic(ID, ResultType);
  15657. return Builder.CreateCall(F, {X, Y});
  15658. }
  15659. }
  15660. switch (BuiltinID) {
  15661. case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
  15662. case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
  15663. default: llvm_unreachable("Unknown BuiltinID");
  15664. }
  15665. Function *F = CGM.getIntrinsic(ID);
  15666. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  15667. return Builder.CreateCall(F, {X, Y, M4Value});
  15668. }
  15669. case SystemZ::BI__builtin_s390_vlbrh:
  15670. case SystemZ::BI__builtin_s390_vlbrf:
  15671. case SystemZ::BI__builtin_s390_vlbrg: {
  15672. llvm::Type *ResultType = ConvertType(E->getType());
  15673. Value *X = EmitScalarExpr(E->getArg(0));
  15674. Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
  15675. return Builder.CreateCall(F, X);
  15676. }
  15677. // Vector intrinsics that output the post-instruction CC value.
  15678. #define INTRINSIC_WITH_CC(NAME) \
  15679. case SystemZ::BI__builtin_##NAME: \
  15680. return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
  15681. INTRINSIC_WITH_CC(s390_vpkshs);
  15682. INTRINSIC_WITH_CC(s390_vpksfs);
  15683. INTRINSIC_WITH_CC(s390_vpksgs);
  15684. INTRINSIC_WITH_CC(s390_vpklshs);
  15685. INTRINSIC_WITH_CC(s390_vpklsfs);
  15686. INTRINSIC_WITH_CC(s390_vpklsgs);
  15687. INTRINSIC_WITH_CC(s390_vceqbs);
  15688. INTRINSIC_WITH_CC(s390_vceqhs);
  15689. INTRINSIC_WITH_CC(s390_vceqfs);
  15690. INTRINSIC_WITH_CC(s390_vceqgs);
  15691. INTRINSIC_WITH_CC(s390_vchbs);
  15692. INTRINSIC_WITH_CC(s390_vchhs);
  15693. INTRINSIC_WITH_CC(s390_vchfs);
  15694. INTRINSIC_WITH_CC(s390_vchgs);
  15695. INTRINSIC_WITH_CC(s390_vchlbs);
  15696. INTRINSIC_WITH_CC(s390_vchlhs);
  15697. INTRINSIC_WITH_CC(s390_vchlfs);
  15698. INTRINSIC_WITH_CC(s390_vchlgs);
  15699. INTRINSIC_WITH_CC(s390_vfaebs);
  15700. INTRINSIC_WITH_CC(s390_vfaehs);
  15701. INTRINSIC_WITH_CC(s390_vfaefs);
  15702. INTRINSIC_WITH_CC(s390_vfaezbs);
  15703. INTRINSIC_WITH_CC(s390_vfaezhs);
  15704. INTRINSIC_WITH_CC(s390_vfaezfs);
  15705. INTRINSIC_WITH_CC(s390_vfeebs);
  15706. INTRINSIC_WITH_CC(s390_vfeehs);
  15707. INTRINSIC_WITH_CC(s390_vfeefs);
  15708. INTRINSIC_WITH_CC(s390_vfeezbs);
  15709. INTRINSIC_WITH_CC(s390_vfeezhs);
  15710. INTRINSIC_WITH_CC(s390_vfeezfs);
  15711. INTRINSIC_WITH_CC(s390_vfenebs);
  15712. INTRINSIC_WITH_CC(s390_vfenehs);
  15713. INTRINSIC_WITH_CC(s390_vfenefs);
  15714. INTRINSIC_WITH_CC(s390_vfenezbs);
  15715. INTRINSIC_WITH_CC(s390_vfenezhs);
  15716. INTRINSIC_WITH_CC(s390_vfenezfs);
  15717. INTRINSIC_WITH_CC(s390_vistrbs);
  15718. INTRINSIC_WITH_CC(s390_vistrhs);
  15719. INTRINSIC_WITH_CC(s390_vistrfs);
  15720. INTRINSIC_WITH_CC(s390_vstrcbs);
  15721. INTRINSIC_WITH_CC(s390_vstrchs);
  15722. INTRINSIC_WITH_CC(s390_vstrcfs);
  15723. INTRINSIC_WITH_CC(s390_vstrczbs);
  15724. INTRINSIC_WITH_CC(s390_vstrczhs);
  15725. INTRINSIC_WITH_CC(s390_vstrczfs);
  15726. INTRINSIC_WITH_CC(s390_vfcesbs);
  15727. INTRINSIC_WITH_CC(s390_vfcedbs);
  15728. INTRINSIC_WITH_CC(s390_vfchsbs);
  15729. INTRINSIC_WITH_CC(s390_vfchdbs);
  15730. INTRINSIC_WITH_CC(s390_vfchesbs);
  15731. INTRINSIC_WITH_CC(s390_vfchedbs);
  15732. INTRINSIC_WITH_CC(s390_vftcisb);
  15733. INTRINSIC_WITH_CC(s390_vftcidb);
  15734. INTRINSIC_WITH_CC(s390_vstrsb);
  15735. INTRINSIC_WITH_CC(s390_vstrsh);
  15736. INTRINSIC_WITH_CC(s390_vstrsf);
  15737. INTRINSIC_WITH_CC(s390_vstrszb);
  15738. INTRINSIC_WITH_CC(s390_vstrszh);
  15739. INTRINSIC_WITH_CC(s390_vstrszf);
  15740. #undef INTRINSIC_WITH_CC
  15741. default:
  15742. return nullptr;
  15743. }
  15744. }
  15745. namespace {
  15746. // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
  15747. struct NVPTXMmaLdstInfo {
  15748. unsigned NumResults; // Number of elements to load/store
  15749. // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
  15750. unsigned IID_col;
  15751. unsigned IID_row;
  15752. };
  15753. #define MMA_INTR(geom_op_type, layout) \
  15754. Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
  15755. #define MMA_LDST(n, geom_op_type) \
  15756. { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
  15757. static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
  15758. switch (BuiltinID) {
  15759. // FP MMA loads
  15760. case NVPTX::BI__hmma_m16n16k16_ld_a:
  15761. return MMA_LDST(8, m16n16k16_load_a_f16);
  15762. case NVPTX::BI__hmma_m16n16k16_ld_b:
  15763. return MMA_LDST(8, m16n16k16_load_b_f16);
  15764. case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
  15765. return MMA_LDST(4, m16n16k16_load_c_f16);
  15766. case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
  15767. return MMA_LDST(8, m16n16k16_load_c_f32);
  15768. case NVPTX::BI__hmma_m32n8k16_ld_a:
  15769. return MMA_LDST(8, m32n8k16_load_a_f16);
  15770. case NVPTX::BI__hmma_m32n8k16_ld_b:
  15771. return MMA_LDST(8, m32n8k16_load_b_f16);
  15772. case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
  15773. return MMA_LDST(4, m32n8k16_load_c_f16);
  15774. case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
  15775. return MMA_LDST(8, m32n8k16_load_c_f32);
  15776. case NVPTX::BI__hmma_m8n32k16_ld_a:
  15777. return MMA_LDST(8, m8n32k16_load_a_f16);
  15778. case NVPTX::BI__hmma_m8n32k16_ld_b:
  15779. return MMA_LDST(8, m8n32k16_load_b_f16);
  15780. case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
  15781. return MMA_LDST(4, m8n32k16_load_c_f16);
  15782. case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
  15783. return MMA_LDST(8, m8n32k16_load_c_f32);
  15784. // Integer MMA loads
  15785. case NVPTX::BI__imma_m16n16k16_ld_a_s8:
  15786. return MMA_LDST(2, m16n16k16_load_a_s8);
  15787. case NVPTX::BI__imma_m16n16k16_ld_a_u8:
  15788. return MMA_LDST(2, m16n16k16_load_a_u8);
  15789. case NVPTX::BI__imma_m16n16k16_ld_b_s8:
  15790. return MMA_LDST(2, m16n16k16_load_b_s8);
  15791. case NVPTX::BI__imma_m16n16k16_ld_b_u8:
  15792. return MMA_LDST(2, m16n16k16_load_b_u8);
  15793. case NVPTX::BI__imma_m16n16k16_ld_c:
  15794. return MMA_LDST(8, m16n16k16_load_c_s32);
  15795. case NVPTX::BI__imma_m32n8k16_ld_a_s8:
  15796. return MMA_LDST(4, m32n8k16_load_a_s8);
  15797. case NVPTX::BI__imma_m32n8k16_ld_a_u8:
  15798. return MMA_LDST(4, m32n8k16_load_a_u8);
  15799. case NVPTX::BI__imma_m32n8k16_ld_b_s8:
  15800. return MMA_LDST(1, m32n8k16_load_b_s8);
  15801. case NVPTX::BI__imma_m32n8k16_ld_b_u8:
  15802. return MMA_LDST(1, m32n8k16_load_b_u8);
  15803. case NVPTX::BI__imma_m32n8k16_ld_c:
  15804. return MMA_LDST(8, m32n8k16_load_c_s32);
  15805. case NVPTX::BI__imma_m8n32k16_ld_a_s8:
  15806. return MMA_LDST(1, m8n32k16_load_a_s8);
  15807. case NVPTX::BI__imma_m8n32k16_ld_a_u8:
  15808. return MMA_LDST(1, m8n32k16_load_a_u8);
  15809. case NVPTX::BI__imma_m8n32k16_ld_b_s8:
  15810. return MMA_LDST(4, m8n32k16_load_b_s8);
  15811. case NVPTX::BI__imma_m8n32k16_ld_b_u8:
  15812. return MMA_LDST(4, m8n32k16_load_b_u8);
  15813. case NVPTX::BI__imma_m8n32k16_ld_c:
  15814. return MMA_LDST(8, m8n32k16_load_c_s32);
  15815. // Sub-integer MMA loads.
  15816. // Only row/col layout is supported by A/B fragments.
  15817. case NVPTX::BI__imma_m8n8k32_ld_a_s4:
  15818. return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
  15819. case NVPTX::BI__imma_m8n8k32_ld_a_u4:
  15820. return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
  15821. case NVPTX::BI__imma_m8n8k32_ld_b_s4:
  15822. return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
  15823. case NVPTX::BI__imma_m8n8k32_ld_b_u4:
  15824. return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
  15825. case NVPTX::BI__imma_m8n8k32_ld_c:
  15826. return MMA_LDST(2, m8n8k32_load_c_s32);
  15827. case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
  15828. return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
  15829. case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
  15830. return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
  15831. case NVPTX::BI__bmma_m8n8k128_ld_c:
  15832. return MMA_LDST(2, m8n8k128_load_c_s32);
  15833. // Double MMA loads
  15834. case NVPTX::BI__dmma_m8n8k4_ld_a:
  15835. return MMA_LDST(1, m8n8k4_load_a_f64);
  15836. case NVPTX::BI__dmma_m8n8k4_ld_b:
  15837. return MMA_LDST(1, m8n8k4_load_b_f64);
  15838. case NVPTX::BI__dmma_m8n8k4_ld_c:
  15839. return MMA_LDST(2, m8n8k4_load_c_f64);
  15840. // Alternate float MMA loads
  15841. case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
  15842. return MMA_LDST(4, m16n16k16_load_a_bf16);
  15843. case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
  15844. return MMA_LDST(4, m16n16k16_load_b_bf16);
  15845. case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
  15846. return MMA_LDST(2, m8n32k16_load_a_bf16);
  15847. case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
  15848. return MMA_LDST(8, m8n32k16_load_b_bf16);
  15849. case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
  15850. return MMA_LDST(8, m32n8k16_load_a_bf16);
  15851. case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
  15852. return MMA_LDST(2, m32n8k16_load_b_bf16);
  15853. case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
  15854. return MMA_LDST(4, m16n16k8_load_a_tf32);
  15855. case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
  15856. return MMA_LDST(4, m16n16k8_load_b_tf32);
  15857. case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
  15858. return MMA_LDST(8, m16n16k8_load_c_f32);
  15859. // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
  15860. // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
  15861. // use fragment C for both loads and stores.
  15862. // FP MMA stores.
  15863. case NVPTX::BI__hmma_m16n16k16_st_c_f16:
  15864. return MMA_LDST(4, m16n16k16_store_d_f16);
  15865. case NVPTX::BI__hmma_m16n16k16_st_c_f32:
  15866. return MMA_LDST(8, m16n16k16_store_d_f32);
  15867. case NVPTX::BI__hmma_m32n8k16_st_c_f16:
  15868. return MMA_LDST(4, m32n8k16_store_d_f16);
  15869. case NVPTX::BI__hmma_m32n8k16_st_c_f32:
  15870. return MMA_LDST(8, m32n8k16_store_d_f32);
  15871. case NVPTX::BI__hmma_m8n32k16_st_c_f16:
  15872. return MMA_LDST(4, m8n32k16_store_d_f16);
  15873. case NVPTX::BI__hmma_m8n32k16_st_c_f32:
  15874. return MMA_LDST(8, m8n32k16_store_d_f32);
  15875. // Integer and sub-integer MMA stores.
  15876. // Another naming quirk. Unlike other MMA builtins that use PTX types in the
  15877. // name, integer loads/stores use LLVM's i32.
  15878. case NVPTX::BI__imma_m16n16k16_st_c_i32:
  15879. return MMA_LDST(8, m16n16k16_store_d_s32);
  15880. case NVPTX::BI__imma_m32n8k16_st_c_i32:
  15881. return MMA_LDST(8, m32n8k16_store_d_s32);
  15882. case NVPTX::BI__imma_m8n32k16_st_c_i32:
  15883. return MMA_LDST(8, m8n32k16_store_d_s32);
  15884. case NVPTX::BI__imma_m8n8k32_st_c_i32:
  15885. return MMA_LDST(2, m8n8k32_store_d_s32);
  15886. case NVPTX::BI__bmma_m8n8k128_st_c_i32:
  15887. return MMA_LDST(2, m8n8k128_store_d_s32);
  15888. // Double MMA store
  15889. case NVPTX::BI__dmma_m8n8k4_st_c_f64:
  15890. return MMA_LDST(2, m8n8k4_store_d_f64);
  15891. // Alternate float MMA store
  15892. case NVPTX::BI__mma_m16n16k8_st_c_f32:
  15893. return MMA_LDST(8, m16n16k8_store_d_f32);
  15894. default:
  15895. llvm_unreachable("Unknown MMA builtin");
  15896. }
  15897. }
  15898. #undef MMA_LDST
  15899. #undef MMA_INTR
  15900. struct NVPTXMmaInfo {
  15901. unsigned NumEltsA;
  15902. unsigned NumEltsB;
  15903. unsigned NumEltsC;
  15904. unsigned NumEltsD;
  15905. // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
  15906. // over 'col' for layout. The index of non-satf variants is expected to match
  15907. // the undocumented layout constants used by CUDA's mma.hpp.
  15908. std::array<unsigned, 8> Variants;
  15909. unsigned getMMAIntrinsic(int Layout, bool Satf) {
  15910. unsigned Index = Layout + 4 * Satf;
  15911. if (Index >= Variants.size())
  15912. return 0;
  15913. return Variants[Index];
  15914. }
  15915. };
  15916. // Returns an intrinsic that matches Layout and Satf for valid combinations of
  15917. // Layout and Satf, 0 otherwise.
  15918. static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
  15919. // clang-format off
  15920. #define MMA_VARIANTS(geom, type) \
  15921. Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
  15922. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
  15923. Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
  15924. Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
  15925. #define MMA_SATF_VARIANTS(geom, type) \
  15926. MMA_VARIANTS(geom, type), \
  15927. Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
  15928. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
  15929. Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
  15930. Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
  15931. // Sub-integer MMA only supports row.col layout.
  15932. #define MMA_VARIANTS_I4(geom, type) \
  15933. 0, \
  15934. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
  15935. 0, \
  15936. 0, \
  15937. 0, \
  15938. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
  15939. 0, \
  15940. 0
  15941. // b1 MMA does not support .satfinite.
  15942. #define MMA_VARIANTS_B1_XOR(geom, type) \
  15943. 0, \
  15944. Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
  15945. 0, \
  15946. 0, \
  15947. 0, \
  15948. 0, \
  15949. 0, \
  15950. 0
  15951. #define MMA_VARIANTS_B1_AND(geom, type) \
  15952. 0, \
  15953. Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
  15954. 0, \
  15955. 0, \
  15956. 0, \
  15957. 0, \
  15958. 0, \
  15959. 0
  15960. // clang-format on
  15961. switch (BuiltinID) {
  15962. // FP MMA
  15963. // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
  15964. // NumEltsN of return value are ordered as A,B,C,D.
  15965. case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
  15966. return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
  15967. case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
  15968. return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
  15969. case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
  15970. return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
  15971. case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
  15972. return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
  15973. case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
  15974. return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
  15975. case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
  15976. return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
  15977. case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
  15978. return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
  15979. case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
  15980. return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
  15981. case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
  15982. return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
  15983. case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
  15984. return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
  15985. case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
  15986. return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
  15987. case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
  15988. return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
  15989. // Integer MMA
  15990. case NVPTX::BI__imma_m16n16k16_mma_s8:
  15991. return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
  15992. case NVPTX::BI__imma_m16n16k16_mma_u8:
  15993. return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
  15994. case NVPTX::BI__imma_m32n8k16_mma_s8:
  15995. return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
  15996. case NVPTX::BI__imma_m32n8k16_mma_u8:
  15997. return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
  15998. case NVPTX::BI__imma_m8n32k16_mma_s8:
  15999. return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
  16000. case NVPTX::BI__imma_m8n32k16_mma_u8:
  16001. return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
  16002. // Sub-integer MMA
  16003. case NVPTX::BI__imma_m8n8k32_mma_s4:
  16004. return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
  16005. case NVPTX::BI__imma_m8n8k32_mma_u4:
  16006. return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
  16007. case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
  16008. return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
  16009. case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
  16010. return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
  16011. // Double MMA
  16012. case NVPTX::BI__dmma_m8n8k4_mma_f64:
  16013. return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
  16014. // Alternate FP MMA
  16015. case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
  16016. return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
  16017. case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
  16018. return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
  16019. case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
  16020. return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
  16021. case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
  16022. return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
  16023. default:
  16024. llvm_unreachable("Unexpected builtin ID.");
  16025. }
  16026. #undef MMA_VARIANTS
  16027. #undef MMA_SATF_VARIANTS
  16028. #undef MMA_VARIANTS_I4
  16029. #undef MMA_VARIANTS_B1_AND
  16030. #undef MMA_VARIANTS_B1_XOR
  16031. }
  16032. } // namespace
  16033. Value *
  16034. CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
  16035. auto MakeLdg = [&](unsigned IntrinsicID) {
  16036. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16037. clang::CharUnits Align =
  16038. CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
  16039. return Builder.CreateCall(
  16040. CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
  16041. Ptr->getType()}),
  16042. {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
  16043. };
  16044. auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
  16045. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16046. return Builder.CreateCall(
  16047. CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
  16048. Ptr->getType()}),
  16049. {Ptr, EmitScalarExpr(E->getArg(1))});
  16050. };
  16051. switch (BuiltinID) {
  16052. case NVPTX::BI__nvvm_atom_add_gen_i:
  16053. case NVPTX::BI__nvvm_atom_add_gen_l:
  16054. case NVPTX::BI__nvvm_atom_add_gen_ll:
  16055. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
  16056. case NVPTX::BI__nvvm_atom_sub_gen_i:
  16057. case NVPTX::BI__nvvm_atom_sub_gen_l:
  16058. case NVPTX::BI__nvvm_atom_sub_gen_ll:
  16059. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
  16060. case NVPTX::BI__nvvm_atom_and_gen_i:
  16061. case NVPTX::BI__nvvm_atom_and_gen_l:
  16062. case NVPTX::BI__nvvm_atom_and_gen_ll:
  16063. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
  16064. case NVPTX::BI__nvvm_atom_or_gen_i:
  16065. case NVPTX::BI__nvvm_atom_or_gen_l:
  16066. case NVPTX::BI__nvvm_atom_or_gen_ll:
  16067. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
  16068. case NVPTX::BI__nvvm_atom_xor_gen_i:
  16069. case NVPTX::BI__nvvm_atom_xor_gen_l:
  16070. case NVPTX::BI__nvvm_atom_xor_gen_ll:
  16071. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
  16072. case NVPTX::BI__nvvm_atom_xchg_gen_i:
  16073. case NVPTX::BI__nvvm_atom_xchg_gen_l:
  16074. case NVPTX::BI__nvvm_atom_xchg_gen_ll:
  16075. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
  16076. case NVPTX::BI__nvvm_atom_max_gen_i:
  16077. case NVPTX::BI__nvvm_atom_max_gen_l:
  16078. case NVPTX::BI__nvvm_atom_max_gen_ll:
  16079. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
  16080. case NVPTX::BI__nvvm_atom_max_gen_ui:
  16081. case NVPTX::BI__nvvm_atom_max_gen_ul:
  16082. case NVPTX::BI__nvvm_atom_max_gen_ull:
  16083. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
  16084. case NVPTX::BI__nvvm_atom_min_gen_i:
  16085. case NVPTX::BI__nvvm_atom_min_gen_l:
  16086. case NVPTX::BI__nvvm_atom_min_gen_ll:
  16087. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
  16088. case NVPTX::BI__nvvm_atom_min_gen_ui:
  16089. case NVPTX::BI__nvvm_atom_min_gen_ul:
  16090. case NVPTX::BI__nvvm_atom_min_gen_ull:
  16091. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
  16092. case NVPTX::BI__nvvm_atom_cas_gen_i:
  16093. case NVPTX::BI__nvvm_atom_cas_gen_l:
  16094. case NVPTX::BI__nvvm_atom_cas_gen_ll:
  16095. // __nvvm_atom_cas_gen_* should return the old value rather than the
  16096. // success flag.
  16097. return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
  16098. case NVPTX::BI__nvvm_atom_add_gen_f:
  16099. case NVPTX::BI__nvvm_atom_add_gen_d: {
  16100. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16101. Value *Val = EmitScalarExpr(E->getArg(1));
  16102. return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
  16103. AtomicOrdering::SequentiallyConsistent);
  16104. }
  16105. case NVPTX::BI__nvvm_atom_inc_gen_ui: {
  16106. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16107. Value *Val = EmitScalarExpr(E->getArg(1));
  16108. Function *FnALI32 =
  16109. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
  16110. return Builder.CreateCall(FnALI32, {Ptr, Val});
  16111. }
  16112. case NVPTX::BI__nvvm_atom_dec_gen_ui: {
  16113. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16114. Value *Val = EmitScalarExpr(E->getArg(1));
  16115. Function *FnALD32 =
  16116. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
  16117. return Builder.CreateCall(FnALD32, {Ptr, Val});
  16118. }
  16119. case NVPTX::BI__nvvm_ldg_c:
  16120. case NVPTX::BI__nvvm_ldg_c2:
  16121. case NVPTX::BI__nvvm_ldg_c4:
  16122. case NVPTX::BI__nvvm_ldg_s:
  16123. case NVPTX::BI__nvvm_ldg_s2:
  16124. case NVPTX::BI__nvvm_ldg_s4:
  16125. case NVPTX::BI__nvvm_ldg_i:
  16126. case NVPTX::BI__nvvm_ldg_i2:
  16127. case NVPTX::BI__nvvm_ldg_i4:
  16128. case NVPTX::BI__nvvm_ldg_l:
  16129. case NVPTX::BI__nvvm_ldg_ll:
  16130. case NVPTX::BI__nvvm_ldg_ll2:
  16131. case NVPTX::BI__nvvm_ldg_uc:
  16132. case NVPTX::BI__nvvm_ldg_uc2:
  16133. case NVPTX::BI__nvvm_ldg_uc4:
  16134. case NVPTX::BI__nvvm_ldg_us:
  16135. case NVPTX::BI__nvvm_ldg_us2:
  16136. case NVPTX::BI__nvvm_ldg_us4:
  16137. case NVPTX::BI__nvvm_ldg_ui:
  16138. case NVPTX::BI__nvvm_ldg_ui2:
  16139. case NVPTX::BI__nvvm_ldg_ui4:
  16140. case NVPTX::BI__nvvm_ldg_ul:
  16141. case NVPTX::BI__nvvm_ldg_ull:
  16142. case NVPTX::BI__nvvm_ldg_ull2:
  16143. // PTX Interoperability section 2.2: "For a vector with an even number of
  16144. // elements, its alignment is set to number of elements times the alignment
  16145. // of its member: n*alignof(t)."
  16146. return MakeLdg(Intrinsic::nvvm_ldg_global_i);
  16147. case NVPTX::BI__nvvm_ldg_f:
  16148. case NVPTX::BI__nvvm_ldg_f2:
  16149. case NVPTX::BI__nvvm_ldg_f4:
  16150. case NVPTX::BI__nvvm_ldg_d:
  16151. case NVPTX::BI__nvvm_ldg_d2:
  16152. return MakeLdg(Intrinsic::nvvm_ldg_global_f);
  16153. case NVPTX::BI__nvvm_atom_cta_add_gen_i:
  16154. case NVPTX::BI__nvvm_atom_cta_add_gen_l:
  16155. case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
  16156. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
  16157. case NVPTX::BI__nvvm_atom_sys_add_gen_i:
  16158. case NVPTX::BI__nvvm_atom_sys_add_gen_l:
  16159. case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
  16160. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
  16161. case NVPTX::BI__nvvm_atom_cta_add_gen_f:
  16162. case NVPTX::BI__nvvm_atom_cta_add_gen_d:
  16163. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
  16164. case NVPTX::BI__nvvm_atom_sys_add_gen_f:
  16165. case NVPTX::BI__nvvm_atom_sys_add_gen_d:
  16166. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
  16167. case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
  16168. case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
  16169. case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
  16170. return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
  16171. case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
  16172. case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
  16173. case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
  16174. return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
  16175. case NVPTX::BI__nvvm_atom_cta_max_gen_i:
  16176. case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
  16177. case NVPTX::BI__nvvm_atom_cta_max_gen_l:
  16178. case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
  16179. case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
  16180. case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
  16181. return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
  16182. case NVPTX::BI__nvvm_atom_sys_max_gen_i:
  16183. case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
  16184. case NVPTX::BI__nvvm_atom_sys_max_gen_l:
  16185. case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
  16186. case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
  16187. case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
  16188. return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
  16189. case NVPTX::BI__nvvm_atom_cta_min_gen_i:
  16190. case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
  16191. case NVPTX::BI__nvvm_atom_cta_min_gen_l:
  16192. case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
  16193. case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
  16194. case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
  16195. return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
  16196. case NVPTX::BI__nvvm_atom_sys_min_gen_i:
  16197. case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
  16198. case NVPTX::BI__nvvm_atom_sys_min_gen_l:
  16199. case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
  16200. case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
  16201. case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
  16202. return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
  16203. case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
  16204. return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
  16205. case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
  16206. return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
  16207. case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
  16208. return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
  16209. case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
  16210. return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
  16211. case NVPTX::BI__nvvm_atom_cta_and_gen_i:
  16212. case NVPTX::BI__nvvm_atom_cta_and_gen_l:
  16213. case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
  16214. return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
  16215. case NVPTX::BI__nvvm_atom_sys_and_gen_i:
  16216. case NVPTX::BI__nvvm_atom_sys_and_gen_l:
  16217. case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
  16218. return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
  16219. case NVPTX::BI__nvvm_atom_cta_or_gen_i:
  16220. case NVPTX::BI__nvvm_atom_cta_or_gen_l:
  16221. case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
  16222. return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
  16223. case NVPTX::BI__nvvm_atom_sys_or_gen_i:
  16224. case NVPTX::BI__nvvm_atom_sys_or_gen_l:
  16225. case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
  16226. return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
  16227. case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
  16228. case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
  16229. case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
  16230. return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
  16231. case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
  16232. case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
  16233. case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
  16234. return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
  16235. case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
  16236. case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
  16237. case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
  16238. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16239. return Builder.CreateCall(
  16240. CGM.getIntrinsic(
  16241. Intrinsic::nvvm_atomic_cas_gen_i_cta,
  16242. {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
  16243. {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
  16244. }
  16245. case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
  16246. case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
  16247. case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
  16248. Value *Ptr = EmitScalarExpr(E->getArg(0));
  16249. return Builder.CreateCall(
  16250. CGM.getIntrinsic(
  16251. Intrinsic::nvvm_atomic_cas_gen_i_sys,
  16252. {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
  16253. {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
  16254. }
  16255. case NVPTX::BI__nvvm_match_all_sync_i32p:
  16256. case NVPTX::BI__nvvm_match_all_sync_i64p: {
  16257. Value *Mask = EmitScalarExpr(E->getArg(0));
  16258. Value *Val = EmitScalarExpr(E->getArg(1));
  16259. Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
  16260. Value *ResultPair = Builder.CreateCall(
  16261. CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
  16262. ? Intrinsic::nvvm_match_all_sync_i32p
  16263. : Intrinsic::nvvm_match_all_sync_i64p),
  16264. {Mask, Val});
  16265. Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
  16266. PredOutPtr.getElementType());
  16267. Builder.CreateStore(Pred, PredOutPtr);
  16268. return Builder.CreateExtractValue(ResultPair, 0);
  16269. }
  16270. // FP MMA loads
  16271. case NVPTX::BI__hmma_m16n16k16_ld_a:
  16272. case NVPTX::BI__hmma_m16n16k16_ld_b:
  16273. case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
  16274. case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
  16275. case NVPTX::BI__hmma_m32n8k16_ld_a:
  16276. case NVPTX::BI__hmma_m32n8k16_ld_b:
  16277. case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
  16278. case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
  16279. case NVPTX::BI__hmma_m8n32k16_ld_a:
  16280. case NVPTX::BI__hmma_m8n32k16_ld_b:
  16281. case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
  16282. case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
  16283. // Integer MMA loads.
  16284. case NVPTX::BI__imma_m16n16k16_ld_a_s8:
  16285. case NVPTX::BI__imma_m16n16k16_ld_a_u8:
  16286. case NVPTX::BI__imma_m16n16k16_ld_b_s8:
  16287. case NVPTX::BI__imma_m16n16k16_ld_b_u8:
  16288. case NVPTX::BI__imma_m16n16k16_ld_c:
  16289. case NVPTX::BI__imma_m32n8k16_ld_a_s8:
  16290. case NVPTX::BI__imma_m32n8k16_ld_a_u8:
  16291. case NVPTX::BI__imma_m32n8k16_ld_b_s8:
  16292. case NVPTX::BI__imma_m32n8k16_ld_b_u8:
  16293. case NVPTX::BI__imma_m32n8k16_ld_c:
  16294. case NVPTX::BI__imma_m8n32k16_ld_a_s8:
  16295. case NVPTX::BI__imma_m8n32k16_ld_a_u8:
  16296. case NVPTX::BI__imma_m8n32k16_ld_b_s8:
  16297. case NVPTX::BI__imma_m8n32k16_ld_b_u8:
  16298. case NVPTX::BI__imma_m8n32k16_ld_c:
  16299. // Sub-integer MMA loads.
  16300. case NVPTX::BI__imma_m8n8k32_ld_a_s4:
  16301. case NVPTX::BI__imma_m8n8k32_ld_a_u4:
  16302. case NVPTX::BI__imma_m8n8k32_ld_b_s4:
  16303. case NVPTX::BI__imma_m8n8k32_ld_b_u4:
  16304. case NVPTX::BI__imma_m8n8k32_ld_c:
  16305. case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
  16306. case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
  16307. case NVPTX::BI__bmma_m8n8k128_ld_c:
  16308. // Double MMA loads.
  16309. case NVPTX::BI__dmma_m8n8k4_ld_a:
  16310. case NVPTX::BI__dmma_m8n8k4_ld_b:
  16311. case NVPTX::BI__dmma_m8n8k4_ld_c:
  16312. // Alternate float MMA loads.
  16313. case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
  16314. case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
  16315. case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
  16316. case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
  16317. case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
  16318. case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
  16319. case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
  16320. case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
  16321. case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
  16322. Address Dst = EmitPointerWithAlignment(E->getArg(0));
  16323. Value *Src = EmitScalarExpr(E->getArg(1));
  16324. Value *Ldm = EmitScalarExpr(E->getArg(2));
  16325. Optional<llvm::APSInt> isColMajorArg =
  16326. E->getArg(3)->getIntegerConstantExpr(getContext());
  16327. if (!isColMajorArg)
  16328. return nullptr;
  16329. bool isColMajor = isColMajorArg->getSExtValue();
  16330. NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
  16331. unsigned IID = isColMajor ? II.IID_col : II.IID_row;
  16332. if (IID == 0)
  16333. return nullptr;
  16334. Value *Result =
  16335. Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
  16336. // Save returned values.
  16337. assert(II.NumResults);
  16338. if (II.NumResults == 1) {
  16339. Builder.CreateAlignedStore(Result, Dst.getPointer(),
  16340. CharUnits::fromQuantity(4));
  16341. } else {
  16342. for (unsigned i = 0; i < II.NumResults; ++i) {
  16343. Builder.CreateAlignedStore(
  16344. Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
  16345. Dst.getElementType()),
  16346. Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
  16347. llvm::ConstantInt::get(IntTy, i)),
  16348. CharUnits::fromQuantity(4));
  16349. }
  16350. }
  16351. return Result;
  16352. }
  16353. case NVPTX::BI__hmma_m16n16k16_st_c_f16:
  16354. case NVPTX::BI__hmma_m16n16k16_st_c_f32:
  16355. case NVPTX::BI__hmma_m32n8k16_st_c_f16:
  16356. case NVPTX::BI__hmma_m32n8k16_st_c_f32:
  16357. case NVPTX::BI__hmma_m8n32k16_st_c_f16:
  16358. case NVPTX::BI__hmma_m8n32k16_st_c_f32:
  16359. case NVPTX::BI__imma_m16n16k16_st_c_i32:
  16360. case NVPTX::BI__imma_m32n8k16_st_c_i32:
  16361. case NVPTX::BI__imma_m8n32k16_st_c_i32:
  16362. case NVPTX::BI__imma_m8n8k32_st_c_i32:
  16363. case NVPTX::BI__bmma_m8n8k128_st_c_i32:
  16364. case NVPTX::BI__dmma_m8n8k4_st_c_f64:
  16365. case NVPTX::BI__mma_m16n16k8_st_c_f32: {
  16366. Value *Dst = EmitScalarExpr(E->getArg(0));
  16367. Address Src = EmitPointerWithAlignment(E->getArg(1));
  16368. Value *Ldm = EmitScalarExpr(E->getArg(2));
  16369. Optional<llvm::APSInt> isColMajorArg =
  16370. E->getArg(3)->getIntegerConstantExpr(getContext());
  16371. if (!isColMajorArg)
  16372. return nullptr;
  16373. bool isColMajor = isColMajorArg->getSExtValue();
  16374. NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
  16375. unsigned IID = isColMajor ? II.IID_col : II.IID_row;
  16376. if (IID == 0)
  16377. return nullptr;
  16378. Function *Intrinsic =
  16379. CGM.getIntrinsic(IID, Dst->getType());
  16380. llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
  16381. SmallVector<Value *, 10> Values = {Dst};
  16382. for (unsigned i = 0; i < II.NumResults; ++i) {
  16383. Value *V = Builder.CreateAlignedLoad(
  16384. Src.getElementType(),
  16385. Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
  16386. llvm::ConstantInt::get(IntTy, i)),
  16387. CharUnits::fromQuantity(4));
  16388. Values.push_back(Builder.CreateBitCast(V, ParamType));
  16389. }
  16390. Values.push_back(Ldm);
  16391. Value *Result = Builder.CreateCall(Intrinsic, Values);
  16392. return Result;
  16393. }
  16394. // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
  16395. // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
  16396. case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
  16397. case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
  16398. case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
  16399. case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
  16400. case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
  16401. case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
  16402. case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
  16403. case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
  16404. case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
  16405. case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
  16406. case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
  16407. case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
  16408. case NVPTX::BI__imma_m16n16k16_mma_s8:
  16409. case NVPTX::BI__imma_m16n16k16_mma_u8:
  16410. case NVPTX::BI__imma_m32n8k16_mma_s8:
  16411. case NVPTX::BI__imma_m32n8k16_mma_u8:
  16412. case NVPTX::BI__imma_m8n32k16_mma_s8:
  16413. case NVPTX::BI__imma_m8n32k16_mma_u8:
  16414. case NVPTX::BI__imma_m8n8k32_mma_s4:
  16415. case NVPTX::BI__imma_m8n8k32_mma_u4:
  16416. case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
  16417. case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
  16418. case NVPTX::BI__dmma_m8n8k4_mma_f64:
  16419. case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
  16420. case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
  16421. case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
  16422. case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
  16423. Address Dst = EmitPointerWithAlignment(E->getArg(0));
  16424. Address SrcA = EmitPointerWithAlignment(E->getArg(1));
  16425. Address SrcB = EmitPointerWithAlignment(E->getArg(2));
  16426. Address SrcC = EmitPointerWithAlignment(E->getArg(3));
  16427. Optional<llvm::APSInt> LayoutArg =
  16428. E->getArg(4)->getIntegerConstantExpr(getContext());
  16429. if (!LayoutArg)
  16430. return nullptr;
  16431. int Layout = LayoutArg->getSExtValue();
  16432. if (Layout < 0 || Layout > 3)
  16433. return nullptr;
  16434. llvm::APSInt SatfArg;
  16435. if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
  16436. BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
  16437. SatfArg = 0; // .b1 does not have satf argument.
  16438. else if (Optional<llvm::APSInt> OptSatfArg =
  16439. E->getArg(5)->getIntegerConstantExpr(getContext()))
  16440. SatfArg = *OptSatfArg;
  16441. else
  16442. return nullptr;
  16443. bool Satf = SatfArg.getSExtValue();
  16444. NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
  16445. unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
  16446. if (IID == 0) // Unsupported combination of Layout/Satf.
  16447. return nullptr;
  16448. SmallVector<Value *, 24> Values;
  16449. Function *Intrinsic = CGM.getIntrinsic(IID);
  16450. llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
  16451. // Load A
  16452. for (unsigned i = 0; i < MI.NumEltsA; ++i) {
  16453. Value *V = Builder.CreateAlignedLoad(
  16454. SrcA.getElementType(),
  16455. Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
  16456. llvm::ConstantInt::get(IntTy, i)),
  16457. CharUnits::fromQuantity(4));
  16458. Values.push_back(Builder.CreateBitCast(V, AType));
  16459. }
  16460. // Load B
  16461. llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
  16462. for (unsigned i = 0; i < MI.NumEltsB; ++i) {
  16463. Value *V = Builder.CreateAlignedLoad(
  16464. SrcB.getElementType(),
  16465. Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
  16466. llvm::ConstantInt::get(IntTy, i)),
  16467. CharUnits::fromQuantity(4));
  16468. Values.push_back(Builder.CreateBitCast(V, BType));
  16469. }
  16470. // Load C
  16471. llvm::Type *CType =
  16472. Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
  16473. for (unsigned i = 0; i < MI.NumEltsC; ++i) {
  16474. Value *V = Builder.CreateAlignedLoad(
  16475. SrcC.getElementType(),
  16476. Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
  16477. llvm::ConstantInt::get(IntTy, i)),
  16478. CharUnits::fromQuantity(4));
  16479. Values.push_back(Builder.CreateBitCast(V, CType));
  16480. }
  16481. Value *Result = Builder.CreateCall(Intrinsic, Values);
  16482. llvm::Type *DType = Dst.getElementType();
  16483. for (unsigned i = 0; i < MI.NumEltsD; ++i)
  16484. Builder.CreateAlignedStore(
  16485. Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
  16486. Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
  16487. llvm::ConstantInt::get(IntTy, i)),
  16488. CharUnits::fromQuantity(4));
  16489. return Result;
  16490. }
  16491. default:
  16492. return nullptr;
  16493. }
  16494. }
  16495. namespace {
  16496. struct BuiltinAlignArgs {
  16497. llvm::Value *Src = nullptr;
  16498. llvm::Type *SrcType = nullptr;
  16499. llvm::Value *Alignment = nullptr;
  16500. llvm::Value *Mask = nullptr;
  16501. llvm::IntegerType *IntType = nullptr;
  16502. BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
  16503. QualType AstType = E->getArg(0)->getType();
  16504. if (AstType->isArrayType())
  16505. Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
  16506. else
  16507. Src = CGF.EmitScalarExpr(E->getArg(0));
  16508. SrcType = Src->getType();
  16509. if (SrcType->isPointerTy()) {
  16510. IntType = IntegerType::get(
  16511. CGF.getLLVMContext(),
  16512. CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
  16513. } else {
  16514. assert(SrcType->isIntegerTy());
  16515. IntType = cast<llvm::IntegerType>(SrcType);
  16516. }
  16517. Alignment = CGF.EmitScalarExpr(E->getArg(1));
  16518. Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
  16519. auto *One = llvm::ConstantInt::get(IntType, 1);
  16520. Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
  16521. }
  16522. };
  16523. } // namespace
  16524. /// Generate (x & (y-1)) == 0.
  16525. RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
  16526. BuiltinAlignArgs Args(E, *this);
  16527. llvm::Value *SrcAddress = Args.Src;
  16528. if (Args.SrcType->isPointerTy())
  16529. SrcAddress =
  16530. Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
  16531. return RValue::get(Builder.CreateICmpEQ(
  16532. Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
  16533. llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
  16534. }
  16535. /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
  16536. /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
  16537. /// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
  16538. /// TODO: actually use ptrmask once most optimization passes know about it.
  16539. RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
  16540. BuiltinAlignArgs Args(E, *this);
  16541. llvm::Value *SrcAddr = Args.Src;
  16542. if (Args.Src->getType()->isPointerTy())
  16543. SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
  16544. llvm::Value *SrcForMask = SrcAddr;
  16545. if (AlignUp) {
  16546. // When aligning up we have to first add the mask to ensure we go over the
  16547. // next alignment value and then align down to the next valid multiple.
  16548. // By adding the mask, we ensure that align_up on an already aligned
  16549. // value will not change the value.
  16550. SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
  16551. }
  16552. // Invert the mask to only clear the lower bits.
  16553. llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
  16554. llvm::Value *Result =
  16555. Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
  16556. if (Args.Src->getType()->isPointerTy()) {
  16557. /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
  16558. // Result = Builder.CreateIntrinsic(
  16559. // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
  16560. // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
  16561. Result->setName("aligned_intptr");
  16562. llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
  16563. // The result must point to the same underlying allocation. This means we
  16564. // can use an inbounds GEP to enable better optimization.
  16565. Value *Base = EmitCastToVoidPtr(Args.Src);
  16566. if (getLangOpts().isSignedOverflowDefined())
  16567. Result = Builder.CreateGEP(Int8Ty, Base, Difference, "aligned_result");
  16568. else
  16569. Result = EmitCheckedInBoundsGEP(Int8Ty, Base, Difference,
  16570. /*SignedIndices=*/true,
  16571. /*isSubtraction=*/!AlignUp,
  16572. E->getExprLoc(), "aligned_result");
  16573. Result = Builder.CreatePointerCast(Result, Args.SrcType);
  16574. // Emit an alignment assumption to ensure that the new alignment is
  16575. // propagated to loads/stores, etc.
  16576. emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
  16577. }
  16578. assert(Result->getType() == Args.SrcType);
  16579. return RValue::get(Result);
  16580. }
  16581. Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
  16582. const CallExpr *E) {
  16583. switch (BuiltinID) {
  16584. case WebAssembly::BI__builtin_wasm_memory_size: {
  16585. llvm::Type *ResultType = ConvertType(E->getType());
  16586. Value *I = EmitScalarExpr(E->getArg(0));
  16587. Function *Callee =
  16588. CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
  16589. return Builder.CreateCall(Callee, I);
  16590. }
  16591. case WebAssembly::BI__builtin_wasm_memory_grow: {
  16592. llvm::Type *ResultType = ConvertType(E->getType());
  16593. Value *Args[] = {EmitScalarExpr(E->getArg(0)),
  16594. EmitScalarExpr(E->getArg(1))};
  16595. Function *Callee =
  16596. CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
  16597. return Builder.CreateCall(Callee, Args);
  16598. }
  16599. case WebAssembly::BI__builtin_wasm_tls_size: {
  16600. llvm::Type *ResultType = ConvertType(E->getType());
  16601. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
  16602. return Builder.CreateCall(Callee);
  16603. }
  16604. case WebAssembly::BI__builtin_wasm_tls_align: {
  16605. llvm::Type *ResultType = ConvertType(E->getType());
  16606. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
  16607. return Builder.CreateCall(Callee);
  16608. }
  16609. case WebAssembly::BI__builtin_wasm_tls_base: {
  16610. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
  16611. return Builder.CreateCall(Callee);
  16612. }
  16613. case WebAssembly::BI__builtin_wasm_throw: {
  16614. Value *Tag = EmitScalarExpr(E->getArg(0));
  16615. Value *Obj = EmitScalarExpr(E->getArg(1));
  16616. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
  16617. return Builder.CreateCall(Callee, {Tag, Obj});
  16618. }
  16619. case WebAssembly::BI__builtin_wasm_rethrow: {
  16620. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
  16621. return Builder.CreateCall(Callee);
  16622. }
  16623. case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
  16624. Value *Addr = EmitScalarExpr(E->getArg(0));
  16625. Value *Expected = EmitScalarExpr(E->getArg(1));
  16626. Value *Timeout = EmitScalarExpr(E->getArg(2));
  16627. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
  16628. return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
  16629. }
  16630. case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
  16631. Value *Addr = EmitScalarExpr(E->getArg(0));
  16632. Value *Expected = EmitScalarExpr(E->getArg(1));
  16633. Value *Timeout = EmitScalarExpr(E->getArg(2));
  16634. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
  16635. return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
  16636. }
  16637. case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
  16638. Value *Addr = EmitScalarExpr(E->getArg(0));
  16639. Value *Count = EmitScalarExpr(E->getArg(1));
  16640. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
  16641. return Builder.CreateCall(Callee, {Addr, Count});
  16642. }
  16643. case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
  16644. case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
  16645. case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
  16646. case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
  16647. Value *Src = EmitScalarExpr(E->getArg(0));
  16648. llvm::Type *ResT = ConvertType(E->getType());
  16649. Function *Callee =
  16650. CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
  16651. return Builder.CreateCall(Callee, {Src});
  16652. }
  16653. case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
  16654. case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
  16655. case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
  16656. case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
  16657. Value *Src = EmitScalarExpr(E->getArg(0));
  16658. llvm::Type *ResT = ConvertType(E->getType());
  16659. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
  16660. {ResT, Src->getType()});
  16661. return Builder.CreateCall(Callee, {Src});
  16662. }
  16663. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
  16664. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
  16665. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
  16666. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
  16667. case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
  16668. Value *Src = EmitScalarExpr(E->getArg(0));
  16669. llvm::Type *ResT = ConvertType(E->getType());
  16670. Function *Callee =
  16671. CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
  16672. return Builder.CreateCall(Callee, {Src});
  16673. }
  16674. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
  16675. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
  16676. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
  16677. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
  16678. case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
  16679. Value *Src = EmitScalarExpr(E->getArg(0));
  16680. llvm::Type *ResT = ConvertType(E->getType());
  16681. Function *Callee =
  16682. CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
  16683. return Builder.CreateCall(Callee, {Src});
  16684. }
  16685. case WebAssembly::BI__builtin_wasm_min_f32:
  16686. case WebAssembly::BI__builtin_wasm_min_f64:
  16687. case WebAssembly::BI__builtin_wasm_min_f32x4:
  16688. case WebAssembly::BI__builtin_wasm_min_f64x2: {
  16689. Value *LHS = EmitScalarExpr(E->getArg(0));
  16690. Value *RHS = EmitScalarExpr(E->getArg(1));
  16691. Function *Callee =
  16692. CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
  16693. return Builder.CreateCall(Callee, {LHS, RHS});
  16694. }
  16695. case WebAssembly::BI__builtin_wasm_max_f32:
  16696. case WebAssembly::BI__builtin_wasm_max_f64:
  16697. case WebAssembly::BI__builtin_wasm_max_f32x4:
  16698. case WebAssembly::BI__builtin_wasm_max_f64x2: {
  16699. Value *LHS = EmitScalarExpr(E->getArg(0));
  16700. Value *RHS = EmitScalarExpr(E->getArg(1));
  16701. Function *Callee =
  16702. CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
  16703. return Builder.CreateCall(Callee, {LHS, RHS});
  16704. }
  16705. case WebAssembly::BI__builtin_wasm_pmin_f32x4:
  16706. case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
  16707. Value *LHS = EmitScalarExpr(E->getArg(0));
  16708. Value *RHS = EmitScalarExpr(E->getArg(1));
  16709. Function *Callee =
  16710. CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
  16711. return Builder.CreateCall(Callee, {LHS, RHS});
  16712. }
  16713. case WebAssembly::BI__builtin_wasm_pmax_f32x4:
  16714. case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
  16715. Value *LHS = EmitScalarExpr(E->getArg(0));
  16716. Value *RHS = EmitScalarExpr(E->getArg(1));
  16717. Function *Callee =
  16718. CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
  16719. return Builder.CreateCall(Callee, {LHS, RHS});
  16720. }
  16721. case WebAssembly::BI__builtin_wasm_ceil_f32x4:
  16722. case WebAssembly::BI__builtin_wasm_floor_f32x4:
  16723. case WebAssembly::BI__builtin_wasm_trunc_f32x4:
  16724. case WebAssembly::BI__builtin_wasm_nearest_f32x4:
  16725. case WebAssembly::BI__builtin_wasm_ceil_f64x2:
  16726. case WebAssembly::BI__builtin_wasm_floor_f64x2:
  16727. case WebAssembly::BI__builtin_wasm_trunc_f64x2:
  16728. case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
  16729. unsigned IntNo;
  16730. switch (BuiltinID) {
  16731. case WebAssembly::BI__builtin_wasm_ceil_f32x4:
  16732. case WebAssembly::BI__builtin_wasm_ceil_f64x2:
  16733. IntNo = Intrinsic::ceil;
  16734. break;
  16735. case WebAssembly::BI__builtin_wasm_floor_f32x4:
  16736. case WebAssembly::BI__builtin_wasm_floor_f64x2:
  16737. IntNo = Intrinsic::floor;
  16738. break;
  16739. case WebAssembly::BI__builtin_wasm_trunc_f32x4:
  16740. case WebAssembly::BI__builtin_wasm_trunc_f64x2:
  16741. IntNo = Intrinsic::trunc;
  16742. break;
  16743. case WebAssembly::BI__builtin_wasm_nearest_f32x4:
  16744. case WebAssembly::BI__builtin_wasm_nearest_f64x2:
  16745. IntNo = Intrinsic::nearbyint;
  16746. break;
  16747. default:
  16748. llvm_unreachable("unexpected builtin ID");
  16749. }
  16750. Value *Value = EmitScalarExpr(E->getArg(0));
  16751. Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
  16752. return Builder.CreateCall(Callee, Value);
  16753. }
  16754. case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
  16755. Value *Src = EmitScalarExpr(E->getArg(0));
  16756. Value *Indices = EmitScalarExpr(E->getArg(1));
  16757. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
  16758. return Builder.CreateCall(Callee, {Src, Indices});
  16759. }
  16760. case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
  16761. case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
  16762. case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
  16763. case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
  16764. case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
  16765. case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
  16766. case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
  16767. case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
  16768. unsigned IntNo;
  16769. switch (BuiltinID) {
  16770. case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
  16771. case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
  16772. IntNo = Intrinsic::sadd_sat;
  16773. break;
  16774. case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
  16775. case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
  16776. IntNo = Intrinsic::uadd_sat;
  16777. break;
  16778. case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
  16779. case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
  16780. IntNo = Intrinsic::wasm_sub_sat_signed;
  16781. break;
  16782. case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
  16783. case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
  16784. IntNo = Intrinsic::wasm_sub_sat_unsigned;
  16785. break;
  16786. default:
  16787. llvm_unreachable("unexpected builtin ID");
  16788. }
  16789. Value *LHS = EmitScalarExpr(E->getArg(0));
  16790. Value *RHS = EmitScalarExpr(E->getArg(1));
  16791. Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
  16792. return Builder.CreateCall(Callee, {LHS, RHS});
  16793. }
  16794. case WebAssembly::BI__builtin_wasm_abs_i8x16:
  16795. case WebAssembly::BI__builtin_wasm_abs_i16x8:
  16796. case WebAssembly::BI__builtin_wasm_abs_i32x4:
  16797. case WebAssembly::BI__builtin_wasm_abs_i64x2: {
  16798. Value *Vec = EmitScalarExpr(E->getArg(0));
  16799. Value *Neg = Builder.CreateNeg(Vec, "neg");
  16800. Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
  16801. Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
  16802. return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
  16803. }
  16804. case WebAssembly::BI__builtin_wasm_min_s_i8x16:
  16805. case WebAssembly::BI__builtin_wasm_min_u_i8x16:
  16806. case WebAssembly::BI__builtin_wasm_max_s_i8x16:
  16807. case WebAssembly::BI__builtin_wasm_max_u_i8x16:
  16808. case WebAssembly::BI__builtin_wasm_min_s_i16x8:
  16809. case WebAssembly::BI__builtin_wasm_min_u_i16x8:
  16810. case WebAssembly::BI__builtin_wasm_max_s_i16x8:
  16811. case WebAssembly::BI__builtin_wasm_max_u_i16x8:
  16812. case WebAssembly::BI__builtin_wasm_min_s_i32x4:
  16813. case WebAssembly::BI__builtin_wasm_min_u_i32x4:
  16814. case WebAssembly::BI__builtin_wasm_max_s_i32x4:
  16815. case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
  16816. Value *LHS = EmitScalarExpr(E->getArg(0));
  16817. Value *RHS = EmitScalarExpr(E->getArg(1));
  16818. Value *ICmp;
  16819. switch (BuiltinID) {
  16820. case WebAssembly::BI__builtin_wasm_min_s_i8x16:
  16821. case WebAssembly::BI__builtin_wasm_min_s_i16x8:
  16822. case WebAssembly::BI__builtin_wasm_min_s_i32x4:
  16823. ICmp = Builder.CreateICmpSLT(LHS, RHS);
  16824. break;
  16825. case WebAssembly::BI__builtin_wasm_min_u_i8x16:
  16826. case WebAssembly::BI__builtin_wasm_min_u_i16x8:
  16827. case WebAssembly::BI__builtin_wasm_min_u_i32x4:
  16828. ICmp = Builder.CreateICmpULT(LHS, RHS);
  16829. break;
  16830. case WebAssembly::BI__builtin_wasm_max_s_i8x16:
  16831. case WebAssembly::BI__builtin_wasm_max_s_i16x8:
  16832. case WebAssembly::BI__builtin_wasm_max_s_i32x4:
  16833. ICmp = Builder.CreateICmpSGT(LHS, RHS);
  16834. break;
  16835. case WebAssembly::BI__builtin_wasm_max_u_i8x16:
  16836. case WebAssembly::BI__builtin_wasm_max_u_i16x8:
  16837. case WebAssembly::BI__builtin_wasm_max_u_i32x4:
  16838. ICmp = Builder.CreateICmpUGT(LHS, RHS);
  16839. break;
  16840. default:
  16841. llvm_unreachable("unexpected builtin ID");
  16842. }
  16843. return Builder.CreateSelect(ICmp, LHS, RHS);
  16844. }
  16845. case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
  16846. case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
  16847. Value *LHS = EmitScalarExpr(E->getArg(0));
  16848. Value *RHS = EmitScalarExpr(E->getArg(1));
  16849. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
  16850. ConvertType(E->getType()));
  16851. return Builder.CreateCall(Callee, {LHS, RHS});
  16852. }
  16853. case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
  16854. Value *LHS = EmitScalarExpr(E->getArg(0));
  16855. Value *RHS = EmitScalarExpr(E->getArg(1));
  16856. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
  16857. return Builder.CreateCall(Callee, {LHS, RHS});
  16858. }
  16859. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
  16860. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
  16861. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
  16862. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
  16863. Value *Vec = EmitScalarExpr(E->getArg(0));
  16864. unsigned IntNo;
  16865. switch (BuiltinID) {
  16866. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
  16867. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
  16868. IntNo = Intrinsic::wasm_extadd_pairwise_signed;
  16869. break;
  16870. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
  16871. case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
  16872. IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
  16873. break;
  16874. default:
  16875. llvm_unreachable("unexptected builtin ID");
  16876. }
  16877. Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
  16878. return Builder.CreateCall(Callee, Vec);
  16879. }
  16880. case WebAssembly::BI__builtin_wasm_bitselect: {
  16881. Value *V1 = EmitScalarExpr(E->getArg(0));
  16882. Value *V2 = EmitScalarExpr(E->getArg(1));
  16883. Value *C = EmitScalarExpr(E->getArg(2));
  16884. Function *Callee =
  16885. CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
  16886. return Builder.CreateCall(Callee, {V1, V2, C});
  16887. }
  16888. case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
  16889. Value *LHS = EmitScalarExpr(E->getArg(0));
  16890. Value *RHS = EmitScalarExpr(E->getArg(1));
  16891. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
  16892. return Builder.CreateCall(Callee, {LHS, RHS});
  16893. }
  16894. case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
  16895. Value *Vec = EmitScalarExpr(E->getArg(0));
  16896. Function *Callee =
  16897. CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
  16898. return Builder.CreateCall(Callee, {Vec});
  16899. }
  16900. case WebAssembly::BI__builtin_wasm_any_true_v128:
  16901. case WebAssembly::BI__builtin_wasm_all_true_i8x16:
  16902. case WebAssembly::BI__builtin_wasm_all_true_i16x8:
  16903. case WebAssembly::BI__builtin_wasm_all_true_i32x4:
  16904. case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
  16905. unsigned IntNo;
  16906. switch (BuiltinID) {
  16907. case WebAssembly::BI__builtin_wasm_any_true_v128:
  16908. IntNo = Intrinsic::wasm_anytrue;
  16909. break;
  16910. case WebAssembly::BI__builtin_wasm_all_true_i8x16:
  16911. case WebAssembly::BI__builtin_wasm_all_true_i16x8:
  16912. case WebAssembly::BI__builtin_wasm_all_true_i32x4:
  16913. case WebAssembly::BI__builtin_wasm_all_true_i64x2:
  16914. IntNo = Intrinsic::wasm_alltrue;
  16915. break;
  16916. default:
  16917. llvm_unreachable("unexpected builtin ID");
  16918. }
  16919. Value *Vec = EmitScalarExpr(E->getArg(0));
  16920. Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
  16921. return Builder.CreateCall(Callee, {Vec});
  16922. }
  16923. case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
  16924. case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
  16925. case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
  16926. case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
  16927. Value *Vec = EmitScalarExpr(E->getArg(0));
  16928. Function *Callee =
  16929. CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
  16930. return Builder.CreateCall(Callee, {Vec});
  16931. }
  16932. case WebAssembly::BI__builtin_wasm_abs_f32x4:
  16933. case WebAssembly::BI__builtin_wasm_abs_f64x2: {
  16934. Value *Vec = EmitScalarExpr(E->getArg(0));
  16935. Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
  16936. return Builder.CreateCall(Callee, {Vec});
  16937. }
  16938. case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
  16939. case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
  16940. Value *Vec = EmitScalarExpr(E->getArg(0));
  16941. Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
  16942. return Builder.CreateCall(Callee, {Vec});
  16943. }
  16944. case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
  16945. case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
  16946. case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
  16947. case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
  16948. Value *Low = EmitScalarExpr(E->getArg(0));
  16949. Value *High = EmitScalarExpr(E->getArg(1));
  16950. unsigned IntNo;
  16951. switch (BuiltinID) {
  16952. case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
  16953. case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
  16954. IntNo = Intrinsic::wasm_narrow_signed;
  16955. break;
  16956. case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
  16957. case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
  16958. IntNo = Intrinsic::wasm_narrow_unsigned;
  16959. break;
  16960. default:
  16961. llvm_unreachable("unexpected builtin ID");
  16962. }
  16963. Function *Callee =
  16964. CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
  16965. return Builder.CreateCall(Callee, {Low, High});
  16966. }
  16967. case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
  16968. case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: {
  16969. Value *Vec = EmitScalarExpr(E->getArg(0));
  16970. unsigned IntNo;
  16971. switch (BuiltinID) {
  16972. case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
  16973. IntNo = Intrinsic::fptosi_sat;
  16974. break;
  16975. case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4:
  16976. IntNo = Intrinsic::fptoui_sat;
  16977. break;
  16978. default:
  16979. llvm_unreachable("unexpected builtin ID");
  16980. }
  16981. llvm::Type *SrcT = Vec->getType();
  16982. llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
  16983. Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
  16984. Value *Trunc = Builder.CreateCall(Callee, Vec);
  16985. Value *Splat = Constant::getNullValue(TruncT);
  16986. return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
  16987. }
  16988. case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
  16989. Value *Ops[18];
  16990. size_t OpIdx = 0;
  16991. Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
  16992. Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
  16993. while (OpIdx < 18) {
  16994. Optional<llvm::APSInt> LaneConst =
  16995. E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
  16996. assert(LaneConst && "Constant arg isn't actually constant?");
  16997. Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
  16998. }
  16999. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
  17000. return Builder.CreateCall(Callee, Ops);
  17001. }
  17002. case WebAssembly::BI__builtin_wasm_fma_f32x4:
  17003. case WebAssembly::BI__builtin_wasm_fms_f32x4:
  17004. case WebAssembly::BI__builtin_wasm_fma_f64x2:
  17005. case WebAssembly::BI__builtin_wasm_fms_f64x2: {
  17006. Value *A = EmitScalarExpr(E->getArg(0));
  17007. Value *B = EmitScalarExpr(E->getArg(1));
  17008. Value *C = EmitScalarExpr(E->getArg(2));
  17009. unsigned IntNo;
  17010. switch (BuiltinID) {
  17011. case WebAssembly::BI__builtin_wasm_fma_f32x4:
  17012. case WebAssembly::BI__builtin_wasm_fma_f64x2:
  17013. IntNo = Intrinsic::wasm_fma;
  17014. break;
  17015. case WebAssembly::BI__builtin_wasm_fms_f32x4:
  17016. case WebAssembly::BI__builtin_wasm_fms_f64x2:
  17017. IntNo = Intrinsic::wasm_fms;
  17018. break;
  17019. default:
  17020. llvm_unreachable("unexpected builtin ID");
  17021. }
  17022. Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
  17023. return Builder.CreateCall(Callee, {A, B, C});
  17024. }
  17025. case WebAssembly::BI__builtin_wasm_laneselect_i8x16:
  17026. case WebAssembly::BI__builtin_wasm_laneselect_i16x8:
  17027. case WebAssembly::BI__builtin_wasm_laneselect_i32x4:
  17028. case WebAssembly::BI__builtin_wasm_laneselect_i64x2: {
  17029. Value *A = EmitScalarExpr(E->getArg(0));
  17030. Value *B = EmitScalarExpr(E->getArg(1));
  17031. Value *C = EmitScalarExpr(E->getArg(2));
  17032. Function *Callee =
  17033. CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType());
  17034. return Builder.CreateCall(Callee, {A, B, C});
  17035. }
  17036. case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
  17037. Value *Src = EmitScalarExpr(E->getArg(0));
  17038. Value *Indices = EmitScalarExpr(E->getArg(1));
  17039. Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
  17040. return Builder.CreateCall(Callee, {Src, Indices});
  17041. }
  17042. case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
  17043. case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
  17044. case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
  17045. case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
  17046. Value *LHS = EmitScalarExpr(E->getArg(0));
  17047. Value *RHS = EmitScalarExpr(E->getArg(1));
  17048. unsigned IntNo;
  17049. switch (BuiltinID) {
  17050. case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
  17051. case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
  17052. IntNo = Intrinsic::wasm_relaxed_min;
  17053. break;
  17054. case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
  17055. case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
  17056. IntNo = Intrinsic::wasm_relaxed_max;
  17057. break;
  17058. default:
  17059. llvm_unreachable("unexpected builtin ID");
  17060. }
  17061. Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
  17062. return Builder.CreateCall(Callee, {LHS, RHS});
  17063. }
  17064. case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
  17065. case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
  17066. case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2:
  17067. case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: {
  17068. Value *Vec = EmitScalarExpr(E->getArg(0));
  17069. unsigned IntNo;
  17070. switch (BuiltinID) {
  17071. case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
  17072. IntNo = Intrinsic::wasm_relaxed_trunc_signed;
  17073. break;
  17074. case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
  17075. IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
  17076. break;
  17077. case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2:
  17078. IntNo = Intrinsic::wasm_relaxed_trunc_zero_signed;
  17079. break;
  17080. case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2:
  17081. IntNo = Intrinsic::wasm_relaxed_trunc_zero_unsigned;
  17082. break;
  17083. default:
  17084. llvm_unreachable("unexpected builtin ID");
  17085. }
  17086. Function *Callee = CGM.getIntrinsic(IntNo);
  17087. return Builder.CreateCall(Callee, {Vec});
  17088. }
  17089. default:
  17090. return nullptr;
  17091. }
  17092. }
  17093. static std::pair<Intrinsic::ID, unsigned>
  17094. getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) {
  17095. struct Info {
  17096. unsigned BuiltinID;
  17097. Intrinsic::ID IntrinsicID;
  17098. unsigned VecLen;
  17099. };
  17100. Info Infos[] = {
  17101. #define CUSTOM_BUILTIN_MAPPING(x,s) \
  17102. { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
  17103. CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
  17104. CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
  17105. CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
  17106. CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
  17107. CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
  17108. CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
  17109. CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
  17110. CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
  17111. CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
  17112. CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
  17113. CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
  17114. CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
  17115. CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
  17116. CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
  17117. CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
  17118. CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
  17119. CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
  17120. CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
  17121. CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
  17122. CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
  17123. CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
  17124. CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
  17125. // Legacy builtins that take a vector in place of a vector predicate.
  17126. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
  17127. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
  17128. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
  17129. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
  17130. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
  17131. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
  17132. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
  17133. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
  17134. #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
  17135. #undef CUSTOM_BUILTIN_MAPPING
  17136. };
  17137. auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
  17138. static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
  17139. (void)SortOnce;
  17140. const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
  17141. Info{BuiltinID, 0, 0}, CmpInfo);
  17142. if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
  17143. return {Intrinsic::not_intrinsic, 0};
  17144. return {F->IntrinsicID, F->VecLen};
  17145. }
  17146. Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
  17147. const CallExpr *E) {
  17148. Intrinsic::ID ID;
  17149. unsigned VecLen;
  17150. std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID);
  17151. auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
  17152. // The base pointer is passed by address, so it needs to be loaded.
  17153. Address A = EmitPointerWithAlignment(E->getArg(0));
  17154. Address BP = Address(Builder.CreateBitCast(
  17155. A.getPointer(), Int8PtrPtrTy), Int8PtrTy, A.getAlignment());
  17156. llvm::Value *Base = Builder.CreateLoad(BP);
  17157. // The treatment of both loads and stores is the same: the arguments for
  17158. // the builtin are the same as the arguments for the intrinsic.
  17159. // Load:
  17160. // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
  17161. // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
  17162. // Store:
  17163. // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
  17164. // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
  17165. SmallVector<llvm::Value*,5> Ops = { Base };
  17166. for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
  17167. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  17168. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
  17169. // The load intrinsics generate two results (Value, NewBase), stores
  17170. // generate one (NewBase). The new base address needs to be stored.
  17171. llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
  17172. : Result;
  17173. llvm::Value *LV = Builder.CreateBitCast(
  17174. EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo());
  17175. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  17176. llvm::Value *RetVal =
  17177. Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
  17178. if (IsLoad)
  17179. RetVal = Builder.CreateExtractValue(Result, 0);
  17180. return RetVal;
  17181. };
  17182. // Handle the conversion of bit-reverse load intrinsics to bit code.
  17183. // The intrinsic call after this function only reads from memory and the
  17184. // write to memory is dealt by the store instruction.
  17185. auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
  17186. // The intrinsic generates one result, which is the new value for the base
  17187. // pointer. It needs to be returned. The result of the load instruction is
  17188. // passed to intrinsic by address, so the value needs to be stored.
  17189. llvm::Value *BaseAddress =
  17190. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  17191. // Expressions like &(*pt++) will be incremented per evaluation.
  17192. // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
  17193. // per call.
  17194. Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
  17195. DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
  17196. Int8Ty, DestAddr.getAlignment());
  17197. llvm::Value *DestAddress = DestAddr.getPointer();
  17198. // Operands are Base, Dest, Modifier.
  17199. // The intrinsic format in LLVM IR is defined as
  17200. // { ValueType, i8* } (i8*, i32).
  17201. llvm::Value *Result = Builder.CreateCall(
  17202. CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
  17203. // The value needs to be stored as the variable is passed by reference.
  17204. llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
  17205. // The store needs to be truncated to fit the destination type.
  17206. // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
  17207. // to be handled with stores of respective destination type.
  17208. DestVal = Builder.CreateTrunc(DestVal, DestTy);
  17209. llvm::Value *DestForStore =
  17210. Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
  17211. Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
  17212. // The updated value of the base pointer is returned.
  17213. return Builder.CreateExtractValue(Result, 1);
  17214. };
  17215. auto V2Q = [this, VecLen] (llvm::Value *Vec) {
  17216. Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
  17217. : Intrinsic::hexagon_V6_vandvrt;
  17218. return Builder.CreateCall(CGM.getIntrinsic(ID),
  17219. {Vec, Builder.getInt32(-1)});
  17220. };
  17221. auto Q2V = [this, VecLen] (llvm::Value *Pred) {
  17222. Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
  17223. : Intrinsic::hexagon_V6_vandqrt;
  17224. return Builder.CreateCall(CGM.getIntrinsic(ID),
  17225. {Pred, Builder.getInt32(-1)});
  17226. };
  17227. switch (BuiltinID) {
  17228. // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
  17229. // and the corresponding C/C++ builtins use loads/stores to update
  17230. // the predicate.
  17231. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
  17232. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
  17233. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
  17234. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
  17235. // Get the type from the 0-th argument.
  17236. llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
  17237. Address PredAddr = Builder.CreateElementBitCast(
  17238. EmitPointerWithAlignment(E->getArg(2)), VecType);
  17239. llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
  17240. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
  17241. {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
  17242. llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
  17243. Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
  17244. PredAddr.getAlignment());
  17245. return Builder.CreateExtractValue(Result, 0);
  17246. }
  17247. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
  17248. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
  17249. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
  17250. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
  17251. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
  17252. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
  17253. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
  17254. case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
  17255. SmallVector<llvm::Value*,4> Ops;
  17256. const Expr *PredOp = E->getArg(0);
  17257. // There will be an implicit cast to a boolean vector. Strip it.
  17258. if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
  17259. if (Cast->getCastKind() == CK_BitCast)
  17260. PredOp = Cast->getSubExpr();
  17261. Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
  17262. }
  17263. for (int i = 1, e = E->getNumArgs(); i != e; ++i)
  17264. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  17265. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  17266. }
  17267. case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
  17268. case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
  17269. case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
  17270. case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
  17271. case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
  17272. case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
  17273. case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
  17274. case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
  17275. case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
  17276. case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
  17277. case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
  17278. case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
  17279. return MakeCircOp(ID, /*IsLoad=*/true);
  17280. case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
  17281. case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
  17282. case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
  17283. case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
  17284. case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
  17285. case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
  17286. case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
  17287. case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
  17288. case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
  17289. case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
  17290. return MakeCircOp(ID, /*IsLoad=*/false);
  17291. case Hexagon::BI__builtin_brev_ldub:
  17292. return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
  17293. case Hexagon::BI__builtin_brev_ldb:
  17294. return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
  17295. case Hexagon::BI__builtin_brev_lduh:
  17296. return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
  17297. case Hexagon::BI__builtin_brev_ldh:
  17298. return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
  17299. case Hexagon::BI__builtin_brev_ldw:
  17300. return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
  17301. case Hexagon::BI__builtin_brev_ldd:
  17302. return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
  17303. } // switch
  17304. return nullptr;
  17305. }
  17306. Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
  17307. const CallExpr *E,
  17308. ReturnValueSlot ReturnValue) {
  17309. SmallVector<Value *, 4> Ops;
  17310. llvm::Type *ResultType = ConvertType(E->getType());
  17311. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
  17312. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  17313. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  17314. unsigned NF = 1;
  17315. constexpr unsigned TAIL_UNDISTURBED = 0;
  17316. // Required for overloaded intrinsics.
  17317. llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
  17318. switch (BuiltinID) {
  17319. default: llvm_unreachable("unexpected builtin ID");
  17320. case RISCV::BI__builtin_riscv_orc_b_32:
  17321. case RISCV::BI__builtin_riscv_orc_b_64:
  17322. case RISCV::BI__builtin_riscv_clmul:
  17323. case RISCV::BI__builtin_riscv_clmulh:
  17324. case RISCV::BI__builtin_riscv_clmulr:
  17325. case RISCV::BI__builtin_riscv_bcompress_32:
  17326. case RISCV::BI__builtin_riscv_bcompress_64:
  17327. case RISCV::BI__builtin_riscv_bdecompress_32:
  17328. case RISCV::BI__builtin_riscv_bdecompress_64:
  17329. case RISCV::BI__builtin_riscv_bfp_32:
  17330. case RISCV::BI__builtin_riscv_bfp_64:
  17331. case RISCV::BI__builtin_riscv_grev_32:
  17332. case RISCV::BI__builtin_riscv_grev_64:
  17333. case RISCV::BI__builtin_riscv_gorc_32:
  17334. case RISCV::BI__builtin_riscv_gorc_64:
  17335. case RISCV::BI__builtin_riscv_shfl_32:
  17336. case RISCV::BI__builtin_riscv_shfl_64:
  17337. case RISCV::BI__builtin_riscv_unshfl_32:
  17338. case RISCV::BI__builtin_riscv_unshfl_64:
  17339. case RISCV::BI__builtin_riscv_xperm_n:
  17340. case RISCV::BI__builtin_riscv_xperm_b:
  17341. case RISCV::BI__builtin_riscv_xperm_h:
  17342. case RISCV::BI__builtin_riscv_xperm_w:
  17343. case RISCV::BI__builtin_riscv_crc32_b:
  17344. case RISCV::BI__builtin_riscv_crc32_h:
  17345. case RISCV::BI__builtin_riscv_crc32_w:
  17346. case RISCV::BI__builtin_riscv_crc32_d:
  17347. case RISCV::BI__builtin_riscv_crc32c_b:
  17348. case RISCV::BI__builtin_riscv_crc32c_h:
  17349. case RISCV::BI__builtin_riscv_crc32c_w:
  17350. case RISCV::BI__builtin_riscv_crc32c_d:
  17351. case RISCV::BI__builtin_riscv_fsl_32:
  17352. case RISCV::BI__builtin_riscv_fsr_32:
  17353. case RISCV::BI__builtin_riscv_fsl_64:
  17354. case RISCV::BI__builtin_riscv_fsr_64: {
  17355. switch (BuiltinID) {
  17356. default: llvm_unreachable("unexpected builtin ID");
  17357. // Zbb
  17358. case RISCV::BI__builtin_riscv_orc_b_32:
  17359. case RISCV::BI__builtin_riscv_orc_b_64:
  17360. ID = Intrinsic::riscv_orc_b;
  17361. break;
  17362. // Zbc
  17363. case RISCV::BI__builtin_riscv_clmul:
  17364. ID = Intrinsic::riscv_clmul;
  17365. break;
  17366. case RISCV::BI__builtin_riscv_clmulh:
  17367. ID = Intrinsic::riscv_clmulh;
  17368. break;
  17369. case RISCV::BI__builtin_riscv_clmulr:
  17370. ID = Intrinsic::riscv_clmulr;
  17371. break;
  17372. // Zbe
  17373. case RISCV::BI__builtin_riscv_bcompress_32:
  17374. case RISCV::BI__builtin_riscv_bcompress_64:
  17375. ID = Intrinsic::riscv_bcompress;
  17376. break;
  17377. case RISCV::BI__builtin_riscv_bdecompress_32:
  17378. case RISCV::BI__builtin_riscv_bdecompress_64:
  17379. ID = Intrinsic::riscv_bdecompress;
  17380. break;
  17381. // Zbf
  17382. case RISCV::BI__builtin_riscv_bfp_32:
  17383. case RISCV::BI__builtin_riscv_bfp_64:
  17384. ID = Intrinsic::riscv_bfp;
  17385. break;
  17386. // Zbp
  17387. case RISCV::BI__builtin_riscv_grev_32:
  17388. case RISCV::BI__builtin_riscv_grev_64:
  17389. ID = Intrinsic::riscv_grev;
  17390. break;
  17391. case RISCV::BI__builtin_riscv_gorc_32:
  17392. case RISCV::BI__builtin_riscv_gorc_64:
  17393. ID = Intrinsic::riscv_gorc;
  17394. break;
  17395. case RISCV::BI__builtin_riscv_shfl_32:
  17396. case RISCV::BI__builtin_riscv_shfl_64:
  17397. ID = Intrinsic::riscv_shfl;
  17398. break;
  17399. case RISCV::BI__builtin_riscv_unshfl_32:
  17400. case RISCV::BI__builtin_riscv_unshfl_64:
  17401. ID = Intrinsic::riscv_unshfl;
  17402. break;
  17403. case RISCV::BI__builtin_riscv_xperm_n:
  17404. ID = Intrinsic::riscv_xperm_n;
  17405. break;
  17406. case RISCV::BI__builtin_riscv_xperm_b:
  17407. ID = Intrinsic::riscv_xperm_b;
  17408. break;
  17409. case RISCV::BI__builtin_riscv_xperm_h:
  17410. ID = Intrinsic::riscv_xperm_h;
  17411. break;
  17412. case RISCV::BI__builtin_riscv_xperm_w:
  17413. ID = Intrinsic::riscv_xperm_w;
  17414. break;
  17415. // Zbr
  17416. case RISCV::BI__builtin_riscv_crc32_b:
  17417. ID = Intrinsic::riscv_crc32_b;
  17418. break;
  17419. case RISCV::BI__builtin_riscv_crc32_h:
  17420. ID = Intrinsic::riscv_crc32_h;
  17421. break;
  17422. case RISCV::BI__builtin_riscv_crc32_w:
  17423. ID = Intrinsic::riscv_crc32_w;
  17424. break;
  17425. case RISCV::BI__builtin_riscv_crc32_d:
  17426. ID = Intrinsic::riscv_crc32_d;
  17427. break;
  17428. case RISCV::BI__builtin_riscv_crc32c_b:
  17429. ID = Intrinsic::riscv_crc32c_b;
  17430. break;
  17431. case RISCV::BI__builtin_riscv_crc32c_h:
  17432. ID = Intrinsic::riscv_crc32c_h;
  17433. break;
  17434. case RISCV::BI__builtin_riscv_crc32c_w:
  17435. ID = Intrinsic::riscv_crc32c_w;
  17436. break;
  17437. case RISCV::BI__builtin_riscv_crc32c_d:
  17438. ID = Intrinsic::riscv_crc32c_d;
  17439. break;
  17440. // Zbt
  17441. case RISCV::BI__builtin_riscv_fsl_32:
  17442. case RISCV::BI__builtin_riscv_fsl_64:
  17443. ID = Intrinsic::riscv_fsl;
  17444. break;
  17445. case RISCV::BI__builtin_riscv_fsr_32:
  17446. case RISCV::BI__builtin_riscv_fsr_64:
  17447. ID = Intrinsic::riscv_fsr;
  17448. break;
  17449. }
  17450. IntrinsicTypes = {ResultType};
  17451. break;
  17452. }
  17453. // Vector builtins are handled from here.
  17454. #include "clang/Basic/riscv_vector_builtin_cg.inc"
  17455. }
  17456. assert(ID != Intrinsic::not_intrinsic);
  17457. llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
  17458. return Builder.CreateCall(F, Ops, "");
  17459. }