12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895 |
- //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This contains code to emit Builtin calls as LLVM code.
- //
- //===----------------------------------------------------------------------===//
- #include "CGCUDARuntime.h"
- #include "CGCXXABI.h"
- #include "CGObjCRuntime.h"
- #include "CGOpenCLRuntime.h"
- #include "CGRecordLayout.h"
- #include "CodeGenFunction.h"
- #include "CodeGenModule.h"
- #include "ConstantEmitter.h"
- #include "PatternInit.h"
- #include "TargetInfo.h"
- #include "clang/AST/ASTContext.h"
- #include "clang/AST/Attr.h"
- #include "clang/AST/Decl.h"
- #include "clang/AST/OSLog.h"
- #include "clang/Basic/TargetBuiltins.h"
- #include "clang/Basic/TargetInfo.h"
- #include "clang/CodeGen/CGFunctionInfo.h"
- #include "llvm/ADT/APFloat.h"
- #include "llvm/ADT/APInt.h"
- #include "llvm/ADT/SmallPtrSet.h"
- #include "llvm/ADT/StringExtras.h"
- #include "llvm/Analysis/ValueTracking.h"
- #include "llvm/IR/DataLayout.h"
- #include "llvm/IR/InlineAsm.h"
- #include "llvm/IR/Intrinsics.h"
- #include "llvm/IR/IntrinsicsAArch64.h"
- #include "llvm/IR/IntrinsicsAMDGPU.h"
- #include "llvm/IR/IntrinsicsARM.h"
- #include "llvm/IR/IntrinsicsBPF.h"
- #include "llvm/IR/IntrinsicsHexagon.h"
- #include "llvm/IR/IntrinsicsNVPTX.h"
- #include "llvm/IR/IntrinsicsPowerPC.h"
- #include "llvm/IR/IntrinsicsR600.h"
- #include "llvm/IR/IntrinsicsRISCV.h"
- #include "llvm/IR/IntrinsicsS390.h"
- #include "llvm/IR/IntrinsicsWebAssembly.h"
- #include "llvm/IR/IntrinsicsX86.h"
- #include "llvm/IR/MDBuilder.h"
- #include "llvm/IR/MatrixBuilder.h"
- #include "llvm/Support/ConvertUTF.h"
- #include "llvm/Support/ScopedPrinter.h"
- #include "llvm/Support/X86TargetParser.h"
- #include <sstream>
- using namespace clang;
- using namespace CodeGen;
- using namespace llvm;
- static
- int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
- return std::min(High, std::max(Low, Value));
- }
- static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
- Align AlignmentInBytes) {
- ConstantInt *Byte;
- switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
- case LangOptions::TrivialAutoVarInitKind::Uninitialized:
- // Nothing to initialize.
- return;
- case LangOptions::TrivialAutoVarInitKind::Zero:
- Byte = CGF.Builder.getInt8(0x00);
- break;
- case LangOptions::TrivialAutoVarInitKind::Pattern: {
- llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
- Byte = llvm::dyn_cast<llvm::ConstantInt>(
- initializationPatternFor(CGF.CGM, Int8));
- break;
- }
- }
- if (CGF.CGM.stopAutoInit())
- return;
- auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
- I->addAnnotationMetadata("auto-init");
- }
- /// getBuiltinLibFunction - Given a builtin id for a function like
- /// "__builtin_fabsf", return a Function* for "fabsf".
- llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
- unsigned BuiltinID) {
- assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
- // Get the name, skip over the __builtin_ prefix (if necessary).
- StringRef Name;
- GlobalDecl D(FD);
- // TODO: This list should be expanded or refactored after all GCC-compatible
- // std libcall builtins are implemented.
- static SmallDenseMap<unsigned, StringRef, 8> F128Builtins{
- {Builtin::BI__builtin_printf, "__printfieee128"},
- {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
- {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
- {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
- {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
- {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
- {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
- };
- // If the builtin has been declared explicitly with an assembler label,
- // use the mangled name. This differs from the plain label on platforms
- // that prefix labels.
- if (FD->hasAttr<AsmLabelAttr>())
- Name = getMangledName(D);
- else {
- // TODO: This mutation should also be applied to other targets other than
- // PPC, after backend supports IEEE 128-bit style libcalls.
- if (getTriple().isPPC64() &&
- &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
- F128Builtins.find(BuiltinID) != F128Builtins.end())
- Name = F128Builtins[BuiltinID];
- else
- Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
- }
- llvm::FunctionType *Ty =
- cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
- return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
- }
- /// Emit the conversions required to turn the given value into an
- /// integer of the given size.
- static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
- QualType T, llvm::IntegerType *IntType) {
- V = CGF.EmitToMemory(V, T);
- if (V->getType()->isPointerTy())
- return CGF.Builder.CreatePtrToInt(V, IntType);
- assert(V->getType() == IntType);
- return V;
- }
- static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
- QualType T, llvm::Type *ResultType) {
- V = CGF.EmitFromMemory(V, T);
- if (ResultType->isPointerTy())
- return CGF.Builder.CreateIntToPtr(V, ResultType);
- assert(V->getType() == ResultType);
- return V;
- }
- /// Utility to insert an atomic instruction based on Intrinsic::ID
- /// and the expression node.
- static Value *MakeBinaryAtomicValue(
- CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
- AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
- QualType T = E->getType();
- assert(E->getArg(0)->getType()->isPointerType());
- assert(CGF.getContext().hasSameUnqualifiedType(T,
- E->getArg(0)->getType()->getPointeeType()));
- assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
- llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
- unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
- llvm::IntegerType *IntType =
- llvm::IntegerType::get(CGF.getLLVMContext(),
- CGF.getContext().getTypeSize(T));
- llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
- llvm::Value *Args[2];
- Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], Ordering);
- return EmitFromInt(CGF, Result, T, ValueType);
- }
- static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
- Value *Val = CGF.EmitScalarExpr(E->getArg(0));
- Value *Address = CGF.EmitScalarExpr(E->getArg(1));
- // Convert the type of the pointer to a pointer to the stored type.
- Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
- unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
- Value *BC = CGF.Builder.CreateBitCast(
- Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
- LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
- LV.setNontemporal(true);
- CGF.EmitStoreOfScalar(Val, LV, false);
- return nullptr;
- }
- static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
- Value *Address = CGF.EmitScalarExpr(E->getArg(0));
- LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
- LV.setNontemporal(true);
- return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
- }
- static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
- llvm::AtomicRMWInst::BinOp Kind,
- const CallExpr *E) {
- return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
- }
- /// Utility to insert an atomic instruction based Intrinsic::ID and
- /// the expression node, where the return value is the result of the
- /// operation.
- static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
- llvm::AtomicRMWInst::BinOp Kind,
- const CallExpr *E,
- Instruction::BinaryOps Op,
- bool Invert = false) {
- QualType T = E->getType();
- assert(E->getArg(0)->getType()->isPointerType());
- assert(CGF.getContext().hasSameUnqualifiedType(T,
- E->getArg(0)->getType()->getPointeeType()));
- assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
- llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
- unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
- llvm::IntegerType *IntType =
- llvm::IntegerType::get(CGF.getLLVMContext(),
- CGF.getContext().getTypeSize(T));
- llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
- llvm::Value *Args[2];
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
- llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
- Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
- if (Invert)
- Result =
- CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
- llvm::ConstantInt::getAllOnesValue(IntType));
- Result = EmitFromInt(CGF, Result, T, ValueType);
- return RValue::get(Result);
- }
- /// Utility to insert an atomic cmpxchg instruction.
- ///
- /// @param CGF The current codegen function.
- /// @param E Builtin call expression to convert to cmpxchg.
- /// arg0 - address to operate on
- /// arg1 - value to compare with
- /// arg2 - new value
- /// @param ReturnBool Specifies whether to return success flag of
- /// cmpxchg result or the old value.
- ///
- /// @returns result of cmpxchg, according to ReturnBool
- ///
- /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
- /// invoke the function EmitAtomicCmpXchgForMSIntrin.
- static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
- bool ReturnBool) {
- QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
- llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
- unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
- llvm::IntegerType *IntType = llvm::IntegerType::get(
- CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
- Value *Args[3];
- Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
- Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::AtomicOrdering::SequentiallyConsistent);
- if (ReturnBool)
- // Extract boolean success flag and zext it to int.
- return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
- CGF.ConvertType(E->getType()));
- else
- // Extract old value and emit it using the same type as compare value.
- return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
- ValueType);
- }
- /// This function should be invoked to emit atomic cmpxchg for Microsoft's
- /// _InterlockedCompareExchange* intrinsics which have the following signature:
- /// T _InterlockedCompareExchange(T volatile *Destination,
- /// T Exchange,
- /// T Comparand);
- ///
- /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
- /// cmpxchg *Destination, Comparand, Exchange.
- /// So we need to swap Comparand and Exchange when invoking
- /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
- /// function MakeAtomicCmpXchgValue since it expects the arguments to be
- /// already swapped.
- static
- Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
- AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
- assert(E->getArg(0)->getType()->isPointerType());
- assert(CGF.getContext().hasSameUnqualifiedType(
- E->getType(), E->getArg(0)->getType()->getPointeeType()));
- assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
- E->getArg(1)->getType()));
- assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
- E->getArg(2)->getType()));
- auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
- auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
- auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
- // For Release ordering, the failure ordering should be Monotonic.
- auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
- AtomicOrdering::Monotonic :
- SuccessOrdering;
- // The atomic instruction is marked volatile for consistency with MSVC. This
- // blocks the few atomics optimizations that LLVM has. If we want to optimize
- // _Interlocked* operations in the future, we will have to remove the volatile
- // marker.
- auto *Result = CGF.Builder.CreateAtomicCmpXchg(
- Destination, Comparand, Exchange,
- SuccessOrdering, FailureOrdering);
- Result->setVolatile(true);
- return CGF.Builder.CreateExtractValue(Result, 0);
- }
- // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
- // prototyped like this:
- //
- // unsigned char _InterlockedCompareExchange128...(
- // __int64 volatile * _Destination,
- // __int64 _ExchangeHigh,
- // __int64 _ExchangeLow,
- // __int64 * _ComparandResult);
- static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
- const CallExpr *E,
- AtomicOrdering SuccessOrdering) {
- assert(E->getNumArgs() == 4);
- llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
- llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
- assert(Destination->getType()->isPointerTy());
- assert(!ExchangeHigh->getType()->isPointerTy());
- assert(!ExchangeLow->getType()->isPointerTy());
- assert(ComparandPtr->getType()->isPointerTy());
- // For Release ordering, the failure ordering should be Monotonic.
- auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
- ? AtomicOrdering::Monotonic
- : SuccessOrdering;
- // Convert to i128 pointers and values.
- llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
- llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
- Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy);
- Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy),
- CGF.getContext().toCharUnitsFromBits(128));
- // (((i128)hi) << 64) | ((i128)lo)
- ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
- ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
- ExchangeHigh =
- CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
- llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
- // Load the comparand for the instruction.
- llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
- auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- SuccessOrdering, FailureOrdering);
- // The atomic instruction is marked volatile for consistency with MSVC. This
- // blocks the few atomics optimizations that LLVM has. If we want to optimize
- // _Interlocked* operations in the future, we will have to remove the volatile
- // marker.
- CXI->setVolatile(true);
- // Store the result as an outparameter.
- CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
- ComparandResult);
- // Get the success boolean and zero extend it to i8.
- Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
- return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
- }
- static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
- AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
- assert(E->getArg(0)->getType()->isPointerType());
- auto *IntTy = CGF.ConvertType(E->getType());
- auto *Result = CGF.Builder.CreateAtomicRMW(
- AtomicRMWInst::Add,
- CGF.EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- Ordering);
- return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
- }
- static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
- AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
- assert(E->getArg(0)->getType()->isPointerType());
- auto *IntTy = CGF.ConvertType(E->getType());
- auto *Result = CGF.Builder.CreateAtomicRMW(
- AtomicRMWInst::Sub,
- CGF.EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- Ordering);
- return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
- }
- // Build a plain volatile load.
- static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
- Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy =
- llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
- Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
- Load->setVolatile(true);
- return Load;
- }
- // Build a plain volatile store.
- static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
- Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
- Value *Value = CGF.EmitScalarExpr(E->getArg(1));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy =
- llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8);
- Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::StoreInst *Store =
- CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
- Store->setVolatile(true);
- return Store;
- }
- // Emit a simple mangled intrinsic that has 1 argument and a return type
- // matching the argument type. Depending on mode, this may be a constrained
- // floating-point intrinsic.
- static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
- const CallExpr *E, unsigned IntrinsicID,
- unsigned ConstrainedIntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
- return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
- } else {
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, Src0);
- }
- }
- // Emit an intrinsic that has 2 operands of the same type as its result.
- // Depending on mode, this may be a constrained floating-point intrinsic.
- static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
- const CallExpr *E, unsigned IntrinsicID,
- unsigned ConstrainedIntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
- if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
- return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
- } else {
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, { Src0, Src1 });
- }
- }
- // Emit an intrinsic that has 3 operands of the same type as its result.
- // Depending on mode, this may be a constrained floating-point intrinsic.
- static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
- const CallExpr *E, unsigned IntrinsicID,
- unsigned ConstrainedIntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
- if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
- return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
- } else {
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
- }
- }
- // Emit an intrinsic where all operands are of the same type as the result.
- // Depending on mode, this may be a constrained floating-point intrinsic.
- static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
- unsigned IntrinsicID,
- unsigned ConstrainedIntrinsicID,
- llvm::Type *Ty,
- ArrayRef<Value *> Args) {
- Function *F;
- if (CGF.Builder.getIsFPConstrained())
- F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
- else
- F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
- if (CGF.Builder.getIsFPConstrained())
- return CGF.Builder.CreateConstrainedFPCall(F, Args);
- else
- return CGF.Builder.CreateCall(F, Args);
- }
- // Emit a simple mangled intrinsic that has 1 argument and a return type
- // matching the argument type.
- static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
- unsigned IntrinsicID,
- llvm::StringRef Name = "") {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, Src0, Name);
- }
- // Emit an intrinsic that has 2 operands of the same type as its result.
- static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, { Src0, Src1 });
- }
- // Emit an intrinsic that has 3 operands of the same type as its result.
- static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
- }
- // Emit an intrinsic that has 1 float or double operand, and 1 integer.
- static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, {Src0, Src1});
- }
- // Emit an intrinsic that has overloaded integer result and fp operand.
- static Value *
- emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
- unsigned IntrinsicID,
- unsigned ConstrainedIntrinsicID) {
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
- {ResultType, Src0->getType()});
- return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
- } else {
- Function *F =
- CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
- return CGF.Builder.CreateCall(F, Src0);
- }
- }
- /// EmitFAbs - Emit a call to @llvm.fabs().
- static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
- Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
- llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
- Call->setDoesNotAccessMemory();
- return Call;
- }
- /// Emit the computation of the sign bit for a floating point value. Returns
- /// the i1 sign bit value.
- static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
- LLVMContext &C = CGF.CGM.getLLVMContext();
- llvm::Type *Ty = V->getType();
- int Width = Ty->getPrimitiveSizeInBits();
- llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
- V = CGF.Builder.CreateBitCast(V, IntTy);
- if (Ty->isPPC_FP128Ty()) {
- // We want the sign bit of the higher-order double. The bitcast we just
- // did works as if the double-double was stored to memory and then
- // read as an i128. The "store" will put the higher-order double in the
- // lower address in both little- and big-Endian modes, but the "load"
- // will treat those bits as a different part of the i128: the low bits in
- // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
- // we need to shift the high bits down to the low before truncating.
- Width >>= 1;
- if (CGF.getTarget().isBigEndian()) {
- Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
- V = CGF.Builder.CreateLShr(V, ShiftCst);
- }
- // We are truncating value in order to extract the higher-order
- // double, which we will be using to extract the sign from.
- IntTy = llvm::IntegerType::get(C, Width);
- V = CGF.Builder.CreateTrunc(V, IntTy);
- }
- Value *Zero = llvm::Constant::getNullValue(IntTy);
- return CGF.Builder.CreateICmpSLT(V, Zero);
- }
- static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
- const CallExpr *E, llvm::Constant *calleeValue) {
- CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
- return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
- }
- /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
- /// depending on IntrinsicID.
- ///
- /// \arg CGF The current codegen function.
- /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
- /// \arg X The first argument to the llvm.*.with.overflow.*.
- /// \arg Y The second argument to the llvm.*.with.overflow.*.
- /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
- /// \returns The result (i.e. sum/product) returned by the intrinsic.
- static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
- const llvm::Intrinsic::ID IntrinsicID,
- llvm::Value *X, llvm::Value *Y,
- llvm::Value *&Carry) {
- // Make sure we have integers of the same width.
- assert(X->getType() == Y->getType() &&
- "Arguments must be the same type. (Did you forget to make sure both "
- "arguments have the same integer width?)");
- Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
- llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
- Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
- return CGF.Builder.CreateExtractValue(Tmp, 0);
- }
- static Value *emitRangedBuiltin(CodeGenFunction &CGF,
- unsigned IntrinsicID,
- int low, int high) {
- llvm::MDBuilder MDHelper(CGF.getLLVMContext());
- llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
- llvm::Instruction *Call = CGF.Builder.CreateCall(F);
- Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
- return Call;
- }
- namespace {
- struct WidthAndSignedness {
- unsigned Width;
- bool Signed;
- };
- }
- static WidthAndSignedness
- getIntegerWidthAndSignedness(const clang::ASTContext &context,
- const clang::QualType Type) {
- assert(Type->isIntegerType() && "Given type is not an integer.");
- unsigned Width = Type->isBooleanType() ? 1
- : Type->isBitIntType() ? context.getIntWidth(Type)
- : context.getTypeInfo(Type).Width;
- bool Signed = Type->isSignedIntegerType();
- return {Width, Signed};
- }
- // Given one or more integer types, this function produces an integer type that
- // encompasses them: any value in one of the given types could be expressed in
- // the encompassing type.
- static struct WidthAndSignedness
- EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
- assert(Types.size() > 0 && "Empty list of types.");
- // If any of the given types is signed, we must return a signed type.
- bool Signed = false;
- for (const auto &Type : Types) {
- Signed |= Type.Signed;
- }
- // The encompassing type must have a width greater than or equal to the width
- // of the specified types. Additionally, if the encompassing type is signed,
- // its width must be strictly greater than the width of any unsigned types
- // given.
- unsigned Width = 0;
- for (const auto &Type : Types) {
- unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
- if (Width < MinWidth) {
- Width = MinWidth;
- }
- }
- return {Width, Signed};
- }
- Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
- llvm::Type *DestType = Int8PtrTy;
- if (ArgValue->getType() != DestType)
- ArgValue =
- Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
- Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
- return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
- }
- /// Checks if using the result of __builtin_object_size(p, @p From) in place of
- /// __builtin_object_size(p, @p To) is correct
- static bool areBOSTypesCompatible(int From, int To) {
- // Note: Our __builtin_object_size implementation currently treats Type=0 and
- // Type=2 identically. Encoding this implementation detail here may make
- // improving __builtin_object_size difficult in the future, so it's omitted.
- return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
- }
- static llvm::Value *
- getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
- return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
- }
- llvm::Value *
- CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType,
- llvm::Value *EmittedE,
- bool IsDynamic) {
- uint64_t ObjectSize;
- if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
- return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
- return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
- }
- /// Returns a Value corresponding to the size of the given expression.
- /// This Value may be either of the following:
- /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
- /// it)
- /// - A call to the @llvm.objectsize intrinsic
- ///
- /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
- /// and we wouldn't otherwise try to reference a pass_object_size parameter,
- /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
- llvm::Value *
- CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType,
- llvm::Value *EmittedE, bool IsDynamic) {
- // We need to reference an argument if the pointer is a parameter with the
- // pass_object_size attribute.
- if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
- auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
- auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
- if (Param != nullptr && PS != nullptr &&
- areBOSTypesCompatible(PS->getType(), Type)) {
- auto Iter = SizeArguments.find(Param);
- assert(Iter != SizeArguments.end());
- const ImplicitParamDecl *D = Iter->second;
- auto DIter = LocalDeclMap.find(D);
- assert(DIter != LocalDeclMap.end());
- return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
- getContext().getSizeType(), E->getBeginLoc());
- }
- }
- // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
- // evaluate E for side-effects. In either case, we shouldn't lower to
- // @llvm.objectsize.
- if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
- return getDefaultBuiltinObjectSizeResult(Type, ResType);
- Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
- assert(Ptr->getType()->isPointerTy() &&
- "Non-pointer passed to __builtin_object_size?");
- Function *F =
- CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
- // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
- Value *Min = Builder.getInt1((Type & 2) != 0);
- // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
- Value *NullIsUnknown = Builder.getTrue();
- Value *Dynamic = Builder.getInt1(IsDynamic);
- return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
- }
- namespace {
- /// A struct to generically describe a bit test intrinsic.
- struct BitTest {
- enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
- enum InterlockingKind : uint8_t {
- Unlocked,
- Sequential,
- Acquire,
- Release,
- NoFence
- };
- ActionKind Action;
- InterlockingKind Interlocking;
- bool Is64Bit;
- static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
- };
- } // namespace
- BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
- switch (BuiltinID) {
- // Main portable variants.
- case Builtin::BI_bittest:
- return {TestOnly, Unlocked, false};
- case Builtin::BI_bittestandcomplement:
- return {Complement, Unlocked, false};
- case Builtin::BI_bittestandreset:
- return {Reset, Unlocked, false};
- case Builtin::BI_bittestandset:
- return {Set, Unlocked, false};
- case Builtin::BI_interlockedbittestandreset:
- return {Reset, Sequential, false};
- case Builtin::BI_interlockedbittestandset:
- return {Set, Sequential, false};
- // X86-specific 64-bit variants.
- case Builtin::BI_bittest64:
- return {TestOnly, Unlocked, true};
- case Builtin::BI_bittestandcomplement64:
- return {Complement, Unlocked, true};
- case Builtin::BI_bittestandreset64:
- return {Reset, Unlocked, true};
- case Builtin::BI_bittestandset64:
- return {Set, Unlocked, true};
- case Builtin::BI_interlockedbittestandreset64:
- return {Reset, Sequential, true};
- case Builtin::BI_interlockedbittestandset64:
- return {Set, Sequential, true};
- // ARM/AArch64-specific ordering variants.
- case Builtin::BI_interlockedbittestandset_acq:
- return {Set, Acquire, false};
- case Builtin::BI_interlockedbittestandset_rel:
- return {Set, Release, false};
- case Builtin::BI_interlockedbittestandset_nf:
- return {Set, NoFence, false};
- case Builtin::BI_interlockedbittestandreset_acq:
- return {Reset, Acquire, false};
- case Builtin::BI_interlockedbittestandreset_rel:
- return {Reset, Release, false};
- case Builtin::BI_interlockedbittestandreset_nf:
- return {Reset, NoFence, false};
- }
- llvm_unreachable("expected only bittest intrinsics");
- }
- static char bitActionToX86BTCode(BitTest::ActionKind A) {
- switch (A) {
- case BitTest::TestOnly: return '\0';
- case BitTest::Complement: return 'c';
- case BitTest::Reset: return 'r';
- case BitTest::Set: return 's';
- }
- llvm_unreachable("invalid action");
- }
- static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
- BitTest BT,
- const CallExpr *E, Value *BitBase,
- Value *BitPos) {
- char Action = bitActionToX86BTCode(BT.Action);
- char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
- // Build the assembly.
- SmallString<64> Asm;
- raw_svector_ostream AsmOS(Asm);
- if (BT.Interlocking != BitTest::Unlocked)
- AsmOS << "lock ";
- AsmOS << "bt";
- if (Action)
- AsmOS << Action;
- AsmOS << SizeSuffix << " $2, ($1)";
- // Build the constraints. FIXME: We should support immediates when possible.
- std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
- std::string MachineClobbers = CGF.getTarget().getClobbers();
- if (!MachineClobbers.empty()) {
- Constraints += ',';
- Constraints += MachineClobbers;
- }
- llvm::IntegerType *IntType = llvm::IntegerType::get(
- CGF.getLLVMContext(),
- CGF.getContext().getTypeSize(E->getArg(1)->getType()));
- llvm::Type *IntPtrType = IntType->getPointerTo();
- llvm::FunctionType *FTy =
- llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
- llvm::InlineAsm *IA =
- llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
- return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
- }
- static llvm::AtomicOrdering
- getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
- switch (I) {
- case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
- case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
- case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
- case BitTest::Release: return llvm::AtomicOrdering::Release;
- case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
- }
- llvm_unreachable("invalid interlocking");
- }
- /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
- /// bits and a bit position and read and optionally modify the bit at that
- /// position. The position index can be arbitrarily large, i.e. it can be larger
- /// than 31 or 63, so we need an indexed load in the general case.
- static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
- unsigned BuiltinID,
- const CallExpr *E) {
- Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
- Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
- BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
- // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
- // indexing operation internally. Use them if possible.
- if (CGF.getTarget().getTriple().isX86())
- return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
- // Otherwise, use generic code to load one byte and test the bit. Use all but
- // the bottom three bits as the array index, and the bottom three bits to form
- // a mask.
- // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
- Value *ByteIndex = CGF.Builder.CreateAShr(
- BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
- Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
- Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
- ByteIndex, "bittest.byteaddr"),
- CharUnits::One());
- Value *PosLow =
- CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
- llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
- // The updating instructions will need a mask.
- Value *Mask = nullptr;
- if (BT.Action != BitTest::TestOnly) {
- Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
- "bittest.mask");
- }
- // Check the action and ordering of the interlocked intrinsics.
- llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
- Value *OldByte = nullptr;
- if (Ordering != llvm::AtomicOrdering::NotAtomic) {
- // Emit a combined atomicrmw load/store operation for the interlocked
- // intrinsics.
- llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
- if (BT.Action == BitTest::Reset) {
- Mask = CGF.Builder.CreateNot(Mask);
- RMWOp = llvm::AtomicRMWInst::And;
- }
- OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
- Ordering);
- } else {
- // Emit a plain load for the non-interlocked intrinsics.
- OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
- Value *NewByte = nullptr;
- switch (BT.Action) {
- case BitTest::TestOnly:
- // Don't store anything.
- break;
- case BitTest::Complement:
- NewByte = CGF.Builder.CreateXor(OldByte, Mask);
- break;
- case BitTest::Reset:
- NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
- break;
- case BitTest::Set:
- NewByte = CGF.Builder.CreateOr(OldByte, Mask);
- break;
- }
- if (NewByte)
- CGF.Builder.CreateStore(NewByte, ByteAddr);
- }
- // However we loaded the old byte, either by plain load or atomicrmw, shift
- // the bit into the low position and mask it to 0 or 1.
- Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
- return CGF.Builder.CreateAnd(
- ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
- }
- static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
- unsigned BuiltinID,
- const CallExpr *E) {
- Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
- SmallString<64> Asm;
- raw_svector_ostream AsmOS(Asm);
- llvm::IntegerType *RetType = CGF.Int32Ty;
- switch (BuiltinID) {
- case clang::PPC::BI__builtin_ppc_ldarx:
- AsmOS << "ldarx ";
- RetType = CGF.Int64Ty;
- break;
- case clang::PPC::BI__builtin_ppc_lwarx:
- AsmOS << "lwarx ";
- RetType = CGF.Int32Ty;
- break;
- case clang::PPC::BI__builtin_ppc_lharx:
- AsmOS << "lharx ";
- RetType = CGF.Int16Ty;
- break;
- case clang::PPC::BI__builtin_ppc_lbarx:
- AsmOS << "lbarx ";
- RetType = CGF.Int8Ty;
- break;
- default:
- llvm_unreachable("Expected only PowerPC load reserve intrinsics");
- }
- AsmOS << "$0, ${1:y}";
- std::string Constraints = "=r,*Z,~{memory}";
- std::string MachineClobbers = CGF.getTarget().getClobbers();
- if (!MachineClobbers.empty()) {
- Constraints += ',';
- Constraints += MachineClobbers;
- }
- llvm::Type *IntPtrType = RetType->getPointerTo();
- llvm::FunctionType *FTy =
- llvm::FunctionType::get(RetType, {IntPtrType}, false);
- llvm::InlineAsm *IA =
- llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
- llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
- CI->addParamAttr(
- 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
- return CI;
- }
- namespace {
- enum class MSVCSetJmpKind {
- _setjmpex,
- _setjmp3,
- _setjmp
- };
- }
- /// MSVC handles setjmp a bit differently on different platforms. On every
- /// architecture except 32-bit x86, the frame address is passed. On x86, extra
- /// parameters can be passed as variadic arguments, but we always pass none.
- static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
- const CallExpr *E) {
- llvm::Value *Arg1 = nullptr;
- llvm::Type *Arg1Ty = nullptr;
- StringRef Name;
- bool IsVarArg = false;
- if (SJKind == MSVCSetJmpKind::_setjmp3) {
- Name = "_setjmp3";
- Arg1Ty = CGF.Int32Ty;
- Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
- IsVarArg = true;
- } else {
- Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
- Arg1Ty = CGF.Int8PtrTy;
- if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
- Arg1 = CGF.Builder.CreateCall(
- CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
- } else
- Arg1 = CGF.Builder.CreateCall(
- CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
- llvm::ConstantInt::get(CGF.Int32Ty, 0));
- }
- // Mark the call site and declaration with ReturnsTwice.
- llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
- llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
- CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
- llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
- ReturnsTwiceAttr, /*Local=*/true);
- llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
- CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
- llvm::Value *Args[] = {Buf, Arg1};
- llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
- CB->setAttributes(ReturnsTwiceAttr);
- return RValue::get(CB);
- }
- // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
- // we handle them here.
- enum class CodeGenFunction::MSVCIntrin {
- _BitScanForward,
- _BitScanReverse,
- _InterlockedAnd,
- _InterlockedDecrement,
- _InterlockedExchange,
- _InterlockedExchangeAdd,
- _InterlockedExchangeSub,
- _InterlockedIncrement,
- _InterlockedOr,
- _InterlockedXor,
- _InterlockedExchangeAdd_acq,
- _InterlockedExchangeAdd_rel,
- _InterlockedExchangeAdd_nf,
- _InterlockedExchange_acq,
- _InterlockedExchange_rel,
- _InterlockedExchange_nf,
- _InterlockedCompareExchange_acq,
- _InterlockedCompareExchange_rel,
- _InterlockedCompareExchange_nf,
- _InterlockedCompareExchange128,
- _InterlockedCompareExchange128_acq,
- _InterlockedCompareExchange128_rel,
- _InterlockedCompareExchange128_nf,
- _InterlockedOr_acq,
- _InterlockedOr_rel,
- _InterlockedOr_nf,
- _InterlockedXor_acq,
- _InterlockedXor_rel,
- _InterlockedXor_nf,
- _InterlockedAnd_acq,
- _InterlockedAnd_rel,
- _InterlockedAnd_nf,
- _InterlockedIncrement_acq,
- _InterlockedIncrement_rel,
- _InterlockedIncrement_nf,
- _InterlockedDecrement_acq,
- _InterlockedDecrement_rel,
- _InterlockedDecrement_nf,
- __fastfail,
- };
- static Optional<CodeGenFunction::MSVCIntrin>
- translateArmToMsvcIntrin(unsigned BuiltinID) {
- using MSVCIntrin = CodeGenFunction::MSVCIntrin;
- switch (BuiltinID) {
- default:
- return None;
- case ARM::BI_BitScanForward:
- case ARM::BI_BitScanForward64:
- return MSVCIntrin::_BitScanForward;
- case ARM::BI_BitScanReverse:
- case ARM::BI_BitScanReverse64:
- return MSVCIntrin::_BitScanReverse;
- case ARM::BI_InterlockedAnd64:
- return MSVCIntrin::_InterlockedAnd;
- case ARM::BI_InterlockedExchange64:
- return MSVCIntrin::_InterlockedExchange;
- case ARM::BI_InterlockedExchangeAdd64:
- return MSVCIntrin::_InterlockedExchangeAdd;
- case ARM::BI_InterlockedExchangeSub64:
- return MSVCIntrin::_InterlockedExchangeSub;
- case ARM::BI_InterlockedOr64:
- return MSVCIntrin::_InterlockedOr;
- case ARM::BI_InterlockedXor64:
- return MSVCIntrin::_InterlockedXor;
- case ARM::BI_InterlockedDecrement64:
- return MSVCIntrin::_InterlockedDecrement;
- case ARM::BI_InterlockedIncrement64:
- return MSVCIntrin::_InterlockedIncrement;
- case ARM::BI_InterlockedExchangeAdd8_acq:
- case ARM::BI_InterlockedExchangeAdd16_acq:
- case ARM::BI_InterlockedExchangeAdd_acq:
- case ARM::BI_InterlockedExchangeAdd64_acq:
- return MSVCIntrin::_InterlockedExchangeAdd_acq;
- case ARM::BI_InterlockedExchangeAdd8_rel:
- case ARM::BI_InterlockedExchangeAdd16_rel:
- case ARM::BI_InterlockedExchangeAdd_rel:
- case ARM::BI_InterlockedExchangeAdd64_rel:
- return MSVCIntrin::_InterlockedExchangeAdd_rel;
- case ARM::BI_InterlockedExchangeAdd8_nf:
- case ARM::BI_InterlockedExchangeAdd16_nf:
- case ARM::BI_InterlockedExchangeAdd_nf:
- case ARM::BI_InterlockedExchangeAdd64_nf:
- return MSVCIntrin::_InterlockedExchangeAdd_nf;
- case ARM::BI_InterlockedExchange8_acq:
- case ARM::BI_InterlockedExchange16_acq:
- case ARM::BI_InterlockedExchange_acq:
- case ARM::BI_InterlockedExchange64_acq:
- return MSVCIntrin::_InterlockedExchange_acq;
- case ARM::BI_InterlockedExchange8_rel:
- case ARM::BI_InterlockedExchange16_rel:
- case ARM::BI_InterlockedExchange_rel:
- case ARM::BI_InterlockedExchange64_rel:
- return MSVCIntrin::_InterlockedExchange_rel;
- case ARM::BI_InterlockedExchange8_nf:
- case ARM::BI_InterlockedExchange16_nf:
- case ARM::BI_InterlockedExchange_nf:
- case ARM::BI_InterlockedExchange64_nf:
- return MSVCIntrin::_InterlockedExchange_nf;
- case ARM::BI_InterlockedCompareExchange8_acq:
- case ARM::BI_InterlockedCompareExchange16_acq:
- case ARM::BI_InterlockedCompareExchange_acq:
- case ARM::BI_InterlockedCompareExchange64_acq:
- return MSVCIntrin::_InterlockedCompareExchange_acq;
- case ARM::BI_InterlockedCompareExchange8_rel:
- case ARM::BI_InterlockedCompareExchange16_rel:
- case ARM::BI_InterlockedCompareExchange_rel:
- case ARM::BI_InterlockedCompareExchange64_rel:
- return MSVCIntrin::_InterlockedCompareExchange_rel;
- case ARM::BI_InterlockedCompareExchange8_nf:
- case ARM::BI_InterlockedCompareExchange16_nf:
- case ARM::BI_InterlockedCompareExchange_nf:
- case ARM::BI_InterlockedCompareExchange64_nf:
- return MSVCIntrin::_InterlockedCompareExchange_nf;
- case ARM::BI_InterlockedOr8_acq:
- case ARM::BI_InterlockedOr16_acq:
- case ARM::BI_InterlockedOr_acq:
- case ARM::BI_InterlockedOr64_acq:
- return MSVCIntrin::_InterlockedOr_acq;
- case ARM::BI_InterlockedOr8_rel:
- case ARM::BI_InterlockedOr16_rel:
- case ARM::BI_InterlockedOr_rel:
- case ARM::BI_InterlockedOr64_rel:
- return MSVCIntrin::_InterlockedOr_rel;
- case ARM::BI_InterlockedOr8_nf:
- case ARM::BI_InterlockedOr16_nf:
- case ARM::BI_InterlockedOr_nf:
- case ARM::BI_InterlockedOr64_nf:
- return MSVCIntrin::_InterlockedOr_nf;
- case ARM::BI_InterlockedXor8_acq:
- case ARM::BI_InterlockedXor16_acq:
- case ARM::BI_InterlockedXor_acq:
- case ARM::BI_InterlockedXor64_acq:
- return MSVCIntrin::_InterlockedXor_acq;
- case ARM::BI_InterlockedXor8_rel:
- case ARM::BI_InterlockedXor16_rel:
- case ARM::BI_InterlockedXor_rel:
- case ARM::BI_InterlockedXor64_rel:
- return MSVCIntrin::_InterlockedXor_rel;
- case ARM::BI_InterlockedXor8_nf:
- case ARM::BI_InterlockedXor16_nf:
- case ARM::BI_InterlockedXor_nf:
- case ARM::BI_InterlockedXor64_nf:
- return MSVCIntrin::_InterlockedXor_nf;
- case ARM::BI_InterlockedAnd8_acq:
- case ARM::BI_InterlockedAnd16_acq:
- case ARM::BI_InterlockedAnd_acq:
- case ARM::BI_InterlockedAnd64_acq:
- return MSVCIntrin::_InterlockedAnd_acq;
- case ARM::BI_InterlockedAnd8_rel:
- case ARM::BI_InterlockedAnd16_rel:
- case ARM::BI_InterlockedAnd_rel:
- case ARM::BI_InterlockedAnd64_rel:
- return MSVCIntrin::_InterlockedAnd_rel;
- case ARM::BI_InterlockedAnd8_nf:
- case ARM::BI_InterlockedAnd16_nf:
- case ARM::BI_InterlockedAnd_nf:
- case ARM::BI_InterlockedAnd64_nf:
- return MSVCIntrin::_InterlockedAnd_nf;
- case ARM::BI_InterlockedIncrement16_acq:
- case ARM::BI_InterlockedIncrement_acq:
- case ARM::BI_InterlockedIncrement64_acq:
- return MSVCIntrin::_InterlockedIncrement_acq;
- case ARM::BI_InterlockedIncrement16_rel:
- case ARM::BI_InterlockedIncrement_rel:
- case ARM::BI_InterlockedIncrement64_rel:
- return MSVCIntrin::_InterlockedIncrement_rel;
- case ARM::BI_InterlockedIncrement16_nf:
- case ARM::BI_InterlockedIncrement_nf:
- case ARM::BI_InterlockedIncrement64_nf:
- return MSVCIntrin::_InterlockedIncrement_nf;
- case ARM::BI_InterlockedDecrement16_acq:
- case ARM::BI_InterlockedDecrement_acq:
- case ARM::BI_InterlockedDecrement64_acq:
- return MSVCIntrin::_InterlockedDecrement_acq;
- case ARM::BI_InterlockedDecrement16_rel:
- case ARM::BI_InterlockedDecrement_rel:
- case ARM::BI_InterlockedDecrement64_rel:
- return MSVCIntrin::_InterlockedDecrement_rel;
- case ARM::BI_InterlockedDecrement16_nf:
- case ARM::BI_InterlockedDecrement_nf:
- case ARM::BI_InterlockedDecrement64_nf:
- return MSVCIntrin::_InterlockedDecrement_nf;
- }
- llvm_unreachable("must return from switch");
- }
- static Optional<CodeGenFunction::MSVCIntrin>
- translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
- using MSVCIntrin = CodeGenFunction::MSVCIntrin;
- switch (BuiltinID) {
- default:
- return None;
- case AArch64::BI_BitScanForward:
- case AArch64::BI_BitScanForward64:
- return MSVCIntrin::_BitScanForward;
- case AArch64::BI_BitScanReverse:
- case AArch64::BI_BitScanReverse64:
- return MSVCIntrin::_BitScanReverse;
- case AArch64::BI_InterlockedAnd64:
- return MSVCIntrin::_InterlockedAnd;
- case AArch64::BI_InterlockedExchange64:
- return MSVCIntrin::_InterlockedExchange;
- case AArch64::BI_InterlockedExchangeAdd64:
- return MSVCIntrin::_InterlockedExchangeAdd;
- case AArch64::BI_InterlockedExchangeSub64:
- return MSVCIntrin::_InterlockedExchangeSub;
- case AArch64::BI_InterlockedOr64:
- return MSVCIntrin::_InterlockedOr;
- case AArch64::BI_InterlockedXor64:
- return MSVCIntrin::_InterlockedXor;
- case AArch64::BI_InterlockedDecrement64:
- return MSVCIntrin::_InterlockedDecrement;
- case AArch64::BI_InterlockedIncrement64:
- return MSVCIntrin::_InterlockedIncrement;
- case AArch64::BI_InterlockedExchangeAdd8_acq:
- case AArch64::BI_InterlockedExchangeAdd16_acq:
- case AArch64::BI_InterlockedExchangeAdd_acq:
- case AArch64::BI_InterlockedExchangeAdd64_acq:
- return MSVCIntrin::_InterlockedExchangeAdd_acq;
- case AArch64::BI_InterlockedExchangeAdd8_rel:
- case AArch64::BI_InterlockedExchangeAdd16_rel:
- case AArch64::BI_InterlockedExchangeAdd_rel:
- case AArch64::BI_InterlockedExchangeAdd64_rel:
- return MSVCIntrin::_InterlockedExchangeAdd_rel;
- case AArch64::BI_InterlockedExchangeAdd8_nf:
- case AArch64::BI_InterlockedExchangeAdd16_nf:
- case AArch64::BI_InterlockedExchangeAdd_nf:
- case AArch64::BI_InterlockedExchangeAdd64_nf:
- return MSVCIntrin::_InterlockedExchangeAdd_nf;
- case AArch64::BI_InterlockedExchange8_acq:
- case AArch64::BI_InterlockedExchange16_acq:
- case AArch64::BI_InterlockedExchange_acq:
- case AArch64::BI_InterlockedExchange64_acq:
- return MSVCIntrin::_InterlockedExchange_acq;
- case AArch64::BI_InterlockedExchange8_rel:
- case AArch64::BI_InterlockedExchange16_rel:
- case AArch64::BI_InterlockedExchange_rel:
- case AArch64::BI_InterlockedExchange64_rel:
- return MSVCIntrin::_InterlockedExchange_rel;
- case AArch64::BI_InterlockedExchange8_nf:
- case AArch64::BI_InterlockedExchange16_nf:
- case AArch64::BI_InterlockedExchange_nf:
- case AArch64::BI_InterlockedExchange64_nf:
- return MSVCIntrin::_InterlockedExchange_nf;
- case AArch64::BI_InterlockedCompareExchange8_acq:
- case AArch64::BI_InterlockedCompareExchange16_acq:
- case AArch64::BI_InterlockedCompareExchange_acq:
- case AArch64::BI_InterlockedCompareExchange64_acq:
- return MSVCIntrin::_InterlockedCompareExchange_acq;
- case AArch64::BI_InterlockedCompareExchange8_rel:
- case AArch64::BI_InterlockedCompareExchange16_rel:
- case AArch64::BI_InterlockedCompareExchange_rel:
- case AArch64::BI_InterlockedCompareExchange64_rel:
- return MSVCIntrin::_InterlockedCompareExchange_rel;
- case AArch64::BI_InterlockedCompareExchange8_nf:
- case AArch64::BI_InterlockedCompareExchange16_nf:
- case AArch64::BI_InterlockedCompareExchange_nf:
- case AArch64::BI_InterlockedCompareExchange64_nf:
- return MSVCIntrin::_InterlockedCompareExchange_nf;
- case AArch64::BI_InterlockedCompareExchange128:
- return MSVCIntrin::_InterlockedCompareExchange128;
- case AArch64::BI_InterlockedCompareExchange128_acq:
- return MSVCIntrin::_InterlockedCompareExchange128_acq;
- case AArch64::BI_InterlockedCompareExchange128_nf:
- return MSVCIntrin::_InterlockedCompareExchange128_nf;
- case AArch64::BI_InterlockedCompareExchange128_rel:
- return MSVCIntrin::_InterlockedCompareExchange128_rel;
- case AArch64::BI_InterlockedOr8_acq:
- case AArch64::BI_InterlockedOr16_acq:
- case AArch64::BI_InterlockedOr_acq:
- case AArch64::BI_InterlockedOr64_acq:
- return MSVCIntrin::_InterlockedOr_acq;
- case AArch64::BI_InterlockedOr8_rel:
- case AArch64::BI_InterlockedOr16_rel:
- case AArch64::BI_InterlockedOr_rel:
- case AArch64::BI_InterlockedOr64_rel:
- return MSVCIntrin::_InterlockedOr_rel;
- case AArch64::BI_InterlockedOr8_nf:
- case AArch64::BI_InterlockedOr16_nf:
- case AArch64::BI_InterlockedOr_nf:
- case AArch64::BI_InterlockedOr64_nf:
- return MSVCIntrin::_InterlockedOr_nf;
- case AArch64::BI_InterlockedXor8_acq:
- case AArch64::BI_InterlockedXor16_acq:
- case AArch64::BI_InterlockedXor_acq:
- case AArch64::BI_InterlockedXor64_acq:
- return MSVCIntrin::_InterlockedXor_acq;
- case AArch64::BI_InterlockedXor8_rel:
- case AArch64::BI_InterlockedXor16_rel:
- case AArch64::BI_InterlockedXor_rel:
- case AArch64::BI_InterlockedXor64_rel:
- return MSVCIntrin::_InterlockedXor_rel;
- case AArch64::BI_InterlockedXor8_nf:
- case AArch64::BI_InterlockedXor16_nf:
- case AArch64::BI_InterlockedXor_nf:
- case AArch64::BI_InterlockedXor64_nf:
- return MSVCIntrin::_InterlockedXor_nf;
- case AArch64::BI_InterlockedAnd8_acq:
- case AArch64::BI_InterlockedAnd16_acq:
- case AArch64::BI_InterlockedAnd_acq:
- case AArch64::BI_InterlockedAnd64_acq:
- return MSVCIntrin::_InterlockedAnd_acq;
- case AArch64::BI_InterlockedAnd8_rel:
- case AArch64::BI_InterlockedAnd16_rel:
- case AArch64::BI_InterlockedAnd_rel:
- case AArch64::BI_InterlockedAnd64_rel:
- return MSVCIntrin::_InterlockedAnd_rel;
- case AArch64::BI_InterlockedAnd8_nf:
- case AArch64::BI_InterlockedAnd16_nf:
- case AArch64::BI_InterlockedAnd_nf:
- case AArch64::BI_InterlockedAnd64_nf:
- return MSVCIntrin::_InterlockedAnd_nf;
- case AArch64::BI_InterlockedIncrement16_acq:
- case AArch64::BI_InterlockedIncrement_acq:
- case AArch64::BI_InterlockedIncrement64_acq:
- return MSVCIntrin::_InterlockedIncrement_acq;
- case AArch64::BI_InterlockedIncrement16_rel:
- case AArch64::BI_InterlockedIncrement_rel:
- case AArch64::BI_InterlockedIncrement64_rel:
- return MSVCIntrin::_InterlockedIncrement_rel;
- case AArch64::BI_InterlockedIncrement16_nf:
- case AArch64::BI_InterlockedIncrement_nf:
- case AArch64::BI_InterlockedIncrement64_nf:
- return MSVCIntrin::_InterlockedIncrement_nf;
- case AArch64::BI_InterlockedDecrement16_acq:
- case AArch64::BI_InterlockedDecrement_acq:
- case AArch64::BI_InterlockedDecrement64_acq:
- return MSVCIntrin::_InterlockedDecrement_acq;
- case AArch64::BI_InterlockedDecrement16_rel:
- case AArch64::BI_InterlockedDecrement_rel:
- case AArch64::BI_InterlockedDecrement64_rel:
- return MSVCIntrin::_InterlockedDecrement_rel;
- case AArch64::BI_InterlockedDecrement16_nf:
- case AArch64::BI_InterlockedDecrement_nf:
- case AArch64::BI_InterlockedDecrement64_nf:
- return MSVCIntrin::_InterlockedDecrement_nf;
- }
- llvm_unreachable("must return from switch");
- }
- static Optional<CodeGenFunction::MSVCIntrin>
- translateX86ToMsvcIntrin(unsigned BuiltinID) {
- using MSVCIntrin = CodeGenFunction::MSVCIntrin;
- switch (BuiltinID) {
- default:
- return None;
- case clang::X86::BI_BitScanForward:
- case clang::X86::BI_BitScanForward64:
- return MSVCIntrin::_BitScanForward;
- case clang::X86::BI_BitScanReverse:
- case clang::X86::BI_BitScanReverse64:
- return MSVCIntrin::_BitScanReverse;
- case clang::X86::BI_InterlockedAnd64:
- return MSVCIntrin::_InterlockedAnd;
- case clang::X86::BI_InterlockedCompareExchange128:
- return MSVCIntrin::_InterlockedCompareExchange128;
- case clang::X86::BI_InterlockedExchange64:
- return MSVCIntrin::_InterlockedExchange;
- case clang::X86::BI_InterlockedExchangeAdd64:
- return MSVCIntrin::_InterlockedExchangeAdd;
- case clang::X86::BI_InterlockedExchangeSub64:
- return MSVCIntrin::_InterlockedExchangeSub;
- case clang::X86::BI_InterlockedOr64:
- return MSVCIntrin::_InterlockedOr;
- case clang::X86::BI_InterlockedXor64:
- return MSVCIntrin::_InterlockedXor;
- case clang::X86::BI_InterlockedDecrement64:
- return MSVCIntrin::_InterlockedDecrement;
- case clang::X86::BI_InterlockedIncrement64:
- return MSVCIntrin::_InterlockedIncrement;
- }
- llvm_unreachable("must return from switch");
- }
- // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
- Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
- const CallExpr *E) {
- switch (BuiltinID) {
- case MSVCIntrin::_BitScanForward:
- case MSVCIntrin::_BitScanReverse: {
- Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
- Value *ArgValue = EmitScalarExpr(E->getArg(1));
- llvm::Type *ArgType = ArgValue->getType();
- llvm::Type *IndexType = IndexAddress.getElementType();
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *ArgZero = llvm::Constant::getNullValue(ArgType);
- Value *ResZero = llvm::Constant::getNullValue(ResultType);
- Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
- BasicBlock *Begin = Builder.GetInsertBlock();
- BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
- Builder.SetInsertPoint(End);
- PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
- Builder.SetInsertPoint(Begin);
- Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
- BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
- Builder.CreateCondBr(IsZero, End, NotZero);
- Result->addIncoming(ResZero, Begin);
- Builder.SetInsertPoint(NotZero);
- if (BuiltinID == MSVCIntrin::_BitScanForward) {
- Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
- Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
- ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
- Builder.CreateStore(ZeroCount, IndexAddress, false);
- } else {
- unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
- Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
- Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
- ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
- Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
- Builder.CreateStore(Index, IndexAddress, false);
- }
- Builder.CreateBr(End);
- Result->addIncoming(ResOne, NotZero);
- Builder.SetInsertPoint(End);
- return Result;
- }
- case MSVCIntrin::_InterlockedAnd:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
- case MSVCIntrin::_InterlockedExchange:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
- case MSVCIntrin::_InterlockedExchangeAdd:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
- case MSVCIntrin::_InterlockedExchangeSub:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
- case MSVCIntrin::_InterlockedOr:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
- case MSVCIntrin::_InterlockedXor:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
- case MSVCIntrin::_InterlockedExchangeAdd_acq:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
- AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedExchangeAdd_rel:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
- AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedExchangeAdd_nf:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
- AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedExchange_acq:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
- AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedExchange_rel:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
- AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedExchange_nf:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
- AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedCompareExchange_acq:
- return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedCompareExchange_rel:
- return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedCompareExchange_nf:
- return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedCompareExchange128:
- return EmitAtomicCmpXchg128ForMSIntrin(
- *this, E, AtomicOrdering::SequentiallyConsistent);
- case MSVCIntrin::_InterlockedCompareExchange128_acq:
- return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedCompareExchange128_rel:
- return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedCompareExchange128_nf:
- return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedOr_acq:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
- AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedOr_rel:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
- AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedOr_nf:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
- AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedXor_acq:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
- AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedXor_rel:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
- AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedXor_nf:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
- AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedAnd_acq:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
- AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedAnd_rel:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
- AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedAnd_nf:
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
- AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedIncrement_acq:
- return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedIncrement_rel:
- return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedIncrement_nf:
- return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedDecrement_acq:
- return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
- case MSVCIntrin::_InterlockedDecrement_rel:
- return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
- case MSVCIntrin::_InterlockedDecrement_nf:
- return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
- case MSVCIntrin::_InterlockedDecrement:
- return EmitAtomicDecrementValue(*this, E);
- case MSVCIntrin::_InterlockedIncrement:
- return EmitAtomicIncrementValue(*this, E);
- case MSVCIntrin::__fastfail: {
- // Request immediate process termination from the kernel. The instruction
- // sequences to do this are documented on MSDN:
- // https://msdn.microsoft.com/en-us/library/dn774154.aspx
- llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
- StringRef Asm, Constraints;
- switch (ISA) {
- default:
- ErrorUnsupported(E, "__fastfail call for this architecture");
- break;
- case llvm::Triple::x86:
- case llvm::Triple::x86_64:
- Asm = "int $$0x29";
- Constraints = "{cx}";
- break;
- case llvm::Triple::thumb:
- Asm = "udf #251";
- Constraints = "{r0}";
- break;
- case llvm::Triple::aarch64:
- Asm = "brk #0xF003";
- Constraints = "{w0}";
- }
- llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
- llvm::InlineAsm *IA =
- llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
- llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
- getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoReturn);
- llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
- CI->setAttributes(NoReturnAttr);
- return CI;
- }
- }
- llvm_unreachable("Incorrect MSVC intrinsic!");
- }
- namespace {
- // ARC cleanup for __builtin_os_log_format
- struct CallObjCArcUse final : EHScopeStack::Cleanup {
- CallObjCArcUse(llvm::Value *object) : object(object) {}
- llvm::Value *object;
- void Emit(CodeGenFunction &CGF, Flags flags) override {
- CGF.EmitARCIntrinsicUse(object);
- }
- };
- }
- Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
- BuiltinCheckKind Kind) {
- assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
- && "Unsupported builtin check kind");
- Value *ArgValue = EmitScalarExpr(E);
- if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
- return ArgValue;
- SanitizerScope SanScope(this);
- Value *Cond = Builder.CreateICmpNE(
- ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
- EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
- SanitizerHandler::InvalidBuiltin,
- {EmitCheckSourceLocation(E->getExprLoc()),
- llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
- None);
- return ArgValue;
- }
- /// Get the argument type for arguments to os_log_helper.
- static CanQualType getOSLogArgType(ASTContext &C, int Size) {
- QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
- return C.getCanonicalType(UnsignedTy);
- }
- llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
- const analyze_os_log::OSLogBufferLayout &Layout,
- CharUnits BufferAlignment) {
- ASTContext &Ctx = getContext();
- llvm::SmallString<64> Name;
- {
- raw_svector_ostream OS(Name);
- OS << "__os_log_helper";
- OS << "_" << BufferAlignment.getQuantity();
- OS << "_" << int(Layout.getSummaryByte());
- OS << "_" << int(Layout.getNumArgsByte());
- for (const auto &Item : Layout.Items)
- OS << "_" << int(Item.getSizeByte()) << "_"
- << int(Item.getDescriptorByte());
- }
- if (llvm::Function *F = CGM.getModule().getFunction(Name))
- return F;
- llvm::SmallVector<QualType, 4> ArgTys;
- FunctionArgList Args;
- Args.push_back(ImplicitParamDecl::Create(
- Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
- ImplicitParamDecl::Other));
- ArgTys.emplace_back(Ctx.VoidPtrTy);
- for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
- char Size = Layout.Items[I].getSizeByte();
- if (!Size)
- continue;
- QualType ArgTy = getOSLogArgType(Ctx, Size);
- Args.push_back(ImplicitParamDecl::Create(
- Ctx, nullptr, SourceLocation(),
- &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
- ImplicitParamDecl::Other));
- ArgTys.emplace_back(ArgTy);
- }
- QualType ReturnTy = Ctx.VoidTy;
- // The helper function has linkonce_odr linkage to enable the linker to merge
- // identical functions. To ensure the merging always happens, 'noinline' is
- // attached to the function when compiling with -Oz.
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
- llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = llvm::Function::Create(
- FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
- Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
- CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
- Fn->setDoesNotThrow();
- // Attach 'noinline' at -Oz.
- if (CGM.getCodeGenOpts().OptimizeSize == 2)
- Fn->addFnAttr(llvm::Attribute::NoInline);
- auto NL = ApplyDebugLocation::CreateEmpty(*this);
- StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
- // Create a scope with an artificial location for the body of this function.
- auto AL = ApplyDebugLocation::CreateArtificial(*this);
- CharUnits Offset;
- Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"),
- BufferAlignment);
- Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
- Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
- unsigned I = 1;
- for (const auto &Item : Layout.Items) {
- Builder.CreateStore(
- Builder.getInt8(Item.getDescriptorByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
- Builder.CreateStore(
- Builder.getInt8(Item.getSizeByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
- CharUnits Size = Item.size();
- if (!Size.getQuantity())
- continue;
- Address Arg = GetAddrOfLocalVar(Args[I]);
- Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
- Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
- "argDataCast");
- Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
- Offset += Size;
- ++I;
- }
- FinishFunction();
- return Fn;
- }
- RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
- assert(E.getNumArgs() >= 2 &&
- "__builtin_os_log_format takes at least 2 arguments");
- ASTContext &Ctx = getContext();
- analyze_os_log::OSLogBufferLayout Layout;
- analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
- Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
- llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
- // Ignore argument 1, the format string. It is not currently used.
- CallArgList Args;
- Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
- for (const auto &Item : Layout.Items) {
- int Size = Item.getSizeByte();
- if (!Size)
- continue;
- llvm::Value *ArgVal;
- if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
- uint64_t Val = 0;
- for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
- Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
- ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
- } else if (const Expr *TheExpr = Item.getExpr()) {
- ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
- // If a temporary object that requires destruction after the full
- // expression is passed, push a lifetime-extended cleanup to extend its
- // lifetime to the end of the enclosing block scope.
- auto LifetimeExtendObject = [&](const Expr *E) {
- E = E->IgnoreParenCasts();
- // Extend lifetimes of objects returned by function calls and message
- // sends.
- // FIXME: We should do this in other cases in which temporaries are
- // created including arguments of non-ARC types (e.g., C++
- // temporaries).
- if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
- return true;
- return false;
- };
- if (TheExpr->getType()->isObjCRetainableType() &&
- getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
- assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
- "Only scalar can be a ObjC retainable type");
- if (!isa<Constant>(ArgVal)) {
- CleanupKind Cleanup = getARCCleanupKind();
- QualType Ty = TheExpr->getType();
- Address Alloca = Address::invalid();
- Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
- ArgVal = EmitARCRetain(Ty, ArgVal);
- Builder.CreateStore(ArgVal, Addr);
- pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
- CodeGenFunction::destroyARCStrongPrecise,
- Cleanup & EHCleanup);
- // Push a clang.arc.use call to ensure ARC optimizer knows that the
- // argument has to be alive.
- if (CGM.getCodeGenOpts().OptimizationLevel != 0)
- pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
- }
- }
- } else {
- ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
- }
- unsigned ArgValSize =
- CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
- llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
- ArgValSize);
- ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
- CanQualType ArgTy = getOSLogArgType(Ctx, Size);
- // If ArgVal has type x86_fp80, zero-extend ArgVal.
- ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
- Args.add(RValue::get(ArgVal), ArgTy);
- }
- const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
- llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
- Layout, BufAddr.getAlignment());
- EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
- return RValue::get(BufAddr.getPointer());
- }
- static bool isSpecialUnsignedMultiplySignedResult(
- unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
- WidthAndSignedness ResultInfo) {
- return BuiltinID == Builtin::BI__builtin_mul_overflow &&
- Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
- !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
- }
- static RValue EmitCheckedUnsignedMultiplySignedResult(
- CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
- const clang::Expr *Op2, WidthAndSignedness Op2Info,
- const clang::Expr *ResultArg, QualType ResultQTy,
- WidthAndSignedness ResultInfo) {
- assert(isSpecialUnsignedMultiplySignedResult(
- Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
- "Cannot specialize this multiply");
- llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
- llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
- llvm::Value *HasOverflow;
- llvm::Value *Result = EmitOverflowIntrinsic(
- CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
- // The intrinsic call will detect overflow when the value is > UINT_MAX,
- // however, since the original builtin had a signed result, we need to report
- // an overflow when the result is greater than INT_MAX.
- auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
- llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
- llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
- HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
- bool isVolatile =
- ResultArg->getType()->getPointeeType().isVolatileQualified();
- Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
- CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
- isVolatile);
- return RValue::get(HasOverflow);
- }
- /// Determine if a binop is a checked mixed-sign multiply we can specialize.
- static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
- WidthAndSignedness Op1Info,
- WidthAndSignedness Op2Info,
- WidthAndSignedness ResultInfo) {
- return BuiltinID == Builtin::BI__builtin_mul_overflow &&
- std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
- Op1Info.Signed != Op2Info.Signed;
- }
- /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
- /// the generic checked-binop irgen.
- static RValue
- EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
- WidthAndSignedness Op1Info, const clang::Expr *Op2,
- WidthAndSignedness Op2Info,
- const clang::Expr *ResultArg, QualType ResultQTy,
- WidthAndSignedness ResultInfo) {
- assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
- Op2Info, ResultInfo) &&
- "Not a mixed-sign multipliction we can specialize");
- // Emit the signed and unsigned operands.
- const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
- const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
- llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
- llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
- unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
- unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
- // One of the operands may be smaller than the other. If so, [s|z]ext it.
- if (SignedOpWidth < UnsignedOpWidth)
- Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
- if (UnsignedOpWidth < SignedOpWidth)
- Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
- llvm::Type *OpTy = Signed->getType();
- llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
- Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
- llvm::Type *ResTy = ResultPtr.getElementType();
- unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
- // Take the absolute value of the signed operand.
- llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
- llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
- llvm::Value *AbsSigned =
- CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
- // Perform a checked unsigned multiplication.
- llvm::Value *UnsignedOverflow;
- llvm::Value *UnsignedResult =
- EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
- Unsigned, UnsignedOverflow);
- llvm::Value *Overflow, *Result;
- if (ResultInfo.Signed) {
- // Signed overflow occurs if the result is greater than INT_MAX or lesser
- // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
- auto IntMax =
- llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
- llvm::Value *MaxResult =
- CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
- CGF.Builder.CreateZExt(IsNegative, OpTy));
- llvm::Value *SignedOverflow =
- CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
- Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
- // Prepare the signed result (possibly by negating it).
- llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
- llvm::Value *SignedResult =
- CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
- Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
- } else {
- // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
- llvm::Value *Underflow = CGF.Builder.CreateAnd(
- IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
- Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
- if (ResultInfo.Width < OpWidth) {
- auto IntMax =
- llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
- llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
- UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
- Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
- }
- // Negate the product if it would be negative in infinite precision.
- Result = CGF.Builder.CreateSelect(
- IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
- Result = CGF.Builder.CreateTrunc(Result, ResTy);
- }
- assert(Overflow && Result && "Missing overflow or result");
- bool isVolatile =
- ResultArg->getType()->getPointeeType().isVolatileQualified();
- CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
- isVolatile);
- return RValue::get(Overflow);
- }
- static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
- Value *&RecordPtr, CharUnits Align,
- llvm::FunctionCallee Func, int Lvl) {
- ASTContext &Context = CGF.getContext();
- RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition();
- std::string Pad = std::string(Lvl * 4, ' ');
- Value *GString =
- CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
- Value *Res = CGF.Builder.CreateCall(Func, {GString});
- static llvm::DenseMap<QualType, const char *> Types;
- if (Types.empty()) {
- Types[Context.CharTy] = "%c";
- Types[Context.BoolTy] = "%d";
- Types[Context.SignedCharTy] = "%hhd";
- Types[Context.UnsignedCharTy] = "%hhu";
- Types[Context.IntTy] = "%d";
- Types[Context.UnsignedIntTy] = "%u";
- Types[Context.LongTy] = "%ld";
- Types[Context.UnsignedLongTy] = "%lu";
- Types[Context.LongLongTy] = "%lld";
- Types[Context.UnsignedLongLongTy] = "%llu";
- Types[Context.ShortTy] = "%hd";
- Types[Context.UnsignedShortTy] = "%hu";
- Types[Context.VoidPtrTy] = "%p";
- Types[Context.FloatTy] = "%f";
- Types[Context.DoubleTy] = "%f";
- Types[Context.LongDoubleTy] = "%Lf";
- Types[Context.getPointerType(Context.CharTy)] = "%s";
- Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
- }
- for (const auto *FD : RD->fields()) {
- Value *FieldPtr = RecordPtr;
- if (RD->isUnion())
- FieldPtr = CGF.Builder.CreatePointerCast(
- FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
- else
- FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
- FD->getFieldIndex());
- GString = CGF.Builder.CreateGlobalStringPtr(
- llvm::Twine(Pad)
- .concat(FD->getType().getAsString())
- .concat(llvm::Twine(' '))
- .concat(FD->getNameAsString())
- .concat(" : ")
- .str());
- Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
- Res = CGF.Builder.CreateAdd(Res, TmpRes);
- QualType CanonicalType =
- FD->getType().getUnqualifiedType().getCanonicalType();
- // We check whether we are in a recursive type
- if (CanonicalType->isRecordType()) {
- TmpRes = dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
- Res = CGF.Builder.CreateAdd(TmpRes, Res);
- continue;
- }
- // We try to determine the best format to print the current field
- llvm::Twine Format = Types.find(CanonicalType) == Types.end()
- ? Types[Context.VoidPtrTy]
- : Types[CanonicalType];
- Address FieldAddress = Address(FieldPtr, Align);
- FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
- // FIXME Need to handle bitfield here
- GString = CGF.Builder.CreateGlobalStringPtr(
- Format.concat(llvm::Twine('\n')).str());
- TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
- Res = CGF.Builder.CreateAdd(Res, TmpRes);
- }
- GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
- Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
- Res = CGF.Builder.CreateAdd(Res, TmpRes);
- return Res;
- }
- static bool
- TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
- llvm::SmallPtrSetImpl<const Decl *> &Seen) {
- if (const auto *Arr = Ctx.getAsArrayType(Ty))
- Ty = Ctx.getBaseElementType(Arr);
- const auto *Record = Ty->getAsCXXRecordDecl();
- if (!Record)
- return false;
- // We've already checked this type, or are in the process of checking it.
- if (!Seen.insert(Record).second)
- return false;
- assert(Record->hasDefinition() &&
- "Incomplete types should already be diagnosed");
- if (Record->isDynamicClass())
- return true;
- for (FieldDecl *F : Record->fields()) {
- if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
- return true;
- }
- return false;
- }
- /// Determine if the specified type requires laundering by checking if it is a
- /// dynamic class type or contains a subobject which is a dynamic class type.
- static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
- if (!CGM.getCodeGenOpts().StrictVTablePointers)
- return false;
- llvm::SmallPtrSet<const Decl *, 16> Seen;
- return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
- }
- RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
- llvm::Value *Src = EmitScalarExpr(E->getArg(0));
- llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
- // The builtin's shift arg may have a different type than the source arg and
- // result, but the LLVM intrinsic uses the same type for all values.
- llvm::Type *Ty = Src->getType();
- ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
- // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
- unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
- Function *F = CGM.getIntrinsic(IID, Ty);
- return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
- }
- // Map math builtins for long-double to f128 version.
- static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
- switch (BuiltinID) {
- #define MUTATE_LDBL(func) \
- case Builtin::BI__builtin_##func##l: \
- return Builtin::BI__builtin_##func##f128;
- MUTATE_LDBL(sqrt)
- MUTATE_LDBL(cbrt)
- MUTATE_LDBL(fabs)
- MUTATE_LDBL(log)
- MUTATE_LDBL(log2)
- MUTATE_LDBL(log10)
- MUTATE_LDBL(log1p)
- MUTATE_LDBL(logb)
- MUTATE_LDBL(exp)
- MUTATE_LDBL(exp2)
- MUTATE_LDBL(expm1)
- MUTATE_LDBL(fdim)
- MUTATE_LDBL(hypot)
- MUTATE_LDBL(ilogb)
- MUTATE_LDBL(pow)
- MUTATE_LDBL(fmin)
- MUTATE_LDBL(fmax)
- MUTATE_LDBL(ceil)
- MUTATE_LDBL(trunc)
- MUTATE_LDBL(rint)
- MUTATE_LDBL(nearbyint)
- MUTATE_LDBL(round)
- MUTATE_LDBL(floor)
- MUTATE_LDBL(lround)
- MUTATE_LDBL(llround)
- MUTATE_LDBL(lrint)
- MUTATE_LDBL(llrint)
- MUTATE_LDBL(fmod)
- MUTATE_LDBL(modf)
- MUTATE_LDBL(nan)
- MUTATE_LDBL(nans)
- MUTATE_LDBL(inf)
- MUTATE_LDBL(fma)
- MUTATE_LDBL(sin)
- MUTATE_LDBL(cos)
- MUTATE_LDBL(tan)
- MUTATE_LDBL(sinh)
- MUTATE_LDBL(cosh)
- MUTATE_LDBL(tanh)
- MUTATE_LDBL(asin)
- MUTATE_LDBL(acos)
- MUTATE_LDBL(atan)
- MUTATE_LDBL(asinh)
- MUTATE_LDBL(acosh)
- MUTATE_LDBL(atanh)
- MUTATE_LDBL(atan2)
- MUTATE_LDBL(erf)
- MUTATE_LDBL(erfc)
- MUTATE_LDBL(ldexp)
- MUTATE_LDBL(frexp)
- MUTATE_LDBL(huge_val)
- MUTATE_LDBL(copysign)
- MUTATE_LDBL(nextafter)
- MUTATE_LDBL(nexttoward)
- MUTATE_LDBL(remainder)
- MUTATE_LDBL(remquo)
- MUTATE_LDBL(scalbln)
- MUTATE_LDBL(scalbn)
- MUTATE_LDBL(tgamma)
- MUTATE_LDBL(lgamma)
- #undef MUTATE_LDBL
- default:
- return BuiltinID;
- }
- }
- RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
- const CallExpr *E,
- ReturnValueSlot ReturnValue) {
- const FunctionDecl *FD = GD.getDecl()->getAsFunction();
- // See if we can constant fold this builtin. If so, don't emit it at all.
- Expr::EvalResult Result;
- if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
- !Result.hasSideEffects()) {
- if (Result.Val.isInt())
- return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
- Result.Val.getInt()));
- if (Result.Val.isFloat())
- return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
- Result.Val.getFloat()));
- }
- // If current long-double semantics is IEEE 128-bit, replace math builtins
- // of long-double with f128 equivalent.
- // TODO: This mutation should also be applied to other targets other than PPC,
- // after backend supports IEEE 128-bit style libcalls.
- if (getTarget().getTriple().isPPC64() &&
- &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
- BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
- // If the builtin has been declared explicitly with an assembler label,
- // disable the specialized emitting below. Ideally we should communicate the
- // rename in IR, or at least avoid generating the intrinsic calls that are
- // likely to get lowered to the renamed library functions.
- const unsigned BuiltinIDIfNoAsmLabel =
- FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
- // There are LLVM math intrinsics/instructions corresponding to math library
- // functions except the LLVM op will never set errno while the math library
- // might. Also, math builtins have the same semantics as their math library
- // twins. Thus, we can transform math library and builtin calls to their
- // LLVM counterparts if the call is marked 'const' (known to never set errno).
- if (FD->hasAttr<ConstAttr>()) {
- switch (BuiltinIDIfNoAsmLabel) {
- case Builtin::BIceil:
- case Builtin::BIceilf:
- case Builtin::BIceill:
- case Builtin::BI__builtin_ceil:
- case Builtin::BI__builtin_ceilf:
- case Builtin::BI__builtin_ceilf16:
- case Builtin::BI__builtin_ceill:
- case Builtin::BI__builtin_ceilf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::ceil,
- Intrinsic::experimental_constrained_ceil));
- case Builtin::BIcopysign:
- case Builtin::BIcopysignf:
- case Builtin::BIcopysignl:
- case Builtin::BI__builtin_copysign:
- case Builtin::BI__builtin_copysignf:
- case Builtin::BI__builtin_copysignf16:
- case Builtin::BI__builtin_copysignl:
- case Builtin::BI__builtin_copysignf128:
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
- case Builtin::BIcos:
- case Builtin::BIcosf:
- case Builtin::BIcosl:
- case Builtin::BI__builtin_cos:
- case Builtin::BI__builtin_cosf:
- case Builtin::BI__builtin_cosf16:
- case Builtin::BI__builtin_cosl:
- case Builtin::BI__builtin_cosf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::cos,
- Intrinsic::experimental_constrained_cos));
- case Builtin::BIexp:
- case Builtin::BIexpf:
- case Builtin::BIexpl:
- case Builtin::BI__builtin_exp:
- case Builtin::BI__builtin_expf:
- case Builtin::BI__builtin_expf16:
- case Builtin::BI__builtin_expl:
- case Builtin::BI__builtin_expf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::exp,
- Intrinsic::experimental_constrained_exp));
- case Builtin::BIexp2:
- case Builtin::BIexp2f:
- case Builtin::BIexp2l:
- case Builtin::BI__builtin_exp2:
- case Builtin::BI__builtin_exp2f:
- case Builtin::BI__builtin_exp2f16:
- case Builtin::BI__builtin_exp2l:
- case Builtin::BI__builtin_exp2f128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::exp2,
- Intrinsic::experimental_constrained_exp2));
- case Builtin::BIfabs:
- case Builtin::BIfabsf:
- case Builtin::BIfabsl:
- case Builtin::BI__builtin_fabs:
- case Builtin::BI__builtin_fabsf:
- case Builtin::BI__builtin_fabsf16:
- case Builtin::BI__builtin_fabsl:
- case Builtin::BI__builtin_fabsf128:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
- case Builtin::BIfloor:
- case Builtin::BIfloorf:
- case Builtin::BIfloorl:
- case Builtin::BI__builtin_floor:
- case Builtin::BI__builtin_floorf:
- case Builtin::BI__builtin_floorf16:
- case Builtin::BI__builtin_floorl:
- case Builtin::BI__builtin_floorf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::floor,
- Intrinsic::experimental_constrained_floor));
- case Builtin::BIfma:
- case Builtin::BIfmaf:
- case Builtin::BIfmal:
- case Builtin::BI__builtin_fma:
- case Builtin::BI__builtin_fmaf:
- case Builtin::BI__builtin_fmaf16:
- case Builtin::BI__builtin_fmal:
- case Builtin::BI__builtin_fmaf128:
- return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::fma,
- Intrinsic::experimental_constrained_fma));
- case Builtin::BIfmax:
- case Builtin::BIfmaxf:
- case Builtin::BIfmaxl:
- case Builtin::BI__builtin_fmax:
- case Builtin::BI__builtin_fmaxf:
- case Builtin::BI__builtin_fmaxf16:
- case Builtin::BI__builtin_fmaxl:
- case Builtin::BI__builtin_fmaxf128:
- return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::maxnum,
- Intrinsic::experimental_constrained_maxnum));
- case Builtin::BIfmin:
- case Builtin::BIfminf:
- case Builtin::BIfminl:
- case Builtin::BI__builtin_fmin:
- case Builtin::BI__builtin_fminf:
- case Builtin::BI__builtin_fminf16:
- case Builtin::BI__builtin_fminl:
- case Builtin::BI__builtin_fminf128:
- return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::minnum,
- Intrinsic::experimental_constrained_minnum));
- // fmod() is a special-case. It maps to the frem instruction rather than an
- // LLVM intrinsic.
- case Builtin::BIfmod:
- case Builtin::BIfmodf:
- case Builtin::BIfmodl:
- case Builtin::BI__builtin_fmod:
- case Builtin::BI__builtin_fmodf:
- case Builtin::BI__builtin_fmodf16:
- case Builtin::BI__builtin_fmodl:
- case Builtin::BI__builtin_fmodf128: {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- Value *Arg1 = EmitScalarExpr(E->getArg(0));
- Value *Arg2 = EmitScalarExpr(E->getArg(1));
- return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
- }
- case Builtin::BIlog:
- case Builtin::BIlogf:
- case Builtin::BIlogl:
- case Builtin::BI__builtin_log:
- case Builtin::BI__builtin_logf:
- case Builtin::BI__builtin_logf16:
- case Builtin::BI__builtin_logl:
- case Builtin::BI__builtin_logf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::log,
- Intrinsic::experimental_constrained_log));
- case Builtin::BIlog10:
- case Builtin::BIlog10f:
- case Builtin::BIlog10l:
- case Builtin::BI__builtin_log10:
- case Builtin::BI__builtin_log10f:
- case Builtin::BI__builtin_log10f16:
- case Builtin::BI__builtin_log10l:
- case Builtin::BI__builtin_log10f128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::log10,
- Intrinsic::experimental_constrained_log10));
- case Builtin::BIlog2:
- case Builtin::BIlog2f:
- case Builtin::BIlog2l:
- case Builtin::BI__builtin_log2:
- case Builtin::BI__builtin_log2f:
- case Builtin::BI__builtin_log2f16:
- case Builtin::BI__builtin_log2l:
- case Builtin::BI__builtin_log2f128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::log2,
- Intrinsic::experimental_constrained_log2));
- case Builtin::BInearbyint:
- case Builtin::BInearbyintf:
- case Builtin::BInearbyintl:
- case Builtin::BI__builtin_nearbyint:
- case Builtin::BI__builtin_nearbyintf:
- case Builtin::BI__builtin_nearbyintl:
- case Builtin::BI__builtin_nearbyintf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::nearbyint,
- Intrinsic::experimental_constrained_nearbyint));
- case Builtin::BIpow:
- case Builtin::BIpowf:
- case Builtin::BIpowl:
- case Builtin::BI__builtin_pow:
- case Builtin::BI__builtin_powf:
- case Builtin::BI__builtin_powf16:
- case Builtin::BI__builtin_powl:
- case Builtin::BI__builtin_powf128:
- return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::pow,
- Intrinsic::experimental_constrained_pow));
- case Builtin::BIrint:
- case Builtin::BIrintf:
- case Builtin::BIrintl:
- case Builtin::BI__builtin_rint:
- case Builtin::BI__builtin_rintf:
- case Builtin::BI__builtin_rintf16:
- case Builtin::BI__builtin_rintl:
- case Builtin::BI__builtin_rintf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::rint,
- Intrinsic::experimental_constrained_rint));
- case Builtin::BIround:
- case Builtin::BIroundf:
- case Builtin::BIroundl:
- case Builtin::BI__builtin_round:
- case Builtin::BI__builtin_roundf:
- case Builtin::BI__builtin_roundf16:
- case Builtin::BI__builtin_roundl:
- case Builtin::BI__builtin_roundf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::round,
- Intrinsic::experimental_constrained_round));
- case Builtin::BIsin:
- case Builtin::BIsinf:
- case Builtin::BIsinl:
- case Builtin::BI__builtin_sin:
- case Builtin::BI__builtin_sinf:
- case Builtin::BI__builtin_sinf16:
- case Builtin::BI__builtin_sinl:
- case Builtin::BI__builtin_sinf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::sin,
- Intrinsic::experimental_constrained_sin));
- case Builtin::BIsqrt:
- case Builtin::BIsqrtf:
- case Builtin::BIsqrtl:
- case Builtin::BI__builtin_sqrt:
- case Builtin::BI__builtin_sqrtf:
- case Builtin::BI__builtin_sqrtf16:
- case Builtin::BI__builtin_sqrtl:
- case Builtin::BI__builtin_sqrtf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::sqrt,
- Intrinsic::experimental_constrained_sqrt));
- case Builtin::BItrunc:
- case Builtin::BItruncf:
- case Builtin::BItruncl:
- case Builtin::BI__builtin_trunc:
- case Builtin::BI__builtin_truncf:
- case Builtin::BI__builtin_truncf16:
- case Builtin::BI__builtin_truncl:
- case Builtin::BI__builtin_truncf128:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
- Intrinsic::trunc,
- Intrinsic::experimental_constrained_trunc));
- case Builtin::BIlround:
- case Builtin::BIlroundf:
- case Builtin::BIlroundl:
- case Builtin::BI__builtin_lround:
- case Builtin::BI__builtin_lroundf:
- case Builtin::BI__builtin_lroundl:
- case Builtin::BI__builtin_lroundf128:
- return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
- *this, E, Intrinsic::lround,
- Intrinsic::experimental_constrained_lround));
- case Builtin::BIllround:
- case Builtin::BIllroundf:
- case Builtin::BIllroundl:
- case Builtin::BI__builtin_llround:
- case Builtin::BI__builtin_llroundf:
- case Builtin::BI__builtin_llroundl:
- case Builtin::BI__builtin_llroundf128:
- return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
- *this, E, Intrinsic::llround,
- Intrinsic::experimental_constrained_llround));
- case Builtin::BIlrint:
- case Builtin::BIlrintf:
- case Builtin::BIlrintl:
- case Builtin::BI__builtin_lrint:
- case Builtin::BI__builtin_lrintf:
- case Builtin::BI__builtin_lrintl:
- case Builtin::BI__builtin_lrintf128:
- return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
- *this, E, Intrinsic::lrint,
- Intrinsic::experimental_constrained_lrint));
- case Builtin::BIllrint:
- case Builtin::BIllrintf:
- case Builtin::BIllrintl:
- case Builtin::BI__builtin_llrint:
- case Builtin::BI__builtin_llrintf:
- case Builtin::BI__builtin_llrintl:
- case Builtin::BI__builtin_llrintf128:
- return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
- *this, E, Intrinsic::llrint,
- Intrinsic::experimental_constrained_llrint));
- default:
- break;
- }
- }
- switch (BuiltinIDIfNoAsmLabel) {
- default: break;
- case Builtin::BI__builtin___CFStringMakeConstantString:
- case Builtin::BI__builtin___NSStringMakeConstantString:
- return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
- case Builtin::BI__builtin_stdarg_start:
- case Builtin::BI__builtin_va_start:
- case Builtin::BI__va_start:
- case Builtin::BI__builtin_va_end:
- return RValue::get(
- EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
- ? EmitScalarExpr(E->getArg(0))
- : EmitVAListRef(E->getArg(0)).getPointer(),
- BuiltinID != Builtin::BI__builtin_va_end));
- case Builtin::BI__builtin_va_copy: {
- Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
- Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
- llvm::Type *Type = Int8PtrTy;
- DstPtr = Builder.CreateBitCast(DstPtr, Type);
- SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
- return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
- {DstPtr, SrcPtr}));
- }
- case Builtin::BI__builtin_abs:
- case Builtin::BI__builtin_labs:
- case Builtin::BI__builtin_llabs: {
- // X < 0 ? -X : X
- // The negation has 'nsw' because abs of INT_MIN is undefined.
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
- Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
- Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
- Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_complex: {
- Value *Real = EmitScalarExpr(E->getArg(0));
- Value *Imag = EmitScalarExpr(E->getArg(1));
- return RValue::getComplex({Real, Imag});
- }
- case Builtin::BI__builtin_conj:
- case Builtin::BI__builtin_conjf:
- case Builtin::BI__builtin_conjl:
- case Builtin::BIconj:
- case Builtin::BIconjf:
- case Builtin::BIconjl: {
- ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
- Value *Real = ComplexVal.first;
- Value *Imag = ComplexVal.second;
- Imag = Builder.CreateFNeg(Imag, "neg");
- return RValue::getComplex(std::make_pair(Real, Imag));
- }
- case Builtin::BI__builtin_creal:
- case Builtin::BI__builtin_crealf:
- case Builtin::BI__builtin_creall:
- case Builtin::BIcreal:
- case Builtin::BIcrealf:
- case Builtin::BIcreall: {
- ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
- return RValue::get(ComplexVal.first);
- }
- case Builtin::BI__builtin_dump_struct: {
- llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy);
- llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get(
- LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true);
- Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
- CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
- const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
- QualType Arg0Type = Arg0->getType()->getPointeeType();
- Value *RecordPtr = EmitScalarExpr(Arg0);
- Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align,
- {LLVMFuncType, Func}, 0);
- return RValue::get(Res);
- }
- case Builtin::BI__builtin_preserve_access_index: {
- // Only enabled preserved access index region when debuginfo
- // is available as debuginfo is needed to preserve user-level
- // access pattern.
- if (!getDebugInfo()) {
- CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
- return RValue::get(EmitScalarExpr(E->getArg(0)));
- }
- // Nested builtin_preserve_access_index() not supported
- if (IsInPreservedAIRegion) {
- CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
- return RValue::get(EmitScalarExpr(E->getArg(0)));
- }
- IsInPreservedAIRegion = true;
- Value *Res = EmitScalarExpr(E->getArg(0));
- IsInPreservedAIRegion = false;
- return RValue::get(Res);
- }
- case Builtin::BI__builtin_cimag:
- case Builtin::BI__builtin_cimagf:
- case Builtin::BI__builtin_cimagl:
- case Builtin::BIcimag:
- case Builtin::BIcimagf:
- case Builtin::BIcimagl: {
- ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
- return RValue::get(ComplexVal.second);
- }
- case Builtin::BI__builtin_clrsb:
- case Builtin::BI__builtin_clrsbl:
- case Builtin::BI__builtin_clrsbll: {
- // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Zero = llvm::Constant::getNullValue(ArgType);
- Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
- Value *Inverse = Builder.CreateNot(ArgValue, "not");
- Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
- Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
- Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_ctzs:
- case Builtin::BI__builtin_ctz:
- case Builtin::BI__builtin_ctzl:
- case Builtin::BI__builtin_ctzll: {
- Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
- Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_clzs:
- case Builtin::BI__builtin_clz:
- case Builtin::BI__builtin_clzl:
- case Builtin::BI__builtin_clzll: {
- Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
- Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_ffs:
- case Builtin::BI__builtin_ffsl:
- case Builtin::BI__builtin_ffsll: {
- // ffs(x) -> x ? cttz(x) + 1 : 0
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Tmp =
- Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
- llvm::ConstantInt::get(ArgType, 1));
- Value *Zero = llvm::Constant::getNullValue(ArgType);
- Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
- Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_parity:
- case Builtin::BI__builtin_parityl:
- case Builtin::BI__builtin_parityll: {
- // parity(x) -> ctpop(x) & 1
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Tmp = Builder.CreateCall(F, ArgValue);
- Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__lzcnt16:
- case Builtin::BI__lzcnt:
- case Builtin::BI__lzcnt64: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__popcnt16:
- case Builtin::BI__popcnt:
- case Builtin::BI__popcnt64:
- case Builtin::BI__builtin_popcount:
- case Builtin::BI__builtin_popcountl:
- case Builtin::BI__builtin_popcountll: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Result = Builder.CreateCall(F, ArgValue);
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_unpredictable: {
- // Always return the argument of __builtin_unpredictable. LLVM does not
- // handle this builtin. Metadata for this builtin should be added directly
- // to instructions such as branches or switches that use it.
- return RValue::get(EmitScalarExpr(E->getArg(0)));
- }
- case Builtin::BI__builtin_expect: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
- // Don't generate llvm.expect on -O0 as the backend won't use it for
- // anything.
- // Note, we still IRGen ExpectedValue because it could have side-effects.
- if (CGM.getCodeGenOpts().OptimizationLevel == 0)
- return RValue::get(ArgValue);
- Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
- Value *Result =
- Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_expect_with_probability: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
- llvm::APFloat Probability(0.0);
- const Expr *ProbArg = E->getArg(2);
- bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
- assert(EvalSucceed && "probability should be able to evaluate as float");
- (void)EvalSucceed;
- bool LoseInfo = false;
- Probability.convert(llvm::APFloat::IEEEdouble(),
- llvm::RoundingMode::Dynamic, &LoseInfo);
- llvm::Type *Ty = ConvertType(ProbArg->getType());
- Constant *Confidence = ConstantFP::get(Ty, Probability);
- // Don't generate llvm.expect.with.probability on -O0 as the backend
- // won't use it for anything.
- // Note, we still IRGen ExpectedValue because it could have side-effects.
- if (CGM.getCodeGenOpts().OptimizationLevel == 0)
- return RValue::get(ArgValue);
- Function *FnExpect =
- CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
- Value *Result = Builder.CreateCall(
- FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_assume_aligned: {
- const Expr *Ptr = E->getArg(0);
- Value *PtrValue = EmitScalarExpr(Ptr);
- Value *OffsetValue =
- (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
- Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
- ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
- if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
- AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
- llvm::Value::MaximumAlignment);
- emitAlignmentAssumption(PtrValue, Ptr,
- /*The expr loc is sufficient.*/ SourceLocation(),
- AlignmentCI, OffsetValue);
- return RValue::get(PtrValue);
- }
- case Builtin::BI__assume:
- case Builtin::BI__builtin_assume: {
- if (E->getArg(0)->HasSideEffects(getContext()))
- return RValue::get(nullptr);
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
- return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
- }
- case Builtin::BI__arithmetic_fence: {
- // Create the builtin call if FastMath is selected, and the target
- // supports the builtin, otherwise just return the argument.
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- llvm::FastMathFlags FMF = Builder.getFastMathFlags();
- bool isArithmeticFenceEnabled =
- FMF.allowReassoc() &&
- getContext().getTargetInfo().checkArithmeticFenceSupported();
- QualType ArgType = E->getArg(0)->getType();
- if (ArgType->isComplexType()) {
- if (isArithmeticFenceEnabled) {
- QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
- ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
- Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
- ConvertType(ElementType));
- Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
- ConvertType(ElementType));
- return RValue::getComplex(std::make_pair(Real, Imag));
- }
- ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
- Value *Real = ComplexVal.first;
- Value *Imag = ComplexVal.second;
- return RValue::getComplex(std::make_pair(Real, Imag));
- }
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- if (isArithmeticFenceEnabled)
- return RValue::get(
- Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
- return RValue::get(ArgValue);
- }
- case Builtin::BI__builtin_bswap16:
- case Builtin::BI__builtin_bswap32:
- case Builtin::BI__builtin_bswap64: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
- }
- case Builtin::BI__builtin_bitreverse8:
- case Builtin::BI__builtin_bitreverse16:
- case Builtin::BI__builtin_bitreverse32:
- case Builtin::BI__builtin_bitreverse64: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
- }
- case Builtin::BI__builtin_rotateleft8:
- case Builtin::BI__builtin_rotateleft16:
- case Builtin::BI__builtin_rotateleft32:
- case Builtin::BI__builtin_rotateleft64:
- case Builtin::BI_rotl8: // Microsoft variants of rotate left
- case Builtin::BI_rotl16:
- case Builtin::BI_rotl:
- case Builtin::BI_lrotl:
- case Builtin::BI_rotl64:
- return emitRotate(E, false);
- case Builtin::BI__builtin_rotateright8:
- case Builtin::BI__builtin_rotateright16:
- case Builtin::BI__builtin_rotateright32:
- case Builtin::BI__builtin_rotateright64:
- case Builtin::BI_rotr8: // Microsoft variants of rotate right
- case Builtin::BI_rotr16:
- case Builtin::BI_rotr:
- case Builtin::BI_lrotr:
- case Builtin::BI_rotr64:
- return emitRotate(E, true);
- case Builtin::BI__builtin_constant_p: {
- llvm::Type *ResultType = ConvertType(E->getType());
- const Expr *Arg = E->getArg(0);
- QualType ArgType = Arg->getType();
- // FIXME: The allowance for Obj-C pointers and block pointers is historical
- // and likely a mistake.
- if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
- !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
- // Per the GCC documentation, only numeric constants are recognized after
- // inlining.
- return RValue::get(ConstantInt::get(ResultType, 0));
- if (Arg->HasSideEffects(getContext()))
- // The argument is unevaluated, so be conservative if it might have
- // side-effects.
- return RValue::get(ConstantInt::get(ResultType, 0));
- Value *ArgValue = EmitScalarExpr(Arg);
- if (ArgType->isObjCObjectPointerType()) {
- // Convert Objective-C objects to id because we cannot distinguish between
- // LLVM types for Obj-C classes as they are opaque.
- ArgType = CGM.getContext().getObjCIdType();
- ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
- }
- Function *F =
- CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
- Value *Result = Builder.CreateCall(F, ArgValue);
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_dynamic_object_size:
- case Builtin::BI__builtin_object_size: {
- unsigned Type =
- E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
- auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
- // We pass this builtin onto the optimizer so that it can figure out the
- // object size in more complex cases.
- bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
- return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
- /*EmittedE=*/nullptr, IsDynamic));
- }
- case Builtin::BI__builtin_prefetch: {
- Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
- // FIXME: Technically these constants should of type 'int', yes?
- RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
- llvm::ConstantInt::get(Int32Ty, 0);
- Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
- llvm::ConstantInt::get(Int32Ty, 3);
- Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
- }
- case Builtin::BI__builtin_readcyclecounter: {
- Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
- return RValue::get(Builder.CreateCall(F));
- }
- case Builtin::BI__builtin___clear_cache: {
- Value *Begin = EmitScalarExpr(E->getArg(0));
- Value *End = EmitScalarExpr(E->getArg(1));
- Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
- return RValue::get(Builder.CreateCall(F, {Begin, End}));
- }
- case Builtin::BI__builtin_trap:
- return RValue::get(EmitTrapCall(Intrinsic::trap));
- case Builtin::BI__debugbreak:
- return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
- case Builtin::BI__builtin_unreachable: {
- EmitUnreachable(E->getExprLoc());
- // We do need to preserve an insertion point.
- EmitBlock(createBasicBlock("unreachable.cont"));
- return RValue::get(nullptr);
- }
- case Builtin::BI__builtin_powi:
- case Builtin::BI__builtin_powif:
- case Builtin::BI__builtin_powil: {
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- if (Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
- Src0->getType());
- return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
- }
- Function *F = CGM.getIntrinsic(Intrinsic::powi,
- { Src0->getType(), Src1->getType() });
- return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
- }
- case Builtin::BI__builtin_isgreater:
- case Builtin::BI__builtin_isgreaterequal:
- case Builtin::BI__builtin_isless:
- case Builtin::BI__builtin_islessequal:
- case Builtin::BI__builtin_islessgreater:
- case Builtin::BI__builtin_isunordered: {
- // Ordered comparisons: we know the arguments to these are matching scalar
- // floating point values.
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- switch (BuiltinID) {
- default: llvm_unreachable("Unknown ordered comparison");
- case Builtin::BI__builtin_isgreater:
- LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
- break;
- case Builtin::BI__builtin_isgreaterequal:
- LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
- break;
- case Builtin::BI__builtin_isless:
- LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
- break;
- case Builtin::BI__builtin_islessequal:
- LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
- break;
- case Builtin::BI__builtin_islessgreater:
- LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
- break;
- case Builtin::BI__builtin_isunordered:
- LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
- break;
- }
- // ZExt bool to int type.
- return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
- }
- case Builtin::BI__builtin_isnan: {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- Value *V = EmitScalarExpr(E->getArg(0));
- llvm::Type *Ty = V->getType();
- const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
- if (!Builder.getIsFPConstrained() ||
- Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
- !Ty->isIEEE()) {
- V = Builder.CreateFCmpUNO(V, V, "cmp");
- return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
- }
- if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
- return RValue::get(Result);
- // NaN has all exp bits set and a non zero significand. Therefore:
- // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
- unsigned bitsize = Ty->getScalarSizeInBits();
- llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
- Value *IntV = Builder.CreateBitCast(V, IntTy);
- APInt AndMask = APInt::getSignedMaxValue(bitsize);
- Value *AbsV =
- Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
- APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
- Value *Sub =
- Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
- // V = sign bit (Sub) <=> V = (Sub < 0)
- V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
- if (bitsize > 32)
- V = Builder.CreateTrunc(V, ConvertType(E->getType()));
- return RValue::get(V);
- }
- case Builtin::BI__builtin_elementwise_abs: {
- Value *Result;
- QualType QT = E->getArg(0)->getType();
- if (auto *VecTy = QT->getAs<VectorType>())
- QT = VecTy->getElementType();
- if (QT->isIntegerType())
- Result = Builder.CreateBinaryIntrinsic(
- llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
- Builder.getFalse(), nullptr, "elt.abs");
- else
- Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_elementwise_ceil:
- return RValue::get(
- emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
- case Builtin::BI__builtin_elementwise_floor:
- return RValue::get(
- emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
- case Builtin::BI__builtin_elementwise_roundeven:
- return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
- "elt.roundeven"));
- case Builtin::BI__builtin_elementwise_trunc:
- return RValue::get(
- emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
- case Builtin::BI__builtin_elementwise_max: {
- Value *Op0 = EmitScalarExpr(E->getArg(0));
- Value *Op1 = EmitScalarExpr(E->getArg(1));
- Value *Result;
- if (Op0->getType()->isIntOrIntVectorTy()) {
- QualType Ty = E->getArg(0)->getType();
- if (auto *VecTy = Ty->getAs<VectorType>())
- Ty = VecTy->getElementType();
- Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
- ? llvm::Intrinsic::smax
- : llvm::Intrinsic::umax,
- Op0, Op1, nullptr, "elt.max");
- } else
- Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_elementwise_min: {
- Value *Op0 = EmitScalarExpr(E->getArg(0));
- Value *Op1 = EmitScalarExpr(E->getArg(1));
- Value *Result;
- if (Op0->getType()->isIntOrIntVectorTy()) {
- QualType Ty = E->getArg(0)->getType();
- if (auto *VecTy = Ty->getAs<VectorType>())
- Ty = VecTy->getElementType();
- Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
- ? llvm::Intrinsic::smin
- : llvm::Intrinsic::umin,
- Op0, Op1, nullptr, "elt.min");
- } else
- Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_reduce_max: {
- auto GetIntrinsicID = [](QualType QT) {
- if (auto *VecTy = QT->getAs<VectorType>())
- QT = VecTy->getElementType();
- if (QT->isSignedIntegerType())
- return llvm::Intrinsic::vector_reduce_smax;
- if (QT->isUnsignedIntegerType())
- return llvm::Intrinsic::vector_reduce_umax;
- assert(QT->isFloatingType() && "must have a float here");
- return llvm::Intrinsic::vector_reduce_fmax;
- };
- return RValue::get(emitUnaryBuiltin(
- *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
- }
- case Builtin::BI__builtin_reduce_min: {
- auto GetIntrinsicID = [](QualType QT) {
- if (auto *VecTy = QT->getAs<VectorType>())
- QT = VecTy->getElementType();
- if (QT->isSignedIntegerType())
- return llvm::Intrinsic::vector_reduce_smin;
- if (QT->isUnsignedIntegerType())
- return llvm::Intrinsic::vector_reduce_umin;
- assert(QT->isFloatingType() && "must have a float here");
- return llvm::Intrinsic::vector_reduce_fmin;
- };
- return RValue::get(emitUnaryBuiltin(
- *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
- }
- case Builtin::BI__builtin_reduce_xor:
- return RValue::get(emitUnaryBuiltin(
- *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
- case Builtin::BI__builtin_reduce_or:
- return RValue::get(emitUnaryBuiltin(
- *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
- case Builtin::BI__builtin_reduce_and:
- return RValue::get(emitUnaryBuiltin(
- *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
- case Builtin::BI__builtin_matrix_transpose: {
- auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
- Value *MatValue = EmitScalarExpr(E->getArg(0));
- MatrixBuilder<CGBuilderTy> MB(Builder);
- Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
- MatrixTy->getNumColumns());
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_matrix_column_major_load: {
- MatrixBuilder<CGBuilderTy> MB(Builder);
- // Emit everything that isn't dependent on the first parameter type
- Value *Stride = EmitScalarExpr(E->getArg(3));
- const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
- auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
- assert(PtrTy && "arg0 must be of pointer type");
- bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
- Address Src = EmitPointerWithAlignment(E->getArg(0));
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- Value *Result = MB.CreateColumnMajorLoad(
- Src.getPointer(), Align(Src.getAlignment().getQuantity()), Stride,
- IsVolatile, ResultTy->getNumRows(), ResultTy->getNumColumns(),
- "matrix");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_matrix_column_major_store: {
- MatrixBuilder<CGBuilderTy> MB(Builder);
- Value *Matrix = EmitScalarExpr(E->getArg(0));
- Address Dst = EmitPointerWithAlignment(E->getArg(1));
- Value *Stride = EmitScalarExpr(E->getArg(2));
- const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
- auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
- assert(PtrTy && "arg1 must be of pointer type");
- bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
- EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 0);
- Value *Result = MB.CreateColumnMajorStore(
- Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
- Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
- return RValue::get(Result);
- }
- case Builtin::BIfinite:
- case Builtin::BI__finite:
- case Builtin::BIfinitef:
- case Builtin::BI__finitef:
- case Builtin::BIfinitel:
- case Builtin::BI__finitel:
- case Builtin::BI__builtin_isinf:
- case Builtin::BI__builtin_isfinite: {
- // isinf(x) --> fabs(x) == infinity
- // isfinite(x) --> fabs(x) != infinity
- // x != NaN via the ordered compare in either case.
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- Value *V = EmitScalarExpr(E->getArg(0));
- llvm::Type *Ty = V->getType();
- if (!Builder.getIsFPConstrained() ||
- Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
- !Ty->isIEEE()) {
- Value *Fabs = EmitFAbs(*this, V);
- Constant *Infinity = ConstantFP::getInfinity(V->getType());
- CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
- ? CmpInst::FCMP_OEQ
- : CmpInst::FCMP_ONE;
- Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
- return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
- }
- if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
- return RValue::get(Result);
- // Inf values have all exp bits set and a zero significand. Therefore:
- // isinf(V) == ((V << 1) == ((exp mask) << 1))
- // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
- unsigned bitsize = Ty->getScalarSizeInBits();
- llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
- Value *IntV = Builder.CreateBitCast(V, IntTy);
- Value *Shl1 = Builder.CreateShl(IntV, 1);
- const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
- APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
- Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1));
- if (BuiltinID == Builtin::BI__builtin_isinf)
- V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1);
- else
- V = Builder.CreateICmpULT(Shl1, ExpMaskShl1);
- return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
- }
- case Builtin::BI__builtin_isinf_sign: {
- // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
- Value *Arg = EmitScalarExpr(E->getArg(0));
- Value *AbsArg = EmitFAbs(*this, Arg);
- Value *IsInf = Builder.CreateFCmpOEQ(
- AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
- Value *IsNeg = EmitSignBit(*this, Arg);
- llvm::Type *IntTy = ConvertType(E->getType());
- Value *Zero = Constant::getNullValue(IntTy);
- Value *One = ConstantInt::get(IntTy, 1);
- Value *NegativeOne = ConstantInt::get(IntTy, -1);
- Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
- Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_isnormal: {
- // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
- Value *V = EmitScalarExpr(E->getArg(0));
- Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
- Value *Abs = EmitFAbs(*this, V);
- Value *IsLessThanInf =
- Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
- APFloat Smallest = APFloat::getSmallestNormalized(
- getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
- Value *IsNormal =
- Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
- "isnormal");
- V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
- V = Builder.CreateAnd(V, IsNormal, "and");
- return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
- }
- case Builtin::BI__builtin_flt_rounds: {
- Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Result = Builder.CreateCall(F);
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_fpclassify: {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
- Value *V = EmitScalarExpr(E->getArg(5));
- llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
- // Create Result
- BasicBlock *Begin = Builder.GetInsertBlock();
- BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
- Builder.SetInsertPoint(End);
- PHINode *Result =
- Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
- "fpclassify_result");
- // if (V==0) return FP_ZERO
- Builder.SetInsertPoint(Begin);
- Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
- "iszero");
- Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
- BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
- Builder.CreateCondBr(IsZero, End, NotZero);
- Result->addIncoming(ZeroLiteral, Begin);
- // if (V != V) return FP_NAN
- Builder.SetInsertPoint(NotZero);
- Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
- Value *NanLiteral = EmitScalarExpr(E->getArg(0));
- BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
- Builder.CreateCondBr(IsNan, End, NotNan);
- Result->addIncoming(NanLiteral, NotZero);
- // if (fabs(V) == infinity) return FP_INFINITY
- Builder.SetInsertPoint(NotNan);
- Value *VAbs = EmitFAbs(*this, V);
- Value *IsInf =
- Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
- "isinf");
- Value *InfLiteral = EmitScalarExpr(E->getArg(1));
- BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
- Builder.CreateCondBr(IsInf, End, NotInf);
- Result->addIncoming(InfLiteral, NotNan);
- // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
- Builder.SetInsertPoint(NotInf);
- APFloat Smallest = APFloat::getSmallestNormalized(
- getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
- Value *IsNormal =
- Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
- "isnormal");
- Value *NormalResult =
- Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(3)));
- Builder.CreateBr(End);
- Result->addIncoming(NormalResult, NotInf);
- // return Result
- Builder.SetInsertPoint(End);
- return RValue::get(Result);
- }
- case Builtin::BIalloca:
- case Builtin::BI_alloca:
- case Builtin::BI__builtin_alloca_uninitialized:
- case Builtin::BI__builtin_alloca: {
- Value *Size = EmitScalarExpr(E->getArg(0));
- const TargetInfo &TI = getContext().getTargetInfo();
- // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
- const Align SuitableAlignmentInBytes =
- CGM.getContext()
- .toCharUnitsFromBits(TI.getSuitableAlign())
- .getAsAlign();
- AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
- AI->setAlignment(SuitableAlignmentInBytes);
- if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
- initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
- return RValue::get(AI);
- }
- case Builtin::BI__builtin_alloca_with_align_uninitialized:
- case Builtin::BI__builtin_alloca_with_align: {
- Value *Size = EmitScalarExpr(E->getArg(0));
- Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
- auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
- unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
- const Align AlignmentInBytes =
- CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
- AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
- AI->setAlignment(AlignmentInBytes);
- if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
- initializeAlloca(*this, AI, Size, AlignmentInBytes);
- return RValue::get(AI);
- }
- case Builtin::BIbzero:
- case Builtin::BI__builtin_bzero: {
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Value *SizeVal = EmitScalarExpr(E->getArg(1));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
- return RValue::get(nullptr);
- }
- case Builtin::BImemcpy:
- case Builtin::BI__builtin_memcpy:
- case Builtin::BImempcpy:
- case Builtin::BI__builtin_mempcpy: {
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Address Src = EmitPointerWithAlignment(E->getArg(1));
- Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
- Builder.CreateMemCpy(Dest, Src, SizeVal, false);
- if (BuiltinID == Builtin::BImempcpy ||
- BuiltinID == Builtin::BI__builtin_mempcpy)
- return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
- Dest.getPointer(), SizeVal));
- else
- return RValue::get(Dest.getPointer());
- }
- case Builtin::BI__builtin_memcpy_inline: {
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Address Src = EmitPointerWithAlignment(E->getArg(1));
- uint64_t Size =
- E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
- Builder.CreateMemCpyInline(Dest, Src, Size);
- return RValue::get(nullptr);
- }
- case Builtin::BI__builtin_char_memchr:
- BuiltinID = Builtin::BI__builtin_memchr;
- break;
- case Builtin::BI__builtin___memcpy_chk: {
- // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
- Expr::EvalResult SizeResult, DstSizeResult;
- if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
- break;
- llvm::APSInt Size = SizeResult.Val.getInt();
- llvm::APSInt DstSize = DstSizeResult.Val.getInt();
- if (Size.ugt(DstSize))
- break;
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Address Src = EmitPointerWithAlignment(E->getArg(1));
- Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
- Builder.CreateMemCpy(Dest, Src, SizeVal, false);
- return RValue::get(Dest.getPointer());
- }
- case Builtin::BI__builtin_objc_memmove_collectable: {
- Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
- Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
- Value *SizeVal = EmitScalarExpr(E->getArg(2));
- CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
- DestAddr, SrcAddr, SizeVal);
- return RValue::get(DestAddr.getPointer());
- }
- case Builtin::BI__builtin___memmove_chk: {
- // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
- Expr::EvalResult SizeResult, DstSizeResult;
- if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
- break;
- llvm::APSInt Size = SizeResult.Val.getInt();
- llvm::APSInt DstSize = DstSizeResult.Val.getInt();
- if (Size.ugt(DstSize))
- break;
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Address Src = EmitPointerWithAlignment(E->getArg(1));
- Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
- Builder.CreateMemMove(Dest, Src, SizeVal, false);
- return RValue::get(Dest.getPointer());
- }
- case Builtin::BImemmove:
- case Builtin::BI__builtin_memmove: {
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Address Src = EmitPointerWithAlignment(E->getArg(1));
- Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
- Builder.CreateMemMove(Dest, Src, SizeVal, false);
- return RValue::get(Dest.getPointer());
- }
- case Builtin::BImemset:
- case Builtin::BI__builtin_memset: {
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
- Builder.getInt8Ty());
- Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
- return RValue::get(Dest.getPointer());
- }
- case Builtin::BI__builtin___memset_chk: {
- // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
- Expr::EvalResult SizeResult, DstSizeResult;
- if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
- break;
- llvm::APSInt Size = SizeResult.Val.getInt();
- llvm::APSInt DstSize = DstSizeResult.Val.getInt();
- if (Size.ugt(DstSize))
- break;
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
- Builder.getInt8Ty());
- Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
- Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
- return RValue::get(Dest.getPointer());
- }
- case Builtin::BI__builtin_wmemchr: {
- // The MSVC runtime library does not provide a definition of wmemchr, so we
- // need an inline implementation.
- if (!getTarget().getTriple().isOSMSVCRT())
- break;
- llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
- Value *Str = EmitScalarExpr(E->getArg(0));
- Value *Chr = EmitScalarExpr(E->getArg(1));
- Value *Size = EmitScalarExpr(E->getArg(2));
- BasicBlock *Entry = Builder.GetInsertBlock();
- BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
- BasicBlock *Next = createBasicBlock("wmemchr.next");
- BasicBlock *Exit = createBasicBlock("wmemchr.exit");
- Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
- Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
- EmitBlock(CmpEq);
- PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
- StrPhi->addIncoming(Str, Entry);
- PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
- SizePhi->addIncoming(Size, Entry);
- CharUnits WCharAlign =
- getContext().getTypeAlignInChars(getContext().WCharTy);
- Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
- Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
- Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
- Builder.CreateCondBr(StrEqChr, Exit, Next);
- EmitBlock(Next);
- Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
- Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
- Value *NextSizeEq0 =
- Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
- Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
- StrPhi->addIncoming(NextStr, Next);
- SizePhi->addIncoming(NextSize, Next);
- EmitBlock(Exit);
- PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
- Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
- Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
- Ret->addIncoming(FoundChr, CmpEq);
- return RValue::get(Ret);
- }
- case Builtin::BI__builtin_wmemcmp: {
- // The MSVC runtime library does not provide a definition of wmemcmp, so we
- // need an inline implementation.
- if (!getTarget().getTriple().isOSMSVCRT())
- break;
- llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
- Value *Dst = EmitScalarExpr(E->getArg(0));
- Value *Src = EmitScalarExpr(E->getArg(1));
- Value *Size = EmitScalarExpr(E->getArg(2));
- BasicBlock *Entry = Builder.GetInsertBlock();
- BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
- BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
- BasicBlock *Next = createBasicBlock("wmemcmp.next");
- BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
- Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
- Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
- EmitBlock(CmpGT);
- PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
- DstPhi->addIncoming(Dst, Entry);
- PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
- SrcPhi->addIncoming(Src, Entry);
- PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
- SizePhi->addIncoming(Size, Entry);
- CharUnits WCharAlign =
- getContext().getTypeAlignInChars(getContext().WCharTy);
- Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
- Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
- Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
- Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
- EmitBlock(CmpLT);
- Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
- Builder.CreateCondBr(DstLtSrc, Exit, Next);
- EmitBlock(Next);
- Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
- Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
- Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
- Value *NextSizeEq0 =
- Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
- Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
- DstPhi->addIncoming(NextDst, Next);
- SrcPhi->addIncoming(NextSrc, Next);
- SizePhi->addIncoming(NextSize, Next);
- EmitBlock(Exit);
- PHINode *Ret = Builder.CreatePHI(IntTy, 4);
- Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
- Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
- Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
- Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
- return RValue::get(Ret);
- }
- case Builtin::BI__builtin_dwarf_cfa: {
- // The offset in bytes from the first argument to the CFA.
- //
- // Why on earth is this in the frontend? Is there any reason at
- // all that the backend can't reasonably determine this while
- // lowering llvm.eh.dwarf.cfa()?
- //
- // TODO: If there's a satisfactory reason, add a target hook for
- // this instead of hard-coding 0, which is correct for most targets.
- int32_t Offset = 0;
- Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
- return RValue::get(Builder.CreateCall(F,
- llvm::ConstantInt::get(Int32Ty, Offset)));
- }
- case Builtin::BI__builtin_return_address: {
- Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
- getContext().UnsignedIntTy);
- Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
- return RValue::get(Builder.CreateCall(F, Depth));
- }
- case Builtin::BI_ReturnAddress: {
- Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
- return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
- }
- case Builtin::BI__builtin_frame_address: {
- Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
- getContext().UnsignedIntTy);
- Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
- return RValue::get(Builder.CreateCall(F, Depth));
- }
- case Builtin::BI__builtin_extract_return_addr: {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_frob_return_addr: {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_dwarf_sp_column: {
- llvm::IntegerType *Ty
- = cast<llvm::IntegerType>(ConvertType(E->getType()));
- int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
- if (Column == -1) {
- CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
- return RValue::get(llvm::UndefValue::get(Ty));
- }
- return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
- }
- case Builtin::BI__builtin_init_dwarf_reg_size_table: {
- Value *Address = EmitScalarExpr(E->getArg(0));
- if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
- CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
- return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
- }
- case Builtin::BI__builtin_eh_return: {
- Value *Int = EmitScalarExpr(E->getArg(0));
- Value *Ptr = EmitScalarExpr(E->getArg(1));
- llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
- assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
- "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
- Function *F =
- CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
- : Intrinsic::eh_return_i64);
- Builder.CreateCall(F, {Int, Ptr});
- Builder.CreateUnreachable();
- // We do need to preserve an insertion point.
- EmitBlock(createBasicBlock("builtin_eh_return.cont"));
- return RValue::get(nullptr);
- }
- case Builtin::BI__builtin_unwind_init: {
- Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
- return RValue::get(Builder.CreateCall(F));
- }
- case Builtin::BI__builtin_extend_pointer: {
- // Extends a pointer to the size of an _Unwind_Word, which is
- // uint64_t on all platforms. Generally this gets poked into a
- // register and eventually used as an address, so if the
- // addressing registers are wider than pointers and the platform
- // doesn't implicitly ignore high-order bits when doing
- // addressing, we need to make sure we zext / sext based on
- // the platform's expectations.
- //
- // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
- // Cast the pointer to intptr_t.
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
- // If that's 64 bits, we're done.
- if (IntPtrTy->getBitWidth() == 64)
- return RValue::get(Result);
- // Otherwise, ask the codegen data what to do.
- if (getTargetHooks().extendPointerWithSExt())
- return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
- else
- return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
- }
- case Builtin::BI__builtin_setjmp: {
- // Buffer is a void**.
- Address Buf = EmitPointerWithAlignment(E->getArg(0));
- // Store the frame pointer to the setjmp buffer.
- Value *FrameAddr = Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
- ConstantInt::get(Int32Ty, 0));
- Builder.CreateStore(FrameAddr, Buf);
- // Store the stack pointer to the setjmp buffer.
- Value *StackAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
- Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
- Builder.CreateStore(StackAddr, StackSaveSlot);
- // Call LLVM's EH setjmp, which is lightweight.
- Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
- Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
- return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
- }
- case Builtin::BI__builtin_longjmp: {
- Value *Buf = EmitScalarExpr(E->getArg(0));
- Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
- // Call LLVM's EH longjmp, which is lightweight.
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
- // longjmp doesn't return; mark this as unreachable.
- Builder.CreateUnreachable();
- // We do need to preserve an insertion point.
- EmitBlock(createBasicBlock("longjmp.cont"));
- return RValue::get(nullptr);
- }
- case Builtin::BI__builtin_launder: {
- const Expr *Arg = E->getArg(0);
- QualType ArgTy = Arg->getType()->getPointeeType();
- Value *Ptr = EmitScalarExpr(Arg);
- if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
- Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
- return RValue::get(Ptr);
- }
- case Builtin::BI__sync_fetch_and_add:
- case Builtin::BI__sync_fetch_and_sub:
- case Builtin::BI__sync_fetch_and_or:
- case Builtin::BI__sync_fetch_and_and:
- case Builtin::BI__sync_fetch_and_xor:
- case Builtin::BI__sync_fetch_and_nand:
- case Builtin::BI__sync_add_and_fetch:
- case Builtin::BI__sync_sub_and_fetch:
- case Builtin::BI__sync_and_and_fetch:
- case Builtin::BI__sync_or_and_fetch:
- case Builtin::BI__sync_xor_and_fetch:
- case Builtin::BI__sync_nand_and_fetch:
- case Builtin::BI__sync_val_compare_and_swap:
- case Builtin::BI__sync_bool_compare_and_swap:
- case Builtin::BI__sync_lock_test_and_set:
- case Builtin::BI__sync_lock_release:
- case Builtin::BI__sync_swap:
- llvm_unreachable("Shouldn't make it through sema");
- case Builtin::BI__sync_fetch_and_add_1:
- case Builtin::BI__sync_fetch_and_add_2:
- case Builtin::BI__sync_fetch_and_add_4:
- case Builtin::BI__sync_fetch_and_add_8:
- case Builtin::BI__sync_fetch_and_add_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
- case Builtin::BI__sync_fetch_and_sub_1:
- case Builtin::BI__sync_fetch_and_sub_2:
- case Builtin::BI__sync_fetch_and_sub_4:
- case Builtin::BI__sync_fetch_and_sub_8:
- case Builtin::BI__sync_fetch_and_sub_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
- case Builtin::BI__sync_fetch_and_or_1:
- case Builtin::BI__sync_fetch_and_or_2:
- case Builtin::BI__sync_fetch_and_or_4:
- case Builtin::BI__sync_fetch_and_or_8:
- case Builtin::BI__sync_fetch_and_or_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
- case Builtin::BI__sync_fetch_and_and_1:
- case Builtin::BI__sync_fetch_and_and_2:
- case Builtin::BI__sync_fetch_and_and_4:
- case Builtin::BI__sync_fetch_and_and_8:
- case Builtin::BI__sync_fetch_and_and_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
- case Builtin::BI__sync_fetch_and_xor_1:
- case Builtin::BI__sync_fetch_and_xor_2:
- case Builtin::BI__sync_fetch_and_xor_4:
- case Builtin::BI__sync_fetch_and_xor_8:
- case Builtin::BI__sync_fetch_and_xor_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
- case Builtin::BI__sync_fetch_and_nand_1:
- case Builtin::BI__sync_fetch_and_nand_2:
- case Builtin::BI__sync_fetch_and_nand_4:
- case Builtin::BI__sync_fetch_and_nand_8:
- case Builtin::BI__sync_fetch_and_nand_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
- // Clang extensions: not overloaded yet.
- case Builtin::BI__sync_fetch_and_min:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
- case Builtin::BI__sync_fetch_and_max:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
- case Builtin::BI__sync_fetch_and_umin:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
- case Builtin::BI__sync_fetch_and_umax:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
- case Builtin::BI__sync_add_and_fetch_1:
- case Builtin::BI__sync_add_and_fetch_2:
- case Builtin::BI__sync_add_and_fetch_4:
- case Builtin::BI__sync_add_and_fetch_8:
- case Builtin::BI__sync_add_and_fetch_16:
- return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
- llvm::Instruction::Add);
- case Builtin::BI__sync_sub_and_fetch_1:
- case Builtin::BI__sync_sub_and_fetch_2:
- case Builtin::BI__sync_sub_and_fetch_4:
- case Builtin::BI__sync_sub_and_fetch_8:
- case Builtin::BI__sync_sub_and_fetch_16:
- return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
- llvm::Instruction::Sub);
- case Builtin::BI__sync_and_and_fetch_1:
- case Builtin::BI__sync_and_and_fetch_2:
- case Builtin::BI__sync_and_and_fetch_4:
- case Builtin::BI__sync_and_and_fetch_8:
- case Builtin::BI__sync_and_and_fetch_16:
- return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
- llvm::Instruction::And);
- case Builtin::BI__sync_or_and_fetch_1:
- case Builtin::BI__sync_or_and_fetch_2:
- case Builtin::BI__sync_or_and_fetch_4:
- case Builtin::BI__sync_or_and_fetch_8:
- case Builtin::BI__sync_or_and_fetch_16:
- return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
- llvm::Instruction::Or);
- case Builtin::BI__sync_xor_and_fetch_1:
- case Builtin::BI__sync_xor_and_fetch_2:
- case Builtin::BI__sync_xor_and_fetch_4:
- case Builtin::BI__sync_xor_and_fetch_8:
- case Builtin::BI__sync_xor_and_fetch_16:
- return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
- llvm::Instruction::Xor);
- case Builtin::BI__sync_nand_and_fetch_1:
- case Builtin::BI__sync_nand_and_fetch_2:
- case Builtin::BI__sync_nand_and_fetch_4:
- case Builtin::BI__sync_nand_and_fetch_8:
- case Builtin::BI__sync_nand_and_fetch_16:
- return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
- llvm::Instruction::And, true);
- case Builtin::BI__sync_val_compare_and_swap_1:
- case Builtin::BI__sync_val_compare_and_swap_2:
- case Builtin::BI__sync_val_compare_and_swap_4:
- case Builtin::BI__sync_val_compare_and_swap_8:
- case Builtin::BI__sync_val_compare_and_swap_16:
- return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
- case Builtin::BI__sync_bool_compare_and_swap_1:
- case Builtin::BI__sync_bool_compare_and_swap_2:
- case Builtin::BI__sync_bool_compare_and_swap_4:
- case Builtin::BI__sync_bool_compare_and_swap_8:
- case Builtin::BI__sync_bool_compare_and_swap_16:
- return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
- case Builtin::BI__sync_swap_1:
- case Builtin::BI__sync_swap_2:
- case Builtin::BI__sync_swap_4:
- case Builtin::BI__sync_swap_8:
- case Builtin::BI__sync_swap_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
- case Builtin::BI__sync_lock_test_and_set_1:
- case Builtin::BI__sync_lock_test_and_set_2:
- case Builtin::BI__sync_lock_test_and_set_4:
- case Builtin::BI__sync_lock_test_and_set_8:
- case Builtin::BI__sync_lock_test_and_set_16:
- return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
- case Builtin::BI__sync_lock_release_1:
- case Builtin::BI__sync_lock_release_2:
- case Builtin::BI__sync_lock_release_4:
- case Builtin::BI__sync_lock_release_8:
- case Builtin::BI__sync_lock_release_16: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
- StoreSize.getQuantity() * 8);
- Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
- llvm::StoreInst *Store =
- Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
- StoreSize);
- Store->setAtomic(llvm::AtomicOrdering::Release);
- return RValue::get(nullptr);
- }
- case Builtin::BI__sync_synchronize: {
- // We assume this is supposed to correspond to a C++0x-style
- // sequentially-consistent fence (i.e. this is only usable for
- // synchronization, not device I/O or anything like that). This intrinsic
- // is really badly designed in the sense that in theory, there isn't
- // any way to safely use it... but in practice, it mostly works
- // to use it with non-atomic loads and stores to get acquire/release
- // semantics.
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
- return RValue::get(nullptr);
- }
- case Builtin::BI__builtin_nontemporal_load:
- return RValue::get(EmitNontemporalLoad(*this, E));
- case Builtin::BI__builtin_nontemporal_store:
- return RValue::get(EmitNontemporalStore(*this, E));
- case Builtin::BI__c11_atomic_is_lock_free:
- case Builtin::BI__atomic_is_lock_free: {
- // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
- // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
- // _Atomic(T) is always properly-aligned.
- const char *LibCallName = "__atomic_is_lock_free";
- CallArgList Args;
- Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
- getContext().getSizeType());
- if (BuiltinID == Builtin::BI__atomic_is_lock_free)
- Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
- getContext().VoidPtrTy);
- else
- Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
- getContext().VoidPtrTy);
- const CGFunctionInfo &FuncInfo =
- CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
- llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
- return EmitCall(FuncInfo, CGCallee::forDirect(Func),
- ReturnValueSlot(), Args);
- }
- case Builtin::BI__atomic_test_and_set: {
- // Look at the argument type to determine whether this is a volatile
- // operation. The parameter type is always volatile.
- QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
- bool Volatile =
- PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
- Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
- Value *NewVal = Builder.getInt8(1);
- Value *Order = EmitScalarExpr(E->getArg(1));
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
- AtomicRMWInst *Result = nullptr;
- switch (ord) {
- case 0: // memory_order_relaxed
- default: // invalid order
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
- llvm::AtomicOrdering::Monotonic);
- break;
- case 1: // memory_order_consume
- case 2: // memory_order_acquire
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
- llvm::AtomicOrdering::Acquire);
- break;
- case 3: // memory_order_release
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
- llvm::AtomicOrdering::Release);
- break;
- case 4: // memory_order_acq_rel
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
- llvm::AtomicOrdering::AcquireRelease);
- break;
- case 5: // memory_order_seq_cst
- Result = Builder.CreateAtomicRMW(
- llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
- llvm::AtomicOrdering::SequentiallyConsistent);
- break;
- }
- Result->setVolatile(Volatile);
- return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
- }
- llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
- llvm::BasicBlock *BBs[5] = {
- createBasicBlock("monotonic", CurFn),
- createBasicBlock("acquire", CurFn),
- createBasicBlock("release", CurFn),
- createBasicBlock("acqrel", CurFn),
- createBasicBlock("seqcst", CurFn)
- };
- llvm::AtomicOrdering Orders[5] = {
- llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
- llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
- llvm::AtomicOrdering::SequentiallyConsistent};
- Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
- llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
- Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
- for (unsigned i = 0; i < 5; ++i) {
- Builder.SetInsertPoint(BBs[i]);
- AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
- Ptr, NewVal, Orders[i]);
- RMW->setVolatile(Volatile);
- Result->addIncoming(RMW, BBs[i]);
- Builder.CreateBr(ContBB);
- }
- SI->addCase(Builder.getInt32(0), BBs[0]);
- SI->addCase(Builder.getInt32(1), BBs[1]);
- SI->addCase(Builder.getInt32(2), BBs[1]);
- SI->addCase(Builder.getInt32(3), BBs[2]);
- SI->addCase(Builder.getInt32(4), BBs[3]);
- SI->addCase(Builder.getInt32(5), BBs[4]);
- Builder.SetInsertPoint(ContBB);
- return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
- }
- case Builtin::BI__atomic_clear: {
- QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
- bool Volatile =
- PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
- Address Ptr = EmitPointerWithAlignment(E->getArg(0));
- unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
- Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
- Value *NewVal = Builder.getInt8(0);
- Value *Order = EmitScalarExpr(E->getArg(1));
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
- StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
- switch (ord) {
- case 0: // memory_order_relaxed
- default: // invalid order
- Store->setOrdering(llvm::AtomicOrdering::Monotonic);
- break;
- case 3: // memory_order_release
- Store->setOrdering(llvm::AtomicOrdering::Release);
- break;
- case 5: // memory_order_seq_cst
- Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
- break;
- }
- return RValue::get(nullptr);
- }
- llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
- llvm::BasicBlock *BBs[3] = {
- createBasicBlock("monotonic", CurFn),
- createBasicBlock("release", CurFn),
- createBasicBlock("seqcst", CurFn)
- };
- llvm::AtomicOrdering Orders[3] = {
- llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
- llvm::AtomicOrdering::SequentiallyConsistent};
- Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
- llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
- for (unsigned i = 0; i < 3; ++i) {
- Builder.SetInsertPoint(BBs[i]);
- StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
- Store->setOrdering(Orders[i]);
- Builder.CreateBr(ContBB);
- }
- SI->addCase(Builder.getInt32(0), BBs[0]);
- SI->addCase(Builder.getInt32(3), BBs[1]);
- SI->addCase(Builder.getInt32(5), BBs[2]);
- Builder.SetInsertPoint(ContBB);
- return RValue::get(nullptr);
- }
- case Builtin::BI__atomic_thread_fence:
- case Builtin::BI__atomic_signal_fence:
- case Builtin::BI__c11_atomic_thread_fence:
- case Builtin::BI__c11_atomic_signal_fence: {
- llvm::SyncScope::ID SSID;
- if (BuiltinID == Builtin::BI__atomic_signal_fence ||
- BuiltinID == Builtin::BI__c11_atomic_signal_fence)
- SSID = llvm::SyncScope::SingleThread;
- else
- SSID = llvm::SyncScope::System;
- Value *Order = EmitScalarExpr(E->getArg(0));
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
- switch (ord) {
- case 0: // memory_order_relaxed
- default: // invalid order
- break;
- case 1: // memory_order_consume
- case 2: // memory_order_acquire
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
- break;
- case 3: // memory_order_release
- Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
- break;
- case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
- break;
- case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
- break;
- }
- return RValue::get(nullptr);
- }
- llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
- AcquireBB = createBasicBlock("acquire", CurFn);
- ReleaseBB = createBasicBlock("release", CurFn);
- AcqRelBB = createBasicBlock("acqrel", CurFn);
- SeqCstBB = createBasicBlock("seqcst", CurFn);
- llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
- Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
- llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
- Builder.SetInsertPoint(AcquireBB);
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
- Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(1), AcquireBB);
- SI->addCase(Builder.getInt32(2), AcquireBB);
- Builder.SetInsertPoint(ReleaseBB);
- Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
- Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(3), ReleaseBB);
- Builder.SetInsertPoint(AcqRelBB);
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
- Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(4), AcqRelBB);
- Builder.SetInsertPoint(SeqCstBB);
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
- Builder.CreateBr(ContBB);
- SI->addCase(Builder.getInt32(5), SeqCstBB);
- Builder.SetInsertPoint(ContBB);
- return RValue::get(nullptr);
- }
- case Builtin::BI__builtin_signbit:
- case Builtin::BI__builtin_signbitf:
- case Builtin::BI__builtin_signbitl: {
- return RValue::get(
- Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
- ConvertType(E->getType())));
- }
- case Builtin::BI__warn_memset_zero_len:
- return RValue::getIgnored();
- case Builtin::BI__annotation: {
- // Re-encode each wide string to UTF8 and make an MDString.
- SmallVector<Metadata *, 1> Strings;
- for (const Expr *Arg : E->arguments()) {
- const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
- assert(Str->getCharByteWidth() == 2);
- StringRef WideBytes = Str->getBytes();
- std::string StrUtf8;
- if (!convertUTF16ToUTF8String(
- makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
- CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
- continue;
- }
- Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
- }
- // Build and MDTuple of MDStrings and emit the intrinsic call.
- llvm::Function *F =
- CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
- MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
- Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
- return RValue::getIgnored();
- }
- case Builtin::BI__builtin_annotation: {
- llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
- AnnVal->getType());
- // Get the annotation string, go through casts. Sema requires this to be a
- // non-wide string literal, potentially casted, so the cast<> is safe.
- const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
- StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
- return RValue::get(
- EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
- }
- case Builtin::BI__builtin_addcb:
- case Builtin::BI__builtin_addcs:
- case Builtin::BI__builtin_addc:
- case Builtin::BI__builtin_addcl:
- case Builtin::BI__builtin_addcll:
- case Builtin::BI__builtin_subcb:
- case Builtin::BI__builtin_subcs:
- case Builtin::BI__builtin_subc:
- case Builtin::BI__builtin_subcl:
- case Builtin::BI__builtin_subcll: {
- // We translate all of these builtins from expressions of the form:
- // int x = ..., y = ..., carryin = ..., carryout, result;
- // result = __builtin_addc(x, y, carryin, &carryout);
- //
- // to LLVM IR of the form:
- //
- // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
- // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
- // %carry1 = extractvalue {i32, i1} %tmp1, 1
- // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
- // i32 %carryin)
- // %result = extractvalue {i32, i1} %tmp2, 0
- // %carry2 = extractvalue {i32, i1} %tmp2, 1
- // %tmp3 = or i1 %carry1, %carry2
- // %tmp4 = zext i1 %tmp3 to i32
- // store i32 %tmp4, i32* %carryout
- // Scalarize our inputs.
- llvm::Value *X = EmitScalarExpr(E->getArg(0));
- llvm::Value *Y = EmitScalarExpr(E->getArg(1));
- llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
- Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
- // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
- llvm::Intrinsic::ID IntrinsicId;
- switch (BuiltinID) {
- default: llvm_unreachable("Unknown multiprecision builtin id.");
- case Builtin::BI__builtin_addcb:
- case Builtin::BI__builtin_addcs:
- case Builtin::BI__builtin_addc:
- case Builtin::BI__builtin_addcl:
- case Builtin::BI__builtin_addcll:
- IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
- break;
- case Builtin::BI__builtin_subcb:
- case Builtin::BI__builtin_subcs:
- case Builtin::BI__builtin_subc:
- case Builtin::BI__builtin_subcl:
- case Builtin::BI__builtin_subcll:
- IntrinsicId = llvm::Intrinsic::usub_with_overflow;
- break;
- }
- // Construct our resulting LLVM IR expression.
- llvm::Value *Carry1;
- llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
- X, Y, Carry1);
- llvm::Value *Carry2;
- llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
- Sum1, Carryin, Carry2);
- llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
- X->getType());
- Builder.CreateStore(CarryOut, CarryOutPtr);
- return RValue::get(Sum2);
- }
- case Builtin::BI__builtin_add_overflow:
- case Builtin::BI__builtin_sub_overflow:
- case Builtin::BI__builtin_mul_overflow: {
- const clang::Expr *LeftArg = E->getArg(0);
- const clang::Expr *RightArg = E->getArg(1);
- const clang::Expr *ResultArg = E->getArg(2);
- clang::QualType ResultQTy =
- ResultArg->getType()->castAs<PointerType>()->getPointeeType();
- WidthAndSignedness LeftInfo =
- getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
- WidthAndSignedness RightInfo =
- getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
- WidthAndSignedness ResultInfo =
- getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
- // Handle mixed-sign multiplication as a special case, because adding
- // runtime or backend support for our generic irgen would be too expensive.
- if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
- return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
- RightInfo, ResultArg, ResultQTy,
- ResultInfo);
- if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
- ResultInfo))
- return EmitCheckedUnsignedMultiplySignedResult(
- *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
- ResultInfo);
- WidthAndSignedness EncompassingInfo =
- EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
- llvm::Type *EncompassingLLVMTy =
- llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
- llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
- llvm::Intrinsic::ID IntrinsicId;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unknown overflow builtin id.");
- case Builtin::BI__builtin_add_overflow:
- IntrinsicId = EncompassingInfo.Signed
- ? llvm::Intrinsic::sadd_with_overflow
- : llvm::Intrinsic::uadd_with_overflow;
- break;
- case Builtin::BI__builtin_sub_overflow:
- IntrinsicId = EncompassingInfo.Signed
- ? llvm::Intrinsic::ssub_with_overflow
- : llvm::Intrinsic::usub_with_overflow;
- break;
- case Builtin::BI__builtin_mul_overflow:
- IntrinsicId = EncompassingInfo.Signed
- ? llvm::Intrinsic::smul_with_overflow
- : llvm::Intrinsic::umul_with_overflow;
- break;
- }
- llvm::Value *Left = EmitScalarExpr(LeftArg);
- llvm::Value *Right = EmitScalarExpr(RightArg);
- Address ResultPtr = EmitPointerWithAlignment(ResultArg);
- // Extend each operand to the encompassing type.
- Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
- Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
- // Perform the operation on the extended values.
- llvm::Value *Overflow, *Result;
- Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
- if (EncompassingInfo.Width > ResultInfo.Width) {
- // The encompassing type is wider than the result type, so we need to
- // truncate it.
- llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
- // To see if the truncation caused an overflow, we will extend
- // the result and then compare it to the original result.
- llvm::Value *ResultTruncExt = Builder.CreateIntCast(
- ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
- llvm::Value *TruncationOverflow =
- Builder.CreateICmpNE(Result, ResultTruncExt);
- Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
- Result = ResultTrunc;
- }
- // Finally, store the result using the pointer.
- bool isVolatile =
- ResultArg->getType()->getPointeeType().isVolatileQualified();
- Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
- return RValue::get(Overflow);
- }
- case Builtin::BI__builtin_uadd_overflow:
- case Builtin::BI__builtin_uaddl_overflow:
- case Builtin::BI__builtin_uaddll_overflow:
- case Builtin::BI__builtin_usub_overflow:
- case Builtin::BI__builtin_usubl_overflow:
- case Builtin::BI__builtin_usubll_overflow:
- case Builtin::BI__builtin_umul_overflow:
- case Builtin::BI__builtin_umull_overflow:
- case Builtin::BI__builtin_umulll_overflow:
- case Builtin::BI__builtin_sadd_overflow:
- case Builtin::BI__builtin_saddl_overflow:
- case Builtin::BI__builtin_saddll_overflow:
- case Builtin::BI__builtin_ssub_overflow:
- case Builtin::BI__builtin_ssubl_overflow:
- case Builtin::BI__builtin_ssubll_overflow:
- case Builtin::BI__builtin_smul_overflow:
- case Builtin::BI__builtin_smull_overflow:
- case Builtin::BI__builtin_smulll_overflow: {
- // We translate all of these builtins directly to the relevant llvm IR node.
- // Scalarize our inputs.
- llvm::Value *X = EmitScalarExpr(E->getArg(0));
- llvm::Value *Y = EmitScalarExpr(E->getArg(1));
- Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
- // Decide which of the overflow intrinsics we are lowering to:
- llvm::Intrinsic::ID IntrinsicId;
- switch (BuiltinID) {
- default: llvm_unreachable("Unknown overflow builtin id.");
- case Builtin::BI__builtin_uadd_overflow:
- case Builtin::BI__builtin_uaddl_overflow:
- case Builtin::BI__builtin_uaddll_overflow:
- IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
- break;
- case Builtin::BI__builtin_usub_overflow:
- case Builtin::BI__builtin_usubl_overflow:
- case Builtin::BI__builtin_usubll_overflow:
- IntrinsicId = llvm::Intrinsic::usub_with_overflow;
- break;
- case Builtin::BI__builtin_umul_overflow:
- case Builtin::BI__builtin_umull_overflow:
- case Builtin::BI__builtin_umulll_overflow:
- IntrinsicId = llvm::Intrinsic::umul_with_overflow;
- break;
- case Builtin::BI__builtin_sadd_overflow:
- case Builtin::BI__builtin_saddl_overflow:
- case Builtin::BI__builtin_saddll_overflow:
- IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
- break;
- case Builtin::BI__builtin_ssub_overflow:
- case Builtin::BI__builtin_ssubl_overflow:
- case Builtin::BI__builtin_ssubll_overflow:
- IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
- break;
- case Builtin::BI__builtin_smul_overflow:
- case Builtin::BI__builtin_smull_overflow:
- case Builtin::BI__builtin_smulll_overflow:
- IntrinsicId = llvm::Intrinsic::smul_with_overflow;
- break;
- }
- llvm::Value *Carry;
- llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
- Builder.CreateStore(Sum, SumOutPtr);
- return RValue::get(Carry);
- }
- case Builtin::BI__builtin_addressof:
- return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
- case Builtin::BI__builtin_function_start:
- return RValue::get(CGM.GetFunctionStart(
- E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
- case Builtin::BI__builtin_operator_new:
- return EmitBuiltinNewDeleteCall(
- E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
- case Builtin::BI__builtin_operator_delete:
- return EmitBuiltinNewDeleteCall(
- E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
- case Builtin::BI__builtin_is_aligned:
- return EmitBuiltinIsAligned(E);
- case Builtin::BI__builtin_align_up:
- return EmitBuiltinAlignTo(E, true);
- case Builtin::BI__builtin_align_down:
- return EmitBuiltinAlignTo(E, false);
- case Builtin::BI__noop:
- // __noop always evaluates to an integer literal zero.
- return RValue::get(ConstantInt::get(IntTy, 0));
- case Builtin::BI__builtin_call_with_static_chain: {
- const CallExpr *Call = cast<CallExpr>(E->getArg(0));
- const Expr *Chain = E->getArg(1);
- return EmitCall(Call->getCallee()->getType(),
- EmitCallee(Call->getCallee()), Call, ReturnValue,
- EmitScalarExpr(Chain));
- }
- case Builtin::BI_InterlockedExchange8:
- case Builtin::BI_InterlockedExchange16:
- case Builtin::BI_InterlockedExchange:
- case Builtin::BI_InterlockedExchangePointer:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
- case Builtin::BI_InterlockedCompareExchangePointer:
- case Builtin::BI_InterlockedCompareExchangePointer_nf: {
- llvm::Type *RTy;
- llvm::IntegerType *IntType =
- IntegerType::get(getLLVMContext(),
- getContext().getTypeSize(E->getType()));
- llvm::Type *IntPtrType = IntType->getPointerTo();
- llvm::Value *Destination =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
- llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
- RTy = Exchange->getType();
- Exchange = Builder.CreatePtrToInt(Exchange, IntType);
- llvm::Value *Comparand =
- Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
- auto Ordering =
- BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
- AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
- auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- Ordering, Ordering);
- Result->setVolatile(true);
- return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
- 0),
- RTy));
- }
- case Builtin::BI_InterlockedCompareExchange8:
- case Builtin::BI_InterlockedCompareExchange16:
- case Builtin::BI_InterlockedCompareExchange:
- case Builtin::BI_InterlockedCompareExchange64:
- return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
- case Builtin::BI_InterlockedIncrement16:
- case Builtin::BI_InterlockedIncrement:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
- case Builtin::BI_InterlockedDecrement16:
- case Builtin::BI_InterlockedDecrement:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
- case Builtin::BI_InterlockedAnd8:
- case Builtin::BI_InterlockedAnd16:
- case Builtin::BI_InterlockedAnd:
- return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
- case Builtin::BI_InterlockedExchangeAdd8:
- case Builtin::BI_InterlockedExchangeAdd16:
- case Builtin::BI_InterlockedExchangeAdd:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
- case Builtin::BI_InterlockedExchangeSub8:
- case Builtin::BI_InterlockedExchangeSub16:
- case Builtin::BI_InterlockedExchangeSub:
- return RValue::get(
- EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
- case Builtin::BI_InterlockedOr8:
- case Builtin::BI_InterlockedOr16:
- case Builtin::BI_InterlockedOr:
- return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
- case Builtin::BI_InterlockedXor8:
- case Builtin::BI_InterlockedXor16:
- case Builtin::BI_InterlockedXor:
- return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
- case Builtin::BI_bittest64:
- case Builtin::BI_bittest:
- case Builtin::BI_bittestandcomplement64:
- case Builtin::BI_bittestandcomplement:
- case Builtin::BI_bittestandreset64:
- case Builtin::BI_bittestandreset:
- case Builtin::BI_bittestandset64:
- case Builtin::BI_bittestandset:
- case Builtin::BI_interlockedbittestandreset:
- case Builtin::BI_interlockedbittestandreset64:
- case Builtin::BI_interlockedbittestandset64:
- case Builtin::BI_interlockedbittestandset:
- case Builtin::BI_interlockedbittestandset_acq:
- case Builtin::BI_interlockedbittestandset_rel:
- case Builtin::BI_interlockedbittestandset_nf:
- case Builtin::BI_interlockedbittestandreset_acq:
- case Builtin::BI_interlockedbittestandreset_rel:
- case Builtin::BI_interlockedbittestandreset_nf:
- return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
- // These builtins exist to emit regular volatile loads and stores not
- // affected by the -fms-volatile setting.
- case Builtin::BI__iso_volatile_load8:
- case Builtin::BI__iso_volatile_load16:
- case Builtin::BI__iso_volatile_load32:
- case Builtin::BI__iso_volatile_load64:
- return RValue::get(EmitISOVolatileLoad(*this, E));
- case Builtin::BI__iso_volatile_store8:
- case Builtin::BI__iso_volatile_store16:
- case Builtin::BI__iso_volatile_store32:
- case Builtin::BI__iso_volatile_store64:
- return RValue::get(EmitISOVolatileStore(*this, E));
- case Builtin::BI__exception_code:
- case Builtin::BI_exception_code:
- return RValue::get(EmitSEHExceptionCode());
- case Builtin::BI__exception_info:
- case Builtin::BI_exception_info:
- return RValue::get(EmitSEHExceptionInfo());
- case Builtin::BI__abnormal_termination:
- case Builtin::BI_abnormal_termination:
- return RValue::get(EmitSEHAbnormalTermination());
- case Builtin::BI_setjmpex:
- if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
- E->getArg(0)->getType()->isPointerType())
- return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
- break;
- case Builtin::BI_setjmp:
- if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
- E->getArg(0)->getType()->isPointerType()) {
- if (getTarget().getTriple().getArch() == llvm::Triple::x86)
- return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
- else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
- return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
- return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
- }
- break;
- case Builtin::BI__GetExceptionInfo: {
- if (llvm::GlobalVariable *GV =
- CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
- return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
- break;
- }
- case Builtin::BI__fastfail:
- return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
- case Builtin::BI__builtin_coro_size: {
- auto & Context = getContext();
- auto SizeTy = Context.getSizeType();
- auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
- Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
- return RValue::get(Builder.CreateCall(F));
- }
- case Builtin::BI__builtin_coro_id:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
- case Builtin::BI__builtin_coro_promise:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
- case Builtin::BI__builtin_coro_resume:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
- case Builtin::BI__builtin_coro_frame:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
- case Builtin::BI__builtin_coro_noop:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
- case Builtin::BI__builtin_coro_free:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
- case Builtin::BI__builtin_coro_destroy:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
- case Builtin::BI__builtin_coro_done:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
- case Builtin::BI__builtin_coro_alloc:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
- case Builtin::BI__builtin_coro_begin:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
- case Builtin::BI__builtin_coro_end:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
- case Builtin::BI__builtin_coro_suspend:
- return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
- // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
- case Builtin::BIread_pipe:
- case Builtin::BIwrite_pipe: {
- Value *Arg0 = EmitScalarExpr(E->getArg(0)),
- *Arg1 = EmitScalarExpr(E->getArg(1));
- CGOpenCLRuntime OpenCLRT(CGM);
- Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
- Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
- // Type of the generic packet parameter.
- unsigned GenericAS =
- getContext().getTargetAddressSpace(LangAS::opencl_generic);
- llvm::Type *I8PTy = llvm::PointerType::get(
- llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
- // Testing which overloaded version we should generate the call for.
- if (2U == E->getNumArgs()) {
- const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
- : "__write_pipe_2";
- // Creating a generic function type to be able to call with any builtin or
- // user defined type.
- llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
- return RValue::get(
- EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, BCast, PacketSize, PacketAlign}));
- } else {
- assert(4 == E->getNumArgs() &&
- "Illegal number of parameters to pipe function");
- const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
- : "__write_pipe_4";
- llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
- Int32Ty, Int32Ty};
- Value *Arg2 = EmitScalarExpr(E->getArg(2)),
- *Arg3 = EmitScalarExpr(E->getArg(3));
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
- // We know the third argument is an integer type, but we may need to cast
- // it to i32.
- if (Arg2->getType() != Int32Ty)
- Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
- return RValue::get(
- EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
- }
- }
- // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
- // functions
- case Builtin::BIreserve_read_pipe:
- case Builtin::BIreserve_write_pipe:
- case Builtin::BIwork_group_reserve_read_pipe:
- case Builtin::BIwork_group_reserve_write_pipe:
- case Builtin::BIsub_group_reserve_read_pipe:
- case Builtin::BIsub_group_reserve_write_pipe: {
- // Composing the mangled name for the function.
- const char *Name;
- if (BuiltinID == Builtin::BIreserve_read_pipe)
- Name = "__reserve_read_pipe";
- else if (BuiltinID == Builtin::BIreserve_write_pipe)
- Name = "__reserve_write_pipe";
- else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
- Name = "__work_group_reserve_read_pipe";
- else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
- Name = "__work_group_reserve_write_pipe";
- else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
- Name = "__sub_group_reserve_read_pipe";
- else
- Name = "__sub_group_reserve_write_pipe";
- Value *Arg0 = EmitScalarExpr(E->getArg(0)),
- *Arg1 = EmitScalarExpr(E->getArg(1));
- llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
- CGOpenCLRuntime OpenCLRT(CGM);
- Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
- Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
- // Building the generic function prototype.
- llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- // We know the second argument is an integer type, but we may need to cast
- // it to i32.
- if (Arg1->getType() != Int32Ty)
- Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
- return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, PacketSize, PacketAlign}));
- }
- // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
- // functions
- case Builtin::BIcommit_read_pipe:
- case Builtin::BIcommit_write_pipe:
- case Builtin::BIwork_group_commit_read_pipe:
- case Builtin::BIwork_group_commit_write_pipe:
- case Builtin::BIsub_group_commit_read_pipe:
- case Builtin::BIsub_group_commit_write_pipe: {
- const char *Name;
- if (BuiltinID == Builtin::BIcommit_read_pipe)
- Name = "__commit_read_pipe";
- else if (BuiltinID == Builtin::BIcommit_write_pipe)
- Name = "__commit_write_pipe";
- else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
- Name = "__work_group_commit_read_pipe";
- else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
- Name = "__work_group_commit_write_pipe";
- else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
- Name = "__sub_group_commit_read_pipe";
- else
- Name = "__sub_group_commit_write_pipe";
- Value *Arg0 = EmitScalarExpr(E->getArg(0)),
- *Arg1 = EmitScalarExpr(E->getArg(1));
- CGOpenCLRuntime OpenCLRT(CGM);
- Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
- Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
- // Building the generic function prototype.
- llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
- llvm::FunctionType *FTy =
- llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
- llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, PacketSize, PacketAlign}));
- }
- // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
- case Builtin::BIget_pipe_num_packets:
- case Builtin::BIget_pipe_max_packets: {
- const char *BaseName;
- const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
- if (BuiltinID == Builtin::BIget_pipe_num_packets)
- BaseName = "__get_pipe_num_packets";
- else
- BaseName = "__get_pipe_max_packets";
- std::string Name = std::string(BaseName) +
- std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
- // Building the generic function prototype.
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- CGOpenCLRuntime OpenCLRT(CGM);
- Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
- Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
- llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, PacketSize, PacketAlign}));
- }
- // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
- case Builtin::BIto_global:
- case Builtin::BIto_local:
- case Builtin::BIto_private: {
- auto Arg0 = EmitScalarExpr(E->getArg(0));
- auto NewArgT = llvm::PointerType::get(Int8Ty,
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
- auto NewRetT = llvm::PointerType::get(Int8Ty,
- CGM.getContext().getTargetAddressSpace(
- E->getType()->getPointeeType().getAddressSpace()));
- auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
- llvm::Value *NewArg;
- if (Arg0->getType()->getPointerAddressSpace() !=
- NewArgT->getPointerAddressSpace())
- NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
- else
- NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
- auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
- auto NewCall =
- EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
- return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
- ConvertType(E->getType())));
- }
- // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
- // It contains four different overload formats specified in Table 6.13.17.1.
- case Builtin::BIenqueue_kernel: {
- StringRef Name; // Generated function call name
- unsigned NumArgs = E->getNumArgs();
- llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
- getContext().getTargetAddressSpace(LangAS::opencl_generic));
- llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
- llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
- LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
- llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
- llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
- if (NumArgs == 4) {
- // The most basic form of the call with parameters:
- // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
- Name = "__enqueue_kernel_basic";
- llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
- GenericVoidPtrTy};
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- auto Info =
- CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
- llvm::Value *Kernel =
- Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
- llvm::Value *Block =
- Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- AttrBuilder B(Builder.getContext());
- B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
- llvm::AttributeList ByValAttrSet =
- llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
- auto RTCall =
- EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
- {Queue, Flags, Range, Kernel, Block});
- RTCall->setAttributes(ByValAttrSet);
- return RValue::get(RTCall);
- }
- assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
- // Create a temporary array to hold the sizes of local pointer arguments
- // for the block. \p First is the position of the first size argument.
- auto CreateArrayForSizeVar = [=](unsigned First)
- -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
- llvm::APInt ArraySize(32, NumArgs - First);
- QualType SizeArrayTy = getContext().getConstantArrayType(
- getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
- auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
- llvm::Value *TmpPtr = Tmp.getPointer();
- llvm::Value *TmpSize = EmitLifetimeStart(
- CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
- llvm::Value *ElemPtr;
- // Each of the following arguments specifies the size of the corresponding
- // argument passed to the enqueued block.
- auto *Zero = llvm::ConstantInt::get(IntTy, 0);
- for (unsigned I = First; I < NumArgs; ++I) {
- auto *Index = llvm::ConstantInt::get(IntTy, I - First);
- auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
- {Zero, Index});
- if (I == First)
- ElemPtr = GEP;
- auto *V =
- Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
- Builder.CreateAlignedStore(
- V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
- }
- return std::tie(ElemPtr, TmpSize, TmpPtr);
- };
- // Could have events and/or varargs.
- if (E->getArg(3)->getType()->isBlockPointerType()) {
- // No events passed, but has variadic arguments.
- Name = "__enqueue_kernel_varargs";
- auto Info =
- CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
- llvm::Value *Kernel =
- Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
- auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
- std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
- // Create a vector of the arguments, as well as a constant value to
- // express to the runtime the number of variadic arguments.
- llvm::Value *const Args[] = {Queue, Flags,
- Range, Kernel,
- Block, ConstantInt::get(IntTy, NumArgs - 4),
- ElemPtr};
- llvm::Type *const ArgTys[] = {
- QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
- GenericVoidPtrTy, IntTy, ElemPtr->getType()};
- llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
- auto Call = RValue::get(
- EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
- if (TmpSize)
- EmitLifetimeEnd(TmpSize, TmpPtr);
- return Call;
- }
- // Any calls now have event arguments passed.
- if (NumArgs >= 7) {
- llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
- llvm::PointerType *EventPtrTy = EventTy->getPointerTo(
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
- llvm::Value *NumEvents =
- Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
- // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
- // to be a null pointer constant (including `0` literal), we can take it
- // into account and emit null pointer directly.
- llvm::Value *EventWaitList = nullptr;
- if (E->getArg(4)->isNullPointerConstant(
- getContext(), Expr::NPC_ValueDependentIsNotNull)) {
- EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy);
- } else {
- EventWaitList = E->getArg(4)->getType()->isArrayType()
- ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
- : EmitScalarExpr(E->getArg(4));
- // Convert to generic address space.
- EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy);
- }
- llvm::Value *EventRet = nullptr;
- if (E->getArg(5)->isNullPointerConstant(
- getContext(), Expr::NPC_ValueDependentIsNotNull)) {
- EventRet = llvm::ConstantPointerNull::get(EventPtrTy);
- } else {
- EventRet =
- Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy);
- }
- auto Info =
- CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
- llvm::Value *Kernel =
- Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
- llvm::Value *Block =
- Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- std::vector<llvm::Type *> ArgTys = {
- QueueTy, Int32Ty, RangeTy, Int32Ty,
- EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
- std::vector<llvm::Value *> Args = {Queue, Flags, Range,
- NumEvents, EventWaitList, EventRet,
- Kernel, Block};
- if (NumArgs == 7) {
- // Has events but no variadics.
- Name = "__enqueue_kernel_basic_events";
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
- }
- // Has event info and variadics
- // Pass the number of variadics to the runtime function too.
- Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
- ArgTys.push_back(Int32Ty);
- Name = "__enqueue_kernel_events_varargs";
- llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
- std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
- Args.push_back(ElemPtr);
- ArgTys.push_back(ElemPtr->getType());
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- auto Call =
- RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
- if (TmpSize)
- EmitLifetimeEnd(TmpSize, TmpPtr);
- return Call;
- }
- LLVM_FALLTHROUGH;
- }
- // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
- // parameter.
- case Builtin::BIget_kernel_work_group_size: {
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
- getContext().getTargetAddressSpace(LangAS::opencl_generic));
- auto Info =
- CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
- Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
- Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- return RValue::get(EmitRuntimeCall(
- CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
- false),
- "__get_kernel_work_group_size_impl"),
- {Kernel, Arg}));
- }
- case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
- getContext().getTargetAddressSpace(LangAS::opencl_generic));
- auto Info =
- CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
- Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
- Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- return RValue::get(EmitRuntimeCall(
- CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
- false),
- "__get_kernel_preferred_work_group_size_multiple_impl"),
- {Kernel, Arg}));
- }
- case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
- case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
- getContext().getTargetAddressSpace(LangAS::opencl_generic));
- LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
- llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
- auto Info =
- CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
- Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
- Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- const char *Name =
- BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
- ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
- : "__get_kernel_sub_group_count_for_ndrange_impl";
- return RValue::get(EmitRuntimeCall(
- CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(
- IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
- false),
- Name),
- {NDRange, Kernel, Block}));
- }
- case Builtin::BI__builtin_store_half:
- case Builtin::BI__builtin_store_halff: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Address Address = EmitPointerWithAlignment(E->getArg(1));
- Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
- return RValue::get(Builder.CreateStore(HalfVal, Address));
- }
- case Builtin::BI__builtin_load_half: {
- Address Address = EmitPointerWithAlignment(E->getArg(0));
- Value *HalfVal = Builder.CreateLoad(Address);
- return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
- }
- case Builtin::BI__builtin_load_halff: {
- Address Address = EmitPointerWithAlignment(E->getArg(0));
- Value *HalfVal = Builder.CreateLoad(Address);
- return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
- }
- case Builtin::BIprintf:
- if (getTarget().getTriple().isNVPTX() ||
- getTarget().getTriple().isAMDGCN()) {
- if (getLangOpts().OpenMPIsDevice)
- return EmitOpenMPDevicePrintfCallExpr(E);
- if (getTarget().getTriple().isNVPTX())
- return EmitNVPTXDevicePrintfCallExpr(E);
- if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
- return EmitAMDGPUDevicePrintfCallExpr(E);
- }
- break;
- case Builtin::BI__builtin_canonicalize:
- case Builtin::BI__builtin_canonicalizef:
- case Builtin::BI__builtin_canonicalizef16:
- case Builtin::BI__builtin_canonicalizel:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
- case Builtin::BI__builtin_thread_pointer: {
- if (!getContext().getTargetInfo().isTLSSupported())
- CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
- // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
- break;
- }
- case Builtin::BI__builtin_os_log_format:
- return emitBuiltinOSLogFormat(*E);
- case Builtin::BI__xray_customevent: {
- if (!ShouldXRayInstrumentFunction())
- return RValue::getIgnored();
- if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
- XRayInstrKind::Custom))
- return RValue::getIgnored();
- if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
- if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
- return RValue::getIgnored();
- Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
- auto FTy = F->getFunctionType();
- auto Arg0 = E->getArg(0);
- auto Arg0Val = EmitScalarExpr(Arg0);
- auto Arg0Ty = Arg0->getType();
- auto PTy0 = FTy->getParamType(0);
- if (PTy0 != Arg0Val->getType()) {
- if (Arg0Ty->isArrayType())
- Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
- else
- Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
- }
- auto Arg1 = EmitScalarExpr(E->getArg(1));
- auto PTy1 = FTy->getParamType(1);
- if (PTy1 != Arg1->getType())
- Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
- return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
- }
- case Builtin::BI__xray_typedevent: {
- // TODO: There should be a way to always emit events even if the current
- // function is not instrumented. Losing events in a stream can cripple
- // a trace.
- if (!ShouldXRayInstrumentFunction())
- return RValue::getIgnored();
- if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
- XRayInstrKind::Typed))
- return RValue::getIgnored();
- if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
- if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
- return RValue::getIgnored();
- Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
- auto FTy = F->getFunctionType();
- auto Arg0 = EmitScalarExpr(E->getArg(0));
- auto PTy0 = FTy->getParamType(0);
- if (PTy0 != Arg0->getType())
- Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
- auto Arg1 = E->getArg(1);
- auto Arg1Val = EmitScalarExpr(Arg1);
- auto Arg1Ty = Arg1->getType();
- auto PTy1 = FTy->getParamType(1);
- if (PTy1 != Arg1Val->getType()) {
- if (Arg1Ty->isArrayType())
- Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
- else
- Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
- }
- auto Arg2 = EmitScalarExpr(E->getArg(2));
- auto PTy2 = FTy->getParamType(2);
- if (PTy2 != Arg2->getType())
- Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
- return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
- }
- case Builtin::BI__builtin_ms_va_start:
- case Builtin::BI__builtin_ms_va_end:
- return RValue::get(
- EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
- BuiltinID == Builtin::BI__builtin_ms_va_start));
- case Builtin::BI__builtin_ms_va_copy: {
- // Lower this manually. We can't reliably determine whether or not any
- // given va_copy() is for a Win64 va_list from the calling convention
- // alone, because it's legal to do this from a System V ABI function.
- // With opaque pointer types, we won't have enough information in LLVM
- // IR to determine this from the argument types, either. Best to do it
- // now, while we have enough information.
- Address DestAddr = EmitMSVAListRef(E->getArg(0));
- Address SrcAddr = EmitMSVAListRef(E->getArg(1));
- llvm::Type *BPP = Int8PtrPtrTy;
- DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
- Int8PtrTy, DestAddr.getAlignment());
- SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
- Int8PtrTy, SrcAddr.getAlignment());
- Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
- return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
- }
- case Builtin::BI__builtin_get_device_side_mangled_name: {
- auto Name = CGM.getCUDARuntime().getDeviceSideName(
- cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
- auto Str = CGM.GetAddrOfConstantCString(Name, "");
- llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
- llvm::ConstantInt::get(SizeTy, 0)};
- auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
- Str.getPointer(), Zeros);
- return RValue::get(Ptr);
- }
- }
- // If this is an alias for a lib function (e.g. __builtin_sin), emit
- // the call using the normal call path, but using the unmangled
- // version of the function name.
- if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
- return emitLibraryCall(*this, FD, E,
- CGM.getBuiltinLibFunction(FD, BuiltinID));
- // If this is a predefined lib function (e.g. malloc), emit the call
- // using exactly the normal call path.
- if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
- return emitLibraryCall(*this, FD, E,
- cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
- // Check that a call to a target specific builtin has the correct target
- // features.
- // This is down here to avoid non-target specific builtins, however, if
- // generic builtins start to require generic target features then we
- // can move this up to the beginning of the function.
- checkTargetFeatures(E, FD);
- if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
- LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
- // See if we have a target specific intrinsic.
- const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
- Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
- StringRef Prefix =
- llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
- if (!Prefix.empty()) {
- IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
- // NOTE we don't need to perform a compatibility flag check here since the
- // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
- // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
- if (IntrinsicID == Intrinsic::not_intrinsic)
- IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
- }
- if (IntrinsicID != Intrinsic::not_intrinsic) {
- SmallVector<Value*, 16> Args;
- // Find out if any arguments are required to be integer constant
- // expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
- Function *F = CGM.getIntrinsic(IntrinsicID);
- llvm::FunctionType *FTy = F->getFunctionType();
- for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
- Value *ArgValue;
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- ArgValue = EmitScalarExpr(E->getArg(i));
- } else {
- // If this is required to be a constant, constant fold it so that we
- // know that the generated intrinsic gets a ConstantInt.
- ArgValue = llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext()));
- }
- // If the intrinsic arg type is different from the builtin arg type
- // we need to do a bit cast.
- llvm::Type *PTy = FTy->getParamType(i);
- if (PTy != ArgValue->getType()) {
- // XXX - vector of pointers?
- if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
- if (PtrTy->getAddressSpace() !=
- ArgValue->getType()->getPointerAddressSpace()) {
- ArgValue = Builder.CreateAddrSpaceCast(
- ArgValue,
- ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
- }
- }
- assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
- "Must be able to losslessly bit cast to param");
- ArgValue = Builder.CreateBitCast(ArgValue, PTy);
- }
- Args.push_back(ArgValue);
- }
- Value *V = Builder.CreateCall(F, Args);
- QualType BuiltinRetType = E->getType();
- llvm::Type *RetTy = VoidTy;
- if (!BuiltinRetType->isVoidType())
- RetTy = ConvertType(BuiltinRetType);
- if (RetTy != V->getType()) {
- // XXX - vector of pointers?
- if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
- if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
- V = Builder.CreateAddrSpaceCast(
- V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
- }
- }
- assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
- "Must be able to losslessly bit cast result type");
- V = Builder.CreateBitCast(V, RetTy);
- }
- return RValue::get(V);
- }
- // Some target-specific builtins can have aggregate return values, e.g.
- // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
- // ReturnValue to be non-null, so that the target-specific emission code can
- // always just emit into it.
- TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
- if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
- Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
- ReturnValue = ReturnValueSlot(DestPtr, false);
- }
- // Now see if we can emit a target-specific builtin.
- if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
- switch (EvalKind) {
- case TEK_Scalar:
- return RValue::get(V);
- case TEK_Aggregate:
- return RValue::getAggregate(ReturnValue.getValue(),
- ReturnValue.isVolatile());
- case TEK_Complex:
- llvm_unreachable("No current target builtin returns complex");
- }
- llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
- }
- ErrorUnsupported(E, "builtin function");
- // Unknown builtin, for now just dump it out and return undef.
- return GetUndefRValue(E->getType());
- }
- static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
- unsigned BuiltinID, const CallExpr *E,
- ReturnValueSlot ReturnValue,
- llvm::Triple::ArchType Arch) {
- switch (Arch) {
- case llvm::Triple::arm:
- case llvm::Triple::armeb:
- case llvm::Triple::thumb:
- case llvm::Triple::thumbeb:
- return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
- case llvm::Triple::aarch64:
- case llvm::Triple::aarch64_32:
- case llvm::Triple::aarch64_be:
- return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
- case llvm::Triple::bpfeb:
- case llvm::Triple::bpfel:
- return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
- case llvm::Triple::x86:
- case llvm::Triple::x86_64:
- return CGF->EmitX86BuiltinExpr(BuiltinID, E);
- case llvm::Triple::ppc:
- case llvm::Triple::ppcle:
- case llvm::Triple::ppc64:
- case llvm::Triple::ppc64le:
- return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
- case llvm::Triple::r600:
- case llvm::Triple::amdgcn:
- return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
- case llvm::Triple::systemz:
- return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
- case llvm::Triple::nvptx:
- case llvm::Triple::nvptx64:
- return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
- case llvm::Triple::wasm32:
- case llvm::Triple::wasm64:
- return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
- case llvm::Triple::hexagon:
- return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
- case llvm::Triple::riscv32:
- case llvm::Triple::riscv64:
- return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
- default:
- return nullptr;
- }
- }
- Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- ReturnValueSlot ReturnValue) {
- if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
- assert(getContext().getAuxTargetInfo() && "Missing aux target info");
- return EmitTargetArchBuiltinExpr(
- this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
- ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
- }
- return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
- getTarget().getTriple().getArch());
- }
- static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
- NeonTypeFlags TypeFlags,
- bool HasLegalHalfType = true,
- bool V1Ty = false,
- bool AllowBFloatArgsAndRet = true) {
- int IsQuad = TypeFlags.isQuad();
- switch (TypeFlags.getEltType()) {
- case NeonTypeFlags::Int8:
- case NeonTypeFlags::Poly8:
- return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
- case NeonTypeFlags::Int16:
- case NeonTypeFlags::Poly16:
- return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
- case NeonTypeFlags::BFloat16:
- if (AllowBFloatArgsAndRet)
- return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
- else
- return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
- case NeonTypeFlags::Float16:
- if (HasLegalHalfType)
- return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
- else
- return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
- case NeonTypeFlags::Int32:
- return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
- case NeonTypeFlags::Int64:
- case NeonTypeFlags::Poly64:
- return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
- case NeonTypeFlags::Poly128:
- // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
- // There is a lot of i128 and f128 API missing.
- // so we use v16i8 to represent poly128 and get pattern matched.
- return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
- case NeonTypeFlags::Float32:
- return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
- case NeonTypeFlags::Float64:
- return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
- }
- llvm_unreachable("Unknown vector element type!");
- }
- static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
- NeonTypeFlags IntTypeFlags) {
- int IsQuad = IntTypeFlags.isQuad();
- switch (IntTypeFlags.getEltType()) {
- case NeonTypeFlags::Int16:
- return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
- case NeonTypeFlags::Int32:
- return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
- case NeonTypeFlags::Int64:
- return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
- default:
- llvm_unreachable("Type can't be converted to floating-point!");
- }
- }
- Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
- const ElementCount &Count) {
- Value *SV = llvm::ConstantVector::getSplat(Count, C);
- return Builder.CreateShuffleVector(V, V, SV, "lane");
- }
- Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
- ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
- return EmitNeonSplat(V, C, EC);
- }
- Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
- const char *name,
- unsigned shift, bool rightshift) {
- unsigned j = 0;
- for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
- ai != ae; ++ai, ++j) {
- if (F->isConstrainedFPIntrinsic())
- if (ai->getType()->isMetadataTy())
- continue;
- if (shift > 0 && shift == j)
- Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
- else
- Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
- }
- if (F->isConstrainedFPIntrinsic())
- return Builder.CreateConstrainedFPCall(F, Ops, name);
- else
- return Builder.CreateCall(F, Ops, name);
- }
- Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
- bool neg) {
- int SV = cast<ConstantInt>(V)->getSExtValue();
- return ConstantInt::get(Ty, neg ? -SV : SV);
- }
- // Right-shift a vector by a constant.
- Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
- llvm::Type *Ty, bool usgn,
- const char *name) {
- llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
- int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
- int EltSize = VTy->getScalarSizeInBits();
- Vec = Builder.CreateBitCast(Vec, Ty);
- // lshr/ashr are undefined when the shift amount is equal to the vector
- // element size.
- if (ShiftAmt == EltSize) {
- if (usgn) {
- // Right-shifting an unsigned value by its size yields 0.
- return llvm::ConstantAggregateZero::get(VTy);
- } else {
- // Right-shifting a signed value by its size is equivalent
- // to a shift of size-1.
- --ShiftAmt;
- Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
- }
- }
- Shift = EmitNeonShiftVector(Shift, Ty, false);
- if (usgn)
- return Builder.CreateLShr(Vec, Shift, name);
- else
- return Builder.CreateAShr(Vec, Shift, name);
- }
- enum {
- AddRetType = (1 << 0),
- Add1ArgType = (1 << 1),
- Add2ArgTypes = (1 << 2),
- VectorizeRetType = (1 << 3),
- VectorizeArgTypes = (1 << 4),
- InventFloatType = (1 << 5),
- UnsignedAlts = (1 << 6),
- Use64BitVectors = (1 << 7),
- Use128BitVectors = (1 << 8),
- Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
- VectorRet = AddRetType | VectorizeRetType,
- VectorRetGetArgs01 =
- AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
- FpCmpzModifiers =
- AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
- };
- namespace {
- struct ARMVectorIntrinsicInfo {
- const char *NameHint;
- unsigned BuiltinID;
- unsigned LLVMIntrinsic;
- unsigned AltLLVMIntrinsic;
- uint64_t TypeModifier;
- bool operator<(unsigned RHSBuiltinID) const {
- return BuiltinID < RHSBuiltinID;
- }
- bool operator<(const ARMVectorIntrinsicInfo &TE) const {
- return BuiltinID < TE.BuiltinID;
- }
- };
- } // end anonymous namespace
- #define NEONMAP0(NameBase) \
- { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
- #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
- { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
- Intrinsic::LLVMIntrinsic, 0, TypeModifier }
- #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
- { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
- Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
- TypeModifier }
- static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
- NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0),
- NEONMAP0(splat_lane_v),
- NEONMAP0(splat_laneq_v),
- NEONMAP0(splatq_lane_v),
- NEONMAP0(splatq_laneq_v),
- NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
- NEONMAP1(vabs_v, arm_neon_vabs, 0),
- NEONMAP1(vabsq_v, arm_neon_vabs, 0),
- NEONMAP0(vadd_v),
- NEONMAP0(vaddhn_v),
- NEONMAP0(vaddq_v),
- NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
- NEONMAP1(vaeseq_v, arm_neon_aese, 0),
- NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
- NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
- NEONMAP1(vbfdot_v, arm_neon_bfdot, 0),
- NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0),
- NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0),
- NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0),
- NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0),
- NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
- NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
- NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcage_v, arm_neon_vacge, 0),
- NEONMAP1(vcageq_v, arm_neon_vacge, 0),
- NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
- NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
- NEONMAP1(vcale_v, arm_neon_vacge, 0),
- NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
- NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
- NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
- NEONMAP0(vceqz_v),
- NEONMAP0(vceqzq_v),
- NEONMAP0(vcgez_v),
- NEONMAP0(vcgezq_v),
- NEONMAP0(vcgtz_v),
- NEONMAP0(vcgtzq_v),
- NEONMAP0(vclez_v),
- NEONMAP0(vclezq_v),
- NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
- NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
- NEONMAP0(vcltz_v),
- NEONMAP0(vcltzq_v),
- NEONMAP1(vclz_v, ctlz, Add1ArgType),
- NEONMAP1(vclzq_v, ctlz, Add1ArgType),
- NEONMAP1(vcnt_v, ctpop, Add1ArgType),
- NEONMAP1(vcntq_v, ctpop, Add1ArgType),
- NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
- NEONMAP0(vcvt_f16_v),
- NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
- NEONMAP0(vcvt_f32_v),
- NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvt_s16_v),
- NEONMAP0(vcvt_s32_v),
- NEONMAP0(vcvt_s64_v),
- NEONMAP0(vcvt_u16_v),
- NEONMAP0(vcvt_u32_v),
- NEONMAP0(vcvt_u64_v),
- NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
- NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
- NEONMAP0(vcvtq_f16_v),
- NEONMAP0(vcvtq_f32_v),
- NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvtq_s16_v),
- NEONMAP0(vcvtq_s32_v),
- NEONMAP0(vcvtq_s64_v),
- NEONMAP0(vcvtq_u16_v),
- NEONMAP0(vcvtq_u32_v),
- NEONMAP0(vcvtq_u64_v),
- NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
- NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
- NEONMAP0(vext_v),
- NEONMAP0(vextq_v),
- NEONMAP0(vfma_v),
- NEONMAP0(vfmaq_v),
- NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
- NEONMAP0(vld1_dup_v),
- NEONMAP1(vld1_v, arm_neon_vld1, 0),
- NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
- NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
- NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
- NEONMAP0(vld1q_dup_v),
- NEONMAP1(vld1q_v, arm_neon_vld1, 0),
- NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
- NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
- NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
- NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
- NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
- NEONMAP1(vld2_v, arm_neon_vld2, 0),
- NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
- NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
- NEONMAP1(vld2q_v, arm_neon_vld2, 0),
- NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
- NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
- NEONMAP1(vld3_v, arm_neon_vld3, 0),
- NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
- NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
- NEONMAP1(vld3q_v, arm_neon_vld3, 0),
- NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
- NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
- NEONMAP1(vld4_v, arm_neon_vld4, 0),
- NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
- NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
- NEONMAP1(vld4q_v, arm_neon_vld4, 0),
- NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
- NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
- NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
- NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
- NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
- NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
- NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
- NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0),
- NEONMAP0(vmovl_v),
- NEONMAP0(vmovn_v),
- NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
- NEONMAP0(vmull_v),
- NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
- NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
- NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
- NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
- NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
- NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
- NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
- NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
- NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
- NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
- NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
- NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
- NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
- NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
- NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
- NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
- NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
- NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
- NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
- NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
- NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
- NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
- NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
- NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
- NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
- NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
- NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
- NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
- NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
- NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
- NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
- NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
- NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
- NEONMAP0(vrndi_v),
- NEONMAP0(vrndiq_v),
- NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
- NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
- NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
- NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
- NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
- NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
- NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
- NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
- NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
- NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
- NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
- NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
- NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
- NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
- NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
- NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
- NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
- NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
- NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
- NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
- NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
- NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
- NEONMAP0(vshl_n_v),
- NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
- NEONMAP0(vshll_n_v),
- NEONMAP0(vshlq_n_v),
- NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
- NEONMAP0(vshr_n_v),
- NEONMAP0(vshrn_n_v),
- NEONMAP0(vshrq_n_v),
- NEONMAP1(vst1_v, arm_neon_vst1, 0),
- NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
- NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
- NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
- NEONMAP1(vst1q_v, arm_neon_vst1, 0),
- NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
- NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
- NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
- NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
- NEONMAP1(vst2_v, arm_neon_vst2, 0),
- NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
- NEONMAP1(vst2q_v, arm_neon_vst2, 0),
- NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
- NEONMAP1(vst3_v, arm_neon_vst3, 0),
- NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
- NEONMAP1(vst3q_v, arm_neon_vst3, 0),
- NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
- NEONMAP1(vst4_v, arm_neon_vst4, 0),
- NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
- NEONMAP1(vst4q_v, arm_neon_vst4, 0),
- NEONMAP0(vsubhn_v),
- NEONMAP0(vtrn_v),
- NEONMAP0(vtrnq_v),
- NEONMAP0(vtst_v),
- NEONMAP0(vtstq_v),
- NEONMAP1(vusdot_v, arm_neon_usdot, 0),
- NEONMAP1(vusdotq_v, arm_neon_usdot, 0),
- NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0),
- NEONMAP0(vuzp_v),
- NEONMAP0(vuzpq_v),
- NEONMAP0(vzip_v),
- NEONMAP0(vzipq_v)
- };
- static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
- NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0),
- NEONMAP0(splat_lane_v),
- NEONMAP0(splat_laneq_v),
- NEONMAP0(splatq_lane_v),
- NEONMAP0(splatq_laneq_v),
- NEONMAP1(vabs_v, aarch64_neon_abs, 0),
- NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
- NEONMAP0(vadd_v),
- NEONMAP0(vaddhn_v),
- NEONMAP0(vaddq_p128),
- NEONMAP0(vaddq_v),
- NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
- NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
- NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
- NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
- NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
- NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0),
- NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0),
- NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0),
- NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0),
- NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0),
- NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcage_v, aarch64_neon_facge, 0),
- NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
- NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
- NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
- NEONMAP1(vcale_v, aarch64_neon_facge, 0),
- NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
- NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
- NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
- NEONMAP0(vceqz_v),
- NEONMAP0(vceqzq_v),
- NEONMAP0(vcgez_v),
- NEONMAP0(vcgezq_v),
- NEONMAP0(vcgtz_v),
- NEONMAP0(vcgtzq_v),
- NEONMAP0(vclez_v),
- NEONMAP0(vclezq_v),
- NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
- NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
- NEONMAP0(vcltz_v),
- NEONMAP0(vcltzq_v),
- NEONMAP1(vclz_v, ctlz, Add1ArgType),
- NEONMAP1(vclzq_v, ctlz, Add1ArgType),
- NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
- NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
- NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
- NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType),
- NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
- NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
- NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
- NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType),
- NEONMAP1(vcnt_v, ctpop, Add1ArgType),
- NEONMAP1(vcntq_v, ctpop, Add1ArgType),
- NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
- NEONMAP0(vcvt_f16_v),
- NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
- NEONMAP0(vcvt_f32_v),
- NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvtq_f16_v),
- NEONMAP0(vcvtq_f32_v),
- NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0),
- NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
- NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
- NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
- NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
- NEONMAP0(vext_v),
- NEONMAP0(vextq_v),
- NEONMAP0(vfma_v),
- NEONMAP0(vfmaq_v),
- NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
- NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
- NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
- NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
- NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
- NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
- NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
- NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
- NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
- NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
- NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
- NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
- NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
- NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
- NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
- NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0),
- NEONMAP0(vmovl_v),
- NEONMAP0(vmovn_v),
- NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
- NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
- NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
- NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
- NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
- NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
- NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
- NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
- NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
- NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
- NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
- NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
- NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
- NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
- NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
- NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
- NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
- NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
- NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
- NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
- NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
- NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
- NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
- NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
- NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
- NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
- NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
- NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
- NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
- NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
- NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
- NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
- NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0),
- NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
- NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
- NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
- NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
- NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
- NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
- NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
- NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
- NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
- NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
- NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
- NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
- NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
- NEONMAP0(vrndi_v),
- NEONMAP0(vrndiq_v),
- NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
- NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
- NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
- NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
- NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
- NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
- NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
- NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
- NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
- NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
- NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
- NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
- NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
- NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0),
- NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0),
- NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0),
- NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0),
- NEONMAP0(vshl_n_v),
- NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
- NEONMAP0(vshll_n_v),
- NEONMAP0(vshlq_n_v),
- NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
- NEONMAP0(vshr_n_v),
- NEONMAP0(vshrn_n_v),
- NEONMAP0(vshrq_n_v),
- NEONMAP1(vsm3partw1q_v, aarch64_crypto_sm3partw1, 0),
- NEONMAP1(vsm3partw2q_v, aarch64_crypto_sm3partw2, 0),
- NEONMAP1(vsm3ss1q_v, aarch64_crypto_sm3ss1, 0),
- NEONMAP1(vsm3tt1aq_v, aarch64_crypto_sm3tt1a, 0),
- NEONMAP1(vsm3tt1bq_v, aarch64_crypto_sm3tt1b, 0),
- NEONMAP1(vsm3tt2aq_v, aarch64_crypto_sm3tt2a, 0),
- NEONMAP1(vsm3tt2bq_v, aarch64_crypto_sm3tt2b, 0),
- NEONMAP1(vsm4ekeyq_v, aarch64_crypto_sm4ekey, 0),
- NEONMAP1(vsm4eq_v, aarch64_crypto_sm4e, 0),
- NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
- NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
- NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
- NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
- NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
- NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
- NEONMAP0(vsubhn_v),
- NEONMAP0(vtst_v),
- NEONMAP0(vtstq_v),
- NEONMAP1(vusdot_v, aarch64_neon_usdot, 0),
- NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0),
- NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0),
- NEONMAP1(vxarq_v, aarch64_crypto_xar, 0),
- };
- static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
- NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
- NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
- NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
- NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
- NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
- NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
- NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
- NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
- NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
- NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
- NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
- NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
- NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
- NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
- NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
- NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
- NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
- NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
- NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
- NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
- NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
- NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
- NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
- NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
- NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
- NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
- NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
- NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
- NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
- NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
- NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
- NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
- NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
- NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
- NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
- NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
- NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
- NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
- NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
- NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
- NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
- NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
- NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
- NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
- NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
- NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
- NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
- NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
- NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
- NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
- NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
- NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
- NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
- NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
- NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
- NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
- NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
- NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
- NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
- NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
- NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
- NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
- NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
- NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
- NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
- NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
- NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
- NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
- NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
- NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
- NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
- NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
- NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
- NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
- NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
- NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
- NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
- NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
- NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
- NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
- NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
- NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
- NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
- NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
- NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
- NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
- NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
- NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
- NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
- NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
- NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
- NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
- NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
- NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
- NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
- NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
- NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
- NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
- NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
- NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
- NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
- NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
- NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
- NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
- NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
- NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
- NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
- NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
- NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
- NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
- NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
- NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
- NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
- // FP16 scalar intrinisics go here.
- NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
- NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
- NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
- NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
- NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
- NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
- NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
- NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
- NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
- NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
- NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
- NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
- NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
- NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
- NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
- NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
- NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
- NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
- };
- #undef NEONMAP0
- #undef NEONMAP1
- #undef NEONMAP2
- #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
- { \
- #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
- TypeModifier \
- }
- #define SVEMAP2(NameBase, TypeModifier) \
- { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
- static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
- #define GET_SVE_LLVM_INTRINSIC_MAP
- #include "clang/Basic/arm_sve_builtin_cg.inc"
- #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
- #undef GET_SVE_LLVM_INTRINSIC_MAP
- };
- #undef SVEMAP1
- #undef SVEMAP2
- static bool NEONSIMDIntrinsicsProvenSorted = false;
- static bool AArch64SIMDIntrinsicsProvenSorted = false;
- static bool AArch64SISDIntrinsicsProvenSorted = false;
- static bool AArch64SVEIntrinsicsProvenSorted = false;
- static const ARMVectorIntrinsicInfo *
- findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
- unsigned BuiltinID, bool &MapProvenSorted) {
- #ifndef NDEBUG
- if (!MapProvenSorted) {
- assert(llvm::is_sorted(IntrinsicMap));
- MapProvenSorted = true;
- }
- #endif
- const ARMVectorIntrinsicInfo *Builtin =
- llvm::lower_bound(IntrinsicMap, BuiltinID);
- if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
- return Builtin;
- return nullptr;
- }
- Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
- unsigned Modifier,
- llvm::Type *ArgType,
- const CallExpr *E) {
- int VectorSize = 0;
- if (Modifier & Use64BitVectors)
- VectorSize = 64;
- else if (Modifier & Use128BitVectors)
- VectorSize = 128;
- // Return type.
- SmallVector<llvm::Type *, 3> Tys;
- if (Modifier & AddRetType) {
- llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
- if (Modifier & VectorizeRetType)
- Ty = llvm::FixedVectorType::get(
- Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
- Tys.push_back(Ty);
- }
- // Arguments.
- if (Modifier & VectorizeArgTypes) {
- int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
- ArgType = llvm::FixedVectorType::get(ArgType, Elts);
- }
- if (Modifier & (Add1ArgType | Add2ArgTypes))
- Tys.push_back(ArgType);
- if (Modifier & Add2ArgTypes)
- Tys.push_back(ArgType);
- if (Modifier & InventFloatType)
- Tys.push_back(FloatTy);
- return CGM.getIntrinsic(IntrinsicID, Tys);
- }
- static Value *EmitCommonNeonSISDBuiltinExpr(
- CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
- SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
- unsigned BuiltinID = SISDInfo.BuiltinID;
- unsigned int Int = SISDInfo.LLVMIntrinsic;
- unsigned Modifier = SISDInfo.TypeModifier;
- const char *s = SISDInfo.NameHint;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vcled_s64:
- case NEON::BI__builtin_neon_vcled_u64:
- case NEON::BI__builtin_neon_vcles_f32:
- case NEON::BI__builtin_neon_vcled_f64:
- case NEON::BI__builtin_neon_vcltd_s64:
- case NEON::BI__builtin_neon_vcltd_u64:
- case NEON::BI__builtin_neon_vclts_f32:
- case NEON::BI__builtin_neon_vcltd_f64:
- case NEON::BI__builtin_neon_vcales_f32:
- case NEON::BI__builtin_neon_vcaled_f64:
- case NEON::BI__builtin_neon_vcalts_f32:
- case NEON::BI__builtin_neon_vcaltd_f64:
- // Only one direction of comparisons actually exist, cmle is actually a cmge
- // with swapped operands. The table gives us the right intrinsic but we
- // still need to do the swap.
- std::swap(Ops[0], Ops[1]);
- break;
- }
- assert(Int && "Generic code assumes a valid intrinsic");
- // Determine the type(s) of this overloaded AArch64 intrinsic.
- const Expr *Arg = E->getArg(0);
- llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
- Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
- int j = 0;
- ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
- for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
- ai != ae; ++ai, ++j) {
- llvm::Type *ArgTy = ai->getType();
- if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
- ArgTy->getPrimitiveSizeInBits())
- continue;
- assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
- // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
- // it before inserting.
- Ops[j] = CGF.Builder.CreateTruncOrBitCast(
- Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
- Ops[j] =
- CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
- }
- Value *Result = CGF.EmitNeonCall(F, Ops, s);
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
- if (ResultType->getPrimitiveSizeInBits().getFixedSize() <
- Result->getType()->getPrimitiveSizeInBits().getFixedSize())
- return CGF.Builder.CreateExtractElement(Result, C0);
- return CGF.Builder.CreateBitCast(Result, ResultType, s);
- }
- Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
- unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
- const char *NameHint, unsigned Modifier, const CallExpr *E,
- SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
- llvm::Triple::ArchType Arch) {
- // Get the last argument, which specifies the vector type.
- const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- Optional<llvm::APSInt> NeonTypeConst =
- Arg->getIntegerConstantExpr(getContext());
- if (!NeonTypeConst)
- return nullptr;
- // Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(NeonTypeConst->getZExtValue());
- bool Usgn = Type.isUnsigned();
- bool Quad = Type.isQuad();
- const bool HasLegalHalfType = getTarget().hasLegalHalfType();
- const bool AllowBFloatArgsAndRet =
- getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
- llvm::FixedVectorType *VTy =
- GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
- llvm::Type *Ty = VTy;
- if (!Ty)
- return nullptr;
- auto getAlignmentValue32 = [&](Address addr) -> Value* {
- return Builder.getInt32(addr.getAlignment().getQuantity());
- };
- unsigned Int = LLVMIntrinsic;
- if ((Modifier & UnsignedAlts) && !Usgn)
- Int = AltLLVMIntrinsic;
- switch (BuiltinID) {
- default: break;
- case NEON::BI__builtin_neon_splat_lane_v:
- case NEON::BI__builtin_neon_splat_laneq_v:
- case NEON::BI__builtin_neon_splatq_lane_v:
- case NEON::BI__builtin_neon_splatq_laneq_v: {
- auto NumElements = VTy->getElementCount();
- if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
- NumElements = NumElements * 2;
- if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
- NumElements = NumElements.divideCoefficientBy(2);
- Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
- return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
- }
- case NEON::BI__builtin_neon_vpadd_v:
- case NEON::BI__builtin_neon_vpaddq_v:
- // We don't allow fp/int overloading of intrinsics.
- if (VTy->getElementType()->isFloatingPointTy() &&
- Int == Intrinsic::aarch64_neon_addp)
- Int = Intrinsic::aarch64_neon_faddp;
- break;
- case NEON::BI__builtin_neon_vabs_v:
- case NEON::BI__builtin_neon_vabsq_v:
- if (VTy->getElementType()->isFloatingPointTy())
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
- return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
- case NEON::BI__builtin_neon_vadd_v:
- case NEON::BI__builtin_neon_vaddq_v: {
- llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
- Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
- Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
- return Builder.CreateBitCast(Ops[0], Ty);
- }
- case NEON::BI__builtin_neon_vaddhn_v: {
- llvm::FixedVectorType *SrcTy =
- llvm::FixedVectorType::getExtendedElementVectorType(VTy);
- // %sum = add <4 x i32> %lhs, %rhs
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
- Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
- // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
- Constant *ShiftAmt =
- ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
- Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
- // %res = trunc <4 x i32> %high to <4 x i16>
- return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
- }
- case NEON::BI__builtin_neon_vcale_v:
- case NEON::BI__builtin_neon_vcaleq_v:
- case NEON::BI__builtin_neon_vcalt_v:
- case NEON::BI__builtin_neon_vcaltq_v:
- std::swap(Ops[0], Ops[1]);
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcage_v:
- case NEON::BI__builtin_neon_vcageq_v:
- case NEON::BI__builtin_neon_vcagt_v:
- case NEON::BI__builtin_neon_vcagtq_v: {
- llvm::Type *Ty;
- switch (VTy->getScalarSizeInBits()) {
- default: llvm_unreachable("unexpected type");
- case 32:
- Ty = FloatTy;
- break;
- case 64:
- Ty = DoubleTy;
- break;
- case 16:
- Ty = HalfTy;
- break;
- }
- auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
- llvm::Type *Tys[] = { VTy, VecFlt };
- Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
- return EmitNeonCall(F, Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vceqz_v:
- case NEON::BI__builtin_neon_vceqzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
- ICmpInst::ICMP_EQ, "vceqz");
- case NEON::BI__builtin_neon_vcgez_v:
- case NEON::BI__builtin_neon_vcgezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
- ICmpInst::ICMP_SGE, "vcgez");
- case NEON::BI__builtin_neon_vclez_v:
- case NEON::BI__builtin_neon_vclezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
- ICmpInst::ICMP_SLE, "vclez");
- case NEON::BI__builtin_neon_vcgtz_v:
- case NEON::BI__builtin_neon_vcgtzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
- ICmpInst::ICMP_SGT, "vcgtz");
- case NEON::BI__builtin_neon_vcltz_v:
- case NEON::BI__builtin_neon_vcltzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
- ICmpInst::ICMP_SLT, "vcltz");
- case NEON::BI__builtin_neon_vclz_v:
- case NEON::BI__builtin_neon_vclzq_v:
- // We generate target-independent intrinsic, which needs a second argument
- // for whether or not clz of zero is undefined; on ARM it isn't.
- Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
- break;
- case NEON::BI__builtin_neon_vcvt_f32_v:
- case NEON::BI__builtin_neon_vcvtq_f32_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
- HasLegalHalfType);
- return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
- : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- case NEON::BI__builtin_neon_vcvt_f16_v:
- case NEON::BI__builtin_neon_vcvtq_f16_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
- HasLegalHalfType);
- return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
- : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- case NEON::BI__builtin_neon_vcvt_n_f16_v:
- case NEON::BI__builtin_neon_vcvt_n_f32_v:
- case NEON::BI__builtin_neon_vcvt_n_f64_v:
- case NEON::BI__builtin_neon_vcvtq_n_f16_v:
- case NEON::BI__builtin_neon_vcvtq_n_f32_v:
- case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
- llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
- Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
- Function *F = CGM.getIntrinsic(Int, Tys);
- return EmitNeonCall(F, Ops, "vcvt_n");
- }
- case NEON::BI__builtin_neon_vcvt_n_s16_v:
- case NEON::BI__builtin_neon_vcvt_n_s32_v:
- case NEON::BI__builtin_neon_vcvt_n_u16_v:
- case NEON::BI__builtin_neon_vcvt_n_u32_v:
- case NEON::BI__builtin_neon_vcvt_n_s64_v:
- case NEON::BI__builtin_neon_vcvt_n_u64_v:
- case NEON::BI__builtin_neon_vcvtq_n_s16_v:
- case NEON::BI__builtin_neon_vcvtq_n_s32_v:
- case NEON::BI__builtin_neon_vcvtq_n_u16_v:
- case NEON::BI__builtin_neon_vcvtq_n_u32_v:
- case NEON::BI__builtin_neon_vcvtq_n_s64_v:
- case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
- llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
- Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
- return EmitNeonCall(F, Ops, "vcvt_n");
- }
- case NEON::BI__builtin_neon_vcvt_s32_v:
- case NEON::BI__builtin_neon_vcvt_u32_v:
- case NEON::BI__builtin_neon_vcvt_s64_v:
- case NEON::BI__builtin_neon_vcvt_u64_v:
- case NEON::BI__builtin_neon_vcvt_s16_v:
- case NEON::BI__builtin_neon_vcvt_u16_v:
- case NEON::BI__builtin_neon_vcvtq_s32_v:
- case NEON::BI__builtin_neon_vcvtq_u32_v:
- case NEON::BI__builtin_neon_vcvtq_s64_v:
- case NEON::BI__builtin_neon_vcvtq_u64_v:
- case NEON::BI__builtin_neon_vcvtq_s16_v:
- case NEON::BI__builtin_neon_vcvtq_u16_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
- return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
- : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
- }
- case NEON::BI__builtin_neon_vcvta_s16_v:
- case NEON::BI__builtin_neon_vcvta_s32_v:
- case NEON::BI__builtin_neon_vcvta_s64_v:
- case NEON::BI__builtin_neon_vcvta_u16_v:
- case NEON::BI__builtin_neon_vcvta_u32_v:
- case NEON::BI__builtin_neon_vcvta_u64_v:
- case NEON::BI__builtin_neon_vcvtaq_s16_v:
- case NEON::BI__builtin_neon_vcvtaq_s32_v:
- case NEON::BI__builtin_neon_vcvtaq_s64_v:
- case NEON::BI__builtin_neon_vcvtaq_u16_v:
- case NEON::BI__builtin_neon_vcvtaq_u32_v:
- case NEON::BI__builtin_neon_vcvtaq_u64_v:
- case NEON::BI__builtin_neon_vcvtn_s16_v:
- case NEON::BI__builtin_neon_vcvtn_s32_v:
- case NEON::BI__builtin_neon_vcvtn_s64_v:
- case NEON::BI__builtin_neon_vcvtn_u16_v:
- case NEON::BI__builtin_neon_vcvtn_u32_v:
- case NEON::BI__builtin_neon_vcvtn_u64_v:
- case NEON::BI__builtin_neon_vcvtnq_s16_v:
- case NEON::BI__builtin_neon_vcvtnq_s32_v:
- case NEON::BI__builtin_neon_vcvtnq_s64_v:
- case NEON::BI__builtin_neon_vcvtnq_u16_v:
- case NEON::BI__builtin_neon_vcvtnq_u32_v:
- case NEON::BI__builtin_neon_vcvtnq_u64_v:
- case NEON::BI__builtin_neon_vcvtp_s16_v:
- case NEON::BI__builtin_neon_vcvtp_s32_v:
- case NEON::BI__builtin_neon_vcvtp_s64_v:
- case NEON::BI__builtin_neon_vcvtp_u16_v:
- case NEON::BI__builtin_neon_vcvtp_u32_v:
- case NEON::BI__builtin_neon_vcvtp_u64_v:
- case NEON::BI__builtin_neon_vcvtpq_s16_v:
- case NEON::BI__builtin_neon_vcvtpq_s32_v:
- case NEON::BI__builtin_neon_vcvtpq_s64_v:
- case NEON::BI__builtin_neon_vcvtpq_u16_v:
- case NEON::BI__builtin_neon_vcvtpq_u32_v:
- case NEON::BI__builtin_neon_vcvtpq_u64_v:
- case NEON::BI__builtin_neon_vcvtm_s16_v:
- case NEON::BI__builtin_neon_vcvtm_s32_v:
- case NEON::BI__builtin_neon_vcvtm_s64_v:
- case NEON::BI__builtin_neon_vcvtm_u16_v:
- case NEON::BI__builtin_neon_vcvtm_u32_v:
- case NEON::BI__builtin_neon_vcvtm_u64_v:
- case NEON::BI__builtin_neon_vcvtmq_s16_v:
- case NEON::BI__builtin_neon_vcvtmq_s32_v:
- case NEON::BI__builtin_neon_vcvtmq_s64_v:
- case NEON::BI__builtin_neon_vcvtmq_u16_v:
- case NEON::BI__builtin_neon_vcvtmq_u32_v:
- case NEON::BI__builtin_neon_vcvtmq_u64_v: {
- llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
- return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vcvtx_f32_v: {
- llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
- return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vext_v:
- case NEON::BI__builtin_neon_vextq_v: {
- int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(i+CV);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
- }
- case NEON::BI__builtin_neon_vfma_v:
- case NEON::BI__builtin_neon_vfmaq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- // NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
- {Ops[1], Ops[2], Ops[0]});
- }
- case NEON::BI__builtin_neon_vld1_v:
- case NEON::BI__builtin_neon_vld1q_v: {
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Ops.push_back(getAlignmentValue32(PtrOp0));
- return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
- }
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld2_v:
- case NEON::BI__builtin_neon_vld2q_v:
- case NEON::BI__builtin_neon_vld3_v:
- case NEON::BI__builtin_neon_vld3q_v:
- case NEON::BI__builtin_neon_vld4_v:
- case NEON::BI__builtin_neon_vld4q_v:
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld2q_dup_v:
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld3q_dup_v:
- case NEON::BI__builtin_neon_vld4_dup_v:
- case NEON::BI__builtin_neon_vld4q_dup_v: {
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
- Value *Align = getAlignmentValue32(PtrOp1);
- Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld1_dup_v:
- case NEON::BI__builtin_neon_vld1q_dup_v: {
- Value *V = UndefValue::get(Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
- LoadInst *Ld = Builder.CreateLoad(PtrOp0);
- llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
- Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
- return EmitNeonSplat(Ops[0], CI);
- }
- case NEON::BI__builtin_neon_vld2_lane_v:
- case NEON::BI__builtin_neon_vld2q_lane_v:
- case NEON::BI__builtin_neon_vld3_lane_v:
- case NEON::BI__builtin_neon_vld3q_lane_v:
- case NEON::BI__builtin_neon_vld4_lane_v:
- case NEON::BI__builtin_neon_vld4q_lane_v: {
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
- for (unsigned I = 2; I < Ops.size() - 1; ++I)
- Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
- Ops.push_back(getAlignmentValue32(PtrOp1));
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vmovl_v: {
- llvm::FixedVectorType *DTy =
- llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
- if (Usgn)
- return Builder.CreateZExt(Ops[0], Ty, "vmovl");
- return Builder.CreateSExt(Ops[0], Ty, "vmovl");
- }
- case NEON::BI__builtin_neon_vmovn_v: {
- llvm::FixedVectorType *QTy =
- llvm::FixedVectorType::getExtendedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
- return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
- }
- case NEON::BI__builtin_neon_vmull_v:
- // FIXME: the integer vmull operations could be emitted in terms of pure
- // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
- // hoisting the exts outside loops. Until global ISel comes along that can
- // see through such movement this leads to bad CodeGen. So we need an
- // intrinsic for now.
- Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
- Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
- case NEON::BI__builtin_neon_vpadal_v:
- case NEON::BI__builtin_neon_vpadalq_v: {
- // The source operand type has twice as many elements of half the size.
- unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
- llvm::Type *EltTy =
- llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
- auto *NarrowTy =
- llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
- llvm::Type *Tys[2] = { Ty, NarrowTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vpaddl_v:
- case NEON::BI__builtin_neon_vpaddlq_v: {
- // The source operand type has twice as many elements of half the size.
- unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
- llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
- auto *NarrowTy =
- llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
- llvm::Type *Tys[2] = { Ty, NarrowTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
- }
- case NEON::BI__builtin_neon_vqdmlal_v:
- case NEON::BI__builtin_neon_vqdmlsl_v: {
- SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
- Ops[1] =
- EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
- Ops.resize(2);
- return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vqdmulhq_lane_v:
- case NEON::BI__builtin_neon_vqdmulh_lane_v:
- case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
- case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
- auto *RTy = cast<llvm::FixedVectorType>(Ty);
- if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
- BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
- RTy = llvm::FixedVectorType::get(RTy->getElementType(),
- RTy->getNumElements() * 2);
- llvm::Type *Tys[2] = {
- RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
- /*isQuad*/ false))};
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
- case NEON::BI__builtin_neon_vqdmulh_laneq_v:
- case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
- case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
- llvm::Type *Tys[2] = {
- Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
- /*isQuad*/ true))};
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
- }
- case NEON::BI__builtin_neon_vqshl_n_v:
- case NEON::BI__builtin_neon_vqshlq_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
- 1, false);
- case NEON::BI__builtin_neon_vqshlu_n_v:
- case NEON::BI__builtin_neon_vqshluq_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
- 1, false);
- case NEON::BI__builtin_neon_vrecpe_v:
- case NEON::BI__builtin_neon_vrecpeq_v:
- case NEON::BI__builtin_neon_vrsqrte_v:
- case NEON::BI__builtin_neon_vrsqrteq_v:
- Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
- case NEON::BI__builtin_neon_vrndi_v:
- case NEON::BI__builtin_neon_vrndiq_v:
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_nearbyint
- : Intrinsic::nearbyint;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
- case NEON::BI__builtin_neon_vrshr_n_v:
- case NEON::BI__builtin_neon_vrshrq_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
- 1, true);
- case NEON::BI__builtin_neon_vsha512hq_v:
- case NEON::BI__builtin_neon_vsha512h2q_v:
- case NEON::BI__builtin_neon_vsha512su0q_v:
- case NEON::BI__builtin_neon_vsha512su1q_v: {
- Function *F = CGM.getIntrinsic(Int);
- return EmitNeonCall(F, Ops, "");
- }
- case NEON::BI__builtin_neon_vshl_n_v:
- case NEON::BI__builtin_neon_vshlq_n_v:
- Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
- return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
- "vshl_n");
- case NEON::BI__builtin_neon_vshll_n_v: {
- llvm::FixedVectorType *SrcTy =
- llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- if (Usgn)
- Ops[0] = Builder.CreateZExt(Ops[0], VTy);
- else
- Ops[0] = Builder.CreateSExt(Ops[0], VTy);
- Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
- return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
- }
- case NEON::BI__builtin_neon_vshrn_n_v: {
- llvm::FixedVectorType *SrcTy =
- llvm::FixedVectorType::getExtendedElementVectorType(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
- if (Usgn)
- Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
- else
- Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
- return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
- }
- case NEON::BI__builtin_neon_vshr_n_v:
- case NEON::BI__builtin_neon_vshrq_n_v:
- return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
- case NEON::BI__builtin_neon_vst1_v:
- case NEON::BI__builtin_neon_vst1q_v:
- case NEON::BI__builtin_neon_vst2_v:
- case NEON::BI__builtin_neon_vst2q_v:
- case NEON::BI__builtin_neon_vst3_v:
- case NEON::BI__builtin_neon_vst3q_v:
- case NEON::BI__builtin_neon_vst4_v:
- case NEON::BI__builtin_neon_vst4q_v:
- case NEON::BI__builtin_neon_vst2_lane_v:
- case NEON::BI__builtin_neon_vst2q_lane_v:
- case NEON::BI__builtin_neon_vst3_lane_v:
- case NEON::BI__builtin_neon_vst3q_lane_v:
- case NEON::BI__builtin_neon_vst4_lane_v:
- case NEON::BI__builtin_neon_vst4q_lane_v: {
- llvm::Type *Tys[] = {Int8PtrTy, Ty};
- Ops.push_back(getAlignmentValue32(PtrOp0));
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
- }
- case NEON::BI__builtin_neon_vsm3partw1q_v:
- case NEON::BI__builtin_neon_vsm3partw2q_v:
- case NEON::BI__builtin_neon_vsm3ss1q_v:
- case NEON::BI__builtin_neon_vsm4ekeyq_v:
- case NEON::BI__builtin_neon_vsm4eq_v: {
- Function *F = CGM.getIntrinsic(Int);
- return EmitNeonCall(F, Ops, "");
- }
- case NEON::BI__builtin_neon_vsm3tt1aq_v:
- case NEON::BI__builtin_neon_vsm3tt1bq_v:
- case NEON::BI__builtin_neon_vsm3tt2aq_v:
- case NEON::BI__builtin_neon_vsm3tt2bq_v: {
- Function *F = CGM.getIntrinsic(Int);
- Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
- return EmitNeonCall(F, Ops, "");
- }
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
- // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
- // in AArch64 it comes last. We may want to stick to one or another.
- if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
- Arch == llvm::Triple::aarch64_32) {
- llvm::Type *Tys[2] = { VTy, PTy };
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
- }
- llvm::Type *Tys[2] = { PTy, VTy };
- return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
- }
- case NEON::BI__builtin_neon_vsubhn_v: {
- llvm::FixedVectorType *SrcTy =
- llvm::FixedVectorType::getExtendedElementVectorType(VTy);
- // %sum = add <4 x i32> %lhs, %rhs
- Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
- Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
- // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
- Constant *ShiftAmt =
- ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
- Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
- // %res = trunc <4 x i32> %high to <4 x i16>
- return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
- }
- case NEON::BI__builtin_neon_vtrn_v:
- case NEON::BI__builtin_neon_vtrnq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Value *SV = nullptr;
- for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(i+vi);
- Indices.push_back(i+e+vi);
- }
- Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
- SV = Builder.CreateDefaultAlignedStore(SV, Addr);
- }
- return SV;
- }
- case NEON::BI__builtin_neon_vtst_v:
- case NEON::BI__builtin_neon_vtstq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
- Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
- ConstantAggregateZero::get(Ty));
- return Builder.CreateSExt(Ops[0], Ty, "vtst");
- }
- case NEON::BI__builtin_neon_vuzp_v:
- case NEON::BI__builtin_neon_vuzpq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Value *SV = nullptr;
- for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(2*i+vi);
- Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
- SV = Builder.CreateDefaultAlignedStore(SV, Addr);
- }
- return SV;
- }
- case NEON::BI__builtin_neon_vxarq_v: {
- Function *F = CGM.getIntrinsic(Int);
- Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
- return EmitNeonCall(F, Ops, "");
- }
- case NEON::BI__builtin_neon_vzip_v:
- case NEON::BI__builtin_neon_vzipq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Value *SV = nullptr;
- for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back((i + vi*e) >> 1);
- Indices.push_back(((i + vi*e) >> 1)+e);
- }
- Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
- SV = Builder.CreateDefaultAlignedStore(SV, Addr);
- }
- return SV;
- }
- case NEON::BI__builtin_neon_vdot_v:
- case NEON::BI__builtin_neon_vdotq_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
- }
- case NEON::BI__builtin_neon_vfmlal_low_v:
- case NEON::BI__builtin_neon_vfmlalq_low_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
- }
- case NEON::BI__builtin_neon_vfmlsl_low_v:
- case NEON::BI__builtin_neon_vfmlslq_low_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
- }
- case NEON::BI__builtin_neon_vfmlal_high_v:
- case NEON::BI__builtin_neon_vfmlalq_high_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
- }
- case NEON::BI__builtin_neon_vfmlsl_high_v:
- case NEON::BI__builtin_neon_vfmlslq_high_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
- }
- case NEON::BI__builtin_neon_vmmlaq_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla");
- }
- case NEON::BI__builtin_neon_vusmmlaq_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
- }
- case NEON::BI__builtin_neon_vusdot_v:
- case NEON::BI__builtin_neon_vusdotq_v: {
- auto *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
- }
- case NEON::BI__builtin_neon_vbfdot_v:
- case NEON::BI__builtin_neon_vbfdotq_v: {
- llvm::Type *InputTy =
- llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
- }
- case NEON::BI__builtin_neon___a32_vcvt_bf16_v: {
- llvm::Type *Tys[1] = { Ty };
- Function *F = CGM.getIntrinsic(Int, Tys);
- return EmitNeonCall(F, Ops, "vcvtfp2bf");
- }
- }
- assert(Int && "Expected valid intrinsic number");
- // Determine the type(s) of this overloaded AArch64 intrinsic.
- Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
- Value *Result = EmitNeonCall(F, Ops, NameHint);
- llvm::Type *ResultType = ConvertType(E->getType());
- // AArch64 intrinsic one-element vector type cast to
- // scalar type expected by the builtin
- return Builder.CreateBitCast(Result, ResultType, NameHint);
- }
- Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
- Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
- const CmpInst::Predicate Ip, const Twine &Name) {
- llvm::Type *OTy = Op->getType();
- // FIXME: this is utterly horrific. We should not be looking at previous
- // codegen context to find out what needs doing. Unfortunately TableGen
- // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
- // (etc).
- if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
- OTy = BI->getOperand(0)->getType();
- Op = Builder.CreateBitCast(Op, OTy);
- if (OTy->getScalarType()->isFloatingPointTy()) {
- Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
- } else {
- Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
- }
- return Builder.CreateSExt(Op, Ty, Name);
- }
- static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
- Value *ExtOp, Value *IndexOp,
- llvm::Type *ResTy, unsigned IntID,
- const char *Name) {
- SmallVector<Value *, 2> TblOps;
- if (ExtOp)
- TblOps.push_back(ExtOp);
- // Build a vector containing sequential number like (0, 1, 2, ..., 15)
- SmallVector<int, 16> Indices;
- auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
- for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
- Indices.push_back(2*i);
- Indices.push_back(2*i+1);
- }
- int PairPos = 0, End = Ops.size() - 1;
- while (PairPos < End) {
- TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
- Ops[PairPos+1], Indices,
- Name));
- PairPos += 2;
- }
- // If there's an odd number of 64-bit lookup table, fill the high 64-bit
- // of the 128-bit lookup table with zero.
- if (PairPos == End) {
- Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
- TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
- ZeroTbl, Indices, Name));
- }
- Function *TblF;
- TblOps.push_back(IndexOp);
- TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
- return CGF.EmitNeonCall(TblF, TblOps, Name);
- }
- Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
- unsigned Value;
- switch (BuiltinID) {
- default:
- return nullptr;
- case ARM::BI__builtin_arm_nop:
- Value = 0;
- break;
- case ARM::BI__builtin_arm_yield:
- case ARM::BI__yield:
- Value = 1;
- break;
- case ARM::BI__builtin_arm_wfe:
- case ARM::BI__wfe:
- Value = 2;
- break;
- case ARM::BI__builtin_arm_wfi:
- case ARM::BI__wfi:
- Value = 3;
- break;
- case ARM::BI__builtin_arm_sev:
- case ARM::BI__sev:
- Value = 4;
- break;
- case ARM::BI__builtin_arm_sevl:
- case ARM::BI__sevl:
- Value = 5;
- break;
- }
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
- llvm::ConstantInt::get(Int32Ty, Value));
- }
- enum SpecialRegisterAccessKind {
- NormalRead,
- VolatileRead,
- Write,
- };
- // Generates the IR for the read/write special register builtin,
- // ValueType is the type of the value that is to be written or read,
- // RegisterType is the type of the register being written to or read from.
- static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- llvm::Type *RegisterType,
- llvm::Type *ValueType,
- SpecialRegisterAccessKind AccessKind,
- StringRef SysReg = "") {
- // write and register intrinsics only support 32 and 64 bit operations.
- assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
- && "Unsupported size for register.");
- CodeGen::CGBuilderTy &Builder = CGF.Builder;
- CodeGen::CodeGenModule &CGM = CGF.CGM;
- LLVMContext &Context = CGM.getLLVMContext();
- if (SysReg.empty()) {
- const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
- SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
- }
- llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
- llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
- llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
- llvm::Type *Types[] = { RegisterType };
- bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
- assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
- && "Can't fit 64-bit value in 32-bit register");
- if (AccessKind != Write) {
- assert(AccessKind == NormalRead || AccessKind == VolatileRead);
- llvm::Function *F = CGM.getIntrinsic(
- AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
- : llvm::Intrinsic::read_register,
- Types);
- llvm::Value *Call = Builder.CreateCall(F, Metadata);
- if (MixedTypes)
- // Read into 64 bit register and then truncate result to 32 bit.
- return Builder.CreateTrunc(Call, ValueType);
- if (ValueType->isPointerTy())
- // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
- return Builder.CreateIntToPtr(Call, ValueType);
- return Call;
- }
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
- llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
- if (MixedTypes) {
- // Extend 32 bit write value to 64 bit to pass to write.
- ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
- return Builder.CreateCall(F, { Metadata, ArgValue });
- }
- if (ValueType->isPointerTy()) {
- // Have VoidPtrTy ArgValue but want to return an i32/i64.
- ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
- return Builder.CreateCall(F, { Metadata, ArgValue });
- }
- return Builder.CreateCall(F, { Metadata, ArgValue });
- }
- /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
- /// argument that specifies the vector type.
- static bool HasExtraNeonArgument(unsigned BuiltinID) {
- switch (BuiltinID) {
- default: break;
- case NEON::BI__builtin_neon_vget_lane_i8:
- case NEON::BI__builtin_neon_vget_lane_i16:
- case NEON::BI__builtin_neon_vget_lane_bf16:
- case NEON::BI__builtin_neon_vget_lane_i32:
- case NEON::BI__builtin_neon_vget_lane_i64:
- case NEON::BI__builtin_neon_vget_lane_f32:
- case NEON::BI__builtin_neon_vgetq_lane_i8:
- case NEON::BI__builtin_neon_vgetq_lane_i16:
- case NEON::BI__builtin_neon_vgetq_lane_bf16:
- case NEON::BI__builtin_neon_vgetq_lane_i32:
- case NEON::BI__builtin_neon_vgetq_lane_i64:
- case NEON::BI__builtin_neon_vgetq_lane_f32:
- case NEON::BI__builtin_neon_vduph_lane_bf16:
- case NEON::BI__builtin_neon_vduph_laneq_bf16:
- case NEON::BI__builtin_neon_vset_lane_i8:
- case NEON::BI__builtin_neon_vset_lane_i16:
- case NEON::BI__builtin_neon_vset_lane_bf16:
- case NEON::BI__builtin_neon_vset_lane_i32:
- case NEON::BI__builtin_neon_vset_lane_i64:
- case NEON::BI__builtin_neon_vset_lane_f32:
- case NEON::BI__builtin_neon_vsetq_lane_i8:
- case NEON::BI__builtin_neon_vsetq_lane_i16:
- case NEON::BI__builtin_neon_vsetq_lane_bf16:
- case NEON::BI__builtin_neon_vsetq_lane_i32:
- case NEON::BI__builtin_neon_vsetq_lane_i64:
- case NEON::BI__builtin_neon_vsetq_lane_f32:
- case NEON::BI__builtin_neon_vsha1h_u32:
- case NEON::BI__builtin_neon_vsha1cq_u32:
- case NEON::BI__builtin_neon_vsha1pq_u32:
- case NEON::BI__builtin_neon_vsha1mq_u32:
- case NEON::BI__builtin_neon_vcvth_bf16_f32:
- case clang::ARM::BI_MoveToCoprocessor:
- case clang::ARM::BI_MoveToCoprocessor2:
- return false;
- }
- return true;
- }
- Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- ReturnValueSlot ReturnValue,
- llvm::Triple::ArchType Arch) {
- if (auto Hint = GetValueForARMHint(BuiltinID))
- return Hint;
- if (BuiltinID == ARM::BI__emit) {
- bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
- llvm::FunctionType *FTy =
- llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
- Expr::EvalResult Result;
- if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
- llvm_unreachable("Sema will ensure that the parameter is constant");
- llvm::APSInt Value = Result.Val.getInt();
- uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
- llvm::InlineAsm *Emit =
- IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
- /*hasSideEffects=*/true)
- : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
- /*hasSideEffects=*/true);
- return Builder.CreateCall(Emit);
- }
- if (BuiltinID == ARM::BI__builtin_arm_dbg) {
- Value *Option = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
- }
- if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *RW = EmitScalarExpr(E->getArg(1));
- Value *IsData = EmitScalarExpr(E->getArg(2));
- // Locality is not supported on ARM target
- Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return Builder.CreateCall(F, {Address, RW, Locality, IsData});
- }
- if (BuiltinID == ARM::BI__builtin_arm_rbit) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
- }
- if (BuiltinID == ARM::BI__builtin_arm_cls) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
- }
- if (BuiltinID == ARM::BI__builtin_arm_cls64) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
- "cls");
- }
- if (BuiltinID == ARM::BI__clear_cache) {
- assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
- const FunctionDecl *FD = E->getDirectCallee();
- Value *Ops[2];
- for (unsigned i = 0; i < 2; i++)
- Ops[i] = EmitScalarExpr(E->getArg(i));
- llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
- llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
- StringRef Name = FD->getName();
- return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
- }
- if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
- BuiltinID == ARM::BI__builtin_arm_mcrr2) {
- Function *F;
- switch (BuiltinID) {
- default: llvm_unreachable("unexpected builtin");
- case ARM::BI__builtin_arm_mcrr:
- F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
- break;
- case ARM::BI__builtin_arm_mcrr2:
- F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
- break;
- }
- // MCRR{2} instruction has 5 operands but
- // the intrinsic has 4 because Rt and Rt2
- // are represented as a single unsigned 64
- // bit integer in the intrinsic definition
- // but internally it's represented as 2 32
- // bit integers.
- Value *Coproc = EmitScalarExpr(E->getArg(0));
- Value *Opc1 = EmitScalarExpr(E->getArg(1));
- Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
- Value *CRm = EmitScalarExpr(E->getArg(3));
- Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
- Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
- Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
- Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
- return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
- }
- if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
- BuiltinID == ARM::BI__builtin_arm_mrrc2) {
- Function *F;
- switch (BuiltinID) {
- default: llvm_unreachable("unexpected builtin");
- case ARM::BI__builtin_arm_mrrc:
- F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
- break;
- case ARM::BI__builtin_arm_mrrc2:
- F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
- break;
- }
- Value *Coproc = EmitScalarExpr(E->getArg(0));
- Value *Opc1 = EmitScalarExpr(E->getArg(1));
- Value *CRm = EmitScalarExpr(E->getArg(2));
- Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
- // Returns an unsigned 64 bit integer, represented
- // as two 32 bit integers.
- Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
- Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
- Rt = Builder.CreateZExt(Rt, Int64Ty);
- Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
- Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
- RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
- RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
- return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
- }
- if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
- ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
- BuiltinID == ARM::BI__builtin_arm_ldaex) &&
- getContext().getTypeSize(E->getType()) == 64) ||
- BuiltinID == ARM::BI__ldrexd) {
- Function *F;
- switch (BuiltinID) {
- default: llvm_unreachable("unexpected builtin");
- case ARM::BI__builtin_arm_ldaex:
- F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
- break;
- case ARM::BI__builtin_arm_ldrexd:
- case ARM::BI__builtin_arm_ldrex:
- case ARM::BI__ldrexd:
- F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
- break;
- }
- Value *LdPtr = EmitScalarExpr(E->getArg(0));
- Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
- "ldrexd");
- Value *Val0 = Builder.CreateExtractValue(Val, 1);
- Value *Val1 = Builder.CreateExtractValue(Val, 0);
- Val0 = Builder.CreateZExt(Val0, Int64Ty);
- Val1 = Builder.CreateZExt(Val1, Int64Ty);
- Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
- Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
- Val = Builder.CreateOr(Val, Val1);
- return Builder.CreateBitCast(Val, ConvertType(E->getType()));
- }
- if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
- BuiltinID == ARM::BI__builtin_arm_ldaex) {
- Value *LoadAddr = EmitScalarExpr(E->getArg(0));
- QualType Ty = E->getType();
- llvm::Type *RealResTy = ConvertType(Ty);
- llvm::Type *PtrTy = llvm::IntegerType::get(
- getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
- LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
- Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
- ? Intrinsic::arm_ldaex
- : Intrinsic::arm_ldrex,
- PtrTy);
- Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
- if (RealResTy->isPointerTy())
- return Builder.CreateIntToPtr(Val, RealResTy);
- else {
- llvm::Type *IntResTy = llvm::IntegerType::get(
- getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
- Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
- return Builder.CreateBitCast(Val, RealResTy);
- }
- }
- if (BuiltinID == ARM::BI__builtin_arm_strexd ||
- ((BuiltinID == ARM::BI__builtin_arm_stlex ||
- BuiltinID == ARM::BI__builtin_arm_strex) &&
- getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
- Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
- ? Intrinsic::arm_stlexd
- : Intrinsic::arm_strexd);
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
- Address Tmp = CreateMemTemp(E->getArg(0)->getType());
- Value *Val = EmitScalarExpr(E->getArg(0));
- Builder.CreateStore(Val, Tmp);
- Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
- Val = Builder.CreateLoad(LdPtr);
- Value *Arg0 = Builder.CreateExtractValue(Val, 0);
- Value *Arg1 = Builder.CreateExtractValue(Val, 1);
- Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
- return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
- }
- if (BuiltinID == ARM::BI__builtin_arm_strex ||
- BuiltinID == ARM::BI__builtin_arm_stlex) {
- Value *StoreVal = EmitScalarExpr(E->getArg(0));
- Value *StoreAddr = EmitScalarExpr(E->getArg(1));
- QualType Ty = E->getArg(0)->getType();
- llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
- getContext().getTypeSize(Ty));
- StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
- if (StoreVal->getType()->isPointerTy())
- StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
- else {
- llvm::Type *IntTy = llvm::IntegerType::get(
- getLLVMContext(),
- CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
- StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
- StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
- }
- Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
- ? Intrinsic::arm_stlex
- : Intrinsic::arm_strex,
- StoreAddr->getType());
- return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
- }
- if (BuiltinID == ARM::BI__builtin_arm_clrex) {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
- return Builder.CreateCall(F);
- }
- // CRC32
- Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
- switch (BuiltinID) {
- case ARM::BI__builtin_arm_crc32b:
- CRCIntrinsicID = Intrinsic::arm_crc32b; break;
- case ARM::BI__builtin_arm_crc32cb:
- CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
- case ARM::BI__builtin_arm_crc32h:
- CRCIntrinsicID = Intrinsic::arm_crc32h; break;
- case ARM::BI__builtin_arm_crc32ch:
- CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
- case ARM::BI__builtin_arm_crc32w:
- case ARM::BI__builtin_arm_crc32d:
- CRCIntrinsicID = Intrinsic::arm_crc32w; break;
- case ARM::BI__builtin_arm_crc32cw:
- case ARM::BI__builtin_arm_crc32cd:
- CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
- }
- if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- Value *Arg1 = EmitScalarExpr(E->getArg(1));
- // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
- // intrinsics, hence we need different codegen for these cases.
- if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
- BuiltinID == ARM::BI__builtin_arm_crc32cd) {
- Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
- Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
- Value *Arg1b = Builder.CreateLShr(Arg1, C1);
- Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
- Function *F = CGM.getIntrinsic(CRCIntrinsicID);
- Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
- return Builder.CreateCall(F, {Res, Arg1b});
- } else {
- Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
- Function *F = CGM.getIntrinsic(CRCIntrinsicID);
- return Builder.CreateCall(F, {Arg0, Arg1});
- }
- }
- if (BuiltinID == ARM::BI__builtin_arm_rsr ||
- BuiltinID == ARM::BI__builtin_arm_rsr64 ||
- BuiltinID == ARM::BI__builtin_arm_rsrp ||
- BuiltinID == ARM::BI__builtin_arm_wsr ||
- BuiltinID == ARM::BI__builtin_arm_wsr64 ||
- BuiltinID == ARM::BI__builtin_arm_wsrp) {
- SpecialRegisterAccessKind AccessKind = Write;
- if (BuiltinID == ARM::BI__builtin_arm_rsr ||
- BuiltinID == ARM::BI__builtin_arm_rsr64 ||
- BuiltinID == ARM::BI__builtin_arm_rsrp)
- AccessKind = VolatileRead;
- bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
- BuiltinID == ARM::BI__builtin_arm_wsrp;
- bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
- BuiltinID == ARM::BI__builtin_arm_wsr64;
- llvm::Type *ValueType;
- llvm::Type *RegisterType;
- if (IsPointerBuiltin) {
- ValueType = VoidPtrTy;
- RegisterType = Int32Ty;
- } else if (Is64Bit) {
- ValueType = RegisterType = Int64Ty;
- } else {
- ValueType = RegisterType = Int32Ty;
- }
- return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
- AccessKind);
- }
- // Handle MSVC intrinsics before argument evaluation to prevent double
- // evaluation.
- if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
- return EmitMSVCBuiltinExpr(*MsvcIntId, E);
- // Deal with MVE builtins
- if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
- return Result;
- // Handle CDE builtins
- if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
- return Result;
- // Find out if any arguments are required to be integer constant
- // expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
- auto getAlignmentValue32 = [&](Address addr) -> Value* {
- return Builder.getInt32(addr.getAlignment().getQuantity());
- };
- Address PtrOp0 = Address::invalid();
- Address PtrOp1 = Address::invalid();
- SmallVector<Value*, 4> Ops;
- bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
- unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
- for (unsigned i = 0, e = NumArgs; i != e; i++) {
- if (i == 0) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld1_v:
- case NEON::BI__builtin_neon_vld1q_v:
- case NEON::BI__builtin_neon_vld1q_lane_v:
- case NEON::BI__builtin_neon_vld1_lane_v:
- case NEON::BI__builtin_neon_vld1_dup_v:
- case NEON::BI__builtin_neon_vld1q_dup_v:
- case NEON::BI__builtin_neon_vst1_v:
- case NEON::BI__builtin_neon_vst1q_v:
- case NEON::BI__builtin_neon_vst1q_lane_v:
- case NEON::BI__builtin_neon_vst1_lane_v:
- case NEON::BI__builtin_neon_vst2_v:
- case NEON::BI__builtin_neon_vst2q_v:
- case NEON::BI__builtin_neon_vst2_lane_v:
- case NEON::BI__builtin_neon_vst2q_lane_v:
- case NEON::BI__builtin_neon_vst3_v:
- case NEON::BI__builtin_neon_vst3q_v:
- case NEON::BI__builtin_neon_vst3_lane_v:
- case NEON::BI__builtin_neon_vst3q_lane_v:
- case NEON::BI__builtin_neon_vst4_v:
- case NEON::BI__builtin_neon_vst4q_v:
- case NEON::BI__builtin_neon_vst4_lane_v:
- case NEON::BI__builtin_neon_vst4q_lane_v:
- // Get the alignment for the argument in addition to the value;
- // we'll use it later.
- PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
- Ops.push_back(PtrOp0.getPointer());
- continue;
- }
- }
- if (i == 1) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_v:
- case NEON::BI__builtin_neon_vld2q_v:
- case NEON::BI__builtin_neon_vld3_v:
- case NEON::BI__builtin_neon_vld3q_v:
- case NEON::BI__builtin_neon_vld4_v:
- case NEON::BI__builtin_neon_vld4q_v:
- case NEON::BI__builtin_neon_vld2_lane_v:
- case NEON::BI__builtin_neon_vld2q_lane_v:
- case NEON::BI__builtin_neon_vld3_lane_v:
- case NEON::BI__builtin_neon_vld3q_lane_v:
- case NEON::BI__builtin_neon_vld4_lane_v:
- case NEON::BI__builtin_neon_vld4q_lane_v:
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld2q_dup_v:
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld3q_dup_v:
- case NEON::BI__builtin_neon_vld4_dup_v:
- case NEON::BI__builtin_neon_vld4q_dup_v:
- // Get the alignment for the argument in addition to the value;
- // we'll use it later.
- PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
- Ops.push_back(PtrOp1.getPointer());
- continue;
- }
- }
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
- }
- switch (BuiltinID) {
- default: break;
- case NEON::BI__builtin_neon_vget_lane_i8:
- case NEON::BI__builtin_neon_vget_lane_i16:
- case NEON::BI__builtin_neon_vget_lane_i32:
- case NEON::BI__builtin_neon_vget_lane_i64:
- case NEON::BI__builtin_neon_vget_lane_bf16:
- case NEON::BI__builtin_neon_vget_lane_f32:
- case NEON::BI__builtin_neon_vgetq_lane_i8:
- case NEON::BI__builtin_neon_vgetq_lane_i16:
- case NEON::BI__builtin_neon_vgetq_lane_i32:
- case NEON::BI__builtin_neon_vgetq_lane_i64:
- case NEON::BI__builtin_neon_vgetq_lane_bf16:
- case NEON::BI__builtin_neon_vgetq_lane_f32:
- case NEON::BI__builtin_neon_vduph_lane_bf16:
- case NEON::BI__builtin_neon_vduph_laneq_bf16:
- return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
- case NEON::BI__builtin_neon_vrndns_f32: {
- Value *Arg = EmitScalarExpr(E->getArg(0));
- llvm::Type *Tys[] = {Arg->getType()};
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
- return Builder.CreateCall(F, {Arg}, "vrndn"); }
- case NEON::BI__builtin_neon_vset_lane_i8:
- case NEON::BI__builtin_neon_vset_lane_i16:
- case NEON::BI__builtin_neon_vset_lane_i32:
- case NEON::BI__builtin_neon_vset_lane_i64:
- case NEON::BI__builtin_neon_vset_lane_bf16:
- case NEON::BI__builtin_neon_vset_lane_f32:
- case NEON::BI__builtin_neon_vsetq_lane_i8:
- case NEON::BI__builtin_neon_vsetq_lane_i16:
- case NEON::BI__builtin_neon_vsetq_lane_i32:
- case NEON::BI__builtin_neon_vsetq_lane_i64:
- case NEON::BI__builtin_neon_vsetq_lane_bf16:
- case NEON::BI__builtin_neon_vsetq_lane_f32:
- return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
- case NEON::BI__builtin_neon_vsha1h_u32:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
- "vsha1h");
- case NEON::BI__builtin_neon_vsha1cq_u32:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
- "vsha1h");
- case NEON::BI__builtin_neon_vsha1pq_u32:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
- "vsha1h");
- case NEON::BI__builtin_neon_vsha1mq_u32:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
- "vsha1h");
- case NEON::BI__builtin_neon_vcvth_bf16_f32: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
- "vcvtbfp2bf");
- }
- // The ARM _MoveToCoprocessor builtins put the input register value as
- // the first argument, but the LLVM intrinsic expects it as the third one.
- case ARM::BI_MoveToCoprocessor:
- case ARM::BI_MoveToCoprocessor2: {
- Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
- Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
- return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
- Ops[3], Ops[4], Ops[5]});
- }
- }
- // Get the last argument, which specifies the vector type.
- assert(HasExtraArg);
- const Expr *Arg = E->getArg(E->getNumArgs()-1);
- Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext());
- if (!Result)
- return nullptr;
- if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
- BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
- // Determine the overloaded type of this builtin.
- llvm::Type *Ty;
- if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
- Ty = FloatTy;
- else
- Ty = DoubleTy;
- // Determine whether this is an unsigned conversion or not.
- bool usgn = Result->getZExtValue() == 1;
- unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
- // Call the appropriate intrinsic.
- Function *F = CGM.getIntrinsic(Int, Ty);
- return Builder.CreateCall(F, Ops, "vcvtr");
- }
- // Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type = Result->getZExtValue();
- bool usgn = Type.isUnsigned();
- bool rightShift = false;
- llvm::FixedVectorType *VTy =
- GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
- getTarget().hasBFloat16Type());
- llvm::Type *Ty = VTy;
- if (!Ty)
- return nullptr;
- // Many NEON builtins have identical semantics and uses in ARM and
- // AArch64. Emit these in a single function.
- auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
- const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
- IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
- if (Builtin)
- return EmitCommonNeonBuiltinExpr(
- Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
- Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
- unsigned Int;
- switch (BuiltinID) {
- default: return nullptr;
- case NEON::BI__builtin_neon_vld1q_lane_v:
- // Handle 64-bit integer elements as a special case. Use shuffles of
- // one-element vectors to avoid poor code for i64 in the backend.
- if (VTy->getElementType()->isIntegerTy(64)) {
- // Extract the other lane.
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
- Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
- Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
- // Load the value as a one-element vector.
- Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
- llvm::Type *Tys[] = {Ty, Int8PtrTy};
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
- Value *Align = getAlignmentValue32(PtrOp0);
- Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
- // Combine them.
- int Indices[] = {1 - Lane, Lane};
- return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
- }
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vld1_lane_v: {
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
- Value *Ld = Builder.CreateLoad(PtrOp0);
- return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
- }
- case NEON::BI__builtin_neon_vqrshrn_n_v:
- Int =
- usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
- 1, true);
- case NEON::BI__builtin_neon_vqrshrun_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
- Ops, "vqrshrun_n", 1, true);
- case NEON::BI__builtin_neon_vqshrn_n_v:
- Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
- 1, true);
- case NEON::BI__builtin_neon_vqshrun_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
- Ops, "vqshrun_n", 1, true);
- case NEON::BI__builtin_neon_vrecpe_v:
- case NEON::BI__builtin_neon_vrecpeq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
- Ops, "vrecpe");
- case NEON::BI__builtin_neon_vrshrn_n_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
- Ops, "vrshrn_n", 1, true);
- case NEON::BI__builtin_neon_vrsra_n_v:
- case NEON::BI__builtin_neon_vrsraq_n_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
- Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
- Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
- return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
- case NEON::BI__builtin_neon_vsri_n_v:
- case NEON::BI__builtin_neon_vsriq_n_v:
- rightShift = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vsli_n_v:
- case NEON::BI__builtin_neon_vsliq_n_v:
- Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
- Ops, "vsli_n");
- case NEON::BI__builtin_neon_vsra_n_v:
- case NEON::BI__builtin_neon_vsraq_n_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
- return Builder.CreateAdd(Ops[0], Ops[1]);
- case NEON::BI__builtin_neon_vst1q_lane_v:
- // Handle 64-bit integer elements as a special case. Use a shuffle to get
- // a one-element vector and avoid poor code for i64 in the backend.
- if (VTy->getElementType()->isIntegerTy(64)) {
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
- Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
- Ops[2] = getAlignmentValue32(PtrOp0);
- llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
- Tys), Ops);
- }
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vst1_lane_v: {
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
- return St;
- }
- case NEON::BI__builtin_neon_vtbl1_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
- Ops, "vtbl1");
- case NEON::BI__builtin_neon_vtbl2_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
- Ops, "vtbl2");
- case NEON::BI__builtin_neon_vtbl3_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
- Ops, "vtbl3");
- case NEON::BI__builtin_neon_vtbl4_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
- Ops, "vtbl4");
- case NEON::BI__builtin_neon_vtbx1_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
- Ops, "vtbx1");
- case NEON::BI__builtin_neon_vtbx2_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
- Ops, "vtbx2");
- case NEON::BI__builtin_neon_vtbx3_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
- Ops, "vtbx3");
- case NEON::BI__builtin_neon_vtbx4_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
- Ops, "vtbx4");
- }
- }
- template<typename Integer>
- static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
- return E->getIntegerConstantExpr(Context)->getExtValue();
- }
- static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
- llvm::Type *T, bool Unsigned) {
- // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
- // which finds it convenient to specify signed/unsigned as a boolean flag.
- return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
- }
- static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
- uint32_t Shift, bool Unsigned) {
- // MVE helper function for integer shift right. This must handle signed vs
- // unsigned, and also deal specially with the case where the shift count is
- // equal to the lane size. In LLVM IR, an LShr with that parameter would be
- // undefined behavior, but in MVE it's legal, so we must convert it to code
- // that is not undefined in IR.
- unsigned LaneBits = cast<llvm::VectorType>(V->getType())
- ->getElementType()
- ->getPrimitiveSizeInBits();
- if (Shift == LaneBits) {
- // An unsigned shift of the full lane size always generates zero, so we can
- // simply emit a zero vector. A signed shift of the full lane size does the
- // same thing as shifting by one bit fewer.
- if (Unsigned)
- return llvm::Constant::getNullValue(V->getType());
- else
- --Shift;
- }
- return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
- }
- static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
- // MVE-specific helper function for a vector splat, which infers the element
- // count of the output vector by knowing that MVE vectors are all 128 bits
- // wide.
- unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
- return Builder.CreateVectorSplat(Elements, V);
- }
- static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
- CodeGenFunction *CGF,
- llvm::Value *V,
- llvm::Type *DestType) {
- // Convert one MVE vector type into another by reinterpreting its in-register
- // format.
- //
- // Little-endian, this is identical to a bitcast (which reinterprets the
- // memory format). But big-endian, they're not necessarily the same, because
- // the register and memory formats map to each other differently depending on
- // the lane size.
- //
- // We generate a bitcast whenever we can (if we're little-endian, or if the
- // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
- // that performs the different kind of reinterpretation.
- if (CGF->getTarget().isBigEndian() &&
- V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
- return Builder.CreateCall(
- CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
- {DestType, V->getType()}),
- V);
- } else {
- return Builder.CreateBitCast(V, DestType);
- }
- }
- static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
- // Make a shufflevector that extracts every other element of a vector (evens
- // or odds, as desired).
- SmallVector<int, 16> Indices;
- unsigned InputElements =
- cast<llvm::FixedVectorType>(V->getType())->getNumElements();
- for (unsigned i = 0; i < InputElements; i += 2)
- Indices.push_back(i + Odd);
- return Builder.CreateShuffleVector(V, Indices);
- }
- static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
- llvm::Value *V1) {
- // Make a shufflevector that interleaves two vectors element by element.
- assert(V0->getType() == V1->getType() && "Can't zip different vector types");
- SmallVector<int, 16> Indices;
- unsigned InputElements =
- cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
- for (unsigned i = 0; i < InputElements; i++) {
- Indices.push_back(i);
- Indices.push_back(i + InputElements);
- }
- return Builder.CreateShuffleVector(V0, V1, Indices);
- }
- template<unsigned HighBit, unsigned OtherBits>
- static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
- // MVE-specific helper function to make a vector splat of a constant such as
- // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
- llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
- unsigned LaneBits = T->getPrimitiveSizeInBits();
- uint32_t Value = HighBit << (LaneBits - 1);
- if (OtherBits)
- Value |= (1UL << (LaneBits - 1)) - 1;
- llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
- return ARMMVEVectorSplat(Builder, Lane);
- }
- static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
- llvm::Value *V,
- unsigned ReverseWidth) {
- // MVE-specific helper function which reverses the elements of a
- // vector within every (ReverseWidth)-bit collection of lanes.
- SmallVector<int, 16> Indices;
- unsigned LaneSize = V->getType()->getScalarSizeInBits();
- unsigned Elements = 128 / LaneSize;
- unsigned Mask = ReverseWidth / LaneSize - 1;
- for (unsigned i = 0; i < Elements; i++)
- Indices.push_back(i ^ Mask);
- return Builder.CreateShuffleVector(V, Indices);
- }
- Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- ReturnValueSlot ReturnValue,
- llvm::Triple::ArchType Arch) {
- enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
- Intrinsic::ID IRIntr;
- unsigned NumVectors;
- // Code autogenerated by Tablegen will handle all the simple builtins.
- switch (BuiltinID) {
- #include "clang/Basic/arm_mve_builtin_cg.inc"
- // If we didn't match an MVE builtin id at all, go back to the
- // main EmitARMBuiltinExpr.
- default:
- return nullptr;
- }
- // Anything that breaks from that switch is an MVE builtin that
- // needs handwritten code to generate.
- switch (CustomCodeGenType) {
- case CustomCodeGen::VLD24: {
- llvm::SmallVector<Value *, 4> Ops;
- llvm::SmallVector<llvm::Type *, 4> Tys;
- auto MvecCType = E->getType();
- auto MvecLType = ConvertType(MvecCType);
- assert(MvecLType->isStructTy() &&
- "Return type for vld[24]q should be a struct");
- assert(MvecLType->getStructNumElements() == 1 &&
- "Return-type struct for vld[24]q should have one element");
- auto MvecLTypeInner = MvecLType->getStructElementType(0);
- assert(MvecLTypeInner->isArrayTy() &&
- "Return-type struct for vld[24]q should contain an array");
- assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
- "Array member of return-type struct vld[24]q has wrong length");
- auto VecLType = MvecLTypeInner->getArrayElementType();
- Tys.push_back(VecLType);
- auto Addr = E->getArg(0);
- Ops.push_back(EmitScalarExpr(Addr));
- Tys.push_back(ConvertType(Addr->getType()));
- Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
- Value *LoadResult = Builder.CreateCall(F, Ops);
- Value *MvecOut = UndefValue::get(MvecLType);
- for (unsigned i = 0; i < NumVectors; ++i) {
- Value *Vec = Builder.CreateExtractValue(LoadResult, i);
- MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
- }
- if (ReturnValue.isNull())
- return MvecOut;
- else
- return Builder.CreateStore(MvecOut, ReturnValue.getValue());
- }
- case CustomCodeGen::VST24: {
- llvm::SmallVector<Value *, 4> Ops;
- llvm::SmallVector<llvm::Type *, 4> Tys;
- auto Addr = E->getArg(0);
- Ops.push_back(EmitScalarExpr(Addr));
- Tys.push_back(ConvertType(Addr->getType()));
- auto MvecCType = E->getArg(1)->getType();
- auto MvecLType = ConvertType(MvecCType);
- assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
- assert(MvecLType->getStructNumElements() == 1 &&
- "Data-type struct for vst2q should have one element");
- auto MvecLTypeInner = MvecLType->getStructElementType(0);
- assert(MvecLTypeInner->isArrayTy() &&
- "Data-type struct for vst2q should contain an array");
- assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
- "Array member of return-type struct vld[24]q has wrong length");
- auto VecLType = MvecLTypeInner->getArrayElementType();
- Tys.push_back(VecLType);
- AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
- EmitAggExpr(E->getArg(1), MvecSlot);
- auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
- for (unsigned i = 0; i < NumVectors; i++)
- Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
- Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
- Value *ToReturn = nullptr;
- for (unsigned i = 0; i < NumVectors; i++) {
- Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
- ToReturn = Builder.CreateCall(F, Ops);
- Ops.pop_back();
- }
- return ToReturn;
- }
- }
- llvm_unreachable("unknown custom codegen type.");
- }
- Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- ReturnValueSlot ReturnValue,
- llvm::Triple::ArchType Arch) {
- switch (BuiltinID) {
- default:
- return nullptr;
- #include "clang/Basic/arm_cde_builtin_cg.inc"
- }
- }
- static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
- const CallExpr *E,
- SmallVectorImpl<Value *> &Ops,
- llvm::Triple::ArchType Arch) {
- unsigned int Int = 0;
- const char *s = nullptr;
- switch (BuiltinID) {
- default:
- return nullptr;
- case NEON::BI__builtin_neon_vtbl1_v:
- case NEON::BI__builtin_neon_vqtbl1_v:
- case NEON::BI__builtin_neon_vqtbl1q_v:
- case NEON::BI__builtin_neon_vtbl2_v:
- case NEON::BI__builtin_neon_vqtbl2_v:
- case NEON::BI__builtin_neon_vqtbl2q_v:
- case NEON::BI__builtin_neon_vtbl3_v:
- case NEON::BI__builtin_neon_vqtbl3_v:
- case NEON::BI__builtin_neon_vqtbl3q_v:
- case NEON::BI__builtin_neon_vtbl4_v:
- case NEON::BI__builtin_neon_vqtbl4_v:
- case NEON::BI__builtin_neon_vqtbl4q_v:
- break;
- case NEON::BI__builtin_neon_vtbx1_v:
- case NEON::BI__builtin_neon_vqtbx1_v:
- case NEON::BI__builtin_neon_vqtbx1q_v:
- case NEON::BI__builtin_neon_vtbx2_v:
- case NEON::BI__builtin_neon_vqtbx2_v:
- case NEON::BI__builtin_neon_vqtbx2q_v:
- case NEON::BI__builtin_neon_vtbx3_v:
- case NEON::BI__builtin_neon_vqtbx3_v:
- case NEON::BI__builtin_neon_vqtbx3q_v:
- case NEON::BI__builtin_neon_vtbx4_v:
- case NEON::BI__builtin_neon_vqtbx4_v:
- case NEON::BI__builtin_neon_vqtbx4q_v:
- break;
- }
- assert(E->getNumArgs() >= 3);
- // Get the last argument, which specifies the vector type.
- const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext());
- if (!Result)
- return nullptr;
- // Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type = Result->getZExtValue();
- llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
- if (!Ty)
- return nullptr;
- CodeGen::CGBuilderTy &Builder = CGF.Builder;
- // AArch64 scalar builtins are not overloaded, they do not have an extra
- // argument that specifies the vector type, need to handle each case.
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vtbl1_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
- Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
- "vtbl1");
- }
- case NEON::BI__builtin_neon_vtbl2_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
- Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
- "vtbl1");
- }
- case NEON::BI__builtin_neon_vtbl3_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
- Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
- "vtbl2");
- }
- case NEON::BI__builtin_neon_vtbl4_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
- Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
- "vtbl2");
- }
- case NEON::BI__builtin_neon_vtbx1_v: {
- Value *TblRes =
- packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
- Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
- llvm::Constant *EightV = ConstantInt::get(Ty, 8);
- Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
- CmpRes = Builder.CreateSExt(CmpRes, Ty);
- Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
- Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
- return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
- }
- case NEON::BI__builtin_neon_vtbx2_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
- Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
- "vtbx1");
- }
- case NEON::BI__builtin_neon_vtbx3_v: {
- Value *TblRes =
- packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
- Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
- llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
- Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
- TwentyFourV);
- CmpRes = Builder.CreateSExt(CmpRes, Ty);
- Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
- Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
- return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
- }
- case NEON::BI__builtin_neon_vtbx4_v: {
- return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
- Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
- "vtbx2");
- }
- case NEON::BI__builtin_neon_vqtbl1_v:
- case NEON::BI__builtin_neon_vqtbl1q_v:
- Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
- case NEON::BI__builtin_neon_vqtbl2_v:
- case NEON::BI__builtin_neon_vqtbl2q_v: {
- Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
- case NEON::BI__builtin_neon_vqtbl3_v:
- case NEON::BI__builtin_neon_vqtbl3q_v:
- Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
- case NEON::BI__builtin_neon_vqtbl4_v:
- case NEON::BI__builtin_neon_vqtbl4q_v:
- Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
- case NEON::BI__builtin_neon_vqtbx1_v:
- case NEON::BI__builtin_neon_vqtbx1q_v:
- Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
- case NEON::BI__builtin_neon_vqtbx2_v:
- case NEON::BI__builtin_neon_vqtbx2q_v:
- Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
- case NEON::BI__builtin_neon_vqtbx3_v:
- case NEON::BI__builtin_neon_vqtbx3q_v:
- Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
- case NEON::BI__builtin_neon_vqtbx4_v:
- case NEON::BI__builtin_neon_vqtbx4q_v:
- Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
- }
- }
- if (!Int)
- return nullptr;
- Function *F = CGF.CGM.getIntrinsic(Int, Ty);
- return CGF.EmitNeonCall(F, Ops, s);
- }
- Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
- auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- Op = Builder.CreateBitCast(Op, Int16Ty);
- Value *V = UndefValue::get(VTy);
- llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
- Op = Builder.CreateInsertElement(V, Op, CI);
- return Op;
- }
- /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
- /// access builtin. Only required if it can't be inferred from the base pointer
- /// operand.
- llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
- switch (TypeFlags.getMemEltType()) {
- case SVETypeFlags::MemEltTyDefault:
- return getEltType(TypeFlags);
- case SVETypeFlags::MemEltTyInt8:
- return Builder.getInt8Ty();
- case SVETypeFlags::MemEltTyInt16:
- return Builder.getInt16Ty();
- case SVETypeFlags::MemEltTyInt32:
- return Builder.getInt32Ty();
- case SVETypeFlags::MemEltTyInt64:
- return Builder.getInt64Ty();
- }
- llvm_unreachable("Unknown MemEltType");
- }
- llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
- switch (TypeFlags.getEltType()) {
- default:
- llvm_unreachable("Invalid SVETypeFlag!");
- case SVETypeFlags::EltTyInt8:
- return Builder.getInt8Ty();
- case SVETypeFlags::EltTyInt16:
- return Builder.getInt16Ty();
- case SVETypeFlags::EltTyInt32:
- return Builder.getInt32Ty();
- case SVETypeFlags::EltTyInt64:
- return Builder.getInt64Ty();
- case SVETypeFlags::EltTyFloat16:
- return Builder.getHalfTy();
- case SVETypeFlags::EltTyFloat32:
- return Builder.getFloatTy();
- case SVETypeFlags::EltTyFloat64:
- return Builder.getDoubleTy();
- case SVETypeFlags::EltTyBFloat16:
- return Builder.getBFloatTy();
- case SVETypeFlags::EltTyBool8:
- case SVETypeFlags::EltTyBool16:
- case SVETypeFlags::EltTyBool32:
- case SVETypeFlags::EltTyBool64:
- return Builder.getInt1Ty();
- }
- }
- // Return the llvm predicate vector type corresponding to the specified element
- // TypeFlags.
- llvm::ScalableVectorType *
- CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
- switch (TypeFlags.getEltType()) {
- default: llvm_unreachable("Unhandled SVETypeFlag!");
- case SVETypeFlags::EltTyInt8:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
- case SVETypeFlags::EltTyInt16:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
- case SVETypeFlags::EltTyInt32:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
- case SVETypeFlags::EltTyInt64:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
- case SVETypeFlags::EltTyBFloat16:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
- case SVETypeFlags::EltTyFloat16:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
- case SVETypeFlags::EltTyFloat32:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
- case SVETypeFlags::EltTyFloat64:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
- case SVETypeFlags::EltTyBool8:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
- case SVETypeFlags::EltTyBool16:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
- case SVETypeFlags::EltTyBool32:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
- case SVETypeFlags::EltTyBool64:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
- }
- }
- // Return the llvm vector type corresponding to the specified element TypeFlags.
- llvm::ScalableVectorType *
- CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
- switch (TypeFlags.getEltType()) {
- default:
- llvm_unreachable("Invalid SVETypeFlag!");
- case SVETypeFlags::EltTyInt8:
- return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
- case SVETypeFlags::EltTyInt16:
- return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
- case SVETypeFlags::EltTyInt32:
- return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
- case SVETypeFlags::EltTyInt64:
- return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
- case SVETypeFlags::EltTyFloat16:
- return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
- case SVETypeFlags::EltTyBFloat16:
- return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
- case SVETypeFlags::EltTyFloat32:
- return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
- case SVETypeFlags::EltTyFloat64:
- return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
- case SVETypeFlags::EltTyBool8:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
- case SVETypeFlags::EltTyBool16:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
- case SVETypeFlags::EltTyBool32:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
- case SVETypeFlags::EltTyBool64:
- return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
- }
- }
- llvm::Value *
- CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
- Function *Ptrue =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
- return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
- }
- constexpr unsigned SVEBitsPerBlock = 128;
- static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
- unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
- return llvm::ScalableVectorType::get(EltTy, NumElts);
- }
- // Reinterpret the input predicate so that it can be used to correctly isolate
- // the elements of the specified datatype.
- Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
- llvm::ScalableVectorType *VTy) {
- auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
- if (Pred->getType() == RTy)
- return Pred;
- unsigned IntID;
- llvm::Type *IntrinsicTy;
- switch (VTy->getMinNumElements()) {
- default:
- llvm_unreachable("unsupported element count!");
- case 2:
- case 4:
- case 8:
- IntID = Intrinsic::aarch64_sve_convert_from_svbool;
- IntrinsicTy = RTy;
- break;
- case 16:
- IntID = Intrinsic::aarch64_sve_convert_to_svbool;
- IntrinsicTy = Pred->getType();
- break;
- }
- Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
- Value *C = Builder.CreateCall(F, Pred);
- assert(C->getType() == RTy && "Unexpected return type!");
- return C;
- }
- Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value *> &Ops,
- unsigned IntID) {
- auto *ResultTy = getSVEType(TypeFlags);
- auto *OverloadedTy =
- llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
- // At the ACLE level there's only one predicate type, svbool_t, which is
- // mapped to <n x 16 x i1>. However, this might be incompatible with the
- // actual type being loaded. For example, when loading doubles (i64) the
- // predicated should be <n x 2 x i1> instead. At the IR level the type of
- // the predicate and the data being loaded must match. Cast accordingly.
- Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
- Function *F = nullptr;
- if (Ops[1]->getType()->isVectorTy())
- // This is the "vector base, scalar offset" case. In order to uniquely
- // map this built-in to an LLVM IR intrinsic, we need both the return type
- // and the type of the vector base.
- F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
- else
- // This is the "scalar base, vector offset case". The type of the offset
- // is encoded in the name of the intrinsic. We only need to specify the
- // return type in order to uniquely map this built-in to an LLVM IR
- // intrinsic.
- F = CGM.getIntrinsic(IntID, OverloadedTy);
- // Pass 0 when the offset is missing. This can only be applied when using
- // the "vector base" addressing mode for which ACLE allows no offset. The
- // corresponding LLVM IR always requires an offset.
- if (Ops.size() == 2) {
- assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
- Ops.push_back(ConstantInt::get(Int64Ty, 0));
- }
- // For "vector base, scalar index" scale the index so that it becomes a
- // scalar offset.
- if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
- unsigned BytesPerElt =
- OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
- Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
- Ops[2] = Builder.CreateMul(Ops[2], Scale);
- }
- Value *Call = Builder.CreateCall(F, Ops);
- // The following sext/zext is only needed when ResultTy != OverloadedTy. In
- // other cases it's folded into a nop.
- return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
- : Builder.CreateSExt(Call, ResultTy);
- }
- Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value *> &Ops,
- unsigned IntID) {
- auto *SrcDataTy = getSVEType(TypeFlags);
- auto *OverloadedTy =
- llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
- // In ACLE the source data is passed in the last argument, whereas in LLVM IR
- // it's the first argument. Move it accordingly.
- Ops.insert(Ops.begin(), Ops.pop_back_val());
- Function *F = nullptr;
- if (Ops[2]->getType()->isVectorTy())
- // This is the "vector base, scalar offset" case. In order to uniquely
- // map this built-in to an LLVM IR intrinsic, we need both the return type
- // and the type of the vector base.
- F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
- else
- // This is the "scalar base, vector offset case". The type of the offset
- // is encoded in the name of the intrinsic. We only need to specify the
- // return type in order to uniquely map this built-in to an LLVM IR
- // intrinsic.
- F = CGM.getIntrinsic(IntID, OverloadedTy);
- // Pass 0 when the offset is missing. This can only be applied when using
- // the "vector base" addressing mode for which ACLE allows no offset. The
- // corresponding LLVM IR always requires an offset.
- if (Ops.size() == 3) {
- assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
- Ops.push_back(ConstantInt::get(Int64Ty, 0));
- }
- // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
- // folded into a nop.
- Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
- // At the ACLE level there's only one predicate type, svbool_t, which is
- // mapped to <n x 16 x i1>. However, this might be incompatible with the
- // actual type being stored. For example, when storing doubles (i64) the
- // predicated should be <n x 2 x i1> instead. At the IR level the type of
- // the predicate and the data being stored must match. Cast accordingly.
- Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
- // For "vector base, scalar index" scale the index so that it becomes a
- // scalar offset.
- if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
- unsigned BytesPerElt =
- OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
- Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
- Ops[3] = Builder.CreateMul(Ops[3], Scale);
- }
- return Builder.CreateCall(F, Ops);
- }
- Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value *> &Ops,
- unsigned IntID) {
- // The gather prefetches are overloaded on the vector input - this can either
- // be the vector of base addresses or vector of offsets.
- auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
- if (!OverloadedTy)
- OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
- // Cast the predicate from svbool_t to the right number of elements.
- Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
- // vector + imm addressing modes
- if (Ops[1]->getType()->isVectorTy()) {
- if (Ops.size() == 3) {
- // Pass 0 for 'vector+imm' when the index is omitted.
- Ops.push_back(ConstantInt::get(Int64Ty, 0));
- // The sv_prfop is the last operand in the builtin and IR intrinsic.
- std::swap(Ops[2], Ops[3]);
- } else {
- // Index needs to be passed as scaled offset.
- llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
- unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
- Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
- Ops[2] = Builder.CreateMul(Ops[2], Scale);
- }
- }
- Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
- return Builder.CreateCall(F, Ops);
- }
- Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value*> &Ops,
- unsigned IntID) {
- llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
- auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
- auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
- unsigned N;
- switch (IntID) {
- case Intrinsic::aarch64_sve_ld2:
- N = 2;
- break;
- case Intrinsic::aarch64_sve_ld3:
- N = 3;
- break;
- case Intrinsic::aarch64_sve_ld4:
- N = 4;
- break;
- default:
- llvm_unreachable("unknown intrinsic!");
- }
- auto RetTy = llvm::VectorType::get(VTy->getElementType(),
- VTy->getElementCount() * N);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
- Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
- Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
- BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
- BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
- Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()});
- return Builder.CreateCall(F, { Predicate, BasePtr });
- }
- Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value*> &Ops,
- unsigned IntID) {
- llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
- auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
- auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
- unsigned N;
- switch (IntID) {
- case Intrinsic::aarch64_sve_st2:
- N = 2;
- break;
- case Intrinsic::aarch64_sve_st3:
- N = 3;
- break;
- case Intrinsic::aarch64_sve_st4:
- N = 4;
- break;
- default:
- llvm_unreachable("unknown intrinsic!");
- }
- auto TupleTy =
- llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
- Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
- Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0);
- Value *Val = Ops.back();
- BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
- BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
- // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
- // need to break up the tuple vector.
- SmallVector<llvm::Value*, 5> Operands;
- Function *FExtr =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
- for (unsigned I = 0; I < N; ++I)
- Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)}));
- Operands.append({Predicate, BasePtr});
- Function *F = CGM.getIntrinsic(IntID, { VTy });
- return Builder.CreateCall(F, Operands);
- }
- // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
- // svpmullt_pair intrinsics, with the exception that their results are bitcast
- // to a wider type.
- Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value *> &Ops,
- unsigned BuiltinID) {
- // Splat scalar operand to vector (intrinsics with _n infix)
- if (TypeFlags.hasSplatOperand()) {
- unsigned OpNo = TypeFlags.getSplatOperand();
- Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
- }
- // The pair-wise function has a narrower overloaded type.
- Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
- Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
- // Now bitcast to the wider result type.
- llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
- return EmitSVEReinterpret(Call, Ty);
- }
- Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
- ArrayRef<Value *> Ops, unsigned BuiltinID) {
- llvm::Type *OverloadedTy = getSVEType(TypeFlags);
- Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
- return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
- }
- Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
- SmallVectorImpl<Value *> &Ops,
- unsigned BuiltinID) {
- auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
- auto *VectorTy = getSVEVectorForElementType(MemEltTy);
- auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
- Value *BasePtr = Ops[1];
- // Implement the index operand if not omitted.
- if (Ops.size() > 3) {
- BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
- BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
- }
- // Prefetch intriniscs always expect an i8*
- BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
- Value *PrfOp = Ops.back();
- Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
- return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
- }
- Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
- llvm::Type *ReturnTy,
- SmallVectorImpl<Value *> &Ops,
- unsigned BuiltinID,
- bool IsZExtReturn) {
- QualType LangPTy = E->getArg(1)->getType();
- llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
- LangPTy->castAs<PointerType>()->getPointeeType());
- // The vector type that is returned may be different from the
- // eventual type loaded from memory.
- auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
- auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
- Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
- Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
- BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
- BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
- Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
- Value *Load = Builder.CreateCall(F, {Predicate, BasePtr});
- return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
- : Builder.CreateSExt(Load, VectorTy);
- }
- Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
- SmallVectorImpl<Value *> &Ops,
- unsigned BuiltinID) {
- QualType LangPTy = E->getArg(1)->getType();
- llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
- LangPTy->castAs<PointerType>()->getPointeeType());
- // The vector type that is stored may be different from the
- // eventual type stored to memory.
- auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
- auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
- Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
- Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0);
- BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
- // Last value is always the data
- llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
- BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
- Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
- return Builder.CreateCall(F, {Val, Predicate, BasePtr});
- }
- // Limit the usage of scalable llvm IR generated by the ACLE by using the
- // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
- Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
- auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty);
- return Builder.CreateCall(F, Scalar);
- }
- Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
- return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
- }
- Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
- // FIXME: For big endian this needs an additional REV, or needs a separate
- // intrinsic that is code-generated as a no-op, because the LLVM bitcast
- // instruction is defined as 'bitwise' equivalent from memory point of
- // view (when storing/reloading), whereas the svreinterpret builtin
- // implements bitwise equivalent cast from register point of view.
- // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
- return Builder.CreateBitCast(Val, Ty);
- }
- static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
- SmallVectorImpl<Value *> &Ops) {
- auto *SplatZero = Constant::getNullValue(Ty);
- Ops.insert(Ops.begin(), SplatZero);
- }
- static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
- SmallVectorImpl<Value *> &Ops) {
- auto *SplatUndef = UndefValue::get(Ty);
- Ops.insert(Ops.begin(), SplatUndef);
- }
- SmallVector<llvm::Type *, 2>
- CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
- llvm::Type *ResultType,
- ArrayRef<Value *> Ops) {
- if (TypeFlags.isOverloadNone())
- return {};
- llvm::Type *DefaultType = getSVEType(TypeFlags);
- if (TypeFlags.isOverloadWhile())
- return {DefaultType, Ops[1]->getType()};
- if (TypeFlags.isOverloadWhileRW())
- return {getSVEPredType(TypeFlags), Ops[0]->getType()};
- if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet())
- return {Ops[0]->getType(), Ops.back()->getType()};
- if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet())
- return {ResultType, Ops[0]->getType()};
- assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
- return {DefaultType};
- }
- Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- // Find out if any arguments are required to be integer constant expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
- llvm::Type *Ty = ConvertType(E->getType());
- if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
- BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
- Value *Val = EmitScalarExpr(E->getArg(0));
- return EmitSVEReinterpret(Val, Ty);
- }
- llvm::SmallVector<Value *, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- if ((ICEArguments & (1 << i)) == 0)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Optional<llvm::APSInt> Result =
- E->getArg(i)->getIntegerConstantExpr(getContext());
- assert(Result && "Expected argument to be a constant");
- // Immediates for SVE llvm intrinsics are always 32bit. We can safely
- // truncate because the immediate has been range checked and no valid
- // immediate requires more than a handful of bits.
- *Result = Result->extOrTrunc(32);
- Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
- }
- }
- auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
- AArch64SVEIntrinsicsProvenSorted);
- SVETypeFlags TypeFlags(Builtin->TypeModifier);
- if (TypeFlags.isLoad())
- return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
- TypeFlags.isZExtReturn());
- else if (TypeFlags.isStore())
- return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isGatherLoad())
- return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isScatterStore())
- return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isPrefetch())
- return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isGatherPrefetch())
- return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isStructLoad())
- return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isStructStore())
- return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isUndef())
- return UndefValue::get(Ty);
- else if (Builtin->LLVMIntrinsic != 0) {
- if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
- InsertExplicitZeroOperand(Builder, Ty, Ops);
- if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
- InsertExplicitUndefOperand(Builder, Ty, Ops);
- // Some ACLE builtins leave out the argument to specify the predicate
- // pattern, which is expected to be expanded to an SV_ALL pattern.
- if (TypeFlags.isAppendSVALL())
- Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
- if (TypeFlags.isInsertOp1SVALL())
- Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
- // Predicates must match the main datatype.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
- if (PredTy->getElementType()->isIntegerTy(1))
- Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
- // Splat scalar operand to vector (intrinsics with _n infix)
- if (TypeFlags.hasSplatOperand()) {
- unsigned OpNo = TypeFlags.getSplatOperand();
- Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
- }
- if (TypeFlags.isReverseCompare())
- std::swap(Ops[1], Ops[2]);
- if (TypeFlags.isReverseUSDOT())
- std::swap(Ops[1], Ops[2]);
- // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
- if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
- llvm::Type *OpndTy = Ops[1]->getType();
- auto *SplatZero = Constant::getNullValue(OpndTy);
- Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy);
- Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero});
- }
- Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
- getSVEOverloadTypes(TypeFlags, Ty, Ops));
- Value *Call = Builder.CreateCall(F, Ops);
- // Predicate results must be converted to svbool_t.
- if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
- if (PredTy->getScalarType()->isIntegerTy(1))
- Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
- return Call;
- }
- switch (BuiltinID) {
- default:
- return nullptr;
- case SVE::BI__builtin_sve_svmov_b_z: {
- // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
- SVETypeFlags TypeFlags(Builtin->TypeModifier);
- llvm::Type* OverloadedTy = getSVEType(TypeFlags);
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
- return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
- }
- case SVE::BI__builtin_sve_svnot_b_z: {
- // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
- SVETypeFlags TypeFlags(Builtin->TypeModifier);
- llvm::Type* OverloadedTy = getSVEType(TypeFlags);
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
- return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
- }
- case SVE::BI__builtin_sve_svmovlb_u16:
- case SVE::BI__builtin_sve_svmovlb_u32:
- case SVE::BI__builtin_sve_svmovlb_u64:
- return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
- case SVE::BI__builtin_sve_svmovlb_s16:
- case SVE::BI__builtin_sve_svmovlb_s32:
- case SVE::BI__builtin_sve_svmovlb_s64:
- return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
- case SVE::BI__builtin_sve_svmovlt_u16:
- case SVE::BI__builtin_sve_svmovlt_u32:
- case SVE::BI__builtin_sve_svmovlt_u64:
- return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
- case SVE::BI__builtin_sve_svmovlt_s16:
- case SVE::BI__builtin_sve_svmovlt_s32:
- case SVE::BI__builtin_sve_svmovlt_s64:
- return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
- case SVE::BI__builtin_sve_svpmullt_u16:
- case SVE::BI__builtin_sve_svpmullt_u64:
- case SVE::BI__builtin_sve_svpmullt_n_u16:
- case SVE::BI__builtin_sve_svpmullt_n_u64:
- return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
- case SVE::BI__builtin_sve_svpmullb_u16:
- case SVE::BI__builtin_sve_svpmullb_u64:
- case SVE::BI__builtin_sve_svpmullb_n_u16:
- case SVE::BI__builtin_sve_svpmullb_n_u64:
- return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
- case SVE::BI__builtin_sve_svdup_n_b8:
- case SVE::BI__builtin_sve_svdup_n_b16:
- case SVE::BI__builtin_sve_svdup_n_b32:
- case SVE::BI__builtin_sve_svdup_n_b64: {
- Value *CmpNE =
- Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
- llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
- Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
- return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
- }
- case SVE::BI__builtin_sve_svdupq_n_b8:
- case SVE::BI__builtin_sve_svdupq_n_b16:
- case SVE::BI__builtin_sve_svdupq_n_b32:
- case SVE::BI__builtin_sve_svdupq_n_b64:
- case SVE::BI__builtin_sve_svdupq_n_u8:
- case SVE::BI__builtin_sve_svdupq_n_s8:
- case SVE::BI__builtin_sve_svdupq_n_u64:
- case SVE::BI__builtin_sve_svdupq_n_f64:
- case SVE::BI__builtin_sve_svdupq_n_s64:
- case SVE::BI__builtin_sve_svdupq_n_u16:
- case SVE::BI__builtin_sve_svdupq_n_f16:
- case SVE::BI__builtin_sve_svdupq_n_bf16:
- case SVE::BI__builtin_sve_svdupq_n_s16:
- case SVE::BI__builtin_sve_svdupq_n_u32:
- case SVE::BI__builtin_sve_svdupq_n_f32:
- case SVE::BI__builtin_sve_svdupq_n_s32: {
- // These builtins are implemented by storing each element to an array and using
- // ld1rq to materialize a vector.
- unsigned NumOpnds = Ops.size();
- bool IsBoolTy =
- cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
- // For svdupq_n_b* the element type of is an integer of type 128/numelts,
- // so that the compare can use the width that is natural for the expected
- // number of predicate lanes.
- llvm::Type *EltTy = Ops[0]->getType();
- if (IsBoolTy)
- EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
- SmallVector<llvm::Value *, 16> VecOps;
- for (unsigned I = 0; I < NumOpnds; ++I)
- VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
- Value *Vec = BuildVector(VecOps);
- SVETypeFlags TypeFlags(Builtin->TypeModifier);
- Value *Pred = EmitSVEAllTruePred(TypeFlags);
- llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
- Value *InsertSubVec = Builder.CreateInsertVector(
- OverloadedTy, UndefValue::get(OverloadedTy), Vec, Builder.getInt64(0));
- Function *F =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
- Value *DupQLane =
- Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
- if (!IsBoolTy)
- return DupQLane;
- // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
- F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
- : Intrinsic::aarch64_sve_cmpne_wide,
- OverloadedTy);
- Value *Call = Builder.CreateCall(
- F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
- return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
- }
- case SVE::BI__builtin_sve_svpfalse_b:
- return ConstantInt::getFalse(Ty);
- case SVE::BI__builtin_sve_svlen_bf16:
- case SVE::BI__builtin_sve_svlen_f16:
- case SVE::BI__builtin_sve_svlen_f32:
- case SVE::BI__builtin_sve_svlen_f64:
- case SVE::BI__builtin_sve_svlen_s8:
- case SVE::BI__builtin_sve_svlen_s16:
- case SVE::BI__builtin_sve_svlen_s32:
- case SVE::BI__builtin_sve_svlen_s64:
- case SVE::BI__builtin_sve_svlen_u8:
- case SVE::BI__builtin_sve_svlen_u16:
- case SVE::BI__builtin_sve_svlen_u32:
- case SVE::BI__builtin_sve_svlen_u64: {
- SVETypeFlags TF(Builtin->TypeModifier);
- auto VTy = cast<llvm::VectorType>(getSVEType(TF));
- auto *NumEls =
- llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
- Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
- return Builder.CreateMul(NumEls, Builder.CreateCall(F));
- }
- case SVE::BI__builtin_sve_svtbl2_u8:
- case SVE::BI__builtin_sve_svtbl2_s8:
- case SVE::BI__builtin_sve_svtbl2_u16:
- case SVE::BI__builtin_sve_svtbl2_s16:
- case SVE::BI__builtin_sve_svtbl2_u32:
- case SVE::BI__builtin_sve_svtbl2_s32:
- case SVE::BI__builtin_sve_svtbl2_u64:
- case SVE::BI__builtin_sve_svtbl2_s64:
- case SVE::BI__builtin_sve_svtbl2_f16:
- case SVE::BI__builtin_sve_svtbl2_bf16:
- case SVE::BI__builtin_sve_svtbl2_f32:
- case SVE::BI__builtin_sve_svtbl2_f64: {
- SVETypeFlags TF(Builtin->TypeModifier);
- auto VTy = cast<llvm::VectorType>(getSVEType(TF));
- auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy);
- Function *FExtr =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
- Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)});
- Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)});
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
- return Builder.CreateCall(F, {V0, V1, Ops[1]});
- }
- case SVE::BI__builtin_sve_svset_neonq_s8:
- case SVE::BI__builtin_sve_svset_neonq_s16:
- case SVE::BI__builtin_sve_svset_neonq_s32:
- case SVE::BI__builtin_sve_svset_neonq_s64:
- case SVE::BI__builtin_sve_svset_neonq_u8:
- case SVE::BI__builtin_sve_svset_neonq_u16:
- case SVE::BI__builtin_sve_svset_neonq_u32:
- case SVE::BI__builtin_sve_svset_neonq_u64:
- case SVE::BI__builtin_sve_svset_neonq_f16:
- case SVE::BI__builtin_sve_svset_neonq_f32:
- case SVE::BI__builtin_sve_svset_neonq_f64:
- case SVE::BI__builtin_sve_svset_neonq_bf16: {
- return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
- }
- case SVE::BI__builtin_sve_svget_neonq_s8:
- case SVE::BI__builtin_sve_svget_neonq_s16:
- case SVE::BI__builtin_sve_svget_neonq_s32:
- case SVE::BI__builtin_sve_svget_neonq_s64:
- case SVE::BI__builtin_sve_svget_neonq_u8:
- case SVE::BI__builtin_sve_svget_neonq_u16:
- case SVE::BI__builtin_sve_svget_neonq_u32:
- case SVE::BI__builtin_sve_svget_neonq_u64:
- case SVE::BI__builtin_sve_svget_neonq_f16:
- case SVE::BI__builtin_sve_svget_neonq_f32:
- case SVE::BI__builtin_sve_svget_neonq_f64:
- case SVE::BI__builtin_sve_svget_neonq_bf16: {
- return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
- }
- case SVE::BI__builtin_sve_svdup_neonq_s8:
- case SVE::BI__builtin_sve_svdup_neonq_s16:
- case SVE::BI__builtin_sve_svdup_neonq_s32:
- case SVE::BI__builtin_sve_svdup_neonq_s64:
- case SVE::BI__builtin_sve_svdup_neonq_u8:
- case SVE::BI__builtin_sve_svdup_neonq_u16:
- case SVE::BI__builtin_sve_svdup_neonq_u32:
- case SVE::BI__builtin_sve_svdup_neonq_u64:
- case SVE::BI__builtin_sve_svdup_neonq_f16:
- case SVE::BI__builtin_sve_svdup_neonq_f32:
- case SVE::BI__builtin_sve_svdup_neonq_f64:
- case SVE::BI__builtin_sve_svdup_neonq_bf16: {
- Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0],
- Builder.getInt64(0));
- return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
- {Insert, Builder.getInt64(0)});
- }
- }
- /// Should not happen
- return nullptr;
- }
- Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- llvm::Triple::ArchType Arch) {
- if (BuiltinID >= AArch64::FirstSVEBuiltin &&
- BuiltinID <= AArch64::LastSVEBuiltin)
- return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
- unsigned HintID = static_cast<unsigned>(-1);
- switch (BuiltinID) {
- default: break;
- case AArch64::BI__builtin_arm_nop:
- HintID = 0;
- break;
- case AArch64::BI__builtin_arm_yield:
- case AArch64::BI__yield:
- HintID = 1;
- break;
- case AArch64::BI__builtin_arm_wfe:
- case AArch64::BI__wfe:
- HintID = 2;
- break;
- case AArch64::BI__builtin_arm_wfi:
- case AArch64::BI__wfi:
- HintID = 3;
- break;
- case AArch64::BI__builtin_arm_sev:
- case AArch64::BI__sev:
- HintID = 4;
- break;
- case AArch64::BI__builtin_arm_sevl:
- case AArch64::BI__sevl:
- HintID = 5;
- break;
- }
- if (HintID != static_cast<unsigned>(-1)) {
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
- return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
- }
- if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *RW = EmitScalarExpr(E->getArg(1));
- Value *CacheLevel = EmitScalarExpr(E->getArg(2));
- Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
- Value *IsData = EmitScalarExpr(E->getArg(4));
- Value *Locality = nullptr;
- if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
- // Temporal fetch, needs to convert cache level to locality.
- Locality = llvm::ConstantInt::get(Int32Ty,
- -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
- } else {
- // Streaming fetch.
- Locality = llvm::ConstantInt::get(Int32Ty, 0);
- }
- // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
- // PLDL3STRM or PLDL2STRM.
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return Builder.CreateCall(F, {Address, RW, Locality, IsData});
- }
- if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
- assert((getContext().getTypeSize(E->getType()) == 32) &&
- "rbit of unusual size!");
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
- assert((getContext().getTypeSize(E->getType()) == 64) &&
- "rbit of unusual size!");
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_cls) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
- "cls");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_cls64) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
- "cls");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_frint32zf ||
- BuiltinID == AArch64::BI__builtin_arm_frint32z) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- llvm::Type *Ty = Arg->getType();
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
- Arg, "frint32z");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_frint64zf ||
- BuiltinID == AArch64::BI__builtin_arm_frint64z) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- llvm::Type *Ty = Arg->getType();
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
- Arg, "frint64z");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_frint32xf ||
- BuiltinID == AArch64::BI__builtin_arm_frint32x) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- llvm::Type *Ty = Arg->getType();
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
- Arg, "frint32x");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_frint64xf ||
- BuiltinID == AArch64::BI__builtin_arm_frint64x) {
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- llvm::Type *Ty = Arg->getType();
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
- Arg, "frint64x");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {
- assert((getContext().getTypeSize(E->getType()) == 32) &&
- "__jcvt of unusual size!");
- llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
- }
- if (BuiltinID == AArch64::BI__builtin_arm_ld64b ||
- BuiltinID == AArch64::BI__builtin_arm_st64b ||
- BuiltinID == AArch64::BI__builtin_arm_st64bv ||
- BuiltinID == AArch64::BI__builtin_arm_st64bv0) {
- llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
- llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
- if (BuiltinID == AArch64::BI__builtin_arm_ld64b) {
- // Load from the address via an LLVM intrinsic, receiving a
- // tuple of 8 i64 words, and store each one to ValPtr.
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
- llvm::Value *Val = Builder.CreateCall(F, MemAddr);
- llvm::Value *ToRet;
- for (size_t i = 0; i < 8; i++) {
- llvm::Value *ValOffsetPtr =
- Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
- Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8));
- ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
- }
- return ToRet;
- } else {
- // Load 8 i64 words from ValPtr, and store them to the address
- // via an LLVM intrinsic.
- SmallVector<llvm::Value *, 9> Args;
- Args.push_back(MemAddr);
- for (size_t i = 0; i < 8; i++) {
- llvm::Value *ValOffsetPtr =
- Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
- Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8));
- Args.push_back(Builder.CreateLoad(Addr));
- }
- auto Intr = (BuiltinID == AArch64::BI__builtin_arm_st64b
- ? Intrinsic::aarch64_st64b
- : BuiltinID == AArch64::BI__builtin_arm_st64bv
- ? Intrinsic::aarch64_st64bv
- : Intrinsic::aarch64_st64bv0);
- Function *F = CGM.getIntrinsic(Intr);
- return Builder.CreateCall(F, Args);
- }
- }
- if (BuiltinID == AArch64::BI__builtin_arm_rndr ||
- BuiltinID == AArch64::BI__builtin_arm_rndrrs) {
- auto Intr = (BuiltinID == AArch64::BI__builtin_arm_rndr
- ? Intrinsic::aarch64_rndr
- : Intrinsic::aarch64_rndrrs);
- Function *F = CGM.getIntrinsic(Intr);
- llvm::Value *Val = Builder.CreateCall(F);
- Value *RandomValue = Builder.CreateExtractValue(Val, 0);
- Value *Status = Builder.CreateExtractValue(Val, 1);
- Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
- Builder.CreateStore(RandomValue, MemAddress);
- Status = Builder.CreateZExt(Status, Int32Ty);
- return Status;
- }
- if (BuiltinID == AArch64::BI__clear_cache) {
- assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
- const FunctionDecl *FD = E->getDirectCallee();
- Value *Ops[2];
- for (unsigned i = 0; i < 2; i++)
- Ops[i] = EmitScalarExpr(E->getArg(i));
- llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
- llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
- StringRef Name = FD->getName();
- return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
- }
- if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
- BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
- getContext().getTypeSize(E->getType()) == 128) {
- Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
- ? Intrinsic::aarch64_ldaxp
- : Intrinsic::aarch64_ldxp);
- Value *LdPtr = EmitScalarExpr(E->getArg(0));
- Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
- "ldxp");
- Value *Val0 = Builder.CreateExtractValue(Val, 1);
- Value *Val1 = Builder.CreateExtractValue(Val, 0);
- llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
- Val0 = Builder.CreateZExt(Val0, Int128Ty);
- Val1 = Builder.CreateZExt(Val1, Int128Ty);
- Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
- Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
- Val = Builder.CreateOr(Val, Val1);
- return Builder.CreateBitCast(Val, ConvertType(E->getType()));
- } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
- BuiltinID == AArch64::BI__builtin_arm_ldaex) {
- Value *LoadAddr = EmitScalarExpr(E->getArg(0));
- QualType Ty = E->getType();
- llvm::Type *RealResTy = ConvertType(Ty);
- llvm::Type *PtrTy = llvm::IntegerType::get(
- getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
- LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
- Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
- ? Intrinsic::aarch64_ldaxr
- : Intrinsic::aarch64_ldxr,
- PtrTy);
- Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
- if (RealResTy->isPointerTy())
- return Builder.CreateIntToPtr(Val, RealResTy);
- llvm::Type *IntResTy = llvm::IntegerType::get(
- getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
- Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
- return Builder.CreateBitCast(Val, RealResTy);
- }
- if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
- BuiltinID == AArch64::BI__builtin_arm_stlex) &&
- getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
- Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
- ? Intrinsic::aarch64_stlxp
- : Intrinsic::aarch64_stxp);
- llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
- Address Tmp = CreateMemTemp(E->getArg(0)->getType());
- EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
- Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
- llvm::Value *Val = Builder.CreateLoad(Tmp);
- Value *Arg0 = Builder.CreateExtractValue(Val, 0);
- Value *Arg1 = Builder.CreateExtractValue(Val, 1);
- Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
- Int8PtrTy);
- return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
- }
- if (BuiltinID == AArch64::BI__builtin_arm_strex ||
- BuiltinID == AArch64::BI__builtin_arm_stlex) {
- Value *StoreVal = EmitScalarExpr(E->getArg(0));
- Value *StoreAddr = EmitScalarExpr(E->getArg(1));
- QualType Ty = E->getArg(0)->getType();
- llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
- getContext().getTypeSize(Ty));
- StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
- if (StoreVal->getType()->isPointerTy())
- StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
- else {
- llvm::Type *IntTy = llvm::IntegerType::get(
- getLLVMContext(),
- CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
- StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
- StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
- }
- Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
- ? Intrinsic::aarch64_stlxr
- : Intrinsic::aarch64_stxr,
- StoreAddr->getType());
- return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
- }
- if (BuiltinID == AArch64::BI__getReg) {
- Expr::EvalResult Result;
- if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
- llvm_unreachable("Sema will ensure that the parameter is constant");
- llvm::APSInt Value = Result.Val.getInt();
- LLVMContext &Context = CGM.getLLVMContext();
- std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
- llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
- llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
- llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
- llvm::Function *F =
- CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
- return Builder.CreateCall(F, Metadata);
- }
- if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
- return Builder.CreateCall(F);
- }
- if (BuiltinID == AArch64::BI_ReadWriteBarrier)
- return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::SyncScope::SingleThread);
- // CRC32
- Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
- switch (BuiltinID) {
- case AArch64::BI__builtin_arm_crc32b:
- CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
- case AArch64::BI__builtin_arm_crc32cb:
- CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
- case AArch64::BI__builtin_arm_crc32h:
- CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
- case AArch64::BI__builtin_arm_crc32ch:
- CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
- case AArch64::BI__builtin_arm_crc32w:
- CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
- case AArch64::BI__builtin_arm_crc32cw:
- CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
- case AArch64::BI__builtin_arm_crc32d:
- CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
- case AArch64::BI__builtin_arm_crc32cd:
- CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
- }
- if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- Value *Arg1 = EmitScalarExpr(E->getArg(1));
- Function *F = CGM.getIntrinsic(CRCIntrinsicID);
- llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
- Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
- return Builder.CreateCall(F, {Arg0, Arg1});
- }
- // Memory Operations (MOPS)
- if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
- Value *Dst = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- Value *Size = EmitScalarExpr(E->getArg(2));
- Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
- Val = Builder.CreateTrunc(Val, Int8Ty);
- Size = Builder.CreateIntCast(Size, Int64Ty, false);
- return Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
- }
- // Memory Tagging Extensions (MTE) Intrinsics
- Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
- switch (BuiltinID) {
- case AArch64::BI__builtin_arm_irg:
- MTEIntrinsicID = Intrinsic::aarch64_irg; break;
- case AArch64::BI__builtin_arm_addg:
- MTEIntrinsicID = Intrinsic::aarch64_addg; break;
- case AArch64::BI__builtin_arm_gmi:
- MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
- case AArch64::BI__builtin_arm_ldg:
- MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
- case AArch64::BI__builtin_arm_stg:
- MTEIntrinsicID = Intrinsic::aarch64_stg; break;
- case AArch64::BI__builtin_arm_subp:
- MTEIntrinsicID = Intrinsic::aarch64_subp; break;
- }
- if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
- llvm::Type *T = ConvertType(E->getType());
- if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
- Value *Pointer = EmitScalarExpr(E->getArg(0));
- Value *Mask = EmitScalarExpr(E->getArg(1));
- Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
- Mask = Builder.CreateZExt(Mask, Int64Ty);
- Value *RV = Builder.CreateCall(
- CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
- return Builder.CreatePointerCast(RV, T);
- }
- if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
- Value *Pointer = EmitScalarExpr(E->getArg(0));
- Value *TagOffset = EmitScalarExpr(E->getArg(1));
- Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
- TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
- Value *RV = Builder.CreateCall(
- CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
- return Builder.CreatePointerCast(RV, T);
- }
- if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
- Value *Pointer = EmitScalarExpr(E->getArg(0));
- Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
- ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
- Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
- return Builder.CreateCall(
- CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
- }
- // Although it is possible to supply a different return
- // address (first arg) to this intrinsic, for now we set
- // return address same as input address.
- if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
- Value *TagAddress = EmitScalarExpr(E->getArg(0));
- TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
- Value *RV = Builder.CreateCall(
- CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
- return Builder.CreatePointerCast(RV, T);
- }
- // Although it is possible to supply a different tag (to set)
- // to this intrinsic (as first arg), for now we supply
- // the tag that is in input address arg (common use case).
- if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
- Value *TagAddress = EmitScalarExpr(E->getArg(0));
- TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
- return Builder.CreateCall(
- CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
- }
- if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
- Value *PointerA = EmitScalarExpr(E->getArg(0));
- Value *PointerB = EmitScalarExpr(E->getArg(1));
- PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
- PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
- return Builder.CreateCall(
- CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
- }
- }
- if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
- BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
- BuiltinID == AArch64::BI__builtin_arm_rsrp ||
- BuiltinID == AArch64::BI__builtin_arm_wsr ||
- BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
- BuiltinID == AArch64::BI__builtin_arm_wsrp) {
- SpecialRegisterAccessKind AccessKind = Write;
- if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
- BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
- BuiltinID == AArch64::BI__builtin_arm_rsrp)
- AccessKind = VolatileRead;
- bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
- BuiltinID == AArch64::BI__builtin_arm_wsrp;
- bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
- BuiltinID != AArch64::BI__builtin_arm_wsr;
- llvm::Type *ValueType;
- llvm::Type *RegisterType = Int64Ty;
- if (IsPointerBuiltin) {
- ValueType = VoidPtrTy;
- } else if (Is64Bit) {
- ValueType = Int64Ty;
- } else {
- ValueType = Int32Ty;
- }
- return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
- AccessKind);
- }
- if (BuiltinID == AArch64::BI_ReadStatusReg ||
- BuiltinID == AArch64::BI_WriteStatusReg) {
- LLVMContext &Context = CGM.getLLVMContext();
- unsigned SysReg =
- E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
- std::string SysRegStr;
- llvm::raw_string_ostream(SysRegStr) <<
- ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
- ((SysReg >> 11) & 7) << ":" <<
- ((SysReg >> 7) & 15) << ":" <<
- ((SysReg >> 3) & 15) << ":" <<
- ( SysReg & 7);
- llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
- llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
- llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
- llvm::Type *RegisterType = Int64Ty;
- llvm::Type *Types[] = { RegisterType };
- if (BuiltinID == AArch64::BI_ReadStatusReg) {
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
- return Builder.CreateCall(F, Metadata);
- }
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
- llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
- return Builder.CreateCall(F, { Metadata, ArgValue });
- }
- if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
- llvm::Function *F =
- CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
- return Builder.CreateCall(F);
- }
- if (BuiltinID == AArch64::BI__builtin_sponentry) {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
- return Builder.CreateCall(F);
- }
- if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
- llvm::Type *ResType = ConvertType(E->getType());
- llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
- bool IsSigned = BuiltinID == AArch64::BI__mulh;
- Value *LHS =
- Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
- Value *RHS =
- Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
- Value *MulResult, *HigherBits;
- if (IsSigned) {
- MulResult = Builder.CreateNSWMul(LHS, RHS);
- HigherBits = Builder.CreateAShr(MulResult, 64);
- } else {
- MulResult = Builder.CreateNUWMul(LHS, RHS);
- HigherBits = Builder.CreateLShr(MulResult, 64);
- }
- HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
- return HigherBits;
- }
- // Handle MSVC intrinsics before argument evaluation to prevent double
- // evaluation.
- if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
- return EmitMSVCBuiltinExpr(*MsvcIntId, E);
- // Find out if any arguments are required to be integer constant
- // expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
- llvm::SmallVector<Value*, 4> Ops;
- Address PtrOp0 = Address::invalid();
- for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
- if (i == 0) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld1_v:
- case NEON::BI__builtin_neon_vld1q_v:
- case NEON::BI__builtin_neon_vld1_dup_v:
- case NEON::BI__builtin_neon_vld1q_dup_v:
- case NEON::BI__builtin_neon_vld1_lane_v:
- case NEON::BI__builtin_neon_vld1q_lane_v:
- case NEON::BI__builtin_neon_vst1_v:
- case NEON::BI__builtin_neon_vst1q_v:
- case NEON::BI__builtin_neon_vst1_lane_v:
- case NEON::BI__builtin_neon_vst1q_lane_v:
- // Get the alignment for the argument in addition to the value;
- // we'll use it later.
- PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
- Ops.push_back(PtrOp0.getPointer());
- continue;
- }
- }
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
- }
- auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
- const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
- SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
- if (Builtin) {
- Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
- Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
- assert(Result && "SISD intrinsic should have been handled");
- return Result;
- }
- const Expr *Arg = E->getArg(E->getNumArgs()-1);
- NeonTypeFlags Type(0);
- if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()))
- // Determine the type of this overloaded NEON intrinsic.
- Type = NeonTypeFlags(Result->getZExtValue());
- bool usgn = Type.isUnsigned();
- bool quad = Type.isQuad();
- // Handle non-overloaded intrinsics first.
- switch (BuiltinID) {
- default: break;
- case NEON::BI__builtin_neon_vabsh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
- case NEON::BI__builtin_neon_vaddq_p128: {
- llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
- llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
- return Builder.CreateBitCast(Ops[0], Int128Ty);
- }
- case NEON::BI__builtin_neon_vldrq_p128: {
- llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
- llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
- Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
- return Builder.CreateAlignedLoad(Int128Ty, Ptr,
- CharUnits::fromQuantity(16));
- }
- case NEON::BI__builtin_neon_vstrq_p128: {
- llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
- Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
- return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
- }
- case NEON::BI__builtin_neon_vcvts_f32_u32:
- case NEON::BI__builtin_neon_vcvtd_f64_u64:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcvts_f32_s32:
- case NEON::BI__builtin_neon_vcvtd_f64_s64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
- llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
- llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
- Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
- if (usgn)
- return Builder.CreateUIToFP(Ops[0], FTy);
- return Builder.CreateSIToFP(Ops[0], FTy);
- }
- case NEON::BI__builtin_neon_vcvth_f16_u16:
- case NEON::BI__builtin_neon_vcvth_f16_u32:
- case NEON::BI__builtin_neon_vcvth_f16_u64:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcvth_f16_s16:
- case NEON::BI__builtin_neon_vcvth_f16_s32:
- case NEON::BI__builtin_neon_vcvth_f16_s64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- llvm::Type *FTy = HalfTy;
- llvm::Type *InTy;
- if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
- InTy = Int64Ty;
- else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
- InTy = Int32Ty;
- else
- InTy = Int16Ty;
- Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
- if (usgn)
- return Builder.CreateUIToFP(Ops[0], FTy);
- return Builder.CreateSIToFP(Ops[0], FTy);
- }
- case NEON::BI__builtin_neon_vcvtah_u16_f16:
- case NEON::BI__builtin_neon_vcvtmh_u16_f16:
- case NEON::BI__builtin_neon_vcvtnh_u16_f16:
- case NEON::BI__builtin_neon_vcvtph_u16_f16:
- case NEON::BI__builtin_neon_vcvth_u16_f16:
- case NEON::BI__builtin_neon_vcvtah_s16_f16:
- case NEON::BI__builtin_neon_vcvtmh_s16_f16:
- case NEON::BI__builtin_neon_vcvtnh_s16_f16:
- case NEON::BI__builtin_neon_vcvtph_s16_f16:
- case NEON::BI__builtin_neon_vcvth_s16_f16: {
- unsigned Int;
- llvm::Type* InTy = Int32Ty;
- llvm::Type* FTy = HalfTy;
- llvm::Type *Tys[2] = {InTy, FTy};
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vcvtah_u16_f16:
- Int = Intrinsic::aarch64_neon_fcvtau; break;
- case NEON::BI__builtin_neon_vcvtmh_u16_f16:
- Int = Intrinsic::aarch64_neon_fcvtmu; break;
- case NEON::BI__builtin_neon_vcvtnh_u16_f16:
- Int = Intrinsic::aarch64_neon_fcvtnu; break;
- case NEON::BI__builtin_neon_vcvtph_u16_f16:
- Int = Intrinsic::aarch64_neon_fcvtpu; break;
- case NEON::BI__builtin_neon_vcvth_u16_f16:
- Int = Intrinsic::aarch64_neon_fcvtzu; break;
- case NEON::BI__builtin_neon_vcvtah_s16_f16:
- Int = Intrinsic::aarch64_neon_fcvtas; break;
- case NEON::BI__builtin_neon_vcvtmh_s16_f16:
- Int = Intrinsic::aarch64_neon_fcvtms; break;
- case NEON::BI__builtin_neon_vcvtnh_s16_f16:
- Int = Intrinsic::aarch64_neon_fcvtns; break;
- case NEON::BI__builtin_neon_vcvtph_s16_f16:
- Int = Intrinsic::aarch64_neon_fcvtps; break;
- case NEON::BI__builtin_neon_vcvth_s16_f16:
- Int = Intrinsic::aarch64_neon_fcvtzs; break;
- }
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vcaleh_f16:
- case NEON::BI__builtin_neon_vcalth_f16:
- case NEON::BI__builtin_neon_vcageh_f16:
- case NEON::BI__builtin_neon_vcagth_f16: {
- unsigned Int;
- llvm::Type* InTy = Int32Ty;
- llvm::Type* FTy = HalfTy;
- llvm::Type *Tys[2] = {InTy, FTy};
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vcageh_f16:
- Int = Intrinsic::aarch64_neon_facge; break;
- case NEON::BI__builtin_neon_vcagth_f16:
- Int = Intrinsic::aarch64_neon_facgt; break;
- case NEON::BI__builtin_neon_vcaleh_f16:
- Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
- case NEON::BI__builtin_neon_vcalth_f16:
- Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
- }
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vcvth_n_s16_f16:
- case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
- unsigned Int;
- llvm::Type* InTy = Int32Ty;
- llvm::Type* FTy = HalfTy;
- llvm::Type *Tys[2] = {InTy, FTy};
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vcvth_n_s16_f16:
- Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
- case NEON::BI__builtin_neon_vcvth_n_u16_f16:
- Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
- }
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vcvth_n_f16_s16:
- case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
- unsigned Int;
- llvm::Type* FTy = HalfTy;
- llvm::Type* InTy = Int32Ty;
- llvm::Type *Tys[2] = {FTy, InTy};
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vcvth_n_f16_s16:
- Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
- Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
- break;
- case NEON::BI__builtin_neon_vcvth_n_f16_u16:
- Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
- Ops[0] = Builder.CreateZExt(Ops[0], InTy);
- break;
- }
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
- }
- case NEON::BI__builtin_neon_vpaddd_s64: {
- auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
- Value *Vec = EmitScalarExpr(E->getArg(0));
- // The vector is v2f64, so make sure it's bitcast to that.
- Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
- llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
- llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
- Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
- Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
- // Pairwise addition of a v2f64 into a scalar f64.
- return Builder.CreateAdd(Op0, Op1, "vpaddd");
- }
- case NEON::BI__builtin_neon_vpaddd_f64: {
- auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
- Value *Vec = EmitScalarExpr(E->getArg(0));
- // The vector is v2f64, so make sure it's bitcast to that.
- Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
- llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
- llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
- Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
- Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
- // Pairwise addition of a v2f64 into a scalar f64.
- return Builder.CreateFAdd(Op0, Op1, "vpaddd");
- }
- case NEON::BI__builtin_neon_vpadds_f32: {
- auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
- Value *Vec = EmitScalarExpr(E->getArg(0));
- // The vector is v2f32, so make sure it's bitcast to that.
- Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
- llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
- llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
- Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
- Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
- // Pairwise addition of a v2f32 into a scalar f32.
- return Builder.CreateFAdd(Op0, Op1, "vpaddd");
- }
- case NEON::BI__builtin_neon_vceqzd_s64:
- case NEON::BI__builtin_neon_vceqzd_f64:
- case NEON::BI__builtin_neon_vceqzs_f32:
- case NEON::BI__builtin_neon_vceqzh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitAArch64CompareBuiltinExpr(
- Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
- case NEON::BI__builtin_neon_vcgezd_s64:
- case NEON::BI__builtin_neon_vcgezd_f64:
- case NEON::BI__builtin_neon_vcgezs_f32:
- case NEON::BI__builtin_neon_vcgezh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitAArch64CompareBuiltinExpr(
- Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
- case NEON::BI__builtin_neon_vclezd_s64:
- case NEON::BI__builtin_neon_vclezd_f64:
- case NEON::BI__builtin_neon_vclezs_f32:
- case NEON::BI__builtin_neon_vclezh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitAArch64CompareBuiltinExpr(
- Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
- case NEON::BI__builtin_neon_vcgtzd_s64:
- case NEON::BI__builtin_neon_vcgtzd_f64:
- case NEON::BI__builtin_neon_vcgtzs_f32:
- case NEON::BI__builtin_neon_vcgtzh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitAArch64CompareBuiltinExpr(
- Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
- case NEON::BI__builtin_neon_vcltzd_s64:
- case NEON::BI__builtin_neon_vcltzd_f64:
- case NEON::BI__builtin_neon_vcltzs_f32:
- case NEON::BI__builtin_neon_vcltzh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitAArch64CompareBuiltinExpr(
- Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
- case NEON::BI__builtin_neon_vceqzd_u64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
- Ops[0] =
- Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
- return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
- }
- case NEON::BI__builtin_neon_vceqd_f64:
- case NEON::BI__builtin_neon_vcled_f64:
- case NEON::BI__builtin_neon_vcltd_f64:
- case NEON::BI__builtin_neon_vcged_f64:
- case NEON::BI__builtin_neon_vcgtd_f64: {
- llvm::CmpInst::Predicate P;
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
- case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
- case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
- case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
- case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
- }
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
- Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
- return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
- }
- case NEON::BI__builtin_neon_vceqs_f32:
- case NEON::BI__builtin_neon_vcles_f32:
- case NEON::BI__builtin_neon_vclts_f32:
- case NEON::BI__builtin_neon_vcges_f32:
- case NEON::BI__builtin_neon_vcgts_f32: {
- llvm::CmpInst::Predicate P;
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
- case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
- case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
- case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
- case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
- }
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
- Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
- return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
- }
- case NEON::BI__builtin_neon_vceqh_f16:
- case NEON::BI__builtin_neon_vcleh_f16:
- case NEON::BI__builtin_neon_vclth_f16:
- case NEON::BI__builtin_neon_vcgeh_f16:
- case NEON::BI__builtin_neon_vcgth_f16: {
- llvm::CmpInst::Predicate P;
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
- case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
- case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
- case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
- case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
- }
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
- Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
- return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
- }
- case NEON::BI__builtin_neon_vceqd_s64:
- case NEON::BI__builtin_neon_vceqd_u64:
- case NEON::BI__builtin_neon_vcgtd_s64:
- case NEON::BI__builtin_neon_vcgtd_u64:
- case NEON::BI__builtin_neon_vcltd_s64:
- case NEON::BI__builtin_neon_vcltd_u64:
- case NEON::BI__builtin_neon_vcged_u64:
- case NEON::BI__builtin_neon_vcged_s64:
- case NEON::BI__builtin_neon_vcled_u64:
- case NEON::BI__builtin_neon_vcled_s64: {
- llvm::CmpInst::Predicate P;
- switch (BuiltinID) {
- default: llvm_unreachable("missing builtin ID in switch!");
- case NEON::BI__builtin_neon_vceqd_s64:
- case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
- case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
- case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
- case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
- case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
- case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
- case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
- case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
- case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
- }
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
- Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
- return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
- }
- case NEON::BI__builtin_neon_vtstd_s64:
- case NEON::BI__builtin_neon_vtstd_u64: {
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
- Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
- Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
- llvm::Constant::getNullValue(Int64Ty));
- return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
- }
- case NEON::BI__builtin_neon_vset_lane_i8:
- case NEON::BI__builtin_neon_vset_lane_i16:
- case NEON::BI__builtin_neon_vset_lane_i32:
- case NEON::BI__builtin_neon_vset_lane_i64:
- case NEON::BI__builtin_neon_vset_lane_bf16:
- case NEON::BI__builtin_neon_vset_lane_f32:
- case NEON::BI__builtin_neon_vsetq_lane_i8:
- case NEON::BI__builtin_neon_vsetq_lane_i16:
- case NEON::BI__builtin_neon_vsetq_lane_i32:
- case NEON::BI__builtin_neon_vsetq_lane_i64:
- case NEON::BI__builtin_neon_vsetq_lane_bf16:
- case NEON::BI__builtin_neon_vsetq_lane_f32:
- Ops.push_back(EmitScalarExpr(E->getArg(2)));
- return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
- case NEON::BI__builtin_neon_vset_lane_f64:
- // The vector type needs a cast for the v1f64 variant.
- Ops[1] =
- Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
- Ops.push_back(EmitScalarExpr(E->getArg(2)));
- return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
- case NEON::BI__builtin_neon_vsetq_lane_f64:
- // The vector type needs a cast for the v2f64 variant.
- Ops[1] =
- Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
- Ops.push_back(EmitScalarExpr(E->getArg(2)));
- return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
- case NEON::BI__builtin_neon_vget_lane_i8:
- case NEON::BI__builtin_neon_vdupb_lane_i8:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- case NEON::BI__builtin_neon_vgetq_lane_i8:
- case NEON::BI__builtin_neon_vdupb_laneq_i8:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- case NEON::BI__builtin_neon_vget_lane_i16:
- case NEON::BI__builtin_neon_vduph_lane_i16:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- case NEON::BI__builtin_neon_vgetq_lane_i16:
- case NEON::BI__builtin_neon_vduph_laneq_i16:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- case NEON::BI__builtin_neon_vget_lane_i32:
- case NEON::BI__builtin_neon_vdups_lane_i32:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- case NEON::BI__builtin_neon_vdups_lane_f32:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vdups_lane");
- case NEON::BI__builtin_neon_vgetq_lane_i32:
- case NEON::BI__builtin_neon_vdups_laneq_i32:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- case NEON::BI__builtin_neon_vget_lane_i64:
- case NEON::BI__builtin_neon_vdupd_lane_i64:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- case NEON::BI__builtin_neon_vdupd_lane_f64:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vdupd_lane");
- case NEON::BI__builtin_neon_vgetq_lane_i64:
- case NEON::BI__builtin_neon_vdupd_laneq_i64:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- case NEON::BI__builtin_neon_vget_lane_f32:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- case NEON::BI__builtin_neon_vget_lane_f64:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- case NEON::BI__builtin_neon_vgetq_lane_f32:
- case NEON::BI__builtin_neon_vdups_laneq_f32:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- case NEON::BI__builtin_neon_vgetq_lane_f64:
- case NEON::BI__builtin_neon_vdupd_laneq_f64:
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- case NEON::BI__builtin_neon_vaddh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
- case NEON::BI__builtin_neon_vsubh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
- case NEON::BI__builtin_neon_vmulh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
- case NEON::BI__builtin_neon_vdivh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
- case NEON::BI__builtin_neon_vfmah_f16:
- // NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
- {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
- case NEON::BI__builtin_neon_vfmsh_f16: {
- // FIXME: This should be an fneg instruction:
- Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
- Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
- // NEON intrinsic puts accumulator first, unlike the LLVM fma.
- return emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
- {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
- }
- case NEON::BI__builtin_neon_vaddd_s64:
- case NEON::BI__builtin_neon_vaddd_u64:
- return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
- case NEON::BI__builtin_neon_vsubd_s64:
- case NEON::BI__builtin_neon_vsubd_u64:
- return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
- case NEON::BI__builtin_neon_vqdmlalh_s16:
- case NEON::BI__builtin_neon_vqdmlslh_s16: {
- SmallVector<Value *, 2> ProductOps;
- ProductOps.push_back(vectorWrapScalar16(Ops[1]));
- ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
- auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
- Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
- ProductOps, "vqdmlXl");
- Constant *CI = ConstantInt::get(SizeTy, 0);
- Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
- unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
- ? Intrinsic::aarch64_neon_sqadd
- : Intrinsic::aarch64_neon_sqsub;
- return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
- }
- case NEON::BI__builtin_neon_vqshlud_n_s64: {
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
- Ops, "vqshlu_n");
- }
- case NEON::BI__builtin_neon_vqshld_n_u64:
- case NEON::BI__builtin_neon_vqshld_n_s64: {
- unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
- ? Intrinsic::aarch64_neon_uqshl
- : Intrinsic::aarch64_neon_sqshl;
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
- return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
- }
- case NEON::BI__builtin_neon_vrshrd_n_u64:
- case NEON::BI__builtin_neon_vrshrd_n_s64: {
- unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
- ? Intrinsic::aarch64_neon_urshl
- : Intrinsic::aarch64_neon_srshl;
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
- Ops[1] = ConstantInt::get(Int64Ty, -SV);
- return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
- }
- case NEON::BI__builtin_neon_vrsrad_n_u64:
- case NEON::BI__builtin_neon_vrsrad_n_s64: {
- unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
- ? Intrinsic::aarch64_neon_urshl
- : Intrinsic::aarch64_neon_srshl;
- Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
- Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
- Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
- {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
- return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
- }
- case NEON::BI__builtin_neon_vshld_n_s64:
- case NEON::BI__builtin_neon_vshld_n_u64: {
- llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateShl(
- Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
- }
- case NEON::BI__builtin_neon_vshrd_n_s64: {
- llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateAShr(
- Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
- Amt->getZExtValue())),
- "shrd_n");
- }
- case NEON::BI__builtin_neon_vshrd_n_u64: {
- llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- uint64_t ShiftAmt = Amt->getZExtValue();
- // Right-shifting an unsigned value by its size yields 0.
- if (ShiftAmt == 64)
- return ConstantInt::get(Int64Ty, 0);
- return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
- "shrd_n");
- }
- case NEON::BI__builtin_neon_vsrad_n_s64: {
- llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
- Ops[1] = Builder.CreateAShr(
- Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
- Amt->getZExtValue())),
- "shrd_n");
- return Builder.CreateAdd(Ops[0], Ops[1]);
- }
- case NEON::BI__builtin_neon_vsrad_n_u64: {
- llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
- uint64_t ShiftAmt = Amt->getZExtValue();
- // Right-shifting an unsigned value by its size yields 0.
- // As Op + 0 = Op, return Ops[0] directly.
- if (ShiftAmt == 64)
- return Ops[0];
- Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
- "shrd_n");
- return Builder.CreateAdd(Ops[0], Ops[1]);
- }
- case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
- case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
- case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
- case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
- Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
- "lane");
- SmallVector<Value *, 2> ProductOps;
- ProductOps.push_back(vectorWrapScalar16(Ops[1]));
- ProductOps.push_back(vectorWrapScalar16(Ops[2]));
- auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
- Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
- ProductOps, "vqdmlXl");
- Constant *CI = ConstantInt::get(SizeTy, 0);
- Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
- Ops.pop_back();
- unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
- ? Intrinsic::aarch64_neon_sqadd
- : Intrinsic::aarch64_neon_sqsub;
- return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
- }
- case NEON::BI__builtin_neon_vqdmlals_s32:
- case NEON::BI__builtin_neon_vqdmlsls_s32: {
- SmallVector<Value *, 2> ProductOps;
- ProductOps.push_back(Ops[1]);
- ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
- Ops[1] =
- EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
- ProductOps, "vqdmlXl");
- unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
- ? Intrinsic::aarch64_neon_sqadd
- : Intrinsic::aarch64_neon_sqsub;
- return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
- }
- case NEON::BI__builtin_neon_vqdmlals_lane_s32:
- case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
- case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
- case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
- Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
- "lane");
- SmallVector<Value *, 2> ProductOps;
- ProductOps.push_back(Ops[1]);
- ProductOps.push_back(Ops[2]);
- Ops[1] =
- EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
- ProductOps, "vqdmlXl");
- Ops.pop_back();
- unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
- ? Intrinsic::aarch64_neon_sqadd
- : Intrinsic::aarch64_neon_sqsub;
- return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
- }
- case NEON::BI__builtin_neon_vget_lane_bf16:
- case NEON::BI__builtin_neon_vduph_lane_bf16:
- case NEON::BI__builtin_neon_vduph_lane_f16: {
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vget_lane");
- }
- case NEON::BI__builtin_neon_vgetq_lane_bf16:
- case NEON::BI__builtin_neon_vduph_laneq_bf16:
- case NEON::BI__builtin_neon_vduph_laneq_f16: {
- return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
- "vgetq_lane");
- }
- case AArch64::BI_InterlockedAdd: {
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- Value *Arg1 = EmitScalarExpr(E->getArg(1));
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add, Arg0, Arg1,
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, Arg1);
- }
- }
- llvm::FixedVectorType *VTy = GetNeonType(this, Type);
- llvm::Type *Ty = VTy;
- if (!Ty)
- return nullptr;
- // Not all intrinsics handled by the common case work for AArch64 yet, so only
- // defer to common code if it's been added to our special map.
- Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
- AArch64SIMDIntrinsicsProvenSorted);
- if (Builtin)
- return EmitCommonNeonBuiltinExpr(
- Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
- Builtin->NameHint, Builtin->TypeModifier, E, Ops,
- /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
- if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
- return V;
- unsigned Int;
- switch (BuiltinID) {
- default: return nullptr;
- case NEON::BI__builtin_neon_vbsl_v:
- case NEON::BI__builtin_neon_vbslq_v: {
- llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
- Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
- Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
- Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
- Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
- Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
- Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
- return Builder.CreateBitCast(Ops[0], Ty);
- }
- case NEON::BI__builtin_neon_vfma_lane_v:
- case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
- // The ARM builtins (and instructions) have the addend as the first
- // operand, but the 'fma' intrinsics have it last. Swap it around here.
- Value *Addend = Ops[0];
- Value *Multiplicand = Ops[1];
- Value *LaneSource = Ops[2];
- Ops[0] = Multiplicand;
- Ops[1] = LaneSource;
- Ops[2] = Addend;
- // Now adjust things to handle the lane access.
- auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
- ? llvm::FixedVectorType::get(VTy->getElementType(),
- VTy->getNumElements() / 2)
- : VTy;
- llvm::Constant *cst = cast<Constant>(Ops[3]);
- Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
- Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
- Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
- Ops.pop_back();
- Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
- : Intrinsic::fma;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
- }
- case NEON::BI__builtin_neon_vfma_laneq_v: {
- auto *VTy = cast<llvm::FixedVectorType>(Ty);
- // v1f64 fma should be mapped to Neon scalar f64 fma
- if (VTy && VTy->getElementType() == DoubleTy) {
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
- llvm::FixedVectorType *VTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
- Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
- Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- Value *Result;
- Result = emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
- DoubleTy, {Ops[1], Ops[2], Ops[0]});
- return Builder.CreateBitCast(Result, Ty);
- }
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
- VTy->getNumElements() * 2);
- Ops[2] = Builder.CreateBitCast(Ops[2], STy);
- Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
- cast<ConstantInt>(Ops[3]));
- Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
- return emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
- {Ops[2], Ops[1], Ops[0]});
- }
- case NEON::BI__builtin_neon_vfmaq_laneq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
- return emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
- {Ops[2], Ops[1], Ops[0]});
- }
- case NEON::BI__builtin_neon_vfmah_lane_f16:
- case NEON::BI__builtin_neon_vfmas_lane_f32:
- case NEON::BI__builtin_neon_vfmah_laneq_f16:
- case NEON::BI__builtin_neon_vfmas_laneq_f32:
- case NEON::BI__builtin_neon_vfmad_lane_f64:
- case NEON::BI__builtin_neon_vfmad_laneq_f64: {
- Ops.push_back(EmitScalarExpr(E->getArg(3)));
- llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
- Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- return emitCallMaybeConstrainedFPBuiltin(
- *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
- {Ops[1], Ops[2], Ops[0]});
- }
- case NEON::BI__builtin_neon_vmull_v:
- // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
- Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
- if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
- case NEON::BI__builtin_neon_vmax_v:
- case NEON::BI__builtin_neon_vmaxq_v:
- // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
- Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
- if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
- case NEON::BI__builtin_neon_vmaxh_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Int = Intrinsic::aarch64_neon_fmax;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
- }
- case NEON::BI__builtin_neon_vmin_v:
- case NEON::BI__builtin_neon_vminq_v:
- // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
- Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
- if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
- case NEON::BI__builtin_neon_vminh_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Int = Intrinsic::aarch64_neon_fmin;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
- }
- case NEON::BI__builtin_neon_vabd_v:
- case NEON::BI__builtin_neon_vabdq_v:
- // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
- Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
- if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
- case NEON::BI__builtin_neon_vpadal_v:
- case NEON::BI__builtin_neon_vpadalq_v: {
- unsigned ArgElts = VTy->getNumElements();
- llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
- unsigned BitWidth = EltTy->getBitWidth();
- auto *ArgTy = llvm::FixedVectorType::get(
- llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
- llvm::Type* Tys[2] = { VTy, ArgTy };
- Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
- SmallVector<llvm::Value*, 1> TmpOps;
- TmpOps.push_back(Ops[1]);
- Function *F = CGM.getIntrinsic(Int, Tys);
- llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
- llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
- return Builder.CreateAdd(tmp, addend);
- }
- case NEON::BI__builtin_neon_vpmin_v:
- case NEON::BI__builtin_neon_vpminq_v:
- // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
- Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
- if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
- case NEON::BI__builtin_neon_vpmax_v:
- case NEON::BI__builtin_neon_vpmaxq_v:
- // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
- Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
- if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
- case NEON::BI__builtin_neon_vminnm_v:
- case NEON::BI__builtin_neon_vminnmq_v:
- Int = Intrinsic::aarch64_neon_fminnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
- case NEON::BI__builtin_neon_vminnmh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Int = Intrinsic::aarch64_neon_fminnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
- case NEON::BI__builtin_neon_vmaxnm_v:
- case NEON::BI__builtin_neon_vmaxnmq_v:
- Int = Intrinsic::aarch64_neon_fmaxnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
- case NEON::BI__builtin_neon_vmaxnmh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- Int = Intrinsic::aarch64_neon_fmaxnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
- case NEON::BI__builtin_neon_vrecpss_f32: {
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
- Ops, "vrecps");
- }
- case NEON::BI__builtin_neon_vrecpsd_f64:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
- Ops, "vrecps");
- case NEON::BI__builtin_neon_vrecpsh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
- Ops, "vrecps");
- case NEON::BI__builtin_neon_vqshrun_n_v:
- Int = Intrinsic::aarch64_neon_sqshrun;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
- case NEON::BI__builtin_neon_vqrshrun_n_v:
- Int = Intrinsic::aarch64_neon_sqrshrun;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
- case NEON::BI__builtin_neon_vqshrn_n_v:
- Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
- case NEON::BI__builtin_neon_vrshrn_n_v:
- Int = Intrinsic::aarch64_neon_rshrn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
- case NEON::BI__builtin_neon_vqrshrn_n_v:
- Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
- case NEON::BI__builtin_neon_vrndah_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_round
- : Intrinsic::round;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
- }
- case NEON::BI__builtin_neon_vrnda_v:
- case NEON::BI__builtin_neon_vrndaq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_round
- : Intrinsic::round;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
- }
- case NEON::BI__builtin_neon_vrndih_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_nearbyint
- : Intrinsic::nearbyint;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
- }
- case NEON::BI__builtin_neon_vrndmh_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_floor
- : Intrinsic::floor;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
- }
- case NEON::BI__builtin_neon_vrndm_v:
- case NEON::BI__builtin_neon_vrndmq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_floor
- : Intrinsic::floor;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
- }
- case NEON::BI__builtin_neon_vrndnh_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_roundeven
- : Intrinsic::roundeven;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
- }
- case NEON::BI__builtin_neon_vrndn_v:
- case NEON::BI__builtin_neon_vrndnq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_roundeven
- : Intrinsic::roundeven;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
- }
- case NEON::BI__builtin_neon_vrndns_f32: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_roundeven
- : Intrinsic::roundeven;
- return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
- }
- case NEON::BI__builtin_neon_vrndph_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_ceil
- : Intrinsic::ceil;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
- }
- case NEON::BI__builtin_neon_vrndp_v:
- case NEON::BI__builtin_neon_vrndpq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_ceil
- : Intrinsic::ceil;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
- }
- case NEON::BI__builtin_neon_vrndxh_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_rint
- : Intrinsic::rint;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
- }
- case NEON::BI__builtin_neon_vrndx_v:
- case NEON::BI__builtin_neon_vrndxq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_rint
- : Intrinsic::rint;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
- }
- case NEON::BI__builtin_neon_vrndh_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_trunc
- : Intrinsic::trunc;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
- }
- case NEON::BI__builtin_neon_vrnd32x_v:
- case NEON::BI__builtin_neon_vrnd32xq_v: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::aarch64_neon_frint32x;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
- }
- case NEON::BI__builtin_neon_vrnd32z_v:
- case NEON::BI__builtin_neon_vrnd32zq_v: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::aarch64_neon_frint32z;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
- }
- case NEON::BI__builtin_neon_vrnd64x_v:
- case NEON::BI__builtin_neon_vrnd64xq_v: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::aarch64_neon_frint64x;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
- }
- case NEON::BI__builtin_neon_vrnd64z_v:
- case NEON::BI__builtin_neon_vrnd64zq_v: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::aarch64_neon_frint64z;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
- }
- case NEON::BI__builtin_neon_vrnd_v:
- case NEON::BI__builtin_neon_vrndq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_trunc
- : Intrinsic::trunc;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
- }
- case NEON::BI__builtin_neon_vcvt_f64_v:
- case NEON::BI__builtin_neon_vcvtq_f64_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
- return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
- : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- case NEON::BI__builtin_neon_vcvt_f64_f32: {
- assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
- "unexpected vcvt_f64_f32 builtin");
- NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
- return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
- }
- case NEON::BI__builtin_neon_vcvt_f32_f64: {
- assert(Type.getEltType() == NeonTypeFlags::Float32 &&
- "unexpected vcvt_f32_f64 builtin");
- NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
- return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
- }
- case NEON::BI__builtin_neon_vcvt_s32_v:
- case NEON::BI__builtin_neon_vcvt_u32_v:
- case NEON::BI__builtin_neon_vcvt_s64_v:
- case NEON::BI__builtin_neon_vcvt_u64_v:
- case NEON::BI__builtin_neon_vcvt_s16_v:
- case NEON::BI__builtin_neon_vcvt_u16_v:
- case NEON::BI__builtin_neon_vcvtq_s32_v:
- case NEON::BI__builtin_neon_vcvtq_u32_v:
- case NEON::BI__builtin_neon_vcvtq_s64_v:
- case NEON::BI__builtin_neon_vcvtq_u64_v:
- case NEON::BI__builtin_neon_vcvtq_s16_v:
- case NEON::BI__builtin_neon_vcvtq_u16_v: {
- Int =
- usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
- llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
- }
- case NEON::BI__builtin_neon_vcvta_s16_v:
- case NEON::BI__builtin_neon_vcvta_u16_v:
- case NEON::BI__builtin_neon_vcvta_s32_v:
- case NEON::BI__builtin_neon_vcvtaq_s16_v:
- case NEON::BI__builtin_neon_vcvtaq_s32_v:
- case NEON::BI__builtin_neon_vcvta_u32_v:
- case NEON::BI__builtin_neon_vcvtaq_u16_v:
- case NEON::BI__builtin_neon_vcvtaq_u32_v:
- case NEON::BI__builtin_neon_vcvta_s64_v:
- case NEON::BI__builtin_neon_vcvtaq_s64_v:
- case NEON::BI__builtin_neon_vcvta_u64_v:
- case NEON::BI__builtin_neon_vcvtaq_u64_v: {
- Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
- llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
- }
- case NEON::BI__builtin_neon_vcvtm_s16_v:
- case NEON::BI__builtin_neon_vcvtm_s32_v:
- case NEON::BI__builtin_neon_vcvtmq_s16_v:
- case NEON::BI__builtin_neon_vcvtmq_s32_v:
- case NEON::BI__builtin_neon_vcvtm_u16_v:
- case NEON::BI__builtin_neon_vcvtm_u32_v:
- case NEON::BI__builtin_neon_vcvtmq_u16_v:
- case NEON::BI__builtin_neon_vcvtmq_u32_v:
- case NEON::BI__builtin_neon_vcvtm_s64_v:
- case NEON::BI__builtin_neon_vcvtmq_s64_v:
- case NEON::BI__builtin_neon_vcvtm_u64_v:
- case NEON::BI__builtin_neon_vcvtmq_u64_v: {
- Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
- llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
- }
- case NEON::BI__builtin_neon_vcvtn_s16_v:
- case NEON::BI__builtin_neon_vcvtn_s32_v:
- case NEON::BI__builtin_neon_vcvtnq_s16_v:
- case NEON::BI__builtin_neon_vcvtnq_s32_v:
- case NEON::BI__builtin_neon_vcvtn_u16_v:
- case NEON::BI__builtin_neon_vcvtn_u32_v:
- case NEON::BI__builtin_neon_vcvtnq_u16_v:
- case NEON::BI__builtin_neon_vcvtnq_u32_v:
- case NEON::BI__builtin_neon_vcvtn_s64_v:
- case NEON::BI__builtin_neon_vcvtnq_s64_v:
- case NEON::BI__builtin_neon_vcvtn_u64_v:
- case NEON::BI__builtin_neon_vcvtnq_u64_v: {
- Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
- llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
- }
- case NEON::BI__builtin_neon_vcvtp_s16_v:
- case NEON::BI__builtin_neon_vcvtp_s32_v:
- case NEON::BI__builtin_neon_vcvtpq_s16_v:
- case NEON::BI__builtin_neon_vcvtpq_s32_v:
- case NEON::BI__builtin_neon_vcvtp_u16_v:
- case NEON::BI__builtin_neon_vcvtp_u32_v:
- case NEON::BI__builtin_neon_vcvtpq_u16_v:
- case NEON::BI__builtin_neon_vcvtpq_u32_v:
- case NEON::BI__builtin_neon_vcvtp_s64_v:
- case NEON::BI__builtin_neon_vcvtpq_s64_v:
- case NEON::BI__builtin_neon_vcvtp_u64_v:
- case NEON::BI__builtin_neon_vcvtpq_u64_v: {
- Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
- llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
- }
- case NEON::BI__builtin_neon_vmulx_v:
- case NEON::BI__builtin_neon_vmulxq_v: {
- Int = Intrinsic::aarch64_neon_fmulx;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
- }
- case NEON::BI__builtin_neon_vmulxh_lane_f16:
- case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
- // vmulx_lane should be mapped to Neon scalar mulx after
- // extracting the scalar element
- Ops.push_back(EmitScalarExpr(E->getArg(2)));
- Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
- Ops.pop_back();
- Int = Intrinsic::aarch64_neon_fmulx;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
- }
- case NEON::BI__builtin_neon_vmul_lane_v:
- case NEON::BI__builtin_neon_vmul_laneq_v: {
- // v1f64 vmul_lane should be mapped to Neon scalar mul lane
- bool Quad = false;
- if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
- Quad = true;
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- llvm::FixedVectorType *VTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
- Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
- Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
- return Builder.CreateBitCast(Result, Ty);
- }
- case NEON::BI__builtin_neon_vnegd_s64:
- return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
- case NEON::BI__builtin_neon_vnegh_f16:
- return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
- case NEON::BI__builtin_neon_vpmaxnm_v:
- case NEON::BI__builtin_neon_vpmaxnmq_v: {
- Int = Intrinsic::aarch64_neon_fmaxnmp;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
- }
- case NEON::BI__builtin_neon_vpminnm_v:
- case NEON::BI__builtin_neon_vpminnmq_v: {
- Int = Intrinsic::aarch64_neon_fminnmp;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
- }
- case NEON::BI__builtin_neon_vsqrth_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_sqrt
- : Intrinsic::sqrt;
- return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
- }
- case NEON::BI__builtin_neon_vsqrt_v:
- case NEON::BI__builtin_neon_vsqrtq_v: {
- Int = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_sqrt
- : Intrinsic::sqrt;
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
- }
- case NEON::BI__builtin_neon_vrbit_v:
- case NEON::BI__builtin_neon_vrbitq_v: {
- Int = Intrinsic::bitreverse;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
- }
- case NEON::BI__builtin_neon_vaddv_u8:
- // FIXME: These are handled by the AArch64 scalar code.
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vaddv_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddv_u16:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vaddv_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u8:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vaddvq_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u16:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vaddvq_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_f16: {
- Int = Intrinsic::aarch64_neon_fmaxv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vmaxvq_f16: {
- Int = Intrinsic::aarch64_neon_fmaxv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vminv_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminv_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminv_f16: {
- Int = Intrinsic::aarch64_neon_fminv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vminvq_f16: {
- Int = Intrinsic::aarch64_neon_fminv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vmaxnmv_f16: {
- Int = Intrinsic::aarch64_neon_fmaxnmv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vmaxnmvq_f16: {
- Int = Intrinsic::aarch64_neon_fmaxnmv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vminnmv_f16: {
- Int = Intrinsic::aarch64_neon_fminnmv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vminnmvq_f16: {
- Int = Intrinsic::aarch64_neon_fminnmv;
- Ty = HalfTy;
- VTy = llvm::FixedVectorType::get(HalfTy, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
- return Builder.CreateTrunc(Ops[0], HalfTy);
- }
- case NEON::BI__builtin_neon_vmul_n_f64: {
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
- return Builder.CreateFMul(Ops[0], RHS);
- }
- case NEON::BI__builtin_neon_vaddlv_u8: {
- Int = Intrinsic::aarch64_neon_uaddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddlv_u16: {
- Int = Intrinsic::aarch64_neon_uaddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- }
- case NEON::BI__builtin_neon_vaddlvq_u8: {
- Int = Intrinsic::aarch64_neon_uaddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddlvq_u16: {
- Int = Intrinsic::aarch64_neon_uaddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- }
- case NEON::BI__builtin_neon_vaddlv_s8: {
- Int = Intrinsic::aarch64_neon_saddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddlv_s16: {
- Int = Intrinsic::aarch64_neon_saddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- }
- case NEON::BI__builtin_neon_vaddlvq_s8: {
- Int = Intrinsic::aarch64_neon_saddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddlvq_s16: {
- Int = Intrinsic::aarch64_neon_saddlv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
- }
- case NEON::BI__builtin_neon_vsri_n_v:
- case NEON::BI__builtin_neon_vsriq_n_v: {
- Int = Intrinsic::aarch64_neon_vsri;
- llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
- return EmitNeonCall(Intrin, Ops, "vsri_n");
- }
- case NEON::BI__builtin_neon_vsli_n_v:
- case NEON::BI__builtin_neon_vsliq_n_v: {
- Int = Intrinsic::aarch64_neon_vsli;
- llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
- return EmitNeonCall(Intrin, Ops, "vsli_n");
- }
- case NEON::BI__builtin_neon_vsra_n_v:
- case NEON::BI__builtin_neon_vsraq_n_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
- return Builder.CreateAdd(Ops[0], Ops[1]);
- case NEON::BI__builtin_neon_vrsra_n_v:
- case NEON::BI__builtin_neon_vrsraq_n_v: {
- Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
- SmallVector<llvm::Value*,2> TmpOps;
- TmpOps.push_back(Ops[1]);
- TmpOps.push_back(Ops[2]);
- Function* F = CGM.getIntrinsic(Int, Ty);
- llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
- Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
- return Builder.CreateAdd(Ops[0], tmp);
- }
- case NEON::BI__builtin_neon_vld1_v:
- case NEON::BI__builtin_neon_vld1q_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
- return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
- }
- case NEON::BI__builtin_neon_vst1_v:
- case NEON::BI__builtin_neon_vst1q_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
- Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
- return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
- case NEON::BI__builtin_neon_vld1_lane_v:
- case NEON::BI__builtin_neon_vld1q_lane_v: {
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
- PtrOp0.getAlignment());
- return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
- }
- case NEON::BI__builtin_neon_vld1_dup_v:
- case NEON::BI__builtin_neon_vld1q_dup_v: {
- Value *V = UndefValue::get(Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
- PtrOp0.getAlignment());
- llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
- Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
- return EmitNeonSplat(Ops[0], CI);
- }
- case NEON::BI__builtin_neon_vst1_lane_v:
- case NEON::BI__builtin_neon_vst1q_lane_v:
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
- PtrOp0.getAlignment());
- case NEON::BI__builtin_neon_vld2_v:
- case NEON::BI__builtin_neon_vld2q_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld3_v:
- case NEON::BI__builtin_neon_vld3q_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld4_v:
- case NEON::BI__builtin_neon_vld4q_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld2q_dup_v: {
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld3q_dup_v: {
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld4_dup_v:
- case NEON::BI__builtin_neon_vld4q_dup_v: {
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
- Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld2_lane_v:
- case NEON::BI__builtin_neon_vld2q_lane_v: {
- llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
- std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld3_lane_v:
- case NEON::BI__builtin_neon_vld3q_lane_v: {
- llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
- std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vld4_lane_v:
- case NEON::BI__builtin_neon_vld4q_lane_v: {
- llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
- std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
- Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vst2_v:
- case NEON::BI__builtin_neon_vst2q_v: {
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
- Ops, "");
- }
- case NEON::BI__builtin_neon_vst2_lane_v:
- case NEON::BI__builtin_neon_vst2q_lane_v: {
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
- llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
- Ops, "");
- }
- case NEON::BI__builtin_neon_vst3_v:
- case NEON::BI__builtin_neon_vst3q_v: {
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
- Ops, "");
- }
- case NEON::BI__builtin_neon_vst3_lane_v:
- case NEON::BI__builtin_neon_vst3q_lane_v: {
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
- llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
- Ops, "");
- }
- case NEON::BI__builtin_neon_vst4_v:
- case NEON::BI__builtin_neon_vst4q_v: {
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
- Ops, "");
- }
- case NEON::BI__builtin_neon_vst4_lane_v:
- case NEON::BI__builtin_neon_vst4q_lane_v: {
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
- Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
- llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
- Ops, "");
- }
- case NEON::BI__builtin_neon_vtrn_v:
- case NEON::BI__builtin_neon_vtrnq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Value *SV = nullptr;
- for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(i+vi);
- Indices.push_back(i+e+vi);
- }
- Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
- SV = Builder.CreateDefaultAlignedStore(SV, Addr);
- }
- return SV;
- }
- case NEON::BI__builtin_neon_vuzp_v:
- case NEON::BI__builtin_neon_vuzpq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Value *SV = nullptr;
- for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(2*i+vi);
- Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
- SV = Builder.CreateDefaultAlignedStore(SV, Addr);
- }
- return SV;
- }
- case NEON::BI__builtin_neon_vzip_v:
- case NEON::BI__builtin_neon_vzipq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Value *SV = nullptr;
- for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<int, 16> Indices;
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back((i + vi*e) >> 1);
- Indices.push_back(((i + vi*e) >> 1)+e);
- }
- Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
- SV = Builder.CreateDefaultAlignedStore(SV, Addr);
- }
- return SV;
- }
- case NEON::BI__builtin_neon_vqtbl1q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
- Ops, "vtbl1");
- }
- case NEON::BI__builtin_neon_vqtbl2q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
- Ops, "vtbl2");
- }
- case NEON::BI__builtin_neon_vqtbl3q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
- Ops, "vtbl3");
- }
- case NEON::BI__builtin_neon_vqtbl4q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
- Ops, "vtbl4");
- }
- case NEON::BI__builtin_neon_vqtbx1q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
- Ops, "vtbx1");
- }
- case NEON::BI__builtin_neon_vqtbx2q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
- Ops, "vtbx2");
- }
- case NEON::BI__builtin_neon_vqtbx3q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
- Ops, "vtbx3");
- }
- case NEON::BI__builtin_neon_vqtbx4q_v: {
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
- Ops, "vtbx4");
- }
- case NEON::BI__builtin_neon_vsqadd_v:
- case NEON::BI__builtin_neon_vsqaddq_v: {
- Int = Intrinsic::aarch64_neon_usqadd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
- }
- case NEON::BI__builtin_neon_vuqadd_v:
- case NEON::BI__builtin_neon_vuqaddq_v: {
- Int = Intrinsic::aarch64_neon_suqadd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
- }
- }
- }
- Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
- BuiltinID == BPF::BI__builtin_btf_type_id ||
- BuiltinID == BPF::BI__builtin_preserve_type_info ||
- BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
- "unexpected BPF builtin");
- // A sequence number, injected into IR builtin functions, to
- // prevent CSE given the only difference of the funciton
- // may just be the debuginfo metadata.
- static uint32_t BuiltinSeqNum;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unexpected BPF builtin");
- case BPF::BI__builtin_preserve_field_info: {
- const Expr *Arg = E->getArg(0);
- bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
- if (!getDebugInfo()) {
- CGM.Error(E->getExprLoc(),
- "using __builtin_preserve_field_info() without -g");
- return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
- : EmitLValue(Arg).getPointer(*this);
- }
- // Enable underlying preserve_*_access_index() generation.
- bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
- IsInPreservedAIRegion = true;
- Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
- : EmitLValue(Arg).getPointer(*this);
- IsInPreservedAIRegion = OldIsInPreservedAIRegion;
- ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
- // Built the IR for the preserve_field_info intrinsic.
- llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
- {FieldAddr->getType()});
- return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
- }
- case BPF::BI__builtin_btf_type_id:
- case BPF::BI__builtin_preserve_type_info: {
- if (!getDebugInfo()) {
- CGM.Error(E->getExprLoc(), "using builtin function without -g");
- return nullptr;
- }
- const Expr *Arg0 = E->getArg(0);
- llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
- Arg0->getType(), Arg0->getExprLoc());
- ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
- Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
- llvm::Function *FnDecl;
- if (BuiltinID == BPF::BI__builtin_btf_type_id)
- FnDecl = llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
- else
- FnDecl = llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
- CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
- Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
- return Fn;
- }
- case BPF::BI__builtin_preserve_enum_value: {
- if (!getDebugInfo()) {
- CGM.Error(E->getExprLoc(), "using builtin function without -g");
- return nullptr;
- }
- const Expr *Arg0 = E->getArg(0);
- llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
- Arg0->getType(), Arg0->getExprLoc());
- // Find enumerator
- const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
- const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
- const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
- const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
- auto &InitVal = Enumerator->getInitVal();
- std::string InitValStr;
- if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
- InitValStr = std::to_string(InitVal.getSExtValue());
- else
- InitValStr = std::to_string(InitVal.getZExtValue());
- std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
- Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
- ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
- Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
- llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
- CallInst *Fn =
- Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
- Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
- return Fn;
- }
- }
- }
- llvm::Value *CodeGenFunction::
- BuildVector(ArrayRef<llvm::Value*> Ops) {
- assert((Ops.size() & (Ops.size() - 1)) == 0 &&
- "Not a power-of-two sized vector!");
- bool AllConstants = true;
- for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
- AllConstants &= isa<Constant>(Ops[i]);
- // If this is a constant vector, create a ConstantVector.
- if (AllConstants) {
- SmallVector<llvm::Constant*, 16> CstOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- CstOps.push_back(cast<Constant>(Ops[i]));
- return llvm::ConstantVector::get(CstOps);
- }
- // Otherwise, insertelement the values to build the vector.
- Value *Result = llvm::UndefValue::get(
- llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
- return Result;
- }
- // Convert the mask from an integer type to a vector of i1.
- static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
- unsigned NumElts) {
- auto *MaskTy = llvm::FixedVectorType::get(
- CGF.Builder.getInt1Ty(),
- cast<IntegerType>(Mask->getType())->getBitWidth());
- Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
- // If we have less than 8 elements, then the starting mask was an i8 and
- // we need to extract down to the right number of elements.
- if (NumElts < 8) {
- int Indices[4];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i;
- MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
- makeArrayRef(Indices, NumElts),
- "extract");
- }
- return MaskVec;
- }
- static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
- Align Alignment) {
- // Cast the pointer to right type.
- Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
- Value *MaskVec = getMaskVecValue(
- CGF, Ops[2],
- cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
- return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
- }
- static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
- Align Alignment) {
- // Cast the pointer to right type.
- llvm::Type *Ty = Ops[1]->getType();
- Value *Ptr =
- CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
- Value *MaskVec = getMaskVecValue(
- CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
- return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
- }
- static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops) {
- auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
- llvm::Type *PtrTy = ResultTy->getElementType();
- // Cast the pointer to element type.
- Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getMaskVecValue(
- CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
- llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
- ResultTy);
- return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
- }
- static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops,
- bool IsCompress) {
- auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
- Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
- Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
- : Intrinsic::x86_avx512_mask_expand;
- llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
- return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
- }
- static Value *EmitX86CompressStore(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops) {
- auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
- llvm::Type *PtrTy = ResultTy->getElementType();
- // Cast the pointer to element type.
- Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
- llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
- ResultTy);
- return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
- }
- static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
- ArrayRef<Value *> Ops,
- bool InvertLHS = false) {
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
- Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
- if (InvertLHS)
- LHS = CGF.Builder.CreateNot(LHS);
- return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
- Ops[0]->getType());
- }
- static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
- Value *Amt, bool IsRight) {
- llvm::Type *Ty = Op0->getType();
- // Amount may be scalar immediate, in which case create a splat vector.
- // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
- // we only care about the lowest log2 bits anyway.
- if (Amt->getType() != Ty) {
- unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
- Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
- Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
- }
- unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
- Function *F = CGF.CGM.getIntrinsic(IID, Ty);
- return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
- }
- static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
- bool IsSigned) {
- Value *Op0 = Ops[0];
- Value *Op1 = Ops[1];
- llvm::Type *Ty = Op0->getType();
- uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
- CmpInst::Predicate Pred;
- switch (Imm) {
- case 0x0:
- Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- break;
- case 0x1:
- Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
- break;
- case 0x2:
- Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
- break;
- case 0x3:
- Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
- break;
- case 0x4:
- Pred = ICmpInst::ICMP_EQ;
- break;
- case 0x5:
- Pred = ICmpInst::ICMP_NE;
- break;
- case 0x6:
- return llvm::Constant::getNullValue(Ty); // FALSE
- case 0x7:
- return llvm::Constant::getAllOnesValue(Ty); // TRUE
- default:
- llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
- }
- Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
- Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
- return Res;
- }
- static Value *EmitX86Select(CodeGenFunction &CGF,
- Value *Mask, Value *Op0, Value *Op1) {
- // If the mask is all ones just return first argument.
- if (const auto *C = dyn_cast<Constant>(Mask))
- if (C->isAllOnesValue())
- return Op0;
- Mask = getMaskVecValue(
- CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
- return CGF.Builder.CreateSelect(Mask, Op0, Op1);
- }
- static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
- Value *Mask, Value *Op0, Value *Op1) {
- // If the mask is all ones just return first argument.
- if (const auto *C = dyn_cast<Constant>(Mask))
- if (C->isAllOnesValue())
- return Op0;
- auto *MaskTy = llvm::FixedVectorType::get(
- CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
- Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
- Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
- return CGF.Builder.CreateSelect(Mask, Op0, Op1);
- }
- static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
- unsigned NumElts, Value *MaskIn) {
- if (MaskIn) {
- const auto *C = dyn_cast<Constant>(MaskIn);
- if (!C || !C->isAllOnesValue())
- Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
- }
- if (NumElts < 8) {
- int Indices[8];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i;
- for (unsigned i = NumElts; i != 8; ++i)
- Indices[i] = i % NumElts + NumElts;
- Cmp = CGF.Builder.CreateShuffleVector(
- Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
- }
- return CGF.Builder.CreateBitCast(Cmp,
- IntegerType::get(CGF.getLLVMContext(),
- std::max(NumElts, 8U)));
- }
- static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
- bool Signed, ArrayRef<Value *> Ops) {
- assert((Ops.size() == 2 || Ops.size() == 4) &&
- "Unexpected number of arguments");
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- Value *Cmp;
- if (CC == 3) {
- Cmp = Constant::getNullValue(
- llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
- } else if (CC == 7) {
- Cmp = Constant::getAllOnesValue(
- llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
- } else {
- ICmpInst::Predicate Pred;
- switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case 0: Pred = ICmpInst::ICMP_EQ; break;
- case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
- case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
- case 4: Pred = ICmpInst::ICMP_NE; break;
- case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
- case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
- }
- Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
- }
- Value *MaskIn = nullptr;
- if (Ops.size() == 4)
- MaskIn = Ops[3];
- return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
- }
- static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
- Value *Zero = Constant::getNullValue(In->getType());
- return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
- }
- static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
- ArrayRef<Value *> Ops, bool IsSigned) {
- unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
- llvm::Type *Ty = Ops[1]->getType();
- Value *Res;
- if (Rnd != 4) {
- Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
- : Intrinsic::x86_avx512_uitofp_round;
- Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
- Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
- } else {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
- : CGF.Builder.CreateUIToFP(Ops[0], Ty);
- }
- return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
- }
- // Lowers X86 FMA intrinsics to IR.
- static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
- ArrayRef<Value *> Ops, unsigned BuiltinID,
- bool IsAddSub) {
- bool Subtract = false;
- Intrinsic::ID IID = Intrinsic::not_intrinsic;
- switch (BuiltinID) {
- default: break;
- case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
- Subtract = true;
- LLVM_FALLTHROUGH;
- case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
- IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
- break;
- case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
- Subtract = true;
- LLVM_FALLTHROUGH;
- case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
- IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
- break;
- case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
- Subtract = true;
- LLVM_FALLTHROUGH;
- case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
- IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
- case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
- Subtract = true;
- LLVM_FALLTHROUGH;
- case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
- IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
- case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
- Subtract = true;
- LLVM_FALLTHROUGH;
- case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
- IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
- break;
- case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
- Subtract = true;
- LLVM_FALLTHROUGH;
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
- IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
- break;
- }
- Value *A = Ops[0];
- Value *B = Ops[1];
- Value *C = Ops[2];
- if (Subtract)
- C = CGF.Builder.CreateFNeg(C);
- Value *Res;
- // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
- if (IID != Intrinsic::not_intrinsic &&
- (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
- IsAddSub)) {
- Function *Intr = CGF.CGM.getIntrinsic(IID);
- Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
- } else {
- llvm::Type *Ty = A->getType();
- Function *FMA;
- if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
- Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
- } else {
- FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- Res = CGF.Builder.CreateCall(FMA, {A, B, C});
- }
- }
- // Handle any required masking.
- Value *MaskFalseVal = nullptr;
- switch (BuiltinID) {
- case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
- MaskFalseVal = Ops[0];
- break;
- case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
- MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
- break;
- case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
- case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
- case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
- case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
- case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
- case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
- case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
- case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
- case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
- case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
- case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
- MaskFalseVal = Ops[2];
- break;
- }
- if (MaskFalseVal)
- return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
- return Res;
- }
- static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
- MutableArrayRef<Value *> Ops, Value *Upper,
- bool ZeroMask = false, unsigned PTIdx = 0,
- bool NegAcc = false) {
- unsigned Rnd = 4;
- if (Ops.size() > 4)
- Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
- if (NegAcc)
- Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
- Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- Value *Res;
- if (Rnd != 4) {
- Intrinsic::ID IID;
- switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
- case 16:
- IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
- break;
- case 32:
- IID = Intrinsic::x86_avx512_vfmadd_f32;
- break;
- case 64:
- IID = Intrinsic::x86_avx512_vfmadd_f64;
- break;
- default:
- llvm_unreachable("Unexpected size");
- }
- Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
- {Ops[0], Ops[1], Ops[2], Ops[4]});
- } else if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
- Function *FMA = CGF.CGM.getIntrinsic(
- Intrinsic::experimental_constrained_fma, Ops[0]->getType());
- Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
- } else {
- Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
- Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
- }
- // If we have more than 3 arguments, we need to do masking.
- if (Ops.size() > 3) {
- Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
- : Ops[PTIdx];
- // If we negated the accumulator and the its the PassThru value we need to
- // bypass the negate. Conveniently Upper should be the same thing in this
- // case.
- if (NegAcc && PTIdx == 2)
- PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
- Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
- }
- return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
- }
- static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
- ArrayRef<Value *> Ops) {
- llvm::Type *Ty = Ops[0]->getType();
- // Arguments have a vXi32 type so cast to vXi64.
- Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
- Ty->getPrimitiveSizeInBits() / 64);
- Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
- Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
- if (IsSigned) {
- // Shift left then arithmetic shift right.
- Constant *ShiftAmt = ConstantInt::get(Ty, 32);
- LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
- LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
- RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
- RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
- } else {
- // Clear the upper bits.
- Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
- LHS = CGF.Builder.CreateAnd(LHS, Mask);
- RHS = CGF.Builder.CreateAnd(RHS, Mask);
- }
- return CGF.Builder.CreateMul(LHS, RHS);
- }
- // Emit a masked pternlog intrinsic. This only exists because the header has to
- // use a macro and we aren't able to pass the input argument to a pternlog
- // builtin and a select builtin without evaluating it twice.
- static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
- ArrayRef<Value *> Ops) {
- llvm::Type *Ty = Ops[0]->getType();
- unsigned VecWidth = Ty->getPrimitiveSizeInBits();
- unsigned EltWidth = Ty->getScalarSizeInBits();
- Intrinsic::ID IID;
- if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_pternlog_d_128;
- else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_pternlog_d_256;
- else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_pternlog_d_512;
- else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_pternlog_q_128;
- else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_pternlog_q_256;
- else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_pternlog_q_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
- Ops.drop_back());
- Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
- return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
- }
- static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
- llvm::Type *DstTy) {
- unsigned NumberOfElements =
- cast<llvm::FixedVectorType>(DstTy)->getNumElements();
- Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
- return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
- }
- // Emit binary intrinsic with the same type used in result/args.
- static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops, Intrinsic::ID IID) {
- llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
- return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
- }
- Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
- const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
- return EmitX86CpuIs(CPUStr);
- }
- // Convert F16 halfs to floats.
- static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops,
- llvm::Type *DstTy) {
- assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
- "Unknown cvtph2ps intrinsic");
- // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
- if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
- Function *F =
- CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
- return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
- }
- unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
- Value *Src = Ops[0];
- // Extract the subvector.
- if (NumDstElts !=
- cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
- assert(NumDstElts == 4 && "Unexpected vector size");
- Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
- }
- // Bitcast from vXi16 to vXf16.
- auto *HalfTy = llvm::FixedVectorType::get(
- llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
- Src = CGF.Builder.CreateBitCast(Src, HalfTy);
- // Perform the fp-extension.
- Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
- if (Ops.size() >= 3)
- Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
- return Res;
- }
- // Convert a BF16 to a float.
- static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF,
- const CallExpr *E,
- ArrayRef<Value *> Ops) {
- llvm::Type *Int32Ty = CGF.Builder.getInt32Ty();
- Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty);
- Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16);
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
- Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType);
- return BitCast;
- }
- Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
- llvm::Type *Int32Ty = Builder.getInt32Ty();
- // Matching the struct layout from the compiler-rt/libgcc structure that is
- // filled in:
- // unsigned int __cpu_vendor;
- // unsigned int __cpu_type;
- // unsigned int __cpu_subtype;
- // unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
- llvm::ArrayType::get(Int32Ty, 1));
- // Grab the global __cpu_model.
- llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
- cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
- // Calculate the index needed to access the correct field based on the
- // range. Also adjust the expected value.
- unsigned Index;
- unsigned Value;
- std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
- #define X86_VENDOR(ENUM, STRING) \
- .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
- #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
- .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
- #define X86_CPU_TYPE(ENUM, STR) \
- .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
- #define X86_CPU_SUBTYPE(ENUM, STR) \
- .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
- #include "llvm/Support/X86TargetParser.def"
- .Default({0, 0});
- assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
- // Grab the appropriate field from __cpu_model.
- llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, Index)};
- llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
- CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
- CharUnits::fromQuantity(4));
- // Check the value of the field against the requested value.
- return Builder.CreateICmpEQ(CpuValue,
- llvm::ConstantInt::get(Int32Ty, Value));
- }
- Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
- const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
- return EmitX86CpuSupports(FeatureStr);
- }
- Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
- return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
- }
- llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
- uint32_t Features1 = Lo_32(FeaturesMask);
- uint32_t Features2 = Hi_32(FeaturesMask);
- Value *Result = Builder.getTrue();
- if (Features1 != 0) {
- // Matching the struct layout from the compiler-rt/libgcc structure that is
- // filled in:
- // unsigned int __cpu_vendor;
- // unsigned int __cpu_type;
- // unsigned int __cpu_subtype;
- // unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
- llvm::ArrayType::get(Int32Ty, 1));
- // Grab the global __cpu_model.
- llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
- cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
- // Grab the first (0th) element from the field __cpu_features off of the
- // global in the struct STy.
- Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
- Builder.getInt32(0)};
- Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
- Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
- CharUnits::fromQuantity(4));
- // Check the value of the bit corresponding to the feature requested.
- Value *Mask = Builder.getInt32(Features1);
- Value *Bitset = Builder.CreateAnd(Features, Mask);
- Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
- Result = Builder.CreateAnd(Result, Cmp);
- }
- if (Features2 != 0) {
- llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
- "__cpu_features2");
- cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
- Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2,
- CharUnits::fromQuantity(4));
- // Check the value of the bit corresponding to the feature requested.
- Value *Mask = Builder.getInt32(Features2);
- Value *Bitset = Builder.CreateAnd(Features, Mask);
- Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
- Result = Builder.CreateAnd(Result, Cmp);
- }
- return Result;
- }
- Value *CodeGenFunction::EmitX86CpuInit() {
- llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
- /*Variadic*/ false);
- llvm::FunctionCallee Func =
- CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
- cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
- cast<llvm::GlobalValue>(Func.getCallee())
- ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
- return Builder.CreateCall(Func);
- }
- Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- if (BuiltinID == X86::BI__builtin_cpu_is)
- return EmitX86CpuIs(E);
- if (BuiltinID == X86::BI__builtin_cpu_supports)
- return EmitX86CpuSupports(E);
- if (BuiltinID == X86::BI__builtin_cpu_init)
- return EmitX86CpuInit();
- // Handle MSVC intrinsics before argument evaluation to prevent double
- // evaluation.
- if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
- return EmitMSVCBuiltinExpr(*MsvcIntId, E);
- SmallVector<Value*, 4> Ops;
- bool IsMaskFCmp = false;
- bool IsConjFMA = false;
- // Find out if any arguments are required to be integer constant expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
- // These exist so that the builtin that takes an immediate can be bounds
- // checked by clang to avoid passing bad immediates to the backend. Since
- // AVX has a larger immediate than SSE we would need separate builtins to
- // do the different bounds checking. Rather than create a clang specific
- // SSE only builtin, this implements eight separate builtins to match gcc
- // implementation.
- auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
- Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops);
- };
- // For the vector forms of FP comparisons, translate the builtins directly to
- // IR.
- // TODO: The builtins could be removed if the SSE header files used vector
- // extension comparisons directly (vector ordered/unordered may need
- // additional support via __builtin_isnan()).
- auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
- bool IsSignaling) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- Value *Cmp;
- if (IsSignaling)
- Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
- else
- Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
- llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
- llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
- Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
- return Builder.CreateBitCast(Sext, FPVecTy);
- };
- switch (BuiltinID) {
- default: return nullptr;
- case X86::BI_mm_prefetch: {
- Value *Address = Ops[0];
- ConstantInt *C = cast<ConstantInt>(Ops[1]);
- Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
- Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
- Value *Data = ConstantInt::get(Int32Ty, 1);
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return Builder.CreateCall(F, {Address, RW, Locality, Data});
- }
- case X86::BI_mm_clflush: {
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
- Ops[0]);
- }
- case X86::BI_mm_lfence: {
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
- }
- case X86::BI_mm_mfence: {
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
- }
- case X86::BI_mm_sfence: {
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
- }
- case X86::BI_mm_pause: {
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
- }
- case X86::BI__rdtsc: {
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
- }
- case X86::BI__builtin_ia32_rdtscp: {
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
- Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
- Ops[0]);
- return Builder.CreateExtractValue(Call, 0);
- }
- case X86::BI__builtin_ia32_lzcnt_u16:
- case X86::BI__builtin_ia32_lzcnt_u32:
- case X86::BI__builtin_ia32_lzcnt_u64: {
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
- return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
- }
- case X86::BI__builtin_ia32_tzcnt_u16:
- case X86::BI__builtin_ia32_tzcnt_u32:
- case X86::BI__builtin_ia32_tzcnt_u64: {
- Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
- return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
- }
- case X86::BI__builtin_ia32_undef128:
- case X86::BI__builtin_ia32_undef256:
- case X86::BI__builtin_ia32_undef512:
- // The x86 definition of "undef" is not the same as the LLVM definition
- // (PR32176). We leave optimizing away an unnecessary zero constant to the
- // IR optimizer and backend.
- // TODO: If we had a "freeze" IR instruction to generate a fixed undef
- // value, we should use that here instead of a zero.
- return llvm::Constant::getNullValue(ConvertType(E->getType()));
- case X86::BI__builtin_ia32_vec_init_v8qi:
- case X86::BI__builtin_ia32_vec_init_v4hi:
- case X86::BI__builtin_ia32_vec_init_v2si:
- return Builder.CreateBitCast(BuildVector(Ops),
- llvm::Type::getX86_MMXTy(getLLVMContext()));
- case X86::BI__builtin_ia32_vec_ext_v2si:
- case X86::BI__builtin_ia32_vec_ext_v16qi:
- case X86::BI__builtin_ia32_vec_ext_v8hi:
- case X86::BI__builtin_ia32_vec_ext_v4si:
- case X86::BI__builtin_ia32_vec_ext_v4sf:
- case X86::BI__builtin_ia32_vec_ext_v2di:
- case X86::BI__builtin_ia32_vec_ext_v32qi:
- case X86::BI__builtin_ia32_vec_ext_v16hi:
- case X86::BI__builtin_ia32_vec_ext_v8si:
- case X86::BI__builtin_ia32_vec_ext_v4di: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
- Index &= NumElts - 1;
- // These builtins exist so we can ensure the index is an ICE and in range.
- // Otherwise we could just do this in the header file.
- return Builder.CreateExtractElement(Ops[0], Index);
- }
- case X86::BI__builtin_ia32_vec_set_v16qi:
- case X86::BI__builtin_ia32_vec_set_v8hi:
- case X86::BI__builtin_ia32_vec_set_v4si:
- case X86::BI__builtin_ia32_vec_set_v2di:
- case X86::BI__builtin_ia32_vec_set_v32qi:
- case X86::BI__builtin_ia32_vec_set_v16hi:
- case X86::BI__builtin_ia32_vec_set_v8si:
- case X86::BI__builtin_ia32_vec_set_v4di: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
- Index &= NumElts - 1;
- // These builtins exist so we can ensure the index is an ICE and in range.
- // Otherwise we could just do this in the header file.
- return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
- }
- case X86::BI_mm_setcsr:
- case X86::BI__builtin_ia32_ldmxcsr: {
- Address Tmp = CreateMemTemp(E->getArg(0)->getType());
- Builder.CreateStore(Ops[0], Tmp);
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
- Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
- }
- case X86::BI_mm_getcsr:
- case X86::BI__builtin_ia32_stmxcsr: {
- Address Tmp = CreateMemTemp(E->getType());
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
- Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
- return Builder.CreateLoad(Tmp, "stmxcsr");
- }
- case X86::BI__builtin_ia32_xsave:
- case X86::BI__builtin_ia32_xsave64:
- case X86::BI__builtin_ia32_xrstor:
- case X86::BI__builtin_ia32_xrstor64:
- case X86::BI__builtin_ia32_xsaveopt:
- case X86::BI__builtin_ia32_xsaveopt64:
- case X86::BI__builtin_ia32_xrstors:
- case X86::BI__builtin_ia32_xrstors64:
- case X86::BI__builtin_ia32_xsavec:
- case X86::BI__builtin_ia32_xsavec64:
- case X86::BI__builtin_ia32_xsaves:
- case X86::BI__builtin_ia32_xsaves64:
- case X86::BI__builtin_ia32_xsetbv:
- case X86::BI_xsetbv: {
- Intrinsic::ID ID;
- #define INTRINSIC_X86_XSAVE_ID(NAME) \
- case X86::BI__builtin_ia32_##NAME: \
- ID = Intrinsic::x86_##NAME; \
- break
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- INTRINSIC_X86_XSAVE_ID(xsave);
- INTRINSIC_X86_XSAVE_ID(xsave64);
- INTRINSIC_X86_XSAVE_ID(xrstor);
- INTRINSIC_X86_XSAVE_ID(xrstor64);
- INTRINSIC_X86_XSAVE_ID(xsaveopt);
- INTRINSIC_X86_XSAVE_ID(xsaveopt64);
- INTRINSIC_X86_XSAVE_ID(xrstors);
- INTRINSIC_X86_XSAVE_ID(xrstors64);
- INTRINSIC_X86_XSAVE_ID(xsavec);
- INTRINSIC_X86_XSAVE_ID(xsavec64);
- INTRINSIC_X86_XSAVE_ID(xsaves);
- INTRINSIC_X86_XSAVE_ID(xsaves64);
- INTRINSIC_X86_XSAVE_ID(xsetbv);
- case X86::BI_xsetbv:
- ID = Intrinsic::x86_xsetbv;
- break;
- }
- #undef INTRINSIC_X86_XSAVE_ID
- Value *Mhi = Builder.CreateTrunc(
- Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
- Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
- Ops[1] = Mhi;
- Ops.push_back(Mlo);
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- }
- case X86::BI__builtin_ia32_xgetbv:
- case X86::BI_xgetbv:
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
- case X86::BI__builtin_ia32_storedqudi128_mask:
- case X86::BI__builtin_ia32_storedqusi128_mask:
- case X86::BI__builtin_ia32_storedquhi128_mask:
- case X86::BI__builtin_ia32_storedquqi128_mask:
- case X86::BI__builtin_ia32_storeupd128_mask:
- case X86::BI__builtin_ia32_storeups128_mask:
- case X86::BI__builtin_ia32_storedqudi256_mask:
- case X86::BI__builtin_ia32_storedqusi256_mask:
- case X86::BI__builtin_ia32_storedquhi256_mask:
- case X86::BI__builtin_ia32_storedquqi256_mask:
- case X86::BI__builtin_ia32_storeupd256_mask:
- case X86::BI__builtin_ia32_storeups256_mask:
- case X86::BI__builtin_ia32_storedqudi512_mask:
- case X86::BI__builtin_ia32_storedqusi512_mask:
- case X86::BI__builtin_ia32_storedquhi512_mask:
- case X86::BI__builtin_ia32_storedquqi512_mask:
- case X86::BI__builtin_ia32_storeupd512_mask:
- case X86::BI__builtin_ia32_storeups512_mask:
- return EmitX86MaskedStore(*this, Ops, Align(1));
- case X86::BI__builtin_ia32_storesh128_mask:
- case X86::BI__builtin_ia32_storess128_mask:
- case X86::BI__builtin_ia32_storesd128_mask:
- return EmitX86MaskedStore(*this, Ops, Align(1));
- case X86::BI__builtin_ia32_vpopcntb_128:
- case X86::BI__builtin_ia32_vpopcntd_128:
- case X86::BI__builtin_ia32_vpopcntq_128:
- case X86::BI__builtin_ia32_vpopcntw_128:
- case X86::BI__builtin_ia32_vpopcntb_256:
- case X86::BI__builtin_ia32_vpopcntd_256:
- case X86::BI__builtin_ia32_vpopcntq_256:
- case X86::BI__builtin_ia32_vpopcntw_256:
- case X86::BI__builtin_ia32_vpopcntb_512:
- case X86::BI__builtin_ia32_vpopcntd_512:
- case X86::BI__builtin_ia32_vpopcntq_512:
- case X86::BI__builtin_ia32_vpopcntw_512: {
- llvm::Type *ResultType = ConvertType(E->getType());
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
- return Builder.CreateCall(F, Ops);
- }
- case X86::BI__builtin_ia32_cvtmask2b128:
- case X86::BI__builtin_ia32_cvtmask2b256:
- case X86::BI__builtin_ia32_cvtmask2b512:
- case X86::BI__builtin_ia32_cvtmask2w128:
- case X86::BI__builtin_ia32_cvtmask2w256:
- case X86::BI__builtin_ia32_cvtmask2w512:
- case X86::BI__builtin_ia32_cvtmask2d128:
- case X86::BI__builtin_ia32_cvtmask2d256:
- case X86::BI__builtin_ia32_cvtmask2d512:
- case X86::BI__builtin_ia32_cvtmask2q128:
- case X86::BI__builtin_ia32_cvtmask2q256:
- case X86::BI__builtin_ia32_cvtmask2q512:
- return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
- case X86::BI__builtin_ia32_cvtb2mask128:
- case X86::BI__builtin_ia32_cvtb2mask256:
- case X86::BI__builtin_ia32_cvtb2mask512:
- case X86::BI__builtin_ia32_cvtw2mask128:
- case X86::BI__builtin_ia32_cvtw2mask256:
- case X86::BI__builtin_ia32_cvtw2mask512:
- case X86::BI__builtin_ia32_cvtd2mask128:
- case X86::BI__builtin_ia32_cvtd2mask256:
- case X86::BI__builtin_ia32_cvtd2mask512:
- case X86::BI__builtin_ia32_cvtq2mask128:
- case X86::BI__builtin_ia32_cvtq2mask256:
- case X86::BI__builtin_ia32_cvtq2mask512:
- return EmitX86ConvertToMask(*this, Ops[0]);
- case X86::BI__builtin_ia32_cvtdq2ps512_mask:
- case X86::BI__builtin_ia32_cvtqq2ps512_mask:
- case X86::BI__builtin_ia32_cvtqq2pd512_mask:
- case X86::BI__builtin_ia32_vcvtw2ph512_mask:
- case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
- case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
- return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
- case X86::BI__builtin_ia32_cvtudq2ps512_mask:
- case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
- case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
- case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
- case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
- case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
- return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
- case X86::BI__builtin_ia32_vfmaddss3:
- case X86::BI__builtin_ia32_vfmaddsd3:
- case X86::BI__builtin_ia32_vfmaddsh3_mask:
- case X86::BI__builtin_ia32_vfmaddss3_mask:
- case X86::BI__builtin_ia32_vfmaddsd3_mask:
- return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
- case X86::BI__builtin_ia32_vfmaddss:
- case X86::BI__builtin_ia32_vfmaddsd:
- return EmitScalarFMAExpr(*this, E, Ops,
- Constant::getNullValue(Ops[0]->getType()));
- case X86::BI__builtin_ia32_vfmaddsh3_maskz:
- case X86::BI__builtin_ia32_vfmaddss3_maskz:
- case X86::BI__builtin_ia32_vfmaddsd3_maskz:
- return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
- case X86::BI__builtin_ia32_vfmaddsh3_mask3:
- case X86::BI__builtin_ia32_vfmaddss3_mask3:
- case X86::BI__builtin_ia32_vfmaddsd3_mask3:
- return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
- case X86::BI__builtin_ia32_vfmsubsh3_mask3:
- case X86::BI__builtin_ia32_vfmsubss3_mask3:
- case X86::BI__builtin_ia32_vfmsubsd3_mask3:
- return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
- /*NegAcc*/ true);
- case X86::BI__builtin_ia32_vfmaddph:
- case X86::BI__builtin_ia32_vfmaddps:
- case X86::BI__builtin_ia32_vfmaddpd:
- case X86::BI__builtin_ia32_vfmaddph256:
- case X86::BI__builtin_ia32_vfmaddps256:
- case X86::BI__builtin_ia32_vfmaddpd256:
- case X86::BI__builtin_ia32_vfmaddph512_mask:
- case X86::BI__builtin_ia32_vfmaddph512_maskz:
- case X86::BI__builtin_ia32_vfmaddph512_mask3:
- case X86::BI__builtin_ia32_vfmaddps512_mask:
- case X86::BI__builtin_ia32_vfmaddps512_maskz:
- case X86::BI__builtin_ia32_vfmaddps512_mask3:
- case X86::BI__builtin_ia32_vfmsubps512_mask3:
- case X86::BI__builtin_ia32_vfmaddpd512_mask:
- case X86::BI__builtin_ia32_vfmaddpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubph512_mask3:
- return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
- case X86::BI__builtin_ia32_vfmaddsubph512_mask:
- case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask:
- case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
- case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
- return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
- case X86::BI__builtin_ia32_movdqa32store128_mask:
- case X86::BI__builtin_ia32_movdqa64store128_mask:
- case X86::BI__builtin_ia32_storeaps128_mask:
- case X86::BI__builtin_ia32_storeapd128_mask:
- case X86::BI__builtin_ia32_movdqa32store256_mask:
- case X86::BI__builtin_ia32_movdqa64store256_mask:
- case X86::BI__builtin_ia32_storeaps256_mask:
- case X86::BI__builtin_ia32_storeapd256_mask:
- case X86::BI__builtin_ia32_movdqa32store512_mask:
- case X86::BI__builtin_ia32_movdqa64store512_mask:
- case X86::BI__builtin_ia32_storeaps512_mask:
- case X86::BI__builtin_ia32_storeapd512_mask:
- return EmitX86MaskedStore(
- *this, Ops,
- getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
- case X86::BI__builtin_ia32_loadups128_mask:
- case X86::BI__builtin_ia32_loadups256_mask:
- case X86::BI__builtin_ia32_loadups512_mask:
- case X86::BI__builtin_ia32_loadupd128_mask:
- case X86::BI__builtin_ia32_loadupd256_mask:
- case X86::BI__builtin_ia32_loadupd512_mask:
- case X86::BI__builtin_ia32_loaddquqi128_mask:
- case X86::BI__builtin_ia32_loaddquqi256_mask:
- case X86::BI__builtin_ia32_loaddquqi512_mask:
- case X86::BI__builtin_ia32_loaddquhi128_mask:
- case X86::BI__builtin_ia32_loaddquhi256_mask:
- case X86::BI__builtin_ia32_loaddquhi512_mask:
- case X86::BI__builtin_ia32_loaddqusi128_mask:
- case X86::BI__builtin_ia32_loaddqusi256_mask:
- case X86::BI__builtin_ia32_loaddqusi512_mask:
- case X86::BI__builtin_ia32_loaddqudi128_mask:
- case X86::BI__builtin_ia32_loaddqudi256_mask:
- case X86::BI__builtin_ia32_loaddqudi512_mask:
- return EmitX86MaskedLoad(*this, Ops, Align(1));
- case X86::BI__builtin_ia32_loadsh128_mask:
- case X86::BI__builtin_ia32_loadss128_mask:
- case X86::BI__builtin_ia32_loadsd128_mask:
- return EmitX86MaskedLoad(*this, Ops, Align(1));
- case X86::BI__builtin_ia32_loadaps128_mask:
- case X86::BI__builtin_ia32_loadaps256_mask:
- case X86::BI__builtin_ia32_loadaps512_mask:
- case X86::BI__builtin_ia32_loadapd128_mask:
- case X86::BI__builtin_ia32_loadapd256_mask:
- case X86::BI__builtin_ia32_loadapd512_mask:
- case X86::BI__builtin_ia32_movdqa32load128_mask:
- case X86::BI__builtin_ia32_movdqa32load256_mask:
- case X86::BI__builtin_ia32_movdqa32load512_mask:
- case X86::BI__builtin_ia32_movdqa64load128_mask:
- case X86::BI__builtin_ia32_movdqa64load256_mask:
- case X86::BI__builtin_ia32_movdqa64load512_mask:
- return EmitX86MaskedLoad(
- *this, Ops,
- getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
- case X86::BI__builtin_ia32_expandloaddf128_mask:
- case X86::BI__builtin_ia32_expandloaddf256_mask:
- case X86::BI__builtin_ia32_expandloaddf512_mask:
- case X86::BI__builtin_ia32_expandloadsf128_mask:
- case X86::BI__builtin_ia32_expandloadsf256_mask:
- case X86::BI__builtin_ia32_expandloadsf512_mask:
- case X86::BI__builtin_ia32_expandloaddi128_mask:
- case X86::BI__builtin_ia32_expandloaddi256_mask:
- case X86::BI__builtin_ia32_expandloaddi512_mask:
- case X86::BI__builtin_ia32_expandloadsi128_mask:
- case X86::BI__builtin_ia32_expandloadsi256_mask:
- case X86::BI__builtin_ia32_expandloadsi512_mask:
- case X86::BI__builtin_ia32_expandloadhi128_mask:
- case X86::BI__builtin_ia32_expandloadhi256_mask:
- case X86::BI__builtin_ia32_expandloadhi512_mask:
- case X86::BI__builtin_ia32_expandloadqi128_mask:
- case X86::BI__builtin_ia32_expandloadqi256_mask:
- case X86::BI__builtin_ia32_expandloadqi512_mask:
- return EmitX86ExpandLoad(*this, Ops);
- case X86::BI__builtin_ia32_compressstoredf128_mask:
- case X86::BI__builtin_ia32_compressstoredf256_mask:
- case X86::BI__builtin_ia32_compressstoredf512_mask:
- case X86::BI__builtin_ia32_compressstoresf128_mask:
- case X86::BI__builtin_ia32_compressstoresf256_mask:
- case X86::BI__builtin_ia32_compressstoresf512_mask:
- case X86::BI__builtin_ia32_compressstoredi128_mask:
- case X86::BI__builtin_ia32_compressstoredi256_mask:
- case X86::BI__builtin_ia32_compressstoredi512_mask:
- case X86::BI__builtin_ia32_compressstoresi128_mask:
- case X86::BI__builtin_ia32_compressstoresi256_mask:
- case X86::BI__builtin_ia32_compressstoresi512_mask:
- case X86::BI__builtin_ia32_compressstorehi128_mask:
- case X86::BI__builtin_ia32_compressstorehi256_mask:
- case X86::BI__builtin_ia32_compressstorehi512_mask:
- case X86::BI__builtin_ia32_compressstoreqi128_mask:
- case X86::BI__builtin_ia32_compressstoreqi256_mask:
- case X86::BI__builtin_ia32_compressstoreqi512_mask:
- return EmitX86CompressStore(*this, Ops);
- case X86::BI__builtin_ia32_expanddf128_mask:
- case X86::BI__builtin_ia32_expanddf256_mask:
- case X86::BI__builtin_ia32_expanddf512_mask:
- case X86::BI__builtin_ia32_expandsf128_mask:
- case X86::BI__builtin_ia32_expandsf256_mask:
- case X86::BI__builtin_ia32_expandsf512_mask:
- case X86::BI__builtin_ia32_expanddi128_mask:
- case X86::BI__builtin_ia32_expanddi256_mask:
- case X86::BI__builtin_ia32_expanddi512_mask:
- case X86::BI__builtin_ia32_expandsi128_mask:
- case X86::BI__builtin_ia32_expandsi256_mask:
- case X86::BI__builtin_ia32_expandsi512_mask:
- case X86::BI__builtin_ia32_expandhi128_mask:
- case X86::BI__builtin_ia32_expandhi256_mask:
- case X86::BI__builtin_ia32_expandhi512_mask:
- case X86::BI__builtin_ia32_expandqi128_mask:
- case X86::BI__builtin_ia32_expandqi256_mask:
- case X86::BI__builtin_ia32_expandqi512_mask:
- return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
- case X86::BI__builtin_ia32_compressdf128_mask:
- case X86::BI__builtin_ia32_compressdf256_mask:
- case X86::BI__builtin_ia32_compressdf512_mask:
- case X86::BI__builtin_ia32_compresssf128_mask:
- case X86::BI__builtin_ia32_compresssf256_mask:
- case X86::BI__builtin_ia32_compresssf512_mask:
- case X86::BI__builtin_ia32_compressdi128_mask:
- case X86::BI__builtin_ia32_compressdi256_mask:
- case X86::BI__builtin_ia32_compressdi512_mask:
- case X86::BI__builtin_ia32_compresssi128_mask:
- case X86::BI__builtin_ia32_compresssi256_mask:
- case X86::BI__builtin_ia32_compresssi512_mask:
- case X86::BI__builtin_ia32_compresshi128_mask:
- case X86::BI__builtin_ia32_compresshi256_mask:
- case X86::BI__builtin_ia32_compresshi512_mask:
- case X86::BI__builtin_ia32_compressqi128_mask:
- case X86::BI__builtin_ia32_compressqi256_mask:
- case X86::BI__builtin_ia32_compressqi512_mask:
- return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
- case X86::BI__builtin_ia32_gather3div2df:
- case X86::BI__builtin_ia32_gather3div2di:
- case X86::BI__builtin_ia32_gather3div4df:
- case X86::BI__builtin_ia32_gather3div4di:
- case X86::BI__builtin_ia32_gather3div4sf:
- case X86::BI__builtin_ia32_gather3div4si:
- case X86::BI__builtin_ia32_gather3div8sf:
- case X86::BI__builtin_ia32_gather3div8si:
- case X86::BI__builtin_ia32_gather3siv2df:
- case X86::BI__builtin_ia32_gather3siv2di:
- case X86::BI__builtin_ia32_gather3siv4df:
- case X86::BI__builtin_ia32_gather3siv4di:
- case X86::BI__builtin_ia32_gather3siv4sf:
- case X86::BI__builtin_ia32_gather3siv4si:
- case X86::BI__builtin_ia32_gather3siv8sf:
- case X86::BI__builtin_ia32_gather3siv8si:
- case X86::BI__builtin_ia32_gathersiv8df:
- case X86::BI__builtin_ia32_gathersiv16sf:
- case X86::BI__builtin_ia32_gatherdiv8df:
- case X86::BI__builtin_ia32_gatherdiv16sf:
- case X86::BI__builtin_ia32_gathersiv8di:
- case X86::BI__builtin_ia32_gathersiv16si:
- case X86::BI__builtin_ia32_gatherdiv8di:
- case X86::BI__builtin_ia32_gatherdiv16si: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_gather3div2df:
- IID = Intrinsic::x86_avx512_mask_gather3div2_df;
- break;
- case X86::BI__builtin_ia32_gather3div2di:
- IID = Intrinsic::x86_avx512_mask_gather3div2_di;
- break;
- case X86::BI__builtin_ia32_gather3div4df:
- IID = Intrinsic::x86_avx512_mask_gather3div4_df;
- break;
- case X86::BI__builtin_ia32_gather3div4di:
- IID = Intrinsic::x86_avx512_mask_gather3div4_di;
- break;
- case X86::BI__builtin_ia32_gather3div4sf:
- IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
- break;
- case X86::BI__builtin_ia32_gather3div4si:
- IID = Intrinsic::x86_avx512_mask_gather3div4_si;
- break;
- case X86::BI__builtin_ia32_gather3div8sf:
- IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
- break;
- case X86::BI__builtin_ia32_gather3div8si:
- IID = Intrinsic::x86_avx512_mask_gather3div8_si;
- break;
- case X86::BI__builtin_ia32_gather3siv2df:
- IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
- break;
- case X86::BI__builtin_ia32_gather3siv2di:
- IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
- break;
- case X86::BI__builtin_ia32_gather3siv4df:
- IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
- break;
- case X86::BI__builtin_ia32_gather3siv4di:
- IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
- break;
- case X86::BI__builtin_ia32_gather3siv4sf:
- IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
- break;
- case X86::BI__builtin_ia32_gather3siv4si:
- IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
- break;
- case X86::BI__builtin_ia32_gather3siv8sf:
- IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
- break;
- case X86::BI__builtin_ia32_gather3siv8si:
- IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
- break;
- case X86::BI__builtin_ia32_gathersiv8df:
- IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
- break;
- case X86::BI__builtin_ia32_gathersiv16sf:
- IID = Intrinsic::x86_avx512_mask_gather_dps_512;
- break;
- case X86::BI__builtin_ia32_gatherdiv8df:
- IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
- break;
- case X86::BI__builtin_ia32_gatherdiv16sf:
- IID = Intrinsic::x86_avx512_mask_gather_qps_512;
- break;
- case X86::BI__builtin_ia32_gathersiv8di:
- IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
- break;
- case X86::BI__builtin_ia32_gathersiv16si:
- IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
- break;
- case X86::BI__builtin_ia32_gatherdiv8di:
- IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
- break;
- case X86::BI__builtin_ia32_gatherdiv16si:
- IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
- break;
- }
- unsigned MinElts = std::min(
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
- cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
- Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
- Function *Intr = CGM.getIntrinsic(IID);
- return Builder.CreateCall(Intr, Ops);
- }
- case X86::BI__builtin_ia32_scattersiv8df:
- case X86::BI__builtin_ia32_scattersiv16sf:
- case X86::BI__builtin_ia32_scatterdiv8df:
- case X86::BI__builtin_ia32_scatterdiv16sf:
- case X86::BI__builtin_ia32_scattersiv8di:
- case X86::BI__builtin_ia32_scattersiv16si:
- case X86::BI__builtin_ia32_scatterdiv8di:
- case X86::BI__builtin_ia32_scatterdiv16si:
- case X86::BI__builtin_ia32_scatterdiv2df:
- case X86::BI__builtin_ia32_scatterdiv2di:
- case X86::BI__builtin_ia32_scatterdiv4df:
- case X86::BI__builtin_ia32_scatterdiv4di:
- case X86::BI__builtin_ia32_scatterdiv4sf:
- case X86::BI__builtin_ia32_scatterdiv4si:
- case X86::BI__builtin_ia32_scatterdiv8sf:
- case X86::BI__builtin_ia32_scatterdiv8si:
- case X86::BI__builtin_ia32_scattersiv2df:
- case X86::BI__builtin_ia32_scattersiv2di:
- case X86::BI__builtin_ia32_scattersiv4df:
- case X86::BI__builtin_ia32_scattersiv4di:
- case X86::BI__builtin_ia32_scattersiv4sf:
- case X86::BI__builtin_ia32_scattersiv4si:
- case X86::BI__builtin_ia32_scattersiv8sf:
- case X86::BI__builtin_ia32_scattersiv8si: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_scattersiv8df:
- IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
- break;
- case X86::BI__builtin_ia32_scattersiv16sf:
- IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
- break;
- case X86::BI__builtin_ia32_scatterdiv8df:
- IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
- break;
- case X86::BI__builtin_ia32_scatterdiv16sf:
- IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
- break;
- case X86::BI__builtin_ia32_scattersiv8di:
- IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
- break;
- case X86::BI__builtin_ia32_scattersiv16si:
- IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
- break;
- case X86::BI__builtin_ia32_scatterdiv8di:
- IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
- break;
- case X86::BI__builtin_ia32_scatterdiv16si:
- IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
- break;
- case X86::BI__builtin_ia32_scatterdiv2df:
- IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
- break;
- case X86::BI__builtin_ia32_scatterdiv2di:
- IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
- break;
- case X86::BI__builtin_ia32_scatterdiv4df:
- IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
- break;
- case X86::BI__builtin_ia32_scatterdiv4di:
- IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
- break;
- case X86::BI__builtin_ia32_scatterdiv4sf:
- IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
- break;
- case X86::BI__builtin_ia32_scatterdiv4si:
- IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
- break;
- case X86::BI__builtin_ia32_scatterdiv8sf:
- IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
- break;
- case X86::BI__builtin_ia32_scatterdiv8si:
- IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
- break;
- case X86::BI__builtin_ia32_scattersiv2df:
- IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
- break;
- case X86::BI__builtin_ia32_scattersiv2di:
- IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
- break;
- case X86::BI__builtin_ia32_scattersiv4df:
- IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
- break;
- case X86::BI__builtin_ia32_scattersiv4di:
- IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
- break;
- case X86::BI__builtin_ia32_scattersiv4sf:
- IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
- break;
- case X86::BI__builtin_ia32_scattersiv4si:
- IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
- break;
- case X86::BI__builtin_ia32_scattersiv8sf:
- IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
- break;
- case X86::BI__builtin_ia32_scattersiv8si:
- IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
- break;
- }
- unsigned MinElts = std::min(
- cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
- cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
- Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
- Function *Intr = CGM.getIntrinsic(IID);
- return Builder.CreateCall(Intr, Ops);
- }
- case X86::BI__builtin_ia32_vextractf128_pd256:
- case X86::BI__builtin_ia32_vextractf128_ps256:
- case X86::BI__builtin_ia32_vextractf128_si256:
- case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_extractf64x4_mask:
- case X86::BI__builtin_ia32_extractf32x4_mask:
- case X86::BI__builtin_ia32_extracti64x4_mask:
- case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_extractf32x8_mask:
- case X86::BI__builtin_ia32_extracti32x8_mask:
- case X86::BI__builtin_ia32_extractf32x4_256_mask:
- case X86::BI__builtin_ia32_extracti32x4_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_256_mask:
- case X86::BI__builtin_ia32_extracti64x2_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_512_mask:
- case X86::BI__builtin_ia32_extracti64x2_512_mask: {
- auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
- unsigned NumElts = DstTy->getNumElements();
- unsigned SrcNumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- unsigned SubVectors = SrcNumElts / NumElts;
- unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
- assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
- Index &= SubVectors - 1; // Remove any extra bits.
- Index *= NumElts;
- int Indices[16];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i + Index;
- Value *Res = Builder.CreateShuffleVector(Ops[0],
- makeArrayRef(Indices, NumElts),
- "extract");
- if (Ops.size() == 4)
- Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
- return Res;
- }
- case X86::BI__builtin_ia32_vinsertf128_pd256:
- case X86::BI__builtin_ia32_vinsertf128_ps256:
- case X86::BI__builtin_ia32_vinsertf128_si256:
- case X86::BI__builtin_ia32_insert128i256:
- case X86::BI__builtin_ia32_insertf64x4:
- case X86::BI__builtin_ia32_insertf32x4:
- case X86::BI__builtin_ia32_inserti64x4:
- case X86::BI__builtin_ia32_inserti32x4:
- case X86::BI__builtin_ia32_insertf32x8:
- case X86::BI__builtin_ia32_inserti32x8:
- case X86::BI__builtin_ia32_insertf32x4_256:
- case X86::BI__builtin_ia32_inserti32x4_256:
- case X86::BI__builtin_ia32_insertf64x2_256:
- case X86::BI__builtin_ia32_inserti64x2_256:
- case X86::BI__builtin_ia32_insertf64x2_512:
- case X86::BI__builtin_ia32_inserti64x2_512: {
- unsigned DstNumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- unsigned SrcNumElts =
- cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
- unsigned SubVectors = DstNumElts / SrcNumElts;
- unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
- assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
- Index &= SubVectors - 1; // Remove any extra bits.
- Index *= SrcNumElts;
- int Indices[16];
- for (unsigned i = 0; i != DstNumElts; ++i)
- Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
- Value *Op1 = Builder.CreateShuffleVector(Ops[1],
- makeArrayRef(Indices, DstNumElts),
- "widen");
- for (unsigned i = 0; i != DstNumElts; ++i) {
- if (i >= Index && i < (Index + SrcNumElts))
- Indices[i] = (i - Index) + DstNumElts;
- else
- Indices[i] = i;
- }
- return Builder.CreateShuffleVector(Ops[0], Op1,
- makeArrayRef(Indices, DstNumElts),
- "insert");
- }
- case X86::BI__builtin_ia32_pmovqd512_mask:
- case X86::BI__builtin_ia32_pmovwb512_mask: {
- Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
- return EmitX86Select(*this, Ops[2], Res, Ops[1]);
- }
- case X86::BI__builtin_ia32_pmovdb512_mask:
- case X86::BI__builtin_ia32_pmovdw512_mask:
- case X86::BI__builtin_ia32_pmovqw512_mask: {
- if (const auto *C = dyn_cast<Constant>(Ops[2]))
- if (C->isAllOnesValue())
- return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_pmovdb512_mask:
- IID = Intrinsic::x86_avx512_mask_pmov_db_512;
- break;
- case X86::BI__builtin_ia32_pmovdw512_mask:
- IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
- break;
- case X86::BI__builtin_ia32_pmovqw512_mask:
- IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
- break;
- }
- Function *Intr = CGM.getIntrinsic(IID);
- return Builder.CreateCall(Intr, Ops);
- }
- case X86::BI__builtin_ia32_pblendw128:
- case X86::BI__builtin_ia32_blendpd:
- case X86::BI__builtin_ia32_blendps:
- case X86::BI__builtin_ia32_blendpd256:
- case X86::BI__builtin_ia32_blendps256:
- case X86::BI__builtin_ia32_pblendw256:
- case X86::BI__builtin_ia32_pblendd128:
- case X86::BI__builtin_ia32_pblendd256: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- int Indices[16];
- // If there are more than 8 elements, the immediate is used twice so make
- // sure we handle that.
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
- return Builder.CreateShuffleVector(Ops[0], Ops[1],
- makeArrayRef(Indices, NumElts),
- "blend");
- }
- case X86::BI__builtin_ia32_pshuflw:
- case X86::BI__builtin_ia32_pshuflw256:
- case X86::BI__builtin_ia32_pshuflw512: {
- uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
- unsigned NumElts = Ty->getNumElements();
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- Imm = (Imm & 0xff) * 0x01010101;
- int Indices[32];
- for (unsigned l = 0; l != NumElts; l += 8) {
- for (unsigned i = 0; i != 4; ++i) {
- Indices[l + i] = l + (Imm & 3);
- Imm >>= 2;
- }
- for (unsigned i = 4; i != 8; ++i)
- Indices[l + i] = l + i;
- }
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
- "pshuflw");
- }
- case X86::BI__builtin_ia32_pshufhw:
- case X86::BI__builtin_ia32_pshufhw256:
- case X86::BI__builtin_ia32_pshufhw512: {
- uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
- unsigned NumElts = Ty->getNumElements();
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- Imm = (Imm & 0xff) * 0x01010101;
- int Indices[32];
- for (unsigned l = 0; l != NumElts; l += 8) {
- for (unsigned i = 0; i != 4; ++i)
- Indices[l + i] = l + i;
- for (unsigned i = 4; i != 8; ++i) {
- Indices[l + i] = l + 4 + (Imm & 3);
- Imm >>= 2;
- }
- }
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
- "pshufhw");
- }
- case X86::BI__builtin_ia32_pshufd:
- case X86::BI__builtin_ia32_pshufd256:
- case X86::BI__builtin_ia32_pshufd512:
- case X86::BI__builtin_ia32_vpermilpd:
- case X86::BI__builtin_ia32_vpermilps:
- case X86::BI__builtin_ia32_vpermilpd256:
- case X86::BI__builtin_ia32_vpermilps256:
- case X86::BI__builtin_ia32_vpermilpd512:
- case X86::BI__builtin_ia32_vpermilps512: {
- uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
- unsigned NumElts = Ty->getNumElements();
- unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
- unsigned NumLaneElts = NumElts / NumLanes;
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- Imm = (Imm & 0xff) * 0x01010101;
- int Indices[16];
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0; i != NumLaneElts; ++i) {
- Indices[i + l] = (Imm % NumLaneElts) + l;
- Imm /= NumLaneElts;
- }
- }
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
- "permil");
- }
- case X86::BI__builtin_ia32_shufpd:
- case X86::BI__builtin_ia32_shufpd256:
- case X86::BI__builtin_ia32_shufpd512:
- case X86::BI__builtin_ia32_shufps:
- case X86::BI__builtin_ia32_shufps256:
- case X86::BI__builtin_ia32_shufps512: {
- uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
- unsigned NumElts = Ty->getNumElements();
- unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
- unsigned NumLaneElts = NumElts / NumLanes;
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- Imm = (Imm & 0xff) * 0x01010101;
- int Indices[16];
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0; i != NumLaneElts; ++i) {
- unsigned Index = Imm % NumLaneElts;
- Imm /= NumLaneElts;
- if (i >= (NumLaneElts / 2))
- Index += NumElts;
- Indices[l + i] = l + Index;
- }
- }
- return Builder.CreateShuffleVector(Ops[0], Ops[1],
- makeArrayRef(Indices, NumElts),
- "shufp");
- }
- case X86::BI__builtin_ia32_permdi256:
- case X86::BI__builtin_ia32_permdf256:
- case X86::BI__builtin_ia32_permdi512:
- case X86::BI__builtin_ia32_permdf512: {
- unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
- unsigned NumElts = Ty->getNumElements();
- // These intrinsics operate on 256-bit lanes of four 64-bit elements.
- int Indices[8];
- for (unsigned l = 0; l != NumElts; l += 4)
- for (unsigned i = 0; i != 4; ++i)
- Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
- return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
- "perm");
- }
- case X86::BI__builtin_ia32_palignr128:
- case X86::BI__builtin_ia32_palignr256:
- case X86::BI__builtin_ia32_palignr512: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- assert(NumElts % 16 == 0);
- // If palignr is shifting the pair of vectors more than the size of two
- // lanes, emit zero.
- if (ShiftVal >= 32)
- return llvm::Constant::getNullValue(ConvertType(E->getType()));
- // If palignr is shifting the pair of input vectors more than one lane,
- // but less than two lanes, convert to shifting in zeroes.
- if (ShiftVal > 16) {
- ShiftVal -= 16;
- Ops[1] = Ops[0];
- Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
- }
- int Indices[64];
- // 256-bit palignr operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != NumElts; l += 16) {
- for (unsigned i = 0; i != 16; ++i) {
- unsigned Idx = ShiftVal + i;
- if (Idx >= 16)
- Idx += NumElts - 16; // End of lane, switch operand.
- Indices[l + i] = Idx + l;
- }
- }
- return Builder.CreateShuffleVector(Ops[1], Ops[0],
- makeArrayRef(Indices, NumElts),
- "palignr");
- }
- case X86::BI__builtin_ia32_alignd128:
- case X86::BI__builtin_ia32_alignd256:
- case X86::BI__builtin_ia32_alignd512:
- case X86::BI__builtin_ia32_alignq128:
- case X86::BI__builtin_ia32_alignq256:
- case X86::BI__builtin_ia32_alignq512: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
- // Mask the shift amount to width of a vector.
- ShiftVal &= NumElts - 1;
- int Indices[16];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i + ShiftVal;
- return Builder.CreateShuffleVector(Ops[1], Ops[0],
- makeArrayRef(Indices, NumElts),
- "valign");
- }
- case X86::BI__builtin_ia32_shuf_f32x4_256:
- case X86::BI__builtin_ia32_shuf_f64x2_256:
- case X86::BI__builtin_ia32_shuf_i32x4_256:
- case X86::BI__builtin_ia32_shuf_i64x2_256:
- case X86::BI__builtin_ia32_shuf_f32x4:
- case X86::BI__builtin_ia32_shuf_f64x2:
- case X86::BI__builtin_ia32_shuf_i32x4:
- case X86::BI__builtin_ia32_shuf_i64x2: {
- unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
- unsigned NumElts = Ty->getNumElements();
- unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
- unsigned NumLaneElts = NumElts / NumLanes;
- int Indices[16];
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- unsigned Index = (Imm % NumLanes) * NumLaneElts;
- Imm /= NumLanes; // Discard the bits we just used.
- if (l >= (NumElts / 2))
- Index += NumElts; // Switch to other source.
- for (unsigned i = 0; i != NumLaneElts; ++i) {
- Indices[l + i] = Index + i;
- }
- }
- return Builder.CreateShuffleVector(Ops[0], Ops[1],
- makeArrayRef(Indices, NumElts),
- "shuf");
- }
- case X86::BI__builtin_ia32_vperm2f128_pd256:
- case X86::BI__builtin_ia32_vperm2f128_ps256:
- case X86::BI__builtin_ia32_vperm2f128_si256:
- case X86::BI__builtin_ia32_permti256: {
- unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- // This takes a very simple approach since there are two lanes and a
- // shuffle can have 2 inputs. So we reserve the first input for the first
- // lane and the second input for the second lane. This may result in
- // duplicate sources, but this can be dealt with in the backend.
- Value *OutOps[2];
- int Indices[8];
- for (unsigned l = 0; l != 2; ++l) {
- // Determine the source for this lane.
- if (Imm & (1 << ((l * 4) + 3)))
- OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
- else if (Imm & (1 << ((l * 4) + 1)))
- OutOps[l] = Ops[1];
- else
- OutOps[l] = Ops[0];
- for (unsigned i = 0; i != NumElts/2; ++i) {
- // Start with ith element of the source for this lane.
- unsigned Idx = (l * NumElts) + i;
- // If bit 0 of the immediate half is set, switch to the high half of
- // the source.
- if (Imm & (1 << (l * 4)))
- Idx += NumElts/2;
- Indices[(l * (NumElts/2)) + i] = Idx;
- }
- }
- return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
- makeArrayRef(Indices, NumElts),
- "vperm");
- }
- case X86::BI__builtin_ia32_pslldqi128_byteshift:
- case X86::BI__builtin_ia32_pslldqi256_byteshift:
- case X86::BI__builtin_ia32_pslldqi512_byteshift: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
- // Builtin type is vXi64 so multiply by 8 to get bytes.
- unsigned NumElts = ResultType->getNumElements() * 8;
- // If pslldq is shifting the vector more than 15 bytes, emit zero.
- if (ShiftVal >= 16)
- return llvm::Constant::getNullValue(ResultType);
- int Indices[64];
- // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != NumElts; l += 16) {
- for (unsigned i = 0; i != 16; ++i) {
- unsigned Idx = NumElts + i - ShiftVal;
- if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
- Indices[l + i] = Idx + l;
- }
- }
- auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
- Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
- Value *Zero = llvm::Constant::getNullValue(VecTy);
- Value *SV = Builder.CreateShuffleVector(Zero, Cast,
- makeArrayRef(Indices, NumElts),
- "pslldq");
- return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
- }
- case X86::BI__builtin_ia32_psrldqi128_byteshift:
- case X86::BI__builtin_ia32_psrldqi256_byteshift:
- case X86::BI__builtin_ia32_psrldqi512_byteshift: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
- // Builtin type is vXi64 so multiply by 8 to get bytes.
- unsigned NumElts = ResultType->getNumElements() * 8;
- // If psrldq is shifting the vector more than 15 bytes, emit zero.
- if (ShiftVal >= 16)
- return llvm::Constant::getNullValue(ResultType);
- int Indices[64];
- // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != NumElts; l += 16) {
- for (unsigned i = 0; i != 16; ++i) {
- unsigned Idx = i + ShiftVal;
- if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
- Indices[l + i] = Idx + l;
- }
- }
- auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
- Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
- Value *Zero = llvm::Constant::getNullValue(VecTy);
- Value *SV = Builder.CreateShuffleVector(Cast, Zero,
- makeArrayRef(Indices, NumElts),
- "psrldq");
- return Builder.CreateBitCast(SV, ResultType, "cast");
- }
- case X86::BI__builtin_ia32_kshiftliqi:
- case X86::BI__builtin_ia32_kshiftlihi:
- case X86::BI__builtin_ia32_kshiftlisi:
- case X86::BI__builtin_ia32_kshiftlidi: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- if (ShiftVal >= NumElts)
- return llvm::Constant::getNullValue(Ops[0]->getType());
- Value *In = getMaskVecValue(*this, Ops[0], NumElts);
- int Indices[64];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = NumElts + i - ShiftVal;
- Value *Zero = llvm::Constant::getNullValue(In->getType());
- Value *SV = Builder.CreateShuffleVector(Zero, In,
- makeArrayRef(Indices, NumElts),
- "kshiftl");
- return Builder.CreateBitCast(SV, Ops[0]->getType());
- }
- case X86::BI__builtin_ia32_kshiftriqi:
- case X86::BI__builtin_ia32_kshiftrihi:
- case X86::BI__builtin_ia32_kshiftrisi:
- case X86::BI__builtin_ia32_kshiftridi: {
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- if (ShiftVal >= NumElts)
- return llvm::Constant::getNullValue(Ops[0]->getType());
- Value *In = getMaskVecValue(*this, Ops[0], NumElts);
- int Indices[64];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i + ShiftVal;
- Value *Zero = llvm::Constant::getNullValue(In->getType());
- Value *SV = Builder.CreateShuffleVector(In, Zero,
- makeArrayRef(Indices, NumElts),
- "kshiftr");
- return Builder.CreateBitCast(SV, Ops[0]->getType());
- }
- case X86::BI__builtin_ia32_movnti:
- case X86::BI__builtin_ia32_movnti64:
- case X86::BI__builtin_ia32_movntsd:
- case X86::BI__builtin_ia32_movntss: {
- llvm::MDNode *Node = llvm::MDNode::get(
- getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
- Value *Ptr = Ops[0];
- Value *Src = Ops[1];
- // Extract the 0'th element of the source vector.
- if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
- BuiltinID == X86::BI__builtin_ia32_movntss)
- Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
- // Convert the type of the pointer to a pointer to the stored type.
- Value *BC = Builder.CreateBitCast(
- Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
- // Unaligned nontemporal store of the scalar value.
- StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
- SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
- SI->setAlignment(llvm::Align(1));
- return SI;
- }
- // Rotate is a special case of funnel shift - 1st 2 args are the same.
- case X86::BI__builtin_ia32_vprotb:
- case X86::BI__builtin_ia32_vprotw:
- case X86::BI__builtin_ia32_vprotd:
- case X86::BI__builtin_ia32_vprotq:
- case X86::BI__builtin_ia32_vprotbi:
- case X86::BI__builtin_ia32_vprotwi:
- case X86::BI__builtin_ia32_vprotdi:
- case X86::BI__builtin_ia32_vprotqi:
- case X86::BI__builtin_ia32_prold128:
- case X86::BI__builtin_ia32_prold256:
- case X86::BI__builtin_ia32_prold512:
- case X86::BI__builtin_ia32_prolq128:
- case X86::BI__builtin_ia32_prolq256:
- case X86::BI__builtin_ia32_prolq512:
- case X86::BI__builtin_ia32_prolvd128:
- case X86::BI__builtin_ia32_prolvd256:
- case X86::BI__builtin_ia32_prolvd512:
- case X86::BI__builtin_ia32_prolvq128:
- case X86::BI__builtin_ia32_prolvq256:
- case X86::BI__builtin_ia32_prolvq512:
- return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
- case X86::BI__builtin_ia32_prord128:
- case X86::BI__builtin_ia32_prord256:
- case X86::BI__builtin_ia32_prord512:
- case X86::BI__builtin_ia32_prorq128:
- case X86::BI__builtin_ia32_prorq256:
- case X86::BI__builtin_ia32_prorq512:
- case X86::BI__builtin_ia32_prorvd128:
- case X86::BI__builtin_ia32_prorvd256:
- case X86::BI__builtin_ia32_prorvd512:
- case X86::BI__builtin_ia32_prorvq128:
- case X86::BI__builtin_ia32_prorvq256:
- case X86::BI__builtin_ia32_prorvq512:
- return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
- case X86::BI__builtin_ia32_selectb_128:
- case X86::BI__builtin_ia32_selectb_256:
- case X86::BI__builtin_ia32_selectb_512:
- case X86::BI__builtin_ia32_selectw_128:
- case X86::BI__builtin_ia32_selectw_256:
- case X86::BI__builtin_ia32_selectw_512:
- case X86::BI__builtin_ia32_selectd_128:
- case X86::BI__builtin_ia32_selectd_256:
- case X86::BI__builtin_ia32_selectd_512:
- case X86::BI__builtin_ia32_selectq_128:
- case X86::BI__builtin_ia32_selectq_256:
- case X86::BI__builtin_ia32_selectq_512:
- case X86::BI__builtin_ia32_selectph_128:
- case X86::BI__builtin_ia32_selectph_256:
- case X86::BI__builtin_ia32_selectph_512:
- case X86::BI__builtin_ia32_selectps_128:
- case X86::BI__builtin_ia32_selectps_256:
- case X86::BI__builtin_ia32_selectps_512:
- case X86::BI__builtin_ia32_selectpd_128:
- case X86::BI__builtin_ia32_selectpd_256:
- case X86::BI__builtin_ia32_selectpd_512:
- return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
- case X86::BI__builtin_ia32_selectsh_128:
- case X86::BI__builtin_ia32_selectss_128:
- case X86::BI__builtin_ia32_selectsd_128: {
- Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- A = EmitX86ScalarSelect(*this, Ops[0], A, B);
- return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
- }
- case X86::BI__builtin_ia32_cmpb128_mask:
- case X86::BI__builtin_ia32_cmpb256_mask:
- case X86::BI__builtin_ia32_cmpb512_mask:
- case X86::BI__builtin_ia32_cmpw128_mask:
- case X86::BI__builtin_ia32_cmpw256_mask:
- case X86::BI__builtin_ia32_cmpw512_mask:
- case X86::BI__builtin_ia32_cmpd128_mask:
- case X86::BI__builtin_ia32_cmpd256_mask:
- case X86::BI__builtin_ia32_cmpd512_mask:
- case X86::BI__builtin_ia32_cmpq128_mask:
- case X86::BI__builtin_ia32_cmpq256_mask:
- case X86::BI__builtin_ia32_cmpq512_mask: {
- unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
- return EmitX86MaskedCompare(*this, CC, true, Ops);
- }
- case X86::BI__builtin_ia32_ucmpb128_mask:
- case X86::BI__builtin_ia32_ucmpb256_mask:
- case X86::BI__builtin_ia32_ucmpb512_mask:
- case X86::BI__builtin_ia32_ucmpw128_mask:
- case X86::BI__builtin_ia32_ucmpw256_mask:
- case X86::BI__builtin_ia32_ucmpw512_mask:
- case X86::BI__builtin_ia32_ucmpd128_mask:
- case X86::BI__builtin_ia32_ucmpd256_mask:
- case X86::BI__builtin_ia32_ucmpd512_mask:
- case X86::BI__builtin_ia32_ucmpq128_mask:
- case X86::BI__builtin_ia32_ucmpq256_mask:
- case X86::BI__builtin_ia32_ucmpq512_mask: {
- unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
- return EmitX86MaskedCompare(*this, CC, false, Ops);
- }
- case X86::BI__builtin_ia32_vpcomb:
- case X86::BI__builtin_ia32_vpcomw:
- case X86::BI__builtin_ia32_vpcomd:
- case X86::BI__builtin_ia32_vpcomq:
- return EmitX86vpcom(*this, Ops, true);
- case X86::BI__builtin_ia32_vpcomub:
- case X86::BI__builtin_ia32_vpcomuw:
- case X86::BI__builtin_ia32_vpcomud:
- case X86::BI__builtin_ia32_vpcomuq:
- return EmitX86vpcom(*this, Ops, false);
- case X86::BI__builtin_ia32_kortestcqi:
- case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestcsi:
- case X86::BI__builtin_ia32_kortestcdi: {
- Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
- Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
- Value *Cmp = Builder.CreateICmpEQ(Or, C);
- return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
- }
- case X86::BI__builtin_ia32_kortestzqi:
- case X86::BI__builtin_ia32_kortestzhi:
- case X86::BI__builtin_ia32_kortestzsi:
- case X86::BI__builtin_ia32_kortestzdi: {
- Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
- Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
- Value *Cmp = Builder.CreateICmpEQ(Or, C);
- return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
- }
- case X86::BI__builtin_ia32_ktestcqi:
- case X86::BI__builtin_ia32_ktestzqi:
- case X86::BI__builtin_ia32_ktestchi:
- case X86::BI__builtin_ia32_ktestzhi:
- case X86::BI__builtin_ia32_ktestcsi:
- case X86::BI__builtin_ia32_ktestzsi:
- case X86::BI__builtin_ia32_ktestcdi:
- case X86::BI__builtin_ia32_ktestzdi: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_ktestcqi:
- IID = Intrinsic::x86_avx512_ktestc_b;
- break;
- case X86::BI__builtin_ia32_ktestzqi:
- IID = Intrinsic::x86_avx512_ktestz_b;
- break;
- case X86::BI__builtin_ia32_ktestchi:
- IID = Intrinsic::x86_avx512_ktestc_w;
- break;
- case X86::BI__builtin_ia32_ktestzhi:
- IID = Intrinsic::x86_avx512_ktestz_w;
- break;
- case X86::BI__builtin_ia32_ktestcsi:
- IID = Intrinsic::x86_avx512_ktestc_d;
- break;
- case X86::BI__builtin_ia32_ktestzsi:
- IID = Intrinsic::x86_avx512_ktestz_d;
- break;
- case X86::BI__builtin_ia32_ktestcdi:
- IID = Intrinsic::x86_avx512_ktestc_q;
- break;
- case X86::BI__builtin_ia32_ktestzdi:
- IID = Intrinsic::x86_avx512_ktestz_q;
- break;
- }
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
- Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
- Function *Intr = CGM.getIntrinsic(IID);
- return Builder.CreateCall(Intr, {LHS, RHS});
- }
- case X86::BI__builtin_ia32_kaddqi:
- case X86::BI__builtin_ia32_kaddhi:
- case X86::BI__builtin_ia32_kaddsi:
- case X86::BI__builtin_ia32_kadddi: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_kaddqi:
- IID = Intrinsic::x86_avx512_kadd_b;
- break;
- case X86::BI__builtin_ia32_kaddhi:
- IID = Intrinsic::x86_avx512_kadd_w;
- break;
- case X86::BI__builtin_ia32_kaddsi:
- IID = Intrinsic::x86_avx512_kadd_d;
- break;
- case X86::BI__builtin_ia32_kadddi:
- IID = Intrinsic::x86_avx512_kadd_q;
- break;
- }
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
- Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
- Function *Intr = CGM.getIntrinsic(IID);
- Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
- return Builder.CreateBitCast(Res, Ops[0]->getType());
- }
- case X86::BI__builtin_ia32_kandqi:
- case X86::BI__builtin_ia32_kandhi:
- case X86::BI__builtin_ia32_kandsi:
- case X86::BI__builtin_ia32_kanddi:
- return EmitX86MaskLogic(*this, Instruction::And, Ops);
- case X86::BI__builtin_ia32_kandnqi:
- case X86::BI__builtin_ia32_kandnhi:
- case X86::BI__builtin_ia32_kandnsi:
- case X86::BI__builtin_ia32_kandndi:
- return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
- case X86::BI__builtin_ia32_korqi:
- case X86::BI__builtin_ia32_korhi:
- case X86::BI__builtin_ia32_korsi:
- case X86::BI__builtin_ia32_kordi:
- return EmitX86MaskLogic(*this, Instruction::Or, Ops);
- case X86::BI__builtin_ia32_kxnorqi:
- case X86::BI__builtin_ia32_kxnorhi:
- case X86::BI__builtin_ia32_kxnorsi:
- case X86::BI__builtin_ia32_kxnordi:
- return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
- case X86::BI__builtin_ia32_kxorqi:
- case X86::BI__builtin_ia32_kxorhi:
- case X86::BI__builtin_ia32_kxorsi:
- case X86::BI__builtin_ia32_kxordi:
- return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
- case X86::BI__builtin_ia32_knotqi:
- case X86::BI__builtin_ia32_knothi:
- case X86::BI__builtin_ia32_knotsi:
- case X86::BI__builtin_ia32_knotdi: {
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
- return Builder.CreateBitCast(Builder.CreateNot(Res),
- Ops[0]->getType());
- }
- case X86::BI__builtin_ia32_kmovb:
- case X86::BI__builtin_ia32_kmovw:
- case X86::BI__builtin_ia32_kmovd:
- case X86::BI__builtin_ia32_kmovq: {
- // Bitcast to vXi1 type and then back to integer. This gets the mask
- // register type into the IR, but might be optimized out depending on
- // what's around it.
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
- return Builder.CreateBitCast(Res, Ops[0]->getType());
- }
- case X86::BI__builtin_ia32_kunpckdi:
- case X86::BI__builtin_ia32_kunpcksi:
- case X86::BI__builtin_ia32_kunpckhi: {
- unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
- Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
- Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
- int Indices[64];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i;
- // First extract half of each vector. This gives better codegen than
- // doing it in a single shuffle.
- LHS = Builder.CreateShuffleVector(LHS, LHS,
- makeArrayRef(Indices, NumElts / 2));
- RHS = Builder.CreateShuffleVector(RHS, RHS,
- makeArrayRef(Indices, NumElts / 2));
- // Concat the vectors.
- // NOTE: Operands are swapped to match the intrinsic definition.
- Value *Res = Builder.CreateShuffleVector(RHS, LHS,
- makeArrayRef(Indices, NumElts));
- return Builder.CreateBitCast(Res, Ops[0]->getType());
- }
- case X86::BI__builtin_ia32_vplzcntd_128:
- case X86::BI__builtin_ia32_vplzcntd_256:
- case X86::BI__builtin_ia32_vplzcntd_512:
- case X86::BI__builtin_ia32_vplzcntq_128:
- case X86::BI__builtin_ia32_vplzcntq_256:
- case X86::BI__builtin_ia32_vplzcntq_512: {
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
- return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
- }
- case X86::BI__builtin_ia32_sqrtss:
- case X86::BI__builtin_ia32_sqrtsd: {
- Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Function *F;
- if (Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
- A->getType());
- A = Builder.CreateConstrainedFPCall(F, {A});
- } else {
- F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
- A = Builder.CreateCall(F, {A});
- }
- return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
- }
- case X86::BI__builtin_ia32_sqrtsh_round_mask:
- case X86::BI__builtin_ia32_sqrtsd_round_mask:
- case X86::BI__builtin_ia32_sqrtss_round_mask: {
- unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
- // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
- // otherwise keep the intrinsic.
- if (CC != 4) {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_sqrtsh_round_mask:
- IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
- break;
- case X86::BI__builtin_ia32_sqrtsd_round_mask:
- IID = Intrinsic::x86_avx512_mask_sqrt_sd;
- break;
- case X86::BI__builtin_ia32_sqrtss_round_mask:
- IID = Intrinsic::x86_avx512_mask_sqrt_ss;
- break;
- }
- return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
- }
- Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Function *F;
- if (Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
- A->getType());
- A = Builder.CreateConstrainedFPCall(F, A);
- } else {
- F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
- A = Builder.CreateCall(F, A);
- }
- Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
- return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
- }
- case X86::BI__builtin_ia32_sqrtpd256:
- case X86::BI__builtin_ia32_sqrtpd:
- case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps:
- case X86::BI__builtin_ia32_sqrtph256:
- case X86::BI__builtin_ia32_sqrtph:
- case X86::BI__builtin_ia32_sqrtph512:
- case X86::BI__builtin_ia32_sqrtps512:
- case X86::BI__builtin_ia32_sqrtpd512: {
- if (Ops.size() == 2) {
- unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
- // otherwise keep the intrinsic.
- if (CC != 4) {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_sqrtph512:
- IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
- break;
- case X86::BI__builtin_ia32_sqrtps512:
- IID = Intrinsic::x86_avx512_sqrt_ps_512;
- break;
- case X86::BI__builtin_ia32_sqrtpd512:
- IID = Intrinsic::x86_avx512_sqrt_pd_512;
- break;
- }
- return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
- }
- }
- if (Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
- Ops[0]->getType());
- return Builder.CreateConstrainedFPCall(F, Ops[0]);
- } else {
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
- return Builder.CreateCall(F, Ops[0]);
- }
- }
- case X86::BI__builtin_ia32_pmuludq128:
- case X86::BI__builtin_ia32_pmuludq256:
- case X86::BI__builtin_ia32_pmuludq512:
- return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
- case X86::BI__builtin_ia32_pmuldq128:
- case X86::BI__builtin_ia32_pmuldq256:
- case X86::BI__builtin_ia32_pmuldq512:
- return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
- case X86::BI__builtin_ia32_pternlogd512_mask:
- case X86::BI__builtin_ia32_pternlogq512_mask:
- case X86::BI__builtin_ia32_pternlogd128_mask:
- case X86::BI__builtin_ia32_pternlogd256_mask:
- case X86::BI__builtin_ia32_pternlogq128_mask:
- case X86::BI__builtin_ia32_pternlogq256_mask:
- return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
- case X86::BI__builtin_ia32_pternlogd512_maskz:
- case X86::BI__builtin_ia32_pternlogq512_maskz:
- case X86::BI__builtin_ia32_pternlogd128_maskz:
- case X86::BI__builtin_ia32_pternlogd256_maskz:
- case X86::BI__builtin_ia32_pternlogq128_maskz:
- case X86::BI__builtin_ia32_pternlogq256_maskz:
- return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
- case X86::BI__builtin_ia32_vpshldd128:
- case X86::BI__builtin_ia32_vpshldd256:
- case X86::BI__builtin_ia32_vpshldd512:
- case X86::BI__builtin_ia32_vpshldq128:
- case X86::BI__builtin_ia32_vpshldq256:
- case X86::BI__builtin_ia32_vpshldq512:
- case X86::BI__builtin_ia32_vpshldw128:
- case X86::BI__builtin_ia32_vpshldw256:
- case X86::BI__builtin_ia32_vpshldw512:
- return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
- case X86::BI__builtin_ia32_vpshrdd128:
- case X86::BI__builtin_ia32_vpshrdd256:
- case X86::BI__builtin_ia32_vpshrdd512:
- case X86::BI__builtin_ia32_vpshrdq128:
- case X86::BI__builtin_ia32_vpshrdq256:
- case X86::BI__builtin_ia32_vpshrdq512:
- case X86::BI__builtin_ia32_vpshrdw128:
- case X86::BI__builtin_ia32_vpshrdw256:
- case X86::BI__builtin_ia32_vpshrdw512:
- // Ops 0 and 1 are swapped.
- return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
- case X86::BI__builtin_ia32_vpshldvd128:
- case X86::BI__builtin_ia32_vpshldvd256:
- case X86::BI__builtin_ia32_vpshldvd512:
- case X86::BI__builtin_ia32_vpshldvq128:
- case X86::BI__builtin_ia32_vpshldvq256:
- case X86::BI__builtin_ia32_vpshldvq512:
- case X86::BI__builtin_ia32_vpshldvw128:
- case X86::BI__builtin_ia32_vpshldvw256:
- case X86::BI__builtin_ia32_vpshldvw512:
- return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
- case X86::BI__builtin_ia32_vpshrdvd128:
- case X86::BI__builtin_ia32_vpshrdvd256:
- case X86::BI__builtin_ia32_vpshrdvd512:
- case X86::BI__builtin_ia32_vpshrdvq128:
- case X86::BI__builtin_ia32_vpshrdvq256:
- case X86::BI__builtin_ia32_vpshrdvq512:
- case X86::BI__builtin_ia32_vpshrdvw128:
- case X86::BI__builtin_ia32_vpshrdvw256:
- case X86::BI__builtin_ia32_vpshrdvw512:
- // Ops 0 and 1 are swapped.
- return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
- // Reductions
- case X86::BI__builtin_ia32_reduce_add_d512:
- case X86::BI__builtin_ia32_reduce_add_q512: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType());
- return Builder.CreateCall(F, {Ops[0]});
- }
- case X86::BI__builtin_ia32_reduce_fadd_pd512:
- case X86::BI__builtin_ia32_reduce_fadd_ps512:
- case X86::BI__builtin_ia32_reduce_fadd_ph512:
- case X86::BI__builtin_ia32_reduce_fadd_ph256:
- case X86::BI__builtin_ia32_reduce_fadd_ph128: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
- Builder.getFastMathFlags().setAllowReassoc();
- return Builder.CreateCall(F, {Ops[0], Ops[1]});
- }
- case X86::BI__builtin_ia32_reduce_fmul_pd512:
- case X86::BI__builtin_ia32_reduce_fmul_ps512:
- case X86::BI__builtin_ia32_reduce_fmul_ph512:
- case X86::BI__builtin_ia32_reduce_fmul_ph256:
- case X86::BI__builtin_ia32_reduce_fmul_ph128: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
- Builder.getFastMathFlags().setAllowReassoc();
- return Builder.CreateCall(F, {Ops[0], Ops[1]});
- }
- case X86::BI__builtin_ia32_reduce_fmax_pd512:
- case X86::BI__builtin_ia32_reduce_fmax_ps512:
- case X86::BI__builtin_ia32_reduce_fmax_ph512:
- case X86::BI__builtin_ia32_reduce_fmax_ph256:
- case X86::BI__builtin_ia32_reduce_fmax_ph128: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
- Builder.getFastMathFlags().setNoNaNs();
- return Builder.CreateCall(F, {Ops[0]});
- }
- case X86::BI__builtin_ia32_reduce_fmin_pd512:
- case X86::BI__builtin_ia32_reduce_fmin_ps512:
- case X86::BI__builtin_ia32_reduce_fmin_ph512:
- case X86::BI__builtin_ia32_reduce_fmin_ph256:
- case X86::BI__builtin_ia32_reduce_fmin_ph128: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
- Builder.getFastMathFlags().setNoNaNs();
- return Builder.CreateCall(F, {Ops[0]});
- }
- case X86::BI__builtin_ia32_reduce_mul_d512:
- case X86::BI__builtin_ia32_reduce_mul_q512: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());
- return Builder.CreateCall(F, {Ops[0]});
- }
- // 3DNow!
- case X86::BI__builtin_ia32_pswapdsf:
- case X86::BI__builtin_ia32_pswapdsi: {
- llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
- Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
- return Builder.CreateCall(F, Ops, "pswapd");
- }
- case X86::BI__builtin_ia32_rdrand16_step:
- case X86::BI__builtin_ia32_rdrand32_step:
- case X86::BI__builtin_ia32_rdrand64_step:
- case X86::BI__builtin_ia32_rdseed16_step:
- case X86::BI__builtin_ia32_rdseed32_step:
- case X86::BI__builtin_ia32_rdseed64_step: {
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_rdrand16_step:
- ID = Intrinsic::x86_rdrand_16;
- break;
- case X86::BI__builtin_ia32_rdrand32_step:
- ID = Intrinsic::x86_rdrand_32;
- break;
- case X86::BI__builtin_ia32_rdrand64_step:
- ID = Intrinsic::x86_rdrand_64;
- break;
- case X86::BI__builtin_ia32_rdseed16_step:
- ID = Intrinsic::x86_rdseed_16;
- break;
- case X86::BI__builtin_ia32_rdseed32_step:
- ID = Intrinsic::x86_rdseed_32;
- break;
- case X86::BI__builtin_ia32_rdseed64_step:
- ID = Intrinsic::x86_rdseed_64;
- break;
- }
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
- Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
- Ops[0]);
- return Builder.CreateExtractValue(Call, 1);
- }
- case X86::BI__builtin_ia32_addcarryx_u32:
- case X86::BI__builtin_ia32_addcarryx_u64:
- case X86::BI__builtin_ia32_subborrow_u32:
- case X86::BI__builtin_ia32_subborrow_u64: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_addcarryx_u32:
- IID = Intrinsic::x86_addcarry_32;
- break;
- case X86::BI__builtin_ia32_addcarryx_u64:
- IID = Intrinsic::x86_addcarry_64;
- break;
- case X86::BI__builtin_ia32_subborrow_u32:
- IID = Intrinsic::x86_subborrow_32;
- break;
- case X86::BI__builtin_ia32_subborrow_u64:
- IID = Intrinsic::x86_subborrow_64;
- break;
- }
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
- { Ops[0], Ops[1], Ops[2] });
- Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
- Ops[3]);
- return Builder.CreateExtractValue(Call, 0);
- }
- case X86::BI__builtin_ia32_fpclassps128_mask:
- case X86::BI__builtin_ia32_fpclassps256_mask:
- case X86::BI__builtin_ia32_fpclassps512_mask:
- case X86::BI__builtin_ia32_fpclassph128_mask:
- case X86::BI__builtin_ia32_fpclassph256_mask:
- case X86::BI__builtin_ia32_fpclassph512_mask:
- case X86::BI__builtin_ia32_fpclasspd128_mask:
- case X86::BI__builtin_ia32_fpclasspd256_mask:
- case X86::BI__builtin_ia32_fpclasspd512_mask: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- Value *MaskIn = Ops[2];
- Ops.erase(&Ops[2]);
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_fpclassph128_mask:
- ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
- break;
- case X86::BI__builtin_ia32_fpclassph256_mask:
- ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
- break;
- case X86::BI__builtin_ia32_fpclassph512_mask:
- ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
- break;
- case X86::BI__builtin_ia32_fpclassps128_mask:
- ID = Intrinsic::x86_avx512_fpclass_ps_128;
- break;
- case X86::BI__builtin_ia32_fpclassps256_mask:
- ID = Intrinsic::x86_avx512_fpclass_ps_256;
- break;
- case X86::BI__builtin_ia32_fpclassps512_mask:
- ID = Intrinsic::x86_avx512_fpclass_ps_512;
- break;
- case X86::BI__builtin_ia32_fpclasspd128_mask:
- ID = Intrinsic::x86_avx512_fpclass_pd_128;
- break;
- case X86::BI__builtin_ia32_fpclasspd256_mask:
- ID = Intrinsic::x86_avx512_fpclass_pd_256;
- break;
- case X86::BI__builtin_ia32_fpclasspd512_mask:
- ID = Intrinsic::x86_avx512_fpclass_pd_512;
- break;
- }
- Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
- }
- case X86::BI__builtin_ia32_vp2intersect_q_512:
- case X86::BI__builtin_ia32_vp2intersect_q_256:
- case X86::BI__builtin_ia32_vp2intersect_q_128:
- case X86::BI__builtin_ia32_vp2intersect_d_512:
- case X86::BI__builtin_ia32_vp2intersect_d_256:
- case X86::BI__builtin_ia32_vp2intersect_d_128: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_vp2intersect_q_512:
- ID = Intrinsic::x86_avx512_vp2intersect_q_512;
- break;
- case X86::BI__builtin_ia32_vp2intersect_q_256:
- ID = Intrinsic::x86_avx512_vp2intersect_q_256;
- break;
- case X86::BI__builtin_ia32_vp2intersect_q_128:
- ID = Intrinsic::x86_avx512_vp2intersect_q_128;
- break;
- case X86::BI__builtin_ia32_vp2intersect_d_512:
- ID = Intrinsic::x86_avx512_vp2intersect_d_512;
- break;
- case X86::BI__builtin_ia32_vp2intersect_d_256:
- ID = Intrinsic::x86_avx512_vp2intersect_d_256;
- break;
- case X86::BI__builtin_ia32_vp2intersect_d_128:
- ID = Intrinsic::x86_avx512_vp2intersect_d_128;
- break;
- }
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
- Value *Result = Builder.CreateExtractValue(Call, 0);
- Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
- Builder.CreateDefaultAlignedStore(Result, Ops[2]);
- Result = Builder.CreateExtractValue(Call, 1);
- Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
- return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
- }
- case X86::BI__builtin_ia32_vpmultishiftqb128:
- case X86::BI__builtin_ia32_vpmultishiftqb256:
- case X86::BI__builtin_ia32_vpmultishiftqb512: {
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_vpmultishiftqb128:
- ID = Intrinsic::x86_avx512_pmultishift_qb_128;
- break;
- case X86::BI__builtin_ia32_vpmultishiftqb256:
- ID = Intrinsic::x86_avx512_pmultishift_qb_256;
- break;
- case X86::BI__builtin_ia32_vpmultishiftqb512:
- ID = Intrinsic::x86_avx512_pmultishift_qb_512;
- break;
- }
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- }
- case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
- case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
- case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- Value *MaskIn = Ops[2];
- Ops.erase(&Ops[2]);
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
- ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
- break;
- case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
- ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
- break;
- case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
- ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
- break;
- }
- Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
- }
- // packed comparison intrinsics
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqpd:
- return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltpd:
- return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmplepd:
- return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordpd:
- return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqpd:
- return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltpd:
- return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnlepd:
- return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordpd:
- return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
- case X86::BI__builtin_ia32_cmpph128_mask:
- case X86::BI__builtin_ia32_cmpph256_mask:
- case X86::BI__builtin_ia32_cmpph512_mask:
- case X86::BI__builtin_ia32_cmpps128_mask:
- case X86::BI__builtin_ia32_cmpps256_mask:
- case X86::BI__builtin_ia32_cmpps512_mask:
- case X86::BI__builtin_ia32_cmppd128_mask:
- case X86::BI__builtin_ia32_cmppd256_mask:
- case X86::BI__builtin_ia32_cmppd512_mask:
- IsMaskFCmp = true;
- LLVM_FALLTHROUGH;
- case X86::BI__builtin_ia32_cmpps:
- case X86::BI__builtin_ia32_cmpps256:
- case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
- // Lowering vector comparisons to fcmp instructions, while
- // ignoring signalling behaviour requested
- // ignoring rounding mode requested
- // This is only possible if fp-model is not strict and FENV_ACCESS is off.
- // The third argument is the comparison condition, and integer in the
- // range [0, 31]
- unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
- // Lowering to IR fcmp instruction.
- // Ignoring requested signaling behaviour,
- // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
- FCmpInst::Predicate Pred;
- bool IsSignaling;
- // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
- // behavior is inverted. We'll handle that after the switch.
- switch (CC & 0xf) {
- case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
- case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
- case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
- case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
- case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
- case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
- case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
- case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
- case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
- case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
- case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
- case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
- case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
- case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
- case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
- case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
- default: llvm_unreachable("Unhandled CC");
- }
- // Invert the signalling behavior for 16-31.
- if (CC & 0x10)
- IsSignaling = !IsSignaling;
- // If the predicate is true or false and we're using constrained intrinsics,
- // we don't have a compare intrinsic we can use. Just use the legacy X86
- // specific intrinsic.
- // If the intrinsic is mask enabled and we're using constrained intrinsics,
- // use the legacy X86 specific intrinsic.
- if (Builder.getIsFPConstrained() &&
- (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
- IsMaskFCmp)) {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_cmpps:
- IID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpps256:
- IID = Intrinsic::x86_avx_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmppd:
- IID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmppd256:
- IID = Intrinsic::x86_avx_cmp_pd_256;
- break;
- case X86::BI__builtin_ia32_cmpps512_mask:
- IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
- break;
- case X86::BI__builtin_ia32_cmppd512_mask:
- IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
- break;
- case X86::BI__builtin_ia32_cmpps128_mask:
- IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
- break;
- case X86::BI__builtin_ia32_cmpps256_mask:
- IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmppd128_mask:
- IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
- break;
- case X86::BI__builtin_ia32_cmppd256_mask:
- IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
- break;
- }
- Function *Intr = CGM.getIntrinsic(IID);
- if (IsMaskFCmp) {
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
- Value *Cmp = Builder.CreateCall(Intr, Ops);
- return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
- }
- return Builder.CreateCall(Intr, Ops);
- }
- // Builtins without the _mask suffix return a vector of integers
- // of the same width as the input vectors
- if (IsMaskFCmp) {
- // We ignore SAE if strict FP is disabled. We only keep precise
- // exception behavior under strict FP.
- // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
- // object will be required.
- unsigned NumElts =
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
- Value *Cmp;
- if (IsSignaling)
- Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
- else
- Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
- return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
- }
- return getVectorFCmpIR(Pred, IsSignaling);
- }
- // SSE scalar comparison intrinsics
- case X86::BI__builtin_ia32_cmpeqss:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
- case X86::BI__builtin_ia32_cmpltss:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
- case X86::BI__builtin_ia32_cmpless:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
- case X86::BI__builtin_ia32_cmpunordss:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
- case X86::BI__builtin_ia32_cmpneqss:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
- case X86::BI__builtin_ia32_cmpnltss:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
- case X86::BI__builtin_ia32_cmpnless:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
- case X86::BI__builtin_ia32_cmpordss:
- return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
- case X86::BI__builtin_ia32_cmpeqsd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
- case X86::BI__builtin_ia32_cmpltsd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
- case X86::BI__builtin_ia32_cmplesd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
- case X86::BI__builtin_ia32_cmpunordsd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
- case X86::BI__builtin_ia32_cmpneqsd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
- case X86::BI__builtin_ia32_cmpnltsd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
- case X86::BI__builtin_ia32_cmpnlesd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
- case X86::BI__builtin_ia32_cmpordsd:
- return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
- // f16c half2float intrinsics
- case X86::BI__builtin_ia32_vcvtph2ps:
- case X86::BI__builtin_ia32_vcvtph2ps256:
- case X86::BI__builtin_ia32_vcvtph2ps_mask:
- case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
- return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
- }
- // AVX512 bf16 intrinsics
- case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
- Ops[2] = getMaskVecValue(
- *this, Ops[2],
- cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
- Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
- return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
- }
- case X86::BI__builtin_ia32_cvtsbf162ss_32:
- return EmitX86CvtBF16ToFloatExpr(*this, E, Ops);
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
- break;
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
- IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
- break;
- }
- Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
- return EmitX86Select(*this, Ops[2], Res, Ops[1]);
- }
- case X86::BI__emul:
- case X86::BI__emulu: {
- llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
- bool isSigned = (BuiltinID == X86::BI__emul);
- Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
- Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
- return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
- }
- case X86::BI__mulh:
- case X86::BI__umulh:
- case X86::BI_mul128:
- case X86::BI_umul128: {
- llvm::Type *ResType = ConvertType(E->getType());
- llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
- bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
- Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
- Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
- Value *MulResult, *HigherBits;
- if (IsSigned) {
- MulResult = Builder.CreateNSWMul(LHS, RHS);
- HigherBits = Builder.CreateAShr(MulResult, 64);
- } else {
- MulResult = Builder.CreateNUWMul(LHS, RHS);
- HigherBits = Builder.CreateLShr(MulResult, 64);
- }
- HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
- if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
- return HigherBits;
- Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
- Builder.CreateStore(HigherBits, HighBitsAddress);
- return Builder.CreateIntCast(MulResult, ResType, IsSigned);
- }
- case X86::BI__faststorefence: {
- return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::SyncScope::System);
- }
- case X86::BI__shiftleft128:
- case X86::BI__shiftright128: {
- llvm::Function *F = CGM.getIntrinsic(
- BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
- Int64Ty);
- // Flip low/high ops and zero-extend amount to matching type.
- // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
- // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
- std::swap(Ops[0], Ops[1]);
- Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
- return Builder.CreateCall(F, Ops);
- }
- case X86::BI_ReadWriteBarrier:
- case X86::BI_ReadBarrier:
- case X86::BI_WriteBarrier: {
- return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::SyncScope::SingleThread);
- }
- case X86::BI_AddressOfReturnAddress: {
- Function *F =
- CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
- return Builder.CreateCall(F);
- }
- case X86::BI__stosb: {
- // We treat __stosb as a volatile memset - it may not generate "rep stosb"
- // instruction, but it will create a memset that won't be optimized away.
- return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
- }
- case X86::BI__ud2:
- // llvm.trap makes a ud2a instruction on x86.
- return EmitTrapCall(Intrinsic::trap);
- case X86::BI__int2c: {
- // This syscall signals a driver assertion failure in x86 NT kernels.
- llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
- llvm::InlineAsm *IA =
- llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
- llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
- getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoReturn);
- llvm::CallInst *CI = Builder.CreateCall(IA);
- CI->setAttributes(NoReturnAttr);
- return CI;
- }
- case X86::BI__readfsbyte:
- case X86::BI__readfsword:
- case X86::BI__readfsdword:
- case X86::BI__readfsqword: {
- llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr =
- Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
- LoadInst *Load = Builder.CreateAlignedLoad(
- IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
- Load->setVolatile(true);
- return Load;
- }
- case X86::BI__readgsbyte:
- case X86::BI__readgsword:
- case X86::BI__readgsdword:
- case X86::BI__readgsqword: {
- llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr =
- Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
- LoadInst *Load = Builder.CreateAlignedLoad(
- IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
- Load->setVolatile(true);
- return Load;
- }
- case X86::BI__builtin_ia32_paddsb512:
- case X86::BI__builtin_ia32_paddsw512:
- case X86::BI__builtin_ia32_paddsb256:
- case X86::BI__builtin_ia32_paddsw256:
- case X86::BI__builtin_ia32_paddsb128:
- case X86::BI__builtin_ia32_paddsw128:
- return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat);
- case X86::BI__builtin_ia32_paddusb512:
- case X86::BI__builtin_ia32_paddusw512:
- case X86::BI__builtin_ia32_paddusb256:
- case X86::BI__builtin_ia32_paddusw256:
- case X86::BI__builtin_ia32_paddusb128:
- case X86::BI__builtin_ia32_paddusw128:
- return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat);
- case X86::BI__builtin_ia32_psubsb512:
- case X86::BI__builtin_ia32_psubsw512:
- case X86::BI__builtin_ia32_psubsb256:
- case X86::BI__builtin_ia32_psubsw256:
- case X86::BI__builtin_ia32_psubsb128:
- case X86::BI__builtin_ia32_psubsw128:
- return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat);
- case X86::BI__builtin_ia32_psubusb512:
- case X86::BI__builtin_ia32_psubusw512:
- case X86::BI__builtin_ia32_psubusb256:
- case X86::BI__builtin_ia32_psubusw256:
- case X86::BI__builtin_ia32_psubusb128:
- case X86::BI__builtin_ia32_psubusw128:
- return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat);
- case X86::BI__builtin_ia32_encodekey128_u32: {
- Intrinsic::ID IID = Intrinsic::x86_encodekey128;
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
- for (int i = 0; i < 3; ++i) {
- Value *Extract = Builder.CreateExtractValue(Call, i + 1);
- Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
- Ptr = Builder.CreateBitCast(
- Ptr, llvm::PointerType::getUnqual(Extract->getType()));
- Builder.CreateAlignedStore(Extract, Ptr, Align(1));
- }
- return Builder.CreateExtractValue(Call, 0);
- }
- case X86::BI__builtin_ia32_encodekey256_u32: {
- Intrinsic::ID IID = Intrinsic::x86_encodekey256;
- Value *Call =
- Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
- for (int i = 0; i < 4; ++i) {
- Value *Extract = Builder.CreateExtractValue(Call, i + 1);
- Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
- Ptr = Builder.CreateBitCast(
- Ptr, llvm::PointerType::getUnqual(Extract->getType()));
- Builder.CreateAlignedStore(Extract, Ptr, Align(1));
- }
- return Builder.CreateExtractValue(Call, 0);
- }
- case X86::BI__builtin_ia32_aesenc128kl_u8:
- case X86::BI__builtin_ia32_aesdec128kl_u8:
- case X86::BI__builtin_ia32_aesenc256kl_u8:
- case X86::BI__builtin_ia32_aesdec256kl_u8: {
- Intrinsic::ID IID;
- StringRef BlockName;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_aesenc128kl_u8:
- IID = Intrinsic::x86_aesenc128kl;
- BlockName = "aesenc128kl";
- break;
- case X86::BI__builtin_ia32_aesdec128kl_u8:
- IID = Intrinsic::x86_aesdec128kl;
- BlockName = "aesdec128kl";
- break;
- case X86::BI__builtin_ia32_aesenc256kl_u8:
- IID = Intrinsic::x86_aesenc256kl;
- BlockName = "aesenc256kl";
- break;
- case X86::BI__builtin_ia32_aesdec256kl_u8:
- IID = Intrinsic::x86_aesdec256kl;
- BlockName = "aesdec256kl";
- break;
- }
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
- BasicBlock *NoError =
- createBasicBlock(BlockName + "_no_error", this->CurFn);
- BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
- BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
- Value *Ret = Builder.CreateExtractValue(Call, 0);
- Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
- Value *Out = Builder.CreateExtractValue(Call, 1);
- Builder.CreateCondBr(Succ, NoError, Error);
- Builder.SetInsertPoint(NoError);
- Builder.CreateDefaultAlignedStore(Out, Ops[0]);
- Builder.CreateBr(End);
- Builder.SetInsertPoint(Error);
- Constant *Zero = llvm::Constant::getNullValue(Out->getType());
- Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
- Builder.CreateBr(End);
- Builder.SetInsertPoint(End);
- return Builder.CreateExtractValue(Call, 0);
- }
- case X86::BI__builtin_ia32_aesencwide128kl_u8:
- case X86::BI__builtin_ia32_aesdecwide128kl_u8:
- case X86::BI__builtin_ia32_aesencwide256kl_u8:
- case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
- Intrinsic::ID IID;
- StringRef BlockName;
- switch (BuiltinID) {
- case X86::BI__builtin_ia32_aesencwide128kl_u8:
- IID = Intrinsic::x86_aesencwide128kl;
- BlockName = "aesencwide128kl";
- break;
- case X86::BI__builtin_ia32_aesdecwide128kl_u8:
- IID = Intrinsic::x86_aesdecwide128kl;
- BlockName = "aesdecwide128kl";
- break;
- case X86::BI__builtin_ia32_aesencwide256kl_u8:
- IID = Intrinsic::x86_aesencwide256kl;
- BlockName = "aesencwide256kl";
- break;
- case X86::BI__builtin_ia32_aesdecwide256kl_u8:
- IID = Intrinsic::x86_aesdecwide256kl;
- BlockName = "aesdecwide256kl";
- break;
- }
- llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
- Value *InOps[9];
- InOps[0] = Ops[2];
- for (int i = 0; i != 8; ++i) {
- Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
- InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
- }
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
- BasicBlock *NoError =
- createBasicBlock(BlockName + "_no_error", this->CurFn);
- BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
- BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
- Value *Ret = Builder.CreateExtractValue(Call, 0);
- Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
- Builder.CreateCondBr(Succ, NoError, Error);
- Builder.SetInsertPoint(NoError);
- for (int i = 0; i != 8; ++i) {
- Value *Extract = Builder.CreateExtractValue(Call, i + 1);
- Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
- Builder.CreateAlignedStore(Extract, Ptr, Align(16));
- }
- Builder.CreateBr(End);
- Builder.SetInsertPoint(Error);
- for (int i = 0; i != 8; ++i) {
- Value *Out = Builder.CreateExtractValue(Call, i + 1);
- Constant *Zero = llvm::Constant::getNullValue(Out->getType());
- Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
- Builder.CreateAlignedStore(Zero, Ptr, Align(16));
- }
- Builder.CreateBr(End);
- Builder.SetInsertPoint(End);
- return Builder.CreateExtractValue(Call, 0);
- }
- case X86::BI__builtin_ia32_vfcmaddcph512_mask:
- IsConjFMA = true;
- LLVM_FALLTHROUGH;
- case X86::BI__builtin_ia32_vfmaddcph512_mask: {
- Intrinsic::ID IID = IsConjFMA
- ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
- : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
- return EmitX86Select(*this, Ops[3], Call, Ops[0]);
- }
- case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
- IsConjFMA = true;
- LLVM_FALLTHROUGH;
- case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
- Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
- : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
- Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
- return EmitX86Select(*this, And, Call, Ops[0]);
- }
- case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
- IsConjFMA = true;
- LLVM_FALLTHROUGH;
- case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
- Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
- : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
- static constexpr int Mask[] = {0, 5, 6, 7};
- return Builder.CreateShuffleVector(Call, Ops[2], Mask);
- }
- }
- }
- Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- SmallVector<Value*, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- if (E->getArg(i)->getType()->isArrayType())
- Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
- else
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- }
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
- switch (BuiltinID) {
- default: return nullptr;
- // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
- // call __builtin_readcyclecounter.
- case PPC::BI__builtin_ppc_get_timebase:
- return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
- // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
- case PPC::BI__builtin_altivec_lvx:
- case PPC::BI__builtin_altivec_lvxl:
- case PPC::BI__builtin_altivec_lvebx:
- case PPC::BI__builtin_altivec_lvehx:
- case PPC::BI__builtin_altivec_lvewx:
- case PPC::BI__builtin_altivec_lvsl:
- case PPC::BI__builtin_altivec_lvsr:
- case PPC::BI__builtin_vsx_lxvd2x:
- case PPC::BI__builtin_vsx_lxvw4x:
- case PPC::BI__builtin_vsx_lxvd2x_be:
- case PPC::BI__builtin_vsx_lxvw4x_be:
- case PPC::BI__builtin_vsx_lxvl:
- case PPC::BI__builtin_vsx_lxvll:
- {
- if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
- BuiltinID == PPC::BI__builtin_vsx_lxvll){
- Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
- }else {
- Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
- Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
- Ops.pop_back();
- }
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
- case PPC::BI__builtin_altivec_lvx:
- ID = Intrinsic::ppc_altivec_lvx;
- break;
- case PPC::BI__builtin_altivec_lvxl:
- ID = Intrinsic::ppc_altivec_lvxl;
- break;
- case PPC::BI__builtin_altivec_lvebx:
- ID = Intrinsic::ppc_altivec_lvebx;
- break;
- case PPC::BI__builtin_altivec_lvehx:
- ID = Intrinsic::ppc_altivec_lvehx;
- break;
- case PPC::BI__builtin_altivec_lvewx:
- ID = Intrinsic::ppc_altivec_lvewx;
- break;
- case PPC::BI__builtin_altivec_lvsl:
- ID = Intrinsic::ppc_altivec_lvsl;
- break;
- case PPC::BI__builtin_altivec_lvsr:
- ID = Intrinsic::ppc_altivec_lvsr;
- break;
- case PPC::BI__builtin_vsx_lxvd2x:
- ID = Intrinsic::ppc_vsx_lxvd2x;
- break;
- case PPC::BI__builtin_vsx_lxvw4x:
- ID = Intrinsic::ppc_vsx_lxvw4x;
- break;
- case PPC::BI__builtin_vsx_lxvd2x_be:
- ID = Intrinsic::ppc_vsx_lxvd2x_be;
- break;
- case PPC::BI__builtin_vsx_lxvw4x_be:
- ID = Intrinsic::ppc_vsx_lxvw4x_be;
- break;
- case PPC::BI__builtin_vsx_lxvl:
- ID = Intrinsic::ppc_vsx_lxvl;
- break;
- case PPC::BI__builtin_vsx_lxvll:
- ID = Intrinsic::ppc_vsx_lxvll;
- break;
- }
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops, "");
- }
- // vec_st, vec_xst_be
- case PPC::BI__builtin_altivec_stvx:
- case PPC::BI__builtin_altivec_stvxl:
- case PPC::BI__builtin_altivec_stvebx:
- case PPC::BI__builtin_altivec_stvehx:
- case PPC::BI__builtin_altivec_stvewx:
- case PPC::BI__builtin_vsx_stxvd2x:
- case PPC::BI__builtin_vsx_stxvw4x:
- case PPC::BI__builtin_vsx_stxvd2x_be:
- case PPC::BI__builtin_vsx_stxvw4x_be:
- case PPC::BI__builtin_vsx_stxvl:
- case PPC::BI__builtin_vsx_stxvll:
- {
- if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
- BuiltinID == PPC::BI__builtin_vsx_stxvll ){
- Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
- }else {
- Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
- Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
- Ops.pop_back();
- }
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported st intrinsic!");
- case PPC::BI__builtin_altivec_stvx:
- ID = Intrinsic::ppc_altivec_stvx;
- break;
- case PPC::BI__builtin_altivec_stvxl:
- ID = Intrinsic::ppc_altivec_stvxl;
- break;
- case PPC::BI__builtin_altivec_stvebx:
- ID = Intrinsic::ppc_altivec_stvebx;
- break;
- case PPC::BI__builtin_altivec_stvehx:
- ID = Intrinsic::ppc_altivec_stvehx;
- break;
- case PPC::BI__builtin_altivec_stvewx:
- ID = Intrinsic::ppc_altivec_stvewx;
- break;
- case PPC::BI__builtin_vsx_stxvd2x:
- ID = Intrinsic::ppc_vsx_stxvd2x;
- break;
- case PPC::BI__builtin_vsx_stxvw4x:
- ID = Intrinsic::ppc_vsx_stxvw4x;
- break;
- case PPC::BI__builtin_vsx_stxvd2x_be:
- ID = Intrinsic::ppc_vsx_stxvd2x_be;
- break;
- case PPC::BI__builtin_vsx_stxvw4x_be:
- ID = Intrinsic::ppc_vsx_stxvw4x_be;
- break;
- case PPC::BI__builtin_vsx_stxvl:
- ID = Intrinsic::ppc_vsx_stxvl;
- break;
- case PPC::BI__builtin_vsx_stxvll:
- ID = Intrinsic::ppc_vsx_stxvll;
- break;
- }
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops, "");
- }
- case PPC::BI__builtin_vsx_ldrmb: {
- // Essentially boils down to performing an unaligned VMX load sequence so
- // as to avoid crossing a page boundary and then shuffling the elements
- // into the right side of the vector register.
- int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
- llvm::Type *ResTy = ConvertType(E->getType());
- bool IsLE = getTarget().isLittleEndian();
- // If the user wants the entire vector, just load the entire vector.
- if (NumBytes == 16) {
- Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo());
- Value *LD =
- Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1)));
- if (!IsLE)
- return LD;
- // Reverse the bytes on LE.
- SmallVector<int, 16> RevMask;
- for (int Idx = 0; Idx < 16; Idx++)
- RevMask.push_back(15 - Idx);
- return Builder.CreateShuffleVector(LD, LD, RevMask);
- }
- llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
- llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
- : Intrinsic::ppc_altivec_lvsl);
- llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
- Value *HiMem = Builder.CreateGEP(
- Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1));
- Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo");
- Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
- Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1");
- Ops.clear();
- Ops.push_back(IsLE ? HiLd : LoLd);
- Ops.push_back(IsLE ? LoLd : HiLd);
- Ops.push_back(Mask1);
- Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1");
- Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
- if (IsLE) {
- SmallVector<int, 16> Consts;
- for (int Idx = 0; Idx < 16; Idx++) {
- int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
- : 16 - (NumBytes - Idx);
- Consts.push_back(Val);
- }
- return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
- Zero, Consts);
- }
- SmallVector<Constant *, 16> Consts;
- for (int Idx = 0; Idx < 16; Idx++)
- Consts.push_back(Builder.getInt8(NumBytes + Idx));
- Value *Mask2 = ConstantVector::get(Consts);
- return Builder.CreateBitCast(
- Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
- }
- case PPC::BI__builtin_vsx_strmb: {
- int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
- bool IsLE = getTarget().isLittleEndian();
- auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
- // Storing the whole vector, simply store it on BE and reverse bytes and
- // store on LE.
- if (Width == 16) {
- Value *BC =
- Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo());
- Value *StVec = Ops[2];
- if (IsLE) {
- SmallVector<int, 16> RevMask;
- for (int Idx = 0; Idx < 16; Idx++)
- RevMask.push_back(15 - Idx);
- StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask);
- }
- return Builder.CreateStore(
- StVec, Address(BC, Ops[2]->getType(), CharUnits::fromQuantity(1)));
- }
- auto *ConvTy = Int64Ty;
- unsigned NumElts = 0;
- switch (Width) {
- default:
- llvm_unreachable("width for stores must be a power of 2");
- case 8:
- ConvTy = Int64Ty;
- NumElts = 2;
- break;
- case 4:
- ConvTy = Int32Ty;
- NumElts = 4;
- break;
- case 2:
- ConvTy = Int16Ty;
- NumElts = 8;
- break;
- case 1:
- ConvTy = Int8Ty;
- NumElts = 16;
- break;
- }
- Value *Vec = Builder.CreateBitCast(
- Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts));
- Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0],
- ConstantInt::get(Int64Ty, Offset));
- Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo());
- Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
- if (IsLE && Width > 1) {
- Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
- Elt = Builder.CreateCall(F, Elt);
- }
- return Builder.CreateStore(
- Elt, Address(PtrBC, ConvTy, CharUnits::fromQuantity(1)));
- };
- unsigned Stored = 0;
- unsigned RemainingBytes = NumBytes;
- Value *Result;
- if (NumBytes == 16)
- return StoreSubVec(16, 0, 0);
- if (NumBytes >= 8) {
- Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
- RemainingBytes -= 8;
- Stored += 8;
- }
- if (RemainingBytes >= 4) {
- Result = StoreSubVec(4, NumBytes - Stored - 4,
- IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
- RemainingBytes -= 4;
- Stored += 4;
- }
- if (RemainingBytes >= 2) {
- Result = StoreSubVec(2, NumBytes - Stored - 2,
- IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
- RemainingBytes -= 2;
- Stored += 2;
- }
- if (RemainingBytes)
- Result =
- StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
- return Result;
- }
- // Square root
- case PPC::BI__builtin_vsx_xvsqrtsp:
- case PPC::BI__builtin_vsx_xvsqrtdp: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- if (Builder.getIsFPConstrained()) {
- llvm::Function *F = CGM.getIntrinsic(
- Intrinsic::experimental_constrained_sqrt, ResultType);
- return Builder.CreateConstrainedFPCall(F, X);
- } else {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
- return Builder.CreateCall(F, X);
- }
- }
- // Count leading zeros
- case PPC::BI__builtin_altivec_vclzb:
- case PPC::BI__builtin_altivec_vclzh:
- case PPC::BI__builtin_altivec_vclzw:
- case PPC::BI__builtin_altivec_vclzd: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
- return Builder.CreateCall(F, {X, Undef});
- }
- case PPC::BI__builtin_altivec_vctzb:
- case PPC::BI__builtin_altivec_vctzh:
- case PPC::BI__builtin_altivec_vctzw:
- case PPC::BI__builtin_altivec_vctzd: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
- Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
- return Builder.CreateCall(F, {X, Undef});
- }
- case PPC::BI__builtin_altivec_vec_replace_elt:
- case PPC::BI__builtin_altivec_vec_replace_unaligned: {
- // The third argument of vec_replace_elt and vec_replace_unaligned must
- // be a compile time constant and will be emitted either to the vinsw
- // or vinsd instruction.
- ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
- assert(ArgCI &&
- "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
- llvm::Type *ResultType = ConvertType(E->getType());
- llvm::Function *F = nullptr;
- Value *Call = nullptr;
- int64_t ConstArg = ArgCI->getSExtValue();
- unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
- bool Is32Bit = false;
- assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
- // The input to vec_replace_elt is an element index, not a byte index.
- if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
- ConstArg *= ArgWidth / 8;
- if (ArgWidth == 32) {
- Is32Bit = true;
- // When the second argument is 32 bits, it can either be an integer or
- // a float. The vinsw intrinsic is used in this case.
- F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
- // Fix the constant according to endianess.
- if (getTarget().isLittleEndian())
- ConstArg = 12 - ConstArg;
- } else {
- // When the second argument is 64 bits, it can either be a long long or
- // a double. The vinsd intrinsic is used in this case.
- F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
- // Fix the constant for little endian.
- if (getTarget().isLittleEndian())
- ConstArg = 8 - ConstArg;
- }
- Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
- // Depending on ArgWidth, the input vector could be a float or a double.
- // If the input vector is a float type, bitcast the inputs to integers. Or,
- // if the input vector is a double, bitcast the inputs to 64-bit integers.
- if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
- Ops[0] = Builder.CreateBitCast(
- Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
- : llvm::FixedVectorType::get(Int64Ty, 2));
- Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
- }
- // Emit the call to vinsw or vinsd.
- Call = Builder.CreateCall(F, Ops);
- // Depending on the builtin, bitcast to the approriate result type.
- if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
- !Ops[1]->getType()->isIntegerTy())
- return Builder.CreateBitCast(Call, ResultType);
- else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
- Ops[1]->getType()->isIntegerTy())
- return Call;
- else
- return Builder.CreateBitCast(Call,
- llvm::FixedVectorType::get(Int8Ty, 16));
- }
- case PPC::BI__builtin_altivec_vpopcntb:
- case PPC::BI__builtin_altivec_vpopcnth:
- case PPC::BI__builtin_altivec_vpopcntw:
- case PPC::BI__builtin_altivec_vpopcntd: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
- return Builder.CreateCall(F, X);
- }
- case PPC::BI__builtin_altivec_vadduqm:
- case PPC::BI__builtin_altivec_vsubuqm: {
- llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int128Ty, 1));
- Ops[1] =
- Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int128Ty, 1));
- if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
- return Builder.CreateAdd(Ops[0], Ops[1], "vadduqm");
- else
- return Builder.CreateSub(Ops[0], Ops[1], "vsubuqm");
- }
- // Rotate and insert under mask operation.
- // __rldimi(rs, is, shift, mask)
- // (rotl64(rs, shift) & mask) | (is & ~mask)
- // __rlwimi(rs, is, shift, mask)
- // (rotl(rs, shift) & mask) | (is & ~mask)
- case PPC::BI__builtin_ppc_rldimi:
- case PPC::BI__builtin_ppc_rlwimi: {
- llvm::Type *Ty = Ops[0]->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
- if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
- Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
- Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[2]});
- Value *X = Builder.CreateAnd(Shift, Ops[3]);
- Value *Y = Builder.CreateAnd(Ops[1], Builder.CreateNot(Ops[3]));
- return Builder.CreateOr(X, Y);
- }
- // Rotate and insert under mask operation.
- // __rlwnm(rs, shift, mask)
- // rotl(rs, shift) & mask
- case PPC::BI__builtin_ppc_rlwnm: {
- llvm::Type *Ty = Ops[0]->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
- Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[1]});
- return Builder.CreateAnd(Shift, Ops[2]);
- }
- case PPC::BI__builtin_ppc_poppar4:
- case PPC::BI__builtin_ppc_poppar8: {
- llvm::Type *ArgType = Ops[0]->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
- Value *Tmp = Builder.CreateCall(F, Ops[0]);
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
- if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
- return Result;
- }
- case PPC::BI__builtin_ppc_cmpb: {
- if (getTarget().getTriple().isPPC64()) {
- Function *F =
- CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
- return Builder.CreateCall(F, Ops, "cmpb");
- }
- // For 32 bit, emit the code as below:
- // %conv = trunc i64 %a to i32
- // %conv1 = trunc i64 %b to i32
- // %shr = lshr i64 %a, 32
- // %conv2 = trunc i64 %shr to i32
- // %shr3 = lshr i64 %b, 32
- // %conv4 = trunc i64 %shr3 to i32
- // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
- // %conv5 = zext i32 %0 to i64
- // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
- // %conv614 = zext i32 %1 to i64
- // %shl = shl nuw i64 %conv614, 32
- // %or = or i64 %shl, %conv5
- // ret i64 %or
- Function *F =
- CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
- Value *ArgOneLo = Builder.CreateTrunc(Ops[0], Int32Ty);
- Value *ArgTwoLo = Builder.CreateTrunc(Ops[1], Int32Ty);
- Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
- Value *ArgOneHi =
- Builder.CreateTrunc(Builder.CreateLShr(Ops[0], ShiftAmt), Int32Ty);
- Value *ArgTwoHi =
- Builder.CreateTrunc(Builder.CreateLShr(Ops[1], ShiftAmt), Int32Ty);
- Value *ResLo = Builder.CreateZExt(
- Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
- Value *ResHiShift = Builder.CreateZExt(
- Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
- Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
- return Builder.CreateOr(ResLo, ResHi);
- }
- // Copy sign
- case PPC::BI__builtin_vsx_xvcpsgnsp:
- case PPC::BI__builtin_vsx_xvcpsgndp: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- ID = Intrinsic::copysign;
- llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, {X, Y});
- }
- // Rounding/truncation
- case PPC::BI__builtin_vsx_xvrspip:
- case PPC::BI__builtin_vsx_xvrdpip:
- case PPC::BI__builtin_vsx_xvrdpim:
- case PPC::BI__builtin_vsx_xvrspim:
- case PPC::BI__builtin_vsx_xvrdpi:
- case PPC::BI__builtin_vsx_xvrspi:
- case PPC::BI__builtin_vsx_xvrdpic:
- case PPC::BI__builtin_vsx_xvrspic:
- case PPC::BI__builtin_vsx_xvrdpiz:
- case PPC::BI__builtin_vsx_xvrspiz: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
- BuiltinID == PPC::BI__builtin_vsx_xvrspim)
- ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_floor
- : Intrinsic::floor;
- else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
- BuiltinID == PPC::BI__builtin_vsx_xvrspi)
- ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_round
- : Intrinsic::round;
- else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
- BuiltinID == PPC::BI__builtin_vsx_xvrspic)
- ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_rint
- : Intrinsic::rint;
- else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
- BuiltinID == PPC::BI__builtin_vsx_xvrspip)
- ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_ceil
- : Intrinsic::ceil;
- else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
- BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
- ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_trunc
- : Intrinsic::trunc;
- llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
- : Builder.CreateCall(F, X);
- }
- // Absolute value
- case PPC::BI__builtin_vsx_xvabsdp:
- case PPC::BI__builtin_vsx_xvabssp: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
- return Builder.CreateCall(F, X);
- }
- // Fastmath by default
- case PPC::BI__builtin_ppc_recipdivf:
- case PPC::BI__builtin_ppc_recipdivd:
- case PPC::BI__builtin_ppc_rsqrtf:
- case PPC::BI__builtin_ppc_rsqrtd: {
- FastMathFlags FMF = Builder.getFastMathFlags();
- Builder.getFastMathFlags().setFast();
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
- BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
- Value *Y = EmitScalarExpr(E->getArg(1));
- Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
- Builder.getFastMathFlags() &= (FMF);
- return FDiv;
- }
- auto *One = ConstantFP::get(ResultType, 1.0);
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
- Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
- Builder.getFastMathFlags() &= (FMF);
- return FDiv;
- }
- case PPC::BI__builtin_ppc_alignx: {
- ConstantInt *AlignmentCI = cast<ConstantInt>(Ops[0]);
- if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
- AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
- llvm::Value::MaximumAlignment);
- emitAlignmentAssumption(Ops[1], E->getArg(1),
- /*The expr loc is sufficient.*/ SourceLocation(),
- AlignmentCI, nullptr);
- return Ops[1];
- }
- case PPC::BI__builtin_ppc_rdlam: {
- llvm::Type *Ty = Ops[0]->getType();
- Value *ShiftAmt = Builder.CreateIntCast(Ops[1], Ty, false);
- Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
- Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt});
- return Builder.CreateAnd(Rotate, Ops[2]);
- }
- case PPC::BI__builtin_ppc_load2r: {
- Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
- Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
- Value *LoadIntrinsic = Builder.CreateCall(F, Ops);
- return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
- }
- // FMA variations
- case PPC::BI__builtin_vsx_xvmaddadp:
- case PPC::BI__builtin_vsx_xvmaddasp:
- case PPC::BI__builtin_vsx_xvnmaddadp:
- case PPC::BI__builtin_vsx_xvnmaddasp:
- case PPC::BI__builtin_vsx_xvmsubadp:
- case PPC::BI__builtin_vsx_xvmsubasp:
- case PPC::BI__builtin_vsx_xvnmsubadp:
- case PPC::BI__builtin_vsx_xvnmsubasp: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- Value *Z = EmitScalarExpr(E->getArg(2));
- llvm::Function *F;
- if (Builder.getIsFPConstrained())
- F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
- else
- F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- switch (BuiltinID) {
- case PPC::BI__builtin_vsx_xvmaddadp:
- case PPC::BI__builtin_vsx_xvmaddasp:
- if (Builder.getIsFPConstrained())
- return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
- else
- return Builder.CreateCall(F, {X, Y, Z});
- case PPC::BI__builtin_vsx_xvnmaddadp:
- case PPC::BI__builtin_vsx_xvnmaddasp:
- if (Builder.getIsFPConstrained())
- return Builder.CreateFNeg(
- Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
- else
- return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
- case PPC::BI__builtin_vsx_xvmsubadp:
- case PPC::BI__builtin_vsx_xvmsubasp:
- if (Builder.getIsFPConstrained())
- return Builder.CreateConstrainedFPCall(
- F, {X, Y, Builder.CreateFNeg(Z, "neg")});
- else
- return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
- case PPC::BI__builtin_vsx_xvnmsubadp:
- case PPC::BI__builtin_vsx_xvnmsubasp:
- if (Builder.getIsFPConstrained())
- return Builder.CreateFNeg(
- Builder.CreateConstrainedFPCall(
- F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
- "neg");
- else
- return Builder.CreateFNeg(
- Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
- "neg");
- }
- llvm_unreachable("Unknown FMA operation");
- return nullptr; // Suppress no-return warning
- }
- case PPC::BI__builtin_vsx_insertword: {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
- // Third argument is a compile time constant int. It must be clamped to
- // to the range [0, 12].
- ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
- assert(ArgCI &&
- "Third arg to xxinsertw intrinsic must be constant integer");
- const int64_t MaxIndex = 12;
- int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
- // The builtin semantics don't exactly match the xxinsertw instructions
- // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
- // word from the first argument, and inserts it in the second argument. The
- // instruction extracts the word from its second input register and inserts
- // it into its first input register, so swap the first and second arguments.
- std::swap(Ops[0], Ops[1]);
- // Need to cast the second argument from a vector of unsigned int to a
- // vector of long long.
- Ops[1] =
- Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
- if (getTarget().isLittleEndian()) {
- // Reverse the double words in the vector we will extract from.
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
- Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0});
- // Reverse the index.
- Index = MaxIndex - Index;
- }
- // Intrinsic expects the first arg to be a vector of int.
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
- Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
- return Builder.CreateCall(F, Ops);
- }
- case PPC::BI__builtin_vsx_extractuword: {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
- // Intrinsic expects the first argument to be a vector of doublewords.
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
- // The second argument is a compile time constant int that needs to
- // be clamped to the range [0, 12].
- ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
- assert(ArgCI &&
- "Second Arg to xxextractuw intrinsic must be a constant integer!");
- const int64_t MaxIndex = 12;
- int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
- if (getTarget().isLittleEndian()) {
- // Reverse the index.
- Index = MaxIndex - Index;
- Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
- // Emit the call, then reverse the double words of the results vector.
- Value *Call = Builder.CreateCall(F, Ops);
- Value *ShuffleCall =
- Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
- return ShuffleCall;
- } else {
- Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
- return Builder.CreateCall(F, Ops);
- }
- }
- case PPC::BI__builtin_vsx_xxpermdi: {
- ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
- assert(ArgCI && "Third arg must be constant integer!");
- unsigned Index = ArgCI->getZExtValue();
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
- Ops[1] =
- Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
- // Account for endianness by treating this as just a shuffle. So we use the
- // same indices for both LE and BE in order to produce expected results in
- // both cases.
- int ElemIdx0 = (Index & 2) >> 1;
- int ElemIdx1 = 2 + (Index & 1);
- int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
- Value *ShuffleCall =
- Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
- QualType BIRetType = E->getType();
- auto RetTy = ConvertType(BIRetType);
- return Builder.CreateBitCast(ShuffleCall, RetTy);
- }
- case PPC::BI__builtin_vsx_xxsldwi: {
- ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
- assert(ArgCI && "Third argument must be a compile time constant");
- unsigned Index = ArgCI->getZExtValue() & 0x3;
- Ops[0] =
- Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
- Ops[1] =
- Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4));
- // Create a shuffle mask
- int ElemIdx0;
- int ElemIdx1;
- int ElemIdx2;
- int ElemIdx3;
- if (getTarget().isLittleEndian()) {
- // Little endian element N comes from element 8+N-Index of the
- // concatenated wide vector (of course, using modulo arithmetic on
- // the total number of elements).
- ElemIdx0 = (8 - Index) % 8;
- ElemIdx1 = (9 - Index) % 8;
- ElemIdx2 = (10 - Index) % 8;
- ElemIdx3 = (11 - Index) % 8;
- } else {
- // Big endian ElemIdx<N> = Index + N
- ElemIdx0 = Index;
- ElemIdx1 = Index + 1;
- ElemIdx2 = Index + 2;
- ElemIdx3 = Index + 3;
- }
- int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
- Value *ShuffleCall =
- Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
- QualType BIRetType = E->getType();
- auto RetTy = ConvertType(BIRetType);
- return Builder.CreateBitCast(ShuffleCall, RetTy);
- }
- case PPC::BI__builtin_pack_vector_int128: {
- bool isLittleEndian = getTarget().isLittleEndian();
- Value *UndefValue =
- llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2));
- Value *Res = Builder.CreateInsertElement(
- UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
- Res = Builder.CreateInsertElement(Res, Ops[1],
- (uint64_t)(isLittleEndian ? 0 : 1));
- return Builder.CreateBitCast(Res, ConvertType(E->getType()));
- }
- case PPC::BI__builtin_unpack_vector_int128: {
- ConstantInt *Index = cast<ConstantInt>(Ops[1]);
- Value *Unpacked = Builder.CreateBitCast(
- Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
- if (getTarget().isLittleEndian())
- Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
- return Builder.CreateExtractElement(Unpacked, Index);
- }
- case PPC::BI__builtin_ppc_sthcx: {
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
- Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
- Ops[1] = Builder.CreateSExt(Ops[1], Int32Ty);
- return Builder.CreateCall(F, Ops);
- }
- // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
- // Some of the MMA instructions accumulate their result into an existing
- // accumulator whereas the others generate a new accumulator. So we need to
- // use custom code generation to expand a builtin call with a pointer to a
- // load (if the corresponding instruction accumulates its result) followed by
- // the call to the intrinsic and a store of the result.
- #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \
- case PPC::BI__builtin_##Name:
- #include "clang/Basic/BuiltinsPPC.def"
- {
- // The first argument of these two builtins is a pointer used to store their
- // result. However, the llvm intrinsics return their result in multiple
- // return values. So, here we emit code extracting these values from the
- // intrinsic results and storing them using that pointer.
- if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
- BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
- BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
- unsigned NumVecs = 2;
- auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
- if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
- NumVecs = 4;
- Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
- }
- llvm::Function *F = CGM.getIntrinsic(Intrinsic);
- Address Addr = EmitPointerWithAlignment(E->getArg(1));
- Value *Vec = Builder.CreateLoad(Addr);
- Value *Call = Builder.CreateCall(F, {Vec});
- llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo());
- for (unsigned i=0; i<NumVecs; i++) {
- Value *Vec = Builder.CreateExtractValue(Call, i);
- llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
- Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
- Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
- }
- return Call;
- }
- if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
- BuiltinID == PPC::BI__builtin_mma_build_acc) {
- // Reverse the order of the operands for LE, so the
- // same builtin call can be used on both LE and BE
- // without the need for the programmer to swap operands.
- // The operands are reversed starting from the second argument,
- // the first operand is the pointer to the pair/accumulator
- // that is being built.
- if (getTarget().isLittleEndian())
- std::reverse(Ops.begin() + 1, Ops.end());
- }
- bool Accumulate;
- switch (BuiltinID) {
- #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
- case PPC::BI__builtin_##Name: \
- ID = Intrinsic::ppc_##Intr; \
- Accumulate = Acc; \
- break;
- #include "clang/Basic/BuiltinsPPC.def"
- }
- if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
- BuiltinID == PPC::BI__builtin_vsx_stxvp ||
- BuiltinID == PPC::BI__builtin_mma_lxvp ||
- BuiltinID == PPC::BI__builtin_mma_stxvp) {
- if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
- BuiltinID == PPC::BI__builtin_mma_lxvp) {
- Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
- Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
- } else {
- Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
- Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
- }
- Ops.pop_back();
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops, "");
- }
- SmallVector<Value*, 4> CallOps;
- if (Accumulate) {
- Address Addr = EmitPointerWithAlignment(E->getArg(0));
- Value *Acc = Builder.CreateLoad(Addr);
- CallOps.push_back(Acc);
- }
- for (unsigned i=1; i<Ops.size(); i++)
- CallOps.push_back(Ops[i]);
- llvm::Function *F = CGM.getIntrinsic(ID);
- Value *Call = Builder.CreateCall(F, CallOps);
- return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
- }
- case PPC::BI__builtin_ppc_compare_and_swap:
- case PPC::BI__builtin_ppc_compare_and_swaplp: {
- Address Addr = EmitPointerWithAlignment(E->getArg(0));
- Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
- Value *OldVal = Builder.CreateLoad(OldValAddr);
- QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
- LValue LV = MakeAddrLValue(Addr, AtomicTy);
- auto Pair = EmitAtomicCompareExchange(
- LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(),
- llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
- // Unlike c11's atomic_compare_exchange, accroding to
- // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
- // > In either case, the contents of the memory location specified by addr
- // > are copied into the memory location specified by old_val_addr.
- // But it hasn't specified storing to OldValAddr is atomic or not and
- // which order to use. Now following XL's codegen, treat it as a normal
- // store.
- Value *LoadedVal = Pair.first.getScalarVal();
- Builder.CreateStore(LoadedVal, OldValAddr);
- return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
- }
- case PPC::BI__builtin_ppc_fetch_and_add:
- case PPC::BI__builtin_ppc_fetch_and_addlp: {
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
- llvm::AtomicOrdering::Monotonic);
- }
- case PPC::BI__builtin_ppc_fetch_and_and:
- case PPC::BI__builtin_ppc_fetch_and_andlp: {
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
- llvm::AtomicOrdering::Monotonic);
- }
- case PPC::BI__builtin_ppc_fetch_and_or:
- case PPC::BI__builtin_ppc_fetch_and_orlp: {
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
- llvm::AtomicOrdering::Monotonic);
- }
- case PPC::BI__builtin_ppc_fetch_and_swap:
- case PPC::BI__builtin_ppc_fetch_and_swaplp: {
- return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
- llvm::AtomicOrdering::Monotonic);
- }
- case PPC::BI__builtin_ppc_ldarx:
- case PPC::BI__builtin_ppc_lwarx:
- case PPC::BI__builtin_ppc_lharx:
- case PPC::BI__builtin_ppc_lbarx:
- return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
- case PPC::BI__builtin_ppc_mfspr: {
- llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
- ? Int32Ty
- : Int64Ty;
- Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
- return Builder.CreateCall(F, Ops);
- }
- case PPC::BI__builtin_ppc_mtspr: {
- llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
- ? Int32Ty
- : Int64Ty;
- Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
- return Builder.CreateCall(F, Ops);
- }
- case PPC::BI__builtin_ppc_popcntb: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
- return Builder.CreateCall(F, Ops, "popcntb");
- }
- case PPC::BI__builtin_ppc_mtfsf: {
- // The builtin takes a uint32 that needs to be cast to an
- // f64 to be passed to the intrinsic.
- Value *Cast = Builder.CreateUIToFP(Ops[1], DoubleTy);
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
- return Builder.CreateCall(F, {Ops[0], Cast}, "");
- }
- case PPC::BI__builtin_ppc_swdiv_nochk:
- case PPC::BI__builtin_ppc_swdivs_nochk: {
- FastMathFlags FMF = Builder.getFastMathFlags();
- Builder.getFastMathFlags().setFast();
- Value *FDiv = Builder.CreateFDiv(Ops[0], Ops[1], "swdiv_nochk");
- Builder.getFastMathFlags() &= (FMF);
- return FDiv;
- }
- case PPC::BI__builtin_ppc_fric:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
- *this, E, Intrinsic::rint,
- Intrinsic::experimental_constrained_rint))
- .getScalarVal();
- case PPC::BI__builtin_ppc_frim:
- case PPC::BI__builtin_ppc_frims:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
- *this, E, Intrinsic::floor,
- Intrinsic::experimental_constrained_floor))
- .getScalarVal();
- case PPC::BI__builtin_ppc_frin:
- case PPC::BI__builtin_ppc_frins:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
- *this, E, Intrinsic::round,
- Intrinsic::experimental_constrained_round))
- .getScalarVal();
- case PPC::BI__builtin_ppc_frip:
- case PPC::BI__builtin_ppc_frips:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
- *this, E, Intrinsic::ceil,
- Intrinsic::experimental_constrained_ceil))
- .getScalarVal();
- case PPC::BI__builtin_ppc_friz:
- case PPC::BI__builtin_ppc_frizs:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
- *this, E, Intrinsic::trunc,
- Intrinsic::experimental_constrained_trunc))
- .getScalarVal();
- case PPC::BI__builtin_ppc_fsqrt:
- case PPC::BI__builtin_ppc_fsqrts:
- return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
- *this, E, Intrinsic::sqrt,
- Intrinsic::experimental_constrained_sqrt))
- .getScalarVal();
- case PPC::BI__builtin_ppc_test_data_class: {
- llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType();
- unsigned IntrinsicID;
- if (ArgType->isDoubleTy())
- IntrinsicID = Intrinsic::ppc_test_data_class_d;
- else if (ArgType->isFloatTy())
- IntrinsicID = Intrinsic::ppc_test_data_class_f;
- else
- llvm_unreachable("Invalid Argument Type");
- return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops,
- "test_data_class");
- }
- case PPC::BI__builtin_ppc_swdiv:
- case PPC::BI__builtin_ppc_swdivs:
- return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv");
- }
- }
- namespace {
- // If \p E is not null pointer, insert address space cast to match return
- // type of \p E if necessary.
- Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
- const CallExpr *E = nullptr) {
- auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
- auto *Call = CGF.Builder.CreateCall(F);
- Call->addRetAttr(
- Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
- Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
- if (!E)
- return Call;
- QualType BuiltinRetType = E->getType();
- auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
- if (RetTy == Call->getType())
- return Call;
- return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
- }
- // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
- Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
- const unsigned XOffset = 4;
- auto *DP = EmitAMDGPUDispatchPtr(CGF);
- // Indexing the HSA kernel_dispatch_packet struct.
- auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2);
- auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
- auto *DstTy =
- CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
- auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
- auto *LD = CGF.Builder.CreateLoad(
- Address(Cast, CGF.Int16Ty, CharUnits::fromQuantity(2)));
- llvm::MDBuilder MDHelper(CGF.getLLVMContext());
- llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
- APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
- LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
- LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
- llvm::MDNode::get(CGF.getLLVMContext(), None));
- return LD;
- }
- // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
- Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
- const unsigned XOffset = 12;
- auto *DP = EmitAMDGPUDispatchPtr(CGF);
- // Indexing the HSA kernel_dispatch_packet struct.
- auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
- auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
- auto *DstTy =
- CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
- auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
- auto *LD = CGF.Builder.CreateLoad(
- Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4)));
- LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
- llvm::MDNode::get(CGF.getLLVMContext(), None));
- return LD;
- }
- } // namespace
- // For processing memory ordering and memory scope arguments of various
- // amdgcn builtins.
- // \p Order takes a C++11 comptabile memory-ordering specifier and converts
- // it into LLVM's memory ordering specifier using atomic C ABI, and writes
- // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
- // specific SyncScopeID and writes it to \p SSID.
- bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
- llvm::AtomicOrdering &AO,
- llvm::SyncScope::ID &SSID) {
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
- // Map C11/C++11 memory ordering to LLVM memory ordering
- assert(llvm::isValidAtomicOrderingCABI(ord));
- switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
- case llvm::AtomicOrderingCABI::acquire:
- case llvm::AtomicOrderingCABI::consume:
- AO = llvm::AtomicOrdering::Acquire;
- break;
- case llvm::AtomicOrderingCABI::release:
- AO = llvm::AtomicOrdering::Release;
- break;
- case llvm::AtomicOrderingCABI::acq_rel:
- AO = llvm::AtomicOrdering::AcquireRelease;
- break;
- case llvm::AtomicOrderingCABI::seq_cst:
- AO = llvm::AtomicOrdering::SequentiallyConsistent;
- break;
- case llvm::AtomicOrderingCABI::relaxed:
- AO = llvm::AtomicOrdering::Monotonic;
- break;
- }
- StringRef scp;
- llvm::getConstantStringInfo(Scope, scp);
- SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
- return true;
- }
- return false;
- }
- Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
- llvm::SyncScope::ID SSID;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_div_scale:
- case AMDGPU::BI__builtin_amdgcn_div_scalef: {
- // Translate from the intrinsics's struct return to the builtin's out
- // argument.
- Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
- llvm::Value *X = EmitScalarExpr(E->getArg(0));
- llvm::Value *Y = EmitScalarExpr(E->getArg(1));
- llvm::Value *Z = EmitScalarExpr(E->getArg(2));
- llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
- X->getType());
- llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
- llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
- llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
- llvm::Type *RealFlagType = FlagOutPtr.getElementType();
- llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
- Builder.CreateStore(FlagExt, FlagOutPtr);
- return Result;
- }
- case AMDGPU::BI__builtin_amdgcn_div_fmas:
- case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
- llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
- Src0->getType());
- llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
- return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
- }
- case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
- return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
- case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
- return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
- case AMDGPU::BI__builtin_amdgcn_mov_dpp:
- case AMDGPU::BI__builtin_amdgcn_update_dpp: {
- llvm::SmallVector<llvm::Value *, 6> Args;
- for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
- assert(Args.size() == 5 || Args.size() == 6);
- if (Args.size() == 5)
- Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
- Function *F =
- CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
- return Builder.CreateCall(F, Args);
- }
- case AMDGPU::BI__builtin_amdgcn_div_fixup:
- case AMDGPU::BI__builtin_amdgcn_div_fixupf:
- case AMDGPU::BI__builtin_amdgcn_div_fixuph:
- return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
- case AMDGPU::BI__builtin_amdgcn_trig_preop:
- case AMDGPU::BI__builtin_amdgcn_trig_preopf:
- return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
- case AMDGPU::BI__builtin_amdgcn_rcp:
- case AMDGPU::BI__builtin_amdgcn_rcpf:
- case AMDGPU::BI__builtin_amdgcn_rcph:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
- case AMDGPU::BI__builtin_amdgcn_sqrt:
- case AMDGPU::BI__builtin_amdgcn_sqrtf:
- case AMDGPU::BI__builtin_amdgcn_sqrth:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
- case AMDGPU::BI__builtin_amdgcn_rsq:
- case AMDGPU::BI__builtin_amdgcn_rsqf:
- case AMDGPU::BI__builtin_amdgcn_rsqh:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
- case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
- case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
- case AMDGPU::BI__builtin_amdgcn_sinf:
- case AMDGPU::BI__builtin_amdgcn_sinh:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
- case AMDGPU::BI__builtin_amdgcn_cosf:
- case AMDGPU::BI__builtin_amdgcn_cosh:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
- case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
- return EmitAMDGPUDispatchPtr(*this, E);
- case AMDGPU::BI__builtin_amdgcn_log_clampf:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
- case AMDGPU::BI__builtin_amdgcn_ldexp:
- case AMDGPU::BI__builtin_amdgcn_ldexpf:
- case AMDGPU::BI__builtin_amdgcn_ldexph:
- return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
- case AMDGPU::BI__builtin_amdgcn_frexp_mant:
- case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
- case AMDGPU::BI__builtin_amdgcn_frexp_manth:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
- case AMDGPU::BI__builtin_amdgcn_frexp_exp:
- case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
- Value *Src0 = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
- { Builder.getInt32Ty(), Src0->getType() });
- return Builder.CreateCall(F, Src0);
- }
- case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
- Value *Src0 = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
- { Builder.getInt16Ty(), Src0->getType() });
- return Builder.CreateCall(F, Src0);
- }
- case AMDGPU::BI__builtin_amdgcn_fract:
- case AMDGPU::BI__builtin_amdgcn_fractf:
- case AMDGPU::BI__builtin_amdgcn_fracth:
- return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
- case AMDGPU::BI__builtin_amdgcn_lerp:
- return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
- case AMDGPU::BI__builtin_amdgcn_ubfe:
- return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
- case AMDGPU::BI__builtin_amdgcn_sbfe:
- return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
- case AMDGPU::BI__builtin_amdgcn_uicmp:
- case AMDGPU::BI__builtin_amdgcn_uicmpl:
- case AMDGPU::BI__builtin_amdgcn_sicmp:
- case AMDGPU::BI__builtin_amdgcn_sicmpl: {
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
- // FIXME-GFX10: How should 32 bit mask be handled?
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
- { Builder.getInt64Ty(), Src0->getType() });
- return Builder.CreateCall(F, { Src0, Src1, Src2 });
- }
- case AMDGPU::BI__builtin_amdgcn_fcmp:
- case AMDGPU::BI__builtin_amdgcn_fcmpf: {
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
- // FIXME-GFX10: How should 32 bit mask be handled?
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
- { Builder.getInt64Ty(), Src0->getType() });
- return Builder.CreateCall(F, { Src0, Src1, Src2 });
- }
- case AMDGPU::BI__builtin_amdgcn_class:
- case AMDGPU::BI__builtin_amdgcn_classf:
- case AMDGPU::BI__builtin_amdgcn_classh:
- return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
- case AMDGPU::BI__builtin_amdgcn_fmed3f:
- case AMDGPU::BI__builtin_amdgcn_fmed3h:
- return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
- case AMDGPU::BI__builtin_amdgcn_ds_append:
- case AMDGPU::BI__builtin_amdgcn_ds_consume: {
- Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
- Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
- Value *Src0 = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
- return Builder.CreateCall(F, { Src0, Builder.getFalse() });
- }
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
- Intrinsic::ID Intrin;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
- Intrin = Intrinsic::amdgcn_ds_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- Intrin = Intrinsic::amdgcn_ds_fmin;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
- Intrin = Intrinsic::amdgcn_ds_fmax;
- break;
- }
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
- llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
- llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
- llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
- llvm::FunctionType *FTy = F->getFunctionType();
- llvm::Type *PTy = FTy->getParamType(0);
- Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
- return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
- }
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
- Intrinsic::ID IID;
- llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
- ArgTy = llvm::Type::getFloatTy(getLLVMContext());
- IID = Intrinsic::amdgcn_global_atomic_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
- ArgTy = llvm::FixedVectorType::get(
- llvm::Type::getHalfTy(getLLVMContext()), 2);
- IID = Intrinsic::amdgcn_global_atomic_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
- IID = Intrinsic::amdgcn_global_atomic_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
- IID = Intrinsic::amdgcn_global_atomic_fmin;
- break;
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
- IID = Intrinsic::amdgcn_global_atomic_fmax;
- break;
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
- IID = Intrinsic::amdgcn_flat_atomic_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
- IID = Intrinsic::amdgcn_flat_atomic_fmin;
- break;
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
- IID = Intrinsic::amdgcn_flat_atomic_fmax;
- break;
- }
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
- llvm::Function *F =
- CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
- return Builder.CreateCall(F, {Addr, Val});
- }
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
- Intrinsic::ID IID;
- llvm::Type *ArgTy;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
- ArgTy = llvm::Type::getFloatTy(getLLVMContext());
- IID = Intrinsic::amdgcn_ds_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
- ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
- IID = Intrinsic::amdgcn_ds_fadd;
- break;
- }
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
- llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
- llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
- llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
- llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
- llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
- return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
- }
- case AMDGPU::BI__builtin_amdgcn_read_exec: {
- CallInst *CI = cast<CallInst>(
- EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
- CI->setConvergent();
- return CI;
- }
- case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
- case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
- StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
- "exec_lo" : "exec_hi";
- CallInst *CI = cast<CallInst>(
- EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
- CI->setConvergent();
- return CI;
- }
- case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
- case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
- case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
- case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
- llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
- llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
- llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
- llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
- llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
- llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
- // The builtins take these arguments as vec4 where the last element is
- // ignored. The intrinsic takes them as vec3.
- RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
- ArrayRef<int>{0, 1, 2});
- RayDir =
- Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
- RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
- ArrayRef<int>{0, 1, 2});
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
- {NodePtr->getType(), RayDir->getType()});
- return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
- RayInverseDir, TextureDescr});
- }
- // amdgcn workitem
- case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
- return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
- case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
- return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
- case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
- return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
- // amdgcn workgroup size
- case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
- return EmitAMDGPUWorkGroupSize(*this, 0);
- case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
- return EmitAMDGPUWorkGroupSize(*this, 1);
- case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
- return EmitAMDGPUWorkGroupSize(*this, 2);
- // amdgcn grid size
- case AMDGPU::BI__builtin_amdgcn_grid_size_x:
- return EmitAMDGPUGridSize(*this, 0);
- case AMDGPU::BI__builtin_amdgcn_grid_size_y:
- return EmitAMDGPUGridSize(*this, 1);
- case AMDGPU::BI__builtin_amdgcn_grid_size_z:
- return EmitAMDGPUGridSize(*this, 2);
- // r600 intrinsics
- case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
- case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
- return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
- case AMDGPU::BI__builtin_r600_read_tidig_x:
- return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
- case AMDGPU::BI__builtin_r600_read_tidig_y:
- return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
- case AMDGPU::BI__builtin_r600_read_tidig_z:
- return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
- case AMDGPU::BI__builtin_amdgcn_alignbit: {
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
- Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
- return Builder.CreateCall(F, { Src0, Src1, Src2 });
- }
- case AMDGPU::BI__builtin_amdgcn_fence: {
- if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(1)), AO, SSID))
- return Builder.CreateFence(AO, SSID);
- LLVM_FALLTHROUGH;
- }
- case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
- case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
- case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
- case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
- unsigned BuiltinAtomicOp;
- llvm::Type *ResultType = ConvertType(E->getType());
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
- case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
- BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
- break;
- case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
- case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
- BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
- break;
- }
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- llvm::Function *F =
- CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
- if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(3)), AO, SSID)) {
- // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
- // scope as unsigned values
- Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
- Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
- QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
- bool Volatile =
- PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
- Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
- return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
- }
- LLVM_FALLTHROUGH;
- }
- default:
- return nullptr;
- }
- }
- /// Handle a SystemZ function in which the final argument is a pointer
- /// to an int that receives the post-instruction CC value. At the LLVM level
- /// this is represented as a function that returns a {result, cc} pair.
- static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
- unsigned IntrinsicID,
- const CallExpr *E) {
- unsigned NumArgs = E->getNumArgs() - 1;
- SmallVector<Value *, 8> Args(NumArgs);
- for (unsigned I = 0; I < NumArgs; ++I)
- Args[I] = CGF.EmitScalarExpr(E->getArg(I));
- Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
- Value *Call = CGF.Builder.CreateCall(F, Args);
- Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
- CGF.Builder.CreateStore(CC, CCPtr);
- return CGF.Builder.CreateExtractValue(Call, 0);
- }
- Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- switch (BuiltinID) {
- case SystemZ::BI__builtin_tbegin: {
- Value *TDB = EmitScalarExpr(E->getArg(0));
- Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
- Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
- return Builder.CreateCall(F, {TDB, Control});
- }
- case SystemZ::BI__builtin_tbegin_nofloat: {
- Value *TDB = EmitScalarExpr(E->getArg(0));
- Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
- Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
- return Builder.CreateCall(F, {TDB, Control});
- }
- case SystemZ::BI__builtin_tbeginc: {
- Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
- Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
- Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
- return Builder.CreateCall(F, {TDB, Control});
- }
- case SystemZ::BI__builtin_tabort: {
- Value *Data = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
- return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
- }
- case SystemZ::BI__builtin_non_tx_store: {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *Data = EmitScalarExpr(E->getArg(1));
- Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
- return Builder.CreateCall(F, {Data, Address});
- }
- // Vector builtins. Note that most vector builtins are mapped automatically
- // to target-specific LLVM intrinsics. The ones handled specially here can
- // be represented via standard LLVM IR, which is preferable to enable common
- // LLVM optimizations.
- case SystemZ::BI__builtin_s390_vpopctb:
- case SystemZ::BI__builtin_s390_vpopcth:
- case SystemZ::BI__builtin_s390_vpopctf:
- case SystemZ::BI__builtin_s390_vpopctg: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
- return Builder.CreateCall(F, X);
- }
- case SystemZ::BI__builtin_s390_vclzb:
- case SystemZ::BI__builtin_s390_vclzh:
- case SystemZ::BI__builtin_s390_vclzf:
- case SystemZ::BI__builtin_s390_vclzg: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
- return Builder.CreateCall(F, {X, Undef});
- }
- case SystemZ::BI__builtin_s390_vctzb:
- case SystemZ::BI__builtin_s390_vctzh:
- case SystemZ::BI__builtin_s390_vctzf:
- case SystemZ::BI__builtin_s390_vctzg: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
- Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
- return Builder.CreateCall(F, {X, Undef});
- }
- case SystemZ::BI__builtin_s390_vfsqsb:
- case SystemZ::BI__builtin_s390_vfsqdb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
- return Builder.CreateConstrainedFPCall(F, { X });
- } else {
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
- return Builder.CreateCall(F, X);
- }
- }
- case SystemZ::BI__builtin_s390_vfmasb:
- case SystemZ::BI__builtin_s390_vfmadb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- Value *Z = EmitScalarExpr(E->getArg(2));
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
- return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
- } else {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateCall(F, {X, Y, Z});
- }
- }
- case SystemZ::BI__builtin_s390_vfmssb:
- case SystemZ::BI__builtin_s390_vfmsdb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- Value *Z = EmitScalarExpr(E->getArg(2));
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
- return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
- } else {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
- }
- }
- case SystemZ::BI__builtin_s390_vfnmasb:
- case SystemZ::BI__builtin_s390_vfnmadb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- Value *Z = EmitScalarExpr(E->getArg(2));
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
- return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
- } else {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
- }
- }
- case SystemZ::BI__builtin_s390_vfnmssb:
- case SystemZ::BI__builtin_s390_vfnmsdb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- Value *Z = EmitScalarExpr(E->getArg(2));
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
- Value *NegZ = Builder.CreateFNeg(Z, "sub");
- return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
- } else {
- Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- Value *NegZ = Builder.CreateFNeg(Z, "neg");
- return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
- }
- }
- case SystemZ::BI__builtin_s390_vflpsb:
- case SystemZ::BI__builtin_s390_vflpdb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
- return Builder.CreateCall(F, X);
- }
- case SystemZ::BI__builtin_s390_vflnsb:
- case SystemZ::BI__builtin_s390_vflndb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
- return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
- }
- case SystemZ::BI__builtin_s390_vfisb:
- case SystemZ::BI__builtin_s390_vfidb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- // Constant-fold the M4 and M5 mask arguments.
- llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
- llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
- // Check whether this instance can be represented via a LLVM standard
- // intrinsic. We only support some combinations of M4 and M5.
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
- Intrinsic::ID CI;
- switch (M4.getZExtValue()) {
- default: break;
- case 0: // IEEE-inexact exception allowed
- switch (M5.getZExtValue()) {
- default: break;
- case 0: ID = Intrinsic::rint;
- CI = Intrinsic::experimental_constrained_rint; break;
- }
- break;
- case 4: // IEEE-inexact exception suppressed
- switch (M5.getZExtValue()) {
- default: break;
- case 0: ID = Intrinsic::nearbyint;
- CI = Intrinsic::experimental_constrained_nearbyint; break;
- case 1: ID = Intrinsic::round;
- CI = Intrinsic::experimental_constrained_round; break;
- case 5: ID = Intrinsic::trunc;
- CI = Intrinsic::experimental_constrained_trunc; break;
- case 6: ID = Intrinsic::ceil;
- CI = Intrinsic::experimental_constrained_ceil; break;
- case 7: ID = Intrinsic::floor;
- CI = Intrinsic::experimental_constrained_floor; break;
- }
- break;
- }
- if (ID != Intrinsic::not_intrinsic) {
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(CI, ResultType);
- return Builder.CreateConstrainedFPCall(F, X);
- } else {
- Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, X);
- }
- }
- switch (BuiltinID) { // FIXME: constrained version?
- case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
- case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
- default: llvm_unreachable("Unknown BuiltinID");
- }
- Function *F = CGM.getIntrinsic(ID);
- Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
- Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
- return Builder.CreateCall(F, {X, M4Value, M5Value});
- }
- case SystemZ::BI__builtin_s390_vfmaxsb:
- case SystemZ::BI__builtin_s390_vfmaxdb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- // Constant-fold the M4 mask argument.
- llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
- // Check whether this instance can be represented via a LLVM standard
- // intrinsic. We only support some values of M4.
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
- Intrinsic::ID CI;
- switch (M4.getZExtValue()) {
- default: break;
- case 4: ID = Intrinsic::maxnum;
- CI = Intrinsic::experimental_constrained_maxnum; break;
- }
- if (ID != Intrinsic::not_intrinsic) {
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(CI, ResultType);
- return Builder.CreateConstrainedFPCall(F, {X, Y});
- } else {
- Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, {X, Y});
- }
- }
- switch (BuiltinID) {
- case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
- case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
- default: llvm_unreachable("Unknown BuiltinID");
- }
- Function *F = CGM.getIntrinsic(ID);
- Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
- return Builder.CreateCall(F, {X, Y, M4Value});
- }
- case SystemZ::BI__builtin_s390_vfminsb:
- case SystemZ::BI__builtin_s390_vfmindb: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Y = EmitScalarExpr(E->getArg(1));
- // Constant-fold the M4 mask argument.
- llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
- // Check whether this instance can be represented via a LLVM standard
- // intrinsic. We only support some values of M4.
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
- Intrinsic::ID CI;
- switch (M4.getZExtValue()) {
- default: break;
- case 4: ID = Intrinsic::minnum;
- CI = Intrinsic::experimental_constrained_minnum; break;
- }
- if (ID != Intrinsic::not_intrinsic) {
- if (Builder.getIsFPConstrained()) {
- Function *F = CGM.getIntrinsic(CI, ResultType);
- return Builder.CreateConstrainedFPCall(F, {X, Y});
- } else {
- Function *F = CGM.getIntrinsic(ID, ResultType);
- return Builder.CreateCall(F, {X, Y});
- }
- }
- switch (BuiltinID) {
- case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
- case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
- default: llvm_unreachable("Unknown BuiltinID");
- }
- Function *F = CGM.getIntrinsic(ID);
- Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
- return Builder.CreateCall(F, {X, Y, M4Value});
- }
- case SystemZ::BI__builtin_s390_vlbrh:
- case SystemZ::BI__builtin_s390_vlbrf:
- case SystemZ::BI__builtin_s390_vlbrg: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
- return Builder.CreateCall(F, X);
- }
- // Vector intrinsics that output the post-instruction CC value.
- #define INTRINSIC_WITH_CC(NAME) \
- case SystemZ::BI__builtin_##NAME: \
- return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
- INTRINSIC_WITH_CC(s390_vpkshs);
- INTRINSIC_WITH_CC(s390_vpksfs);
- INTRINSIC_WITH_CC(s390_vpksgs);
- INTRINSIC_WITH_CC(s390_vpklshs);
- INTRINSIC_WITH_CC(s390_vpklsfs);
- INTRINSIC_WITH_CC(s390_vpklsgs);
- INTRINSIC_WITH_CC(s390_vceqbs);
- INTRINSIC_WITH_CC(s390_vceqhs);
- INTRINSIC_WITH_CC(s390_vceqfs);
- INTRINSIC_WITH_CC(s390_vceqgs);
- INTRINSIC_WITH_CC(s390_vchbs);
- INTRINSIC_WITH_CC(s390_vchhs);
- INTRINSIC_WITH_CC(s390_vchfs);
- INTRINSIC_WITH_CC(s390_vchgs);
- INTRINSIC_WITH_CC(s390_vchlbs);
- INTRINSIC_WITH_CC(s390_vchlhs);
- INTRINSIC_WITH_CC(s390_vchlfs);
- INTRINSIC_WITH_CC(s390_vchlgs);
- INTRINSIC_WITH_CC(s390_vfaebs);
- INTRINSIC_WITH_CC(s390_vfaehs);
- INTRINSIC_WITH_CC(s390_vfaefs);
- INTRINSIC_WITH_CC(s390_vfaezbs);
- INTRINSIC_WITH_CC(s390_vfaezhs);
- INTRINSIC_WITH_CC(s390_vfaezfs);
- INTRINSIC_WITH_CC(s390_vfeebs);
- INTRINSIC_WITH_CC(s390_vfeehs);
- INTRINSIC_WITH_CC(s390_vfeefs);
- INTRINSIC_WITH_CC(s390_vfeezbs);
- INTRINSIC_WITH_CC(s390_vfeezhs);
- INTRINSIC_WITH_CC(s390_vfeezfs);
- INTRINSIC_WITH_CC(s390_vfenebs);
- INTRINSIC_WITH_CC(s390_vfenehs);
- INTRINSIC_WITH_CC(s390_vfenefs);
- INTRINSIC_WITH_CC(s390_vfenezbs);
- INTRINSIC_WITH_CC(s390_vfenezhs);
- INTRINSIC_WITH_CC(s390_vfenezfs);
- INTRINSIC_WITH_CC(s390_vistrbs);
- INTRINSIC_WITH_CC(s390_vistrhs);
- INTRINSIC_WITH_CC(s390_vistrfs);
- INTRINSIC_WITH_CC(s390_vstrcbs);
- INTRINSIC_WITH_CC(s390_vstrchs);
- INTRINSIC_WITH_CC(s390_vstrcfs);
- INTRINSIC_WITH_CC(s390_vstrczbs);
- INTRINSIC_WITH_CC(s390_vstrczhs);
- INTRINSIC_WITH_CC(s390_vstrczfs);
- INTRINSIC_WITH_CC(s390_vfcesbs);
- INTRINSIC_WITH_CC(s390_vfcedbs);
- INTRINSIC_WITH_CC(s390_vfchsbs);
- INTRINSIC_WITH_CC(s390_vfchdbs);
- INTRINSIC_WITH_CC(s390_vfchesbs);
- INTRINSIC_WITH_CC(s390_vfchedbs);
- INTRINSIC_WITH_CC(s390_vftcisb);
- INTRINSIC_WITH_CC(s390_vftcidb);
- INTRINSIC_WITH_CC(s390_vstrsb);
- INTRINSIC_WITH_CC(s390_vstrsh);
- INTRINSIC_WITH_CC(s390_vstrsf);
- INTRINSIC_WITH_CC(s390_vstrszb);
- INTRINSIC_WITH_CC(s390_vstrszh);
- INTRINSIC_WITH_CC(s390_vstrszf);
- #undef INTRINSIC_WITH_CC
- default:
- return nullptr;
- }
- }
- namespace {
- // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
- struct NVPTXMmaLdstInfo {
- unsigned NumResults; // Number of elements to load/store
- // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
- unsigned IID_col;
- unsigned IID_row;
- };
- #define MMA_INTR(geom_op_type, layout) \
- Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
- #define MMA_LDST(n, geom_op_type) \
- { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
- static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
- switch (BuiltinID) {
- // FP MMA loads
- case NVPTX::BI__hmma_m16n16k16_ld_a:
- return MMA_LDST(8, m16n16k16_load_a_f16);
- case NVPTX::BI__hmma_m16n16k16_ld_b:
- return MMA_LDST(8, m16n16k16_load_b_f16);
- case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- return MMA_LDST(4, m16n16k16_load_c_f16);
- case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- return MMA_LDST(8, m16n16k16_load_c_f32);
- case NVPTX::BI__hmma_m32n8k16_ld_a:
- return MMA_LDST(8, m32n8k16_load_a_f16);
- case NVPTX::BI__hmma_m32n8k16_ld_b:
- return MMA_LDST(8, m32n8k16_load_b_f16);
- case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
- return MMA_LDST(4, m32n8k16_load_c_f16);
- case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
- return MMA_LDST(8, m32n8k16_load_c_f32);
- case NVPTX::BI__hmma_m8n32k16_ld_a:
- return MMA_LDST(8, m8n32k16_load_a_f16);
- case NVPTX::BI__hmma_m8n32k16_ld_b:
- return MMA_LDST(8, m8n32k16_load_b_f16);
- case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
- return MMA_LDST(4, m8n32k16_load_c_f16);
- case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
- return MMA_LDST(8, m8n32k16_load_c_f32);
- // Integer MMA loads
- case NVPTX::BI__imma_m16n16k16_ld_a_s8:
- return MMA_LDST(2, m16n16k16_load_a_s8);
- case NVPTX::BI__imma_m16n16k16_ld_a_u8:
- return MMA_LDST(2, m16n16k16_load_a_u8);
- case NVPTX::BI__imma_m16n16k16_ld_b_s8:
- return MMA_LDST(2, m16n16k16_load_b_s8);
- case NVPTX::BI__imma_m16n16k16_ld_b_u8:
- return MMA_LDST(2, m16n16k16_load_b_u8);
- case NVPTX::BI__imma_m16n16k16_ld_c:
- return MMA_LDST(8, m16n16k16_load_c_s32);
- case NVPTX::BI__imma_m32n8k16_ld_a_s8:
- return MMA_LDST(4, m32n8k16_load_a_s8);
- case NVPTX::BI__imma_m32n8k16_ld_a_u8:
- return MMA_LDST(4, m32n8k16_load_a_u8);
- case NVPTX::BI__imma_m32n8k16_ld_b_s8:
- return MMA_LDST(1, m32n8k16_load_b_s8);
- case NVPTX::BI__imma_m32n8k16_ld_b_u8:
- return MMA_LDST(1, m32n8k16_load_b_u8);
- case NVPTX::BI__imma_m32n8k16_ld_c:
- return MMA_LDST(8, m32n8k16_load_c_s32);
- case NVPTX::BI__imma_m8n32k16_ld_a_s8:
- return MMA_LDST(1, m8n32k16_load_a_s8);
- case NVPTX::BI__imma_m8n32k16_ld_a_u8:
- return MMA_LDST(1, m8n32k16_load_a_u8);
- case NVPTX::BI__imma_m8n32k16_ld_b_s8:
- return MMA_LDST(4, m8n32k16_load_b_s8);
- case NVPTX::BI__imma_m8n32k16_ld_b_u8:
- return MMA_LDST(4, m8n32k16_load_b_u8);
- case NVPTX::BI__imma_m8n32k16_ld_c:
- return MMA_LDST(8, m8n32k16_load_c_s32);
- // Sub-integer MMA loads.
- // Only row/col layout is supported by A/B fragments.
- case NVPTX::BI__imma_m8n8k32_ld_a_s4:
- return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
- case NVPTX::BI__imma_m8n8k32_ld_a_u4:
- return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
- case NVPTX::BI__imma_m8n8k32_ld_b_s4:
- return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
- case NVPTX::BI__imma_m8n8k32_ld_b_u4:
- return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
- case NVPTX::BI__imma_m8n8k32_ld_c:
- return MMA_LDST(2, m8n8k32_load_c_s32);
- case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
- return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
- case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
- return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
- case NVPTX::BI__bmma_m8n8k128_ld_c:
- return MMA_LDST(2, m8n8k128_load_c_s32);
- // Double MMA loads
- case NVPTX::BI__dmma_m8n8k4_ld_a:
- return MMA_LDST(1, m8n8k4_load_a_f64);
- case NVPTX::BI__dmma_m8n8k4_ld_b:
- return MMA_LDST(1, m8n8k4_load_b_f64);
- case NVPTX::BI__dmma_m8n8k4_ld_c:
- return MMA_LDST(2, m8n8k4_load_c_f64);
- // Alternate float MMA loads
- case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
- return MMA_LDST(4, m16n16k16_load_a_bf16);
- case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
- return MMA_LDST(4, m16n16k16_load_b_bf16);
- case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
- return MMA_LDST(2, m8n32k16_load_a_bf16);
- case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
- return MMA_LDST(8, m8n32k16_load_b_bf16);
- case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
- return MMA_LDST(8, m32n8k16_load_a_bf16);
- case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
- return MMA_LDST(2, m32n8k16_load_b_bf16);
- case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
- return MMA_LDST(4, m16n16k8_load_a_tf32);
- case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
- return MMA_LDST(4, m16n16k8_load_b_tf32);
- case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
- return MMA_LDST(8, m16n16k8_load_c_f32);
- // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
- // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
- // use fragment C for both loads and stores.
- // FP MMA stores.
- case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- return MMA_LDST(4, m16n16k16_store_d_f16);
- case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- return MMA_LDST(8, m16n16k16_store_d_f32);
- case NVPTX::BI__hmma_m32n8k16_st_c_f16:
- return MMA_LDST(4, m32n8k16_store_d_f16);
- case NVPTX::BI__hmma_m32n8k16_st_c_f32:
- return MMA_LDST(8, m32n8k16_store_d_f32);
- case NVPTX::BI__hmma_m8n32k16_st_c_f16:
- return MMA_LDST(4, m8n32k16_store_d_f16);
- case NVPTX::BI__hmma_m8n32k16_st_c_f32:
- return MMA_LDST(8, m8n32k16_store_d_f32);
- // Integer and sub-integer MMA stores.
- // Another naming quirk. Unlike other MMA builtins that use PTX types in the
- // name, integer loads/stores use LLVM's i32.
- case NVPTX::BI__imma_m16n16k16_st_c_i32:
- return MMA_LDST(8, m16n16k16_store_d_s32);
- case NVPTX::BI__imma_m32n8k16_st_c_i32:
- return MMA_LDST(8, m32n8k16_store_d_s32);
- case NVPTX::BI__imma_m8n32k16_st_c_i32:
- return MMA_LDST(8, m8n32k16_store_d_s32);
- case NVPTX::BI__imma_m8n8k32_st_c_i32:
- return MMA_LDST(2, m8n8k32_store_d_s32);
- case NVPTX::BI__bmma_m8n8k128_st_c_i32:
- return MMA_LDST(2, m8n8k128_store_d_s32);
- // Double MMA store
- case NVPTX::BI__dmma_m8n8k4_st_c_f64:
- return MMA_LDST(2, m8n8k4_store_d_f64);
- // Alternate float MMA store
- case NVPTX::BI__mma_m16n16k8_st_c_f32:
- return MMA_LDST(8, m16n16k8_store_d_f32);
- default:
- llvm_unreachable("Unknown MMA builtin");
- }
- }
- #undef MMA_LDST
- #undef MMA_INTR
- struct NVPTXMmaInfo {
- unsigned NumEltsA;
- unsigned NumEltsB;
- unsigned NumEltsC;
- unsigned NumEltsD;
- // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
- // over 'col' for layout. The index of non-satf variants is expected to match
- // the undocumented layout constants used by CUDA's mma.hpp.
- std::array<unsigned, 8> Variants;
- unsigned getMMAIntrinsic(int Layout, bool Satf) {
- unsigned Index = Layout + 4 * Satf;
- if (Index >= Variants.size())
- return 0;
- return Variants[Index];
- }
- };
- // Returns an intrinsic that matches Layout and Satf for valid combinations of
- // Layout and Satf, 0 otherwise.
- static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
- // clang-format off
- #define MMA_VARIANTS(geom, type) \
- Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
- Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
- Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
- Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
- #define MMA_SATF_VARIANTS(geom, type) \
- MMA_VARIANTS(geom, type), \
- Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
- Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
- // Sub-integer MMA only supports row.col layout.
- #define MMA_VARIANTS_I4(geom, type) \
- 0, \
- Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
- 0, \
- 0, \
- 0, \
- Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
- 0, \
- 0
- // b1 MMA does not support .satfinite.
- #define MMA_VARIANTS_B1_XOR(geom, type) \
- 0, \
- Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
- 0, \
- 0, \
- 0, \
- 0, \
- 0, \
- 0
- #define MMA_VARIANTS_B1_AND(geom, type) \
- 0, \
- Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
- 0, \
- 0, \
- 0, \
- 0, \
- 0, \
- 0
- // clang-format on
- switch (BuiltinID) {
- // FP MMA
- // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
- // NumEltsN of return value are ordered as A,B,C,D.
- case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
- return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
- case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
- return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
- case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
- return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
- case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
- return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
- case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
- return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
- case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
- return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
- case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
- return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
- case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
- return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
- case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
- return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
- case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
- return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
- case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
- return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
- case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
- return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
- // Integer MMA
- case NVPTX::BI__imma_m16n16k16_mma_s8:
- return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
- case NVPTX::BI__imma_m16n16k16_mma_u8:
- return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
- case NVPTX::BI__imma_m32n8k16_mma_s8:
- return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
- case NVPTX::BI__imma_m32n8k16_mma_u8:
- return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
- case NVPTX::BI__imma_m8n32k16_mma_s8:
- return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
- case NVPTX::BI__imma_m8n32k16_mma_u8:
- return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
- // Sub-integer MMA
- case NVPTX::BI__imma_m8n8k32_mma_s4:
- return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
- case NVPTX::BI__imma_m8n8k32_mma_u4:
- return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
- case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
- return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
- case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
- return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
- // Double MMA
- case NVPTX::BI__dmma_m8n8k4_mma_f64:
- return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
- // Alternate FP MMA
- case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
- return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
- case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
- return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
- case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
- return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
- case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
- return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
- default:
- llvm_unreachable("Unexpected builtin ID.");
- }
- #undef MMA_VARIANTS
- #undef MMA_SATF_VARIANTS
- #undef MMA_VARIANTS_I4
- #undef MMA_VARIANTS_B1_AND
- #undef MMA_VARIANTS_B1_XOR
- }
- } // namespace
- Value *
- CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
- auto MakeLdg = [&](unsigned IntrinsicID) {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- clang::CharUnits Align =
- CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
- return Builder.CreateCall(
- CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
- Ptr->getType()}),
- {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
- };
- auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
- Ptr->getType()}),
- {Ptr, EmitScalarExpr(E->getArg(1))});
- };
- switch (BuiltinID) {
- case NVPTX::BI__nvvm_atom_add_gen_i:
- case NVPTX::BI__nvvm_atom_add_gen_l:
- case NVPTX::BI__nvvm_atom_add_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
- case NVPTX::BI__nvvm_atom_sub_gen_i:
- case NVPTX::BI__nvvm_atom_sub_gen_l:
- case NVPTX::BI__nvvm_atom_sub_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
- case NVPTX::BI__nvvm_atom_and_gen_i:
- case NVPTX::BI__nvvm_atom_and_gen_l:
- case NVPTX::BI__nvvm_atom_and_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
- case NVPTX::BI__nvvm_atom_or_gen_i:
- case NVPTX::BI__nvvm_atom_or_gen_l:
- case NVPTX::BI__nvvm_atom_or_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
- case NVPTX::BI__nvvm_atom_xor_gen_i:
- case NVPTX::BI__nvvm_atom_xor_gen_l:
- case NVPTX::BI__nvvm_atom_xor_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
- case NVPTX::BI__nvvm_atom_xchg_gen_i:
- case NVPTX::BI__nvvm_atom_xchg_gen_l:
- case NVPTX::BI__nvvm_atom_xchg_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
- case NVPTX::BI__nvvm_atom_max_gen_i:
- case NVPTX::BI__nvvm_atom_max_gen_l:
- case NVPTX::BI__nvvm_atom_max_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
- case NVPTX::BI__nvvm_atom_max_gen_ui:
- case NVPTX::BI__nvvm_atom_max_gen_ul:
- case NVPTX::BI__nvvm_atom_max_gen_ull:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
- case NVPTX::BI__nvvm_atom_min_gen_i:
- case NVPTX::BI__nvvm_atom_min_gen_l:
- case NVPTX::BI__nvvm_atom_min_gen_ll:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
- case NVPTX::BI__nvvm_atom_min_gen_ui:
- case NVPTX::BI__nvvm_atom_min_gen_ul:
- case NVPTX::BI__nvvm_atom_min_gen_ull:
- return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
- case NVPTX::BI__nvvm_atom_cas_gen_i:
- case NVPTX::BI__nvvm_atom_cas_gen_l:
- case NVPTX::BI__nvvm_atom_cas_gen_ll:
- // __nvvm_atom_cas_gen_* should return the old value rather than the
- // success flag.
- return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
- case NVPTX::BI__nvvm_atom_add_gen_f:
- case NVPTX::BI__nvvm_atom_add_gen_d: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
- AtomicOrdering::SequentiallyConsistent);
- }
- case NVPTX::BI__nvvm_atom_inc_gen_ui: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- Function *FnALI32 =
- CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
- return Builder.CreateCall(FnALI32, {Ptr, Val});
- }
- case NVPTX::BI__nvvm_atom_dec_gen_ui: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- Function *FnALD32 =
- CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
- return Builder.CreateCall(FnALD32, {Ptr, Val});
- }
- case NVPTX::BI__nvvm_ldg_c:
- case NVPTX::BI__nvvm_ldg_c2:
- case NVPTX::BI__nvvm_ldg_c4:
- case NVPTX::BI__nvvm_ldg_s:
- case NVPTX::BI__nvvm_ldg_s2:
- case NVPTX::BI__nvvm_ldg_s4:
- case NVPTX::BI__nvvm_ldg_i:
- case NVPTX::BI__nvvm_ldg_i2:
- case NVPTX::BI__nvvm_ldg_i4:
- case NVPTX::BI__nvvm_ldg_l:
- case NVPTX::BI__nvvm_ldg_ll:
- case NVPTX::BI__nvvm_ldg_ll2:
- case NVPTX::BI__nvvm_ldg_uc:
- case NVPTX::BI__nvvm_ldg_uc2:
- case NVPTX::BI__nvvm_ldg_uc4:
- case NVPTX::BI__nvvm_ldg_us:
- case NVPTX::BI__nvvm_ldg_us2:
- case NVPTX::BI__nvvm_ldg_us4:
- case NVPTX::BI__nvvm_ldg_ui:
- case NVPTX::BI__nvvm_ldg_ui2:
- case NVPTX::BI__nvvm_ldg_ui4:
- case NVPTX::BI__nvvm_ldg_ul:
- case NVPTX::BI__nvvm_ldg_ull:
- case NVPTX::BI__nvvm_ldg_ull2:
- // PTX Interoperability section 2.2: "For a vector with an even number of
- // elements, its alignment is set to number of elements times the alignment
- // of its member: n*alignof(t)."
- return MakeLdg(Intrinsic::nvvm_ldg_global_i);
- case NVPTX::BI__nvvm_ldg_f:
- case NVPTX::BI__nvvm_ldg_f2:
- case NVPTX::BI__nvvm_ldg_f4:
- case NVPTX::BI__nvvm_ldg_d:
- case NVPTX::BI__nvvm_ldg_d2:
- return MakeLdg(Intrinsic::nvvm_ldg_global_f);
- case NVPTX::BI__nvvm_atom_cta_add_gen_i:
- case NVPTX::BI__nvvm_atom_cta_add_gen_l:
- case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_add_gen_i:
- case NVPTX::BI__nvvm_atom_sys_add_gen_l:
- case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_add_gen_f:
- case NVPTX::BI__nvvm_atom_cta_add_gen_d:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
- case NVPTX::BI__nvvm_atom_sys_add_gen_f:
- case NVPTX::BI__nvvm_atom_sys_add_gen_d:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
- case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
- case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
- case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
- case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
- case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_max_gen_i:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
- case NVPTX::BI__nvvm_atom_cta_max_gen_l:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
- case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_max_gen_i:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
- case NVPTX::BI__nvvm_atom_sys_max_gen_l:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
- case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_min_gen_i:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
- case NVPTX::BI__nvvm_atom_cta_min_gen_l:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
- case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_min_gen_i:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
- case NVPTX::BI__nvvm_atom_sys_min_gen_l:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
- case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
- case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
- case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_and_gen_i:
- case NVPTX::BI__nvvm_atom_cta_and_gen_l:
- case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_and_gen_i:
- case NVPTX::BI__nvvm_atom_sys_and_gen_l:
- case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_or_gen_i:
- case NVPTX::BI__nvvm_atom_cta_or_gen_l:
- case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_or_gen_i:
- case NVPTX::BI__nvvm_atom_sys_or_gen_l:
- case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
- case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
- case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
- case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
- case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
- case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
- case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
- case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
- case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(
- Intrinsic::nvvm_atomic_cas_gen_i_cta,
- {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
- {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
- }
- case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
- case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
- case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- return Builder.CreateCall(
- CGM.getIntrinsic(
- Intrinsic::nvvm_atomic_cas_gen_i_sys,
- {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
- {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
- }
- case NVPTX::BI__nvvm_match_all_sync_i32p:
- case NVPTX::BI__nvvm_match_all_sync_i64p: {
- Value *Mask = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
- Value *ResultPair = Builder.CreateCall(
- CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
- ? Intrinsic::nvvm_match_all_sync_i32p
- : Intrinsic::nvvm_match_all_sync_i64p),
- {Mask, Val});
- Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
- PredOutPtr.getElementType());
- Builder.CreateStore(Pred, PredOutPtr);
- return Builder.CreateExtractValue(ResultPair, 0);
- }
- // FP MMA loads
- case NVPTX::BI__hmma_m16n16k16_ld_a:
- case NVPTX::BI__hmma_m16n16k16_ld_b:
- case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- case NVPTX::BI__hmma_m32n8k16_ld_a:
- case NVPTX::BI__hmma_m32n8k16_ld_b:
- case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
- case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
- case NVPTX::BI__hmma_m8n32k16_ld_a:
- case NVPTX::BI__hmma_m8n32k16_ld_b:
- case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
- case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
- // Integer MMA loads.
- case NVPTX::BI__imma_m16n16k16_ld_a_s8:
- case NVPTX::BI__imma_m16n16k16_ld_a_u8:
- case NVPTX::BI__imma_m16n16k16_ld_b_s8:
- case NVPTX::BI__imma_m16n16k16_ld_b_u8:
- case NVPTX::BI__imma_m16n16k16_ld_c:
- case NVPTX::BI__imma_m32n8k16_ld_a_s8:
- case NVPTX::BI__imma_m32n8k16_ld_a_u8:
- case NVPTX::BI__imma_m32n8k16_ld_b_s8:
- case NVPTX::BI__imma_m32n8k16_ld_b_u8:
- case NVPTX::BI__imma_m32n8k16_ld_c:
- case NVPTX::BI__imma_m8n32k16_ld_a_s8:
- case NVPTX::BI__imma_m8n32k16_ld_a_u8:
- case NVPTX::BI__imma_m8n32k16_ld_b_s8:
- case NVPTX::BI__imma_m8n32k16_ld_b_u8:
- case NVPTX::BI__imma_m8n32k16_ld_c:
- // Sub-integer MMA loads.
- case NVPTX::BI__imma_m8n8k32_ld_a_s4:
- case NVPTX::BI__imma_m8n8k32_ld_a_u4:
- case NVPTX::BI__imma_m8n8k32_ld_b_s4:
- case NVPTX::BI__imma_m8n8k32_ld_b_u4:
- case NVPTX::BI__imma_m8n8k32_ld_c:
- case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
- case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
- case NVPTX::BI__bmma_m8n8k128_ld_c:
- // Double MMA loads.
- case NVPTX::BI__dmma_m8n8k4_ld_a:
- case NVPTX::BI__dmma_m8n8k4_ld_b:
- case NVPTX::BI__dmma_m8n8k4_ld_c:
- // Alternate float MMA loads.
- case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
- case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
- case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
- case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
- case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
- case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
- case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
- case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
- case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
- Address Dst = EmitPointerWithAlignment(E->getArg(0));
- Value *Src = EmitScalarExpr(E->getArg(1));
- Value *Ldm = EmitScalarExpr(E->getArg(2));
- Optional<llvm::APSInt> isColMajorArg =
- E->getArg(3)->getIntegerConstantExpr(getContext());
- if (!isColMajorArg)
- return nullptr;
- bool isColMajor = isColMajorArg->getSExtValue();
- NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
- unsigned IID = isColMajor ? II.IID_col : II.IID_row;
- if (IID == 0)
- return nullptr;
- Value *Result =
- Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
- // Save returned values.
- assert(II.NumResults);
- if (II.NumResults == 1) {
- Builder.CreateAlignedStore(Result, Dst.getPointer(),
- CharUnits::fromQuantity(4));
- } else {
- for (unsigned i = 0; i < II.NumResults; ++i) {
- Builder.CreateAlignedStore(
- Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
- Dst.getElementType()),
- Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
- llvm::ConstantInt::get(IntTy, i)),
- CharUnits::fromQuantity(4));
- }
- }
- return Result;
- }
- case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- case NVPTX::BI__hmma_m32n8k16_st_c_f16:
- case NVPTX::BI__hmma_m32n8k16_st_c_f32:
- case NVPTX::BI__hmma_m8n32k16_st_c_f16:
- case NVPTX::BI__hmma_m8n32k16_st_c_f32:
- case NVPTX::BI__imma_m16n16k16_st_c_i32:
- case NVPTX::BI__imma_m32n8k16_st_c_i32:
- case NVPTX::BI__imma_m8n32k16_st_c_i32:
- case NVPTX::BI__imma_m8n8k32_st_c_i32:
- case NVPTX::BI__bmma_m8n8k128_st_c_i32:
- case NVPTX::BI__dmma_m8n8k4_st_c_f64:
- case NVPTX::BI__mma_m16n16k8_st_c_f32: {
- Value *Dst = EmitScalarExpr(E->getArg(0));
- Address Src = EmitPointerWithAlignment(E->getArg(1));
- Value *Ldm = EmitScalarExpr(E->getArg(2));
- Optional<llvm::APSInt> isColMajorArg =
- E->getArg(3)->getIntegerConstantExpr(getContext());
- if (!isColMajorArg)
- return nullptr;
- bool isColMajor = isColMajorArg->getSExtValue();
- NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
- unsigned IID = isColMajor ? II.IID_col : II.IID_row;
- if (IID == 0)
- return nullptr;
- Function *Intrinsic =
- CGM.getIntrinsic(IID, Dst->getType());
- llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
- SmallVector<Value *, 10> Values = {Dst};
- for (unsigned i = 0; i < II.NumResults; ++i) {
- Value *V = Builder.CreateAlignedLoad(
- Src.getElementType(),
- Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
- llvm::ConstantInt::get(IntTy, i)),
- CharUnits::fromQuantity(4));
- Values.push_back(Builder.CreateBitCast(V, ParamType));
- }
- Values.push_back(Ldm);
- Value *Result = Builder.CreateCall(Intrinsic, Values);
- return Result;
- }
- // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
- // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
- case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
- case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
- case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
- case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
- case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
- case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
- case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
- case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
- case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
- case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
- case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
- case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
- case NVPTX::BI__imma_m16n16k16_mma_s8:
- case NVPTX::BI__imma_m16n16k16_mma_u8:
- case NVPTX::BI__imma_m32n8k16_mma_s8:
- case NVPTX::BI__imma_m32n8k16_mma_u8:
- case NVPTX::BI__imma_m8n32k16_mma_s8:
- case NVPTX::BI__imma_m8n32k16_mma_u8:
- case NVPTX::BI__imma_m8n8k32_mma_s4:
- case NVPTX::BI__imma_m8n8k32_mma_u4:
- case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
- case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
- case NVPTX::BI__dmma_m8n8k4_mma_f64:
- case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
- case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
- case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
- case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
- Address Dst = EmitPointerWithAlignment(E->getArg(0));
- Address SrcA = EmitPointerWithAlignment(E->getArg(1));
- Address SrcB = EmitPointerWithAlignment(E->getArg(2));
- Address SrcC = EmitPointerWithAlignment(E->getArg(3));
- Optional<llvm::APSInt> LayoutArg =
- E->getArg(4)->getIntegerConstantExpr(getContext());
- if (!LayoutArg)
- return nullptr;
- int Layout = LayoutArg->getSExtValue();
- if (Layout < 0 || Layout > 3)
- return nullptr;
- llvm::APSInt SatfArg;
- if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
- BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
- SatfArg = 0; // .b1 does not have satf argument.
- else if (Optional<llvm::APSInt> OptSatfArg =
- E->getArg(5)->getIntegerConstantExpr(getContext()))
- SatfArg = *OptSatfArg;
- else
- return nullptr;
- bool Satf = SatfArg.getSExtValue();
- NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
- unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
- if (IID == 0) // Unsupported combination of Layout/Satf.
- return nullptr;
- SmallVector<Value *, 24> Values;
- Function *Intrinsic = CGM.getIntrinsic(IID);
- llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
- // Load A
- for (unsigned i = 0; i < MI.NumEltsA; ++i) {
- Value *V = Builder.CreateAlignedLoad(
- SrcA.getElementType(),
- Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
- llvm::ConstantInt::get(IntTy, i)),
- CharUnits::fromQuantity(4));
- Values.push_back(Builder.CreateBitCast(V, AType));
- }
- // Load B
- llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
- for (unsigned i = 0; i < MI.NumEltsB; ++i) {
- Value *V = Builder.CreateAlignedLoad(
- SrcB.getElementType(),
- Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
- llvm::ConstantInt::get(IntTy, i)),
- CharUnits::fromQuantity(4));
- Values.push_back(Builder.CreateBitCast(V, BType));
- }
- // Load C
- llvm::Type *CType =
- Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
- for (unsigned i = 0; i < MI.NumEltsC; ++i) {
- Value *V = Builder.CreateAlignedLoad(
- SrcC.getElementType(),
- Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
- llvm::ConstantInt::get(IntTy, i)),
- CharUnits::fromQuantity(4));
- Values.push_back(Builder.CreateBitCast(V, CType));
- }
- Value *Result = Builder.CreateCall(Intrinsic, Values);
- llvm::Type *DType = Dst.getElementType();
- for (unsigned i = 0; i < MI.NumEltsD; ++i)
- Builder.CreateAlignedStore(
- Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
- Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
- llvm::ConstantInt::get(IntTy, i)),
- CharUnits::fromQuantity(4));
- return Result;
- }
- default:
- return nullptr;
- }
- }
- namespace {
- struct BuiltinAlignArgs {
- llvm::Value *Src = nullptr;
- llvm::Type *SrcType = nullptr;
- llvm::Value *Alignment = nullptr;
- llvm::Value *Mask = nullptr;
- llvm::IntegerType *IntType = nullptr;
- BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
- QualType AstType = E->getArg(0)->getType();
- if (AstType->isArrayType())
- Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
- else
- Src = CGF.EmitScalarExpr(E->getArg(0));
- SrcType = Src->getType();
- if (SrcType->isPointerTy()) {
- IntType = IntegerType::get(
- CGF.getLLVMContext(),
- CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
- } else {
- assert(SrcType->isIntegerTy());
- IntType = cast<llvm::IntegerType>(SrcType);
- }
- Alignment = CGF.EmitScalarExpr(E->getArg(1));
- Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
- auto *One = llvm::ConstantInt::get(IntType, 1);
- Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
- }
- };
- } // namespace
- /// Generate (x & (y-1)) == 0.
- RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
- BuiltinAlignArgs Args(E, *this);
- llvm::Value *SrcAddress = Args.Src;
- if (Args.SrcType->isPointerTy())
- SrcAddress =
- Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
- return RValue::get(Builder.CreateICmpEQ(
- Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
- llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
- }
- /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
- /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
- /// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
- /// TODO: actually use ptrmask once most optimization passes know about it.
- RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
- BuiltinAlignArgs Args(E, *this);
- llvm::Value *SrcAddr = Args.Src;
- if (Args.Src->getType()->isPointerTy())
- SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
- llvm::Value *SrcForMask = SrcAddr;
- if (AlignUp) {
- // When aligning up we have to first add the mask to ensure we go over the
- // next alignment value and then align down to the next valid multiple.
- // By adding the mask, we ensure that align_up on an already aligned
- // value will not change the value.
- SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
- }
- // Invert the mask to only clear the lower bits.
- llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
- llvm::Value *Result =
- Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
- if (Args.Src->getType()->isPointerTy()) {
- /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
- // Result = Builder.CreateIntrinsic(
- // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
- // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
- Result->setName("aligned_intptr");
- llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
- // The result must point to the same underlying allocation. This means we
- // can use an inbounds GEP to enable better optimization.
- Value *Base = EmitCastToVoidPtr(Args.Src);
- if (getLangOpts().isSignedOverflowDefined())
- Result = Builder.CreateGEP(Int8Ty, Base, Difference, "aligned_result");
- else
- Result = EmitCheckedInBoundsGEP(Int8Ty, Base, Difference,
- /*SignedIndices=*/true,
- /*isSubtraction=*/!AlignUp,
- E->getExprLoc(), "aligned_result");
- Result = Builder.CreatePointerCast(Result, Args.SrcType);
- // Emit an alignment assumption to ensure that the new alignment is
- // propagated to loads/stores, etc.
- emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
- }
- assert(Result->getType() == Args.SrcType);
- return RValue::get(Result);
- }
- Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_memory_size: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *I = EmitScalarExpr(E->getArg(0));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
- return Builder.CreateCall(Callee, I);
- }
- case WebAssembly::BI__builtin_wasm_memory_grow: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Args[] = {EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(1))};
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
- return Builder.CreateCall(Callee, Args);
- }
- case WebAssembly::BI__builtin_wasm_tls_size: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
- return Builder.CreateCall(Callee);
- }
- case WebAssembly::BI__builtin_wasm_tls_align: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
- return Builder.CreateCall(Callee);
- }
- case WebAssembly::BI__builtin_wasm_tls_base: {
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
- return Builder.CreateCall(Callee);
- }
- case WebAssembly::BI__builtin_wasm_throw: {
- Value *Tag = EmitScalarExpr(E->getArg(0));
- Value *Obj = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
- return Builder.CreateCall(Callee, {Tag, Obj});
- }
- case WebAssembly::BI__builtin_wasm_rethrow: {
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
- return Builder.CreateCall(Callee);
- }
- case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
- Value *Addr = EmitScalarExpr(E->getArg(0));
- Value *Expected = EmitScalarExpr(E->getArg(1));
- Value *Timeout = EmitScalarExpr(E->getArg(2));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
- return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
- }
- case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
- Value *Addr = EmitScalarExpr(E->getArg(0));
- Value *Expected = EmitScalarExpr(E->getArg(1));
- Value *Timeout = EmitScalarExpr(E->getArg(2));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
- return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
- }
- case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
- Value *Addr = EmitScalarExpr(E->getArg(0));
- Value *Count = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
- return Builder.CreateCall(Callee, {Addr, Count});
- }
- case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
- case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
- case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
- case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
- Value *Src = EmitScalarExpr(E->getArg(0));
- llvm::Type *ResT = ConvertType(E->getType());
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
- return Builder.CreateCall(Callee, {Src});
- }
- case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
- case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
- case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
- case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
- Value *Src = EmitScalarExpr(E->getArg(0));
- llvm::Type *ResT = ConvertType(E->getType());
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
- {ResT, Src->getType()});
- return Builder.CreateCall(Callee, {Src});
- }
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
- Value *Src = EmitScalarExpr(E->getArg(0));
- llvm::Type *ResT = ConvertType(E->getType());
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
- return Builder.CreateCall(Callee, {Src});
- }
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
- case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
- Value *Src = EmitScalarExpr(E->getArg(0));
- llvm::Type *ResT = ConvertType(E->getType());
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
- return Builder.CreateCall(Callee, {Src});
- }
- case WebAssembly::BI__builtin_wasm_min_f32:
- case WebAssembly::BI__builtin_wasm_min_f64:
- case WebAssembly::BI__builtin_wasm_min_f32x4:
- case WebAssembly::BI__builtin_wasm_min_f64x2: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_max_f32:
- case WebAssembly::BI__builtin_wasm_max_f64:
- case WebAssembly::BI__builtin_wasm_max_f32x4:
- case WebAssembly::BI__builtin_wasm_max_f64x2: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_pmin_f32x4:
- case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_pmax_f32x4:
- case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_ceil_f32x4:
- case WebAssembly::BI__builtin_wasm_floor_f32x4:
- case WebAssembly::BI__builtin_wasm_trunc_f32x4:
- case WebAssembly::BI__builtin_wasm_nearest_f32x4:
- case WebAssembly::BI__builtin_wasm_ceil_f64x2:
- case WebAssembly::BI__builtin_wasm_floor_f64x2:
- case WebAssembly::BI__builtin_wasm_trunc_f64x2:
- case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_ceil_f32x4:
- case WebAssembly::BI__builtin_wasm_ceil_f64x2:
- IntNo = Intrinsic::ceil;
- break;
- case WebAssembly::BI__builtin_wasm_floor_f32x4:
- case WebAssembly::BI__builtin_wasm_floor_f64x2:
- IntNo = Intrinsic::floor;
- break;
- case WebAssembly::BI__builtin_wasm_trunc_f32x4:
- case WebAssembly::BI__builtin_wasm_trunc_f64x2:
- IntNo = Intrinsic::trunc;
- break;
- case WebAssembly::BI__builtin_wasm_nearest_f32x4:
- case WebAssembly::BI__builtin_wasm_nearest_f64x2:
- IntNo = Intrinsic::nearbyint;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Value *Value = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, Value);
- }
- case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
- Value *Src = EmitScalarExpr(E->getArg(0));
- Value *Indices = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
- return Builder.CreateCall(Callee, {Src, Indices});
- }
- case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
- case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
- case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
- case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
- case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
- case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
- case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
- case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
- case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
- IntNo = Intrinsic::sadd_sat;
- break;
- case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
- case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
- IntNo = Intrinsic::uadd_sat;
- break;
- case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
- case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
- IntNo = Intrinsic::wasm_sub_sat_signed;
- break;
- case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
- case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
- IntNo = Intrinsic::wasm_sub_sat_unsigned;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_abs_i8x16:
- case WebAssembly::BI__builtin_wasm_abs_i16x8:
- case WebAssembly::BI__builtin_wasm_abs_i32x4:
- case WebAssembly::BI__builtin_wasm_abs_i64x2: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Value *Neg = Builder.CreateNeg(Vec, "neg");
- Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
- Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
- return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
- }
- case WebAssembly::BI__builtin_wasm_min_s_i8x16:
- case WebAssembly::BI__builtin_wasm_min_u_i8x16:
- case WebAssembly::BI__builtin_wasm_max_s_i8x16:
- case WebAssembly::BI__builtin_wasm_max_u_i8x16:
- case WebAssembly::BI__builtin_wasm_min_s_i16x8:
- case WebAssembly::BI__builtin_wasm_min_u_i16x8:
- case WebAssembly::BI__builtin_wasm_max_s_i16x8:
- case WebAssembly::BI__builtin_wasm_max_u_i16x8:
- case WebAssembly::BI__builtin_wasm_min_s_i32x4:
- case WebAssembly::BI__builtin_wasm_min_u_i32x4:
- case WebAssembly::BI__builtin_wasm_max_s_i32x4:
- case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Value *ICmp;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_min_s_i8x16:
- case WebAssembly::BI__builtin_wasm_min_s_i16x8:
- case WebAssembly::BI__builtin_wasm_min_s_i32x4:
- ICmp = Builder.CreateICmpSLT(LHS, RHS);
- break;
- case WebAssembly::BI__builtin_wasm_min_u_i8x16:
- case WebAssembly::BI__builtin_wasm_min_u_i16x8:
- case WebAssembly::BI__builtin_wasm_min_u_i32x4:
- ICmp = Builder.CreateICmpULT(LHS, RHS);
- break;
- case WebAssembly::BI__builtin_wasm_max_s_i8x16:
- case WebAssembly::BI__builtin_wasm_max_s_i16x8:
- case WebAssembly::BI__builtin_wasm_max_s_i32x4:
- ICmp = Builder.CreateICmpSGT(LHS, RHS);
- break;
- case WebAssembly::BI__builtin_wasm_max_u_i8x16:
- case WebAssembly::BI__builtin_wasm_max_u_i16x8:
- case WebAssembly::BI__builtin_wasm_max_u_i32x4:
- ICmp = Builder.CreateICmpUGT(LHS, RHS);
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- return Builder.CreateSelect(ICmp, LHS, RHS);
- }
- case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
- case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
- ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
- IntNo = Intrinsic::wasm_extadd_pairwise_signed;
- break;
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
- case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
- IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
- break;
- default:
- llvm_unreachable("unexptected builtin ID");
- }
- Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, Vec);
- }
- case WebAssembly::BI__builtin_wasm_bitselect: {
- Value *V1 = EmitScalarExpr(E->getArg(0));
- Value *V2 = EmitScalarExpr(E->getArg(1));
- Value *C = EmitScalarExpr(E->getArg(2));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {V1, V2, C});
- }
- case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {Vec});
- }
- case WebAssembly::BI__builtin_wasm_any_true_v128:
- case WebAssembly::BI__builtin_wasm_all_true_i8x16:
- case WebAssembly::BI__builtin_wasm_all_true_i16x8:
- case WebAssembly::BI__builtin_wasm_all_true_i32x4:
- case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_any_true_v128:
- IntNo = Intrinsic::wasm_anytrue;
- break;
- case WebAssembly::BI__builtin_wasm_all_true_i8x16:
- case WebAssembly::BI__builtin_wasm_all_true_i16x8:
- case WebAssembly::BI__builtin_wasm_all_true_i32x4:
- case WebAssembly::BI__builtin_wasm_all_true_i64x2:
- IntNo = Intrinsic::wasm_alltrue;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
- return Builder.CreateCall(Callee, {Vec});
- }
- case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
- case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
- case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
- case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
- return Builder.CreateCall(Callee, {Vec});
- }
- case WebAssembly::BI__builtin_wasm_abs_f32x4:
- case WebAssembly::BI__builtin_wasm_abs_f64x2: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
- return Builder.CreateCall(Callee, {Vec});
- }
- case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
- case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
- return Builder.CreateCall(Callee, {Vec});
- }
- case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
- case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
- case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
- case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
- Value *Low = EmitScalarExpr(E->getArg(0));
- Value *High = EmitScalarExpr(E->getArg(1));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
- case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
- IntNo = Intrinsic::wasm_narrow_signed;
- break;
- case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
- case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
- IntNo = Intrinsic::wasm_narrow_unsigned;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Function *Callee =
- CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
- return Builder.CreateCall(Callee, {Low, High});
- }
- case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
- case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
- IntNo = Intrinsic::fptosi_sat;
- break;
- case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4:
- IntNo = Intrinsic::fptoui_sat;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- llvm::Type *SrcT = Vec->getType();
- llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
- Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
- Value *Trunc = Builder.CreateCall(Callee, Vec);
- Value *Splat = Constant::getNullValue(TruncT);
- return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
- }
- case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
- Value *Ops[18];
- size_t OpIdx = 0;
- Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
- Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
- while (OpIdx < 18) {
- Optional<llvm::APSInt> LaneConst =
- E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
- assert(LaneConst && "Constant arg isn't actually constant?");
- Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
- }
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
- return Builder.CreateCall(Callee, Ops);
- }
- case WebAssembly::BI__builtin_wasm_fma_f32x4:
- case WebAssembly::BI__builtin_wasm_fms_f32x4:
- case WebAssembly::BI__builtin_wasm_fma_f64x2:
- case WebAssembly::BI__builtin_wasm_fms_f64x2: {
- Value *A = EmitScalarExpr(E->getArg(0));
- Value *B = EmitScalarExpr(E->getArg(1));
- Value *C = EmitScalarExpr(E->getArg(2));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_fma_f32x4:
- case WebAssembly::BI__builtin_wasm_fma_f64x2:
- IntNo = Intrinsic::wasm_fma;
- break;
- case WebAssembly::BI__builtin_wasm_fms_f32x4:
- case WebAssembly::BI__builtin_wasm_fms_f64x2:
- IntNo = Intrinsic::wasm_fms;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
- return Builder.CreateCall(Callee, {A, B, C});
- }
- case WebAssembly::BI__builtin_wasm_laneselect_i8x16:
- case WebAssembly::BI__builtin_wasm_laneselect_i16x8:
- case WebAssembly::BI__builtin_wasm_laneselect_i32x4:
- case WebAssembly::BI__builtin_wasm_laneselect_i64x2: {
- Value *A = EmitScalarExpr(E->getArg(0));
- Value *B = EmitScalarExpr(E->getArg(1));
- Value *C = EmitScalarExpr(E->getArg(2));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType());
- return Builder.CreateCall(Callee, {A, B, C});
- }
- case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
- Value *Src = EmitScalarExpr(E->getArg(0));
- Value *Indices = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
- return Builder.CreateCall(Callee, {Src, Indices});
- }
- case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
- case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
- case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
- case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
- case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
- IntNo = Intrinsic::wasm_relaxed_min;
- break;
- case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
- case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
- IntNo = Intrinsic::wasm_relaxed_max;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2:
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
- IntNo = Intrinsic::wasm_relaxed_trunc_signed;
- break;
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
- IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
- break;
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2:
- IntNo = Intrinsic::wasm_relaxed_trunc_zero_signed;
- break;
- case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2:
- IntNo = Intrinsic::wasm_relaxed_trunc_zero_unsigned;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Function *Callee = CGM.getIntrinsic(IntNo);
- return Builder.CreateCall(Callee, {Vec});
- }
- default:
- return nullptr;
- }
- }
- static std::pair<Intrinsic::ID, unsigned>
- getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) {
- struct Info {
- unsigned BuiltinID;
- Intrinsic::ID IntrinsicID;
- unsigned VecLen;
- };
- Info Infos[] = {
- #define CUSTOM_BUILTIN_MAPPING(x,s) \
- { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
- CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
- CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
- // Legacy builtins that take a vector in place of a vector predicate.
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
- CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
- #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
- #undef CUSTOM_BUILTIN_MAPPING
- };
- auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
- static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
- (void)SortOnce;
- const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
- Info{BuiltinID, 0, 0}, CmpInfo);
- if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
- return {Intrinsic::not_intrinsic, 0};
- return {F->IntrinsicID, F->VecLen};
- }
- Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- Intrinsic::ID ID;
- unsigned VecLen;
- std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID);
- auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
- // The base pointer is passed by address, so it needs to be loaded.
- Address A = EmitPointerWithAlignment(E->getArg(0));
- Address BP = Address(Builder.CreateBitCast(
- A.getPointer(), Int8PtrPtrTy), Int8PtrTy, A.getAlignment());
- llvm::Value *Base = Builder.CreateLoad(BP);
- // The treatment of both loads and stores is the same: the arguments for
- // the builtin are the same as the arguments for the intrinsic.
- // Load:
- // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
- // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
- // Store:
- // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
- // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
- SmallVector<llvm::Value*,5> Ops = { Base };
- for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
- // The load intrinsics generate two results (Value, NewBase), stores
- // generate one (NewBase). The new base address needs to be stored.
- llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
- : Result;
- llvm::Value *LV = Builder.CreateBitCast(
- EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo());
- Address Dest = EmitPointerWithAlignment(E->getArg(0));
- llvm::Value *RetVal =
- Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
- if (IsLoad)
- RetVal = Builder.CreateExtractValue(Result, 0);
- return RetVal;
- };
- // Handle the conversion of bit-reverse load intrinsics to bit code.
- // The intrinsic call after this function only reads from memory and the
- // write to memory is dealt by the store instruction.
- auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
- // The intrinsic generates one result, which is the new value for the base
- // pointer. It needs to be returned. The result of the load instruction is
- // passed to intrinsic by address, so the value needs to be stored.
- llvm::Value *BaseAddress =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
- // Expressions like &(*pt++) will be incremented per evaluation.
- // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
- // per call.
- Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
- DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
- Int8Ty, DestAddr.getAlignment());
- llvm::Value *DestAddress = DestAddr.getPointer();
- // Operands are Base, Dest, Modifier.
- // The intrinsic format in LLVM IR is defined as
- // { ValueType, i8* } (i8*, i32).
- llvm::Value *Result = Builder.CreateCall(
- CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
- // The value needs to be stored as the variable is passed by reference.
- llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
- // The store needs to be truncated to fit the destination type.
- // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
- // to be handled with stores of respective destination type.
- DestVal = Builder.CreateTrunc(DestVal, DestTy);
- llvm::Value *DestForStore =
- Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
- Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
- // The updated value of the base pointer is returned.
- return Builder.CreateExtractValue(Result, 1);
- };
- auto V2Q = [this, VecLen] (llvm::Value *Vec) {
- Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
- : Intrinsic::hexagon_V6_vandvrt;
- return Builder.CreateCall(CGM.getIntrinsic(ID),
- {Vec, Builder.getInt32(-1)});
- };
- auto Q2V = [this, VecLen] (llvm::Value *Pred) {
- Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
- : Intrinsic::hexagon_V6_vandqrt;
- return Builder.CreateCall(CGM.getIntrinsic(ID),
- {Pred, Builder.getInt32(-1)});
- };
- switch (BuiltinID) {
- // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
- // and the corresponding C/C++ builtins use loads/stores to update
- // the predicate.
- case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
- case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
- case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
- case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
- // Get the type from the 0-th argument.
- llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
- Address PredAddr = Builder.CreateElementBitCast(
- EmitPointerWithAlignment(E->getArg(2)), VecType);
- llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
- {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
- llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
- Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
- PredAddr.getAlignment());
- return Builder.CreateExtractValue(Result, 0);
- }
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
- case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
- SmallVector<llvm::Value*,4> Ops;
- const Expr *PredOp = E->getArg(0);
- // There will be an implicit cast to a boolean vector. Strip it.
- if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
- if (Cast->getCastKind() == CK_BitCast)
- PredOp = Cast->getSubExpr();
- Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
- }
- for (int i = 1, e = E->getNumArgs(); i != e; ++i)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- }
- case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
- case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
- case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
- case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
- case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
- case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
- return MakeCircOp(ID, /*IsLoad=*/true);
- case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
- case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
- case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
- case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
- case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
- case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
- case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
- case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
- case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
- case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
- return MakeCircOp(ID, /*IsLoad=*/false);
- case Hexagon::BI__builtin_brev_ldub:
- return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
- case Hexagon::BI__builtin_brev_ldb:
- return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
- case Hexagon::BI__builtin_brev_lduh:
- return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
- case Hexagon::BI__builtin_brev_ldh:
- return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
- case Hexagon::BI__builtin_brev_ldw:
- return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
- case Hexagon::BI__builtin_brev_ldd:
- return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
- } // switch
- return nullptr;
- }
- Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- ReturnValueSlot ReturnValue) {
- SmallVector<Value *, 4> Ops;
- llvm::Type *ResultType = ConvertType(E->getType());
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
- unsigned NF = 1;
- constexpr unsigned TAIL_UNDISTURBED = 0;
- // Required for overloaded intrinsics.
- llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
- switch (BuiltinID) {
- default: llvm_unreachable("unexpected builtin ID");
- case RISCV::BI__builtin_riscv_orc_b_32:
- case RISCV::BI__builtin_riscv_orc_b_64:
- case RISCV::BI__builtin_riscv_clmul:
- case RISCV::BI__builtin_riscv_clmulh:
- case RISCV::BI__builtin_riscv_clmulr:
- case RISCV::BI__builtin_riscv_bcompress_32:
- case RISCV::BI__builtin_riscv_bcompress_64:
- case RISCV::BI__builtin_riscv_bdecompress_32:
- case RISCV::BI__builtin_riscv_bdecompress_64:
- case RISCV::BI__builtin_riscv_bfp_32:
- case RISCV::BI__builtin_riscv_bfp_64:
- case RISCV::BI__builtin_riscv_grev_32:
- case RISCV::BI__builtin_riscv_grev_64:
- case RISCV::BI__builtin_riscv_gorc_32:
- case RISCV::BI__builtin_riscv_gorc_64:
- case RISCV::BI__builtin_riscv_shfl_32:
- case RISCV::BI__builtin_riscv_shfl_64:
- case RISCV::BI__builtin_riscv_unshfl_32:
- case RISCV::BI__builtin_riscv_unshfl_64:
- case RISCV::BI__builtin_riscv_xperm_n:
- case RISCV::BI__builtin_riscv_xperm_b:
- case RISCV::BI__builtin_riscv_xperm_h:
- case RISCV::BI__builtin_riscv_xperm_w:
- case RISCV::BI__builtin_riscv_crc32_b:
- case RISCV::BI__builtin_riscv_crc32_h:
- case RISCV::BI__builtin_riscv_crc32_w:
- case RISCV::BI__builtin_riscv_crc32_d:
- case RISCV::BI__builtin_riscv_crc32c_b:
- case RISCV::BI__builtin_riscv_crc32c_h:
- case RISCV::BI__builtin_riscv_crc32c_w:
- case RISCV::BI__builtin_riscv_crc32c_d:
- case RISCV::BI__builtin_riscv_fsl_32:
- case RISCV::BI__builtin_riscv_fsr_32:
- case RISCV::BI__builtin_riscv_fsl_64:
- case RISCV::BI__builtin_riscv_fsr_64: {
- switch (BuiltinID) {
- default: llvm_unreachable("unexpected builtin ID");
- // Zbb
- case RISCV::BI__builtin_riscv_orc_b_32:
- case RISCV::BI__builtin_riscv_orc_b_64:
- ID = Intrinsic::riscv_orc_b;
- break;
- // Zbc
- case RISCV::BI__builtin_riscv_clmul:
- ID = Intrinsic::riscv_clmul;
- break;
- case RISCV::BI__builtin_riscv_clmulh:
- ID = Intrinsic::riscv_clmulh;
- break;
- case RISCV::BI__builtin_riscv_clmulr:
- ID = Intrinsic::riscv_clmulr;
- break;
- // Zbe
- case RISCV::BI__builtin_riscv_bcompress_32:
- case RISCV::BI__builtin_riscv_bcompress_64:
- ID = Intrinsic::riscv_bcompress;
- break;
- case RISCV::BI__builtin_riscv_bdecompress_32:
- case RISCV::BI__builtin_riscv_bdecompress_64:
- ID = Intrinsic::riscv_bdecompress;
- break;
- // Zbf
- case RISCV::BI__builtin_riscv_bfp_32:
- case RISCV::BI__builtin_riscv_bfp_64:
- ID = Intrinsic::riscv_bfp;
- break;
- // Zbp
- case RISCV::BI__builtin_riscv_grev_32:
- case RISCV::BI__builtin_riscv_grev_64:
- ID = Intrinsic::riscv_grev;
- break;
- case RISCV::BI__builtin_riscv_gorc_32:
- case RISCV::BI__builtin_riscv_gorc_64:
- ID = Intrinsic::riscv_gorc;
- break;
- case RISCV::BI__builtin_riscv_shfl_32:
- case RISCV::BI__builtin_riscv_shfl_64:
- ID = Intrinsic::riscv_shfl;
- break;
- case RISCV::BI__builtin_riscv_unshfl_32:
- case RISCV::BI__builtin_riscv_unshfl_64:
- ID = Intrinsic::riscv_unshfl;
- break;
- case RISCV::BI__builtin_riscv_xperm_n:
- ID = Intrinsic::riscv_xperm_n;
- break;
- case RISCV::BI__builtin_riscv_xperm_b:
- ID = Intrinsic::riscv_xperm_b;
- break;
- case RISCV::BI__builtin_riscv_xperm_h:
- ID = Intrinsic::riscv_xperm_h;
- break;
- case RISCV::BI__builtin_riscv_xperm_w:
- ID = Intrinsic::riscv_xperm_w;
- break;
- // Zbr
- case RISCV::BI__builtin_riscv_crc32_b:
- ID = Intrinsic::riscv_crc32_b;
- break;
- case RISCV::BI__builtin_riscv_crc32_h:
- ID = Intrinsic::riscv_crc32_h;
- break;
- case RISCV::BI__builtin_riscv_crc32_w:
- ID = Intrinsic::riscv_crc32_w;
- break;
- case RISCV::BI__builtin_riscv_crc32_d:
- ID = Intrinsic::riscv_crc32_d;
- break;
- case RISCV::BI__builtin_riscv_crc32c_b:
- ID = Intrinsic::riscv_crc32c_b;
- break;
- case RISCV::BI__builtin_riscv_crc32c_h:
- ID = Intrinsic::riscv_crc32c_h;
- break;
- case RISCV::BI__builtin_riscv_crc32c_w:
- ID = Intrinsic::riscv_crc32c_w;
- break;
- case RISCV::BI__builtin_riscv_crc32c_d:
- ID = Intrinsic::riscv_crc32c_d;
- break;
- // Zbt
- case RISCV::BI__builtin_riscv_fsl_32:
- case RISCV::BI__builtin_riscv_fsl_64:
- ID = Intrinsic::riscv_fsl;
- break;
- case RISCV::BI__builtin_riscv_fsr_32:
- case RISCV::BI__builtin_riscv_fsr_64:
- ID = Intrinsic::riscv_fsr;
- break;
- }
- IntrinsicTypes = {ResultType};
- break;
- }
- // Vector builtins are handled from here.
- #include "clang/Basic/riscv_vector_builtin_cg.inc"
- }
- assert(ID != Intrinsic::not_intrinsic);
- llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
- return Builder.CreateCall(F, Ops, "");
- }
|