123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960 |
- //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- /// \file
- /// This file implements the targeting of the InstructionSelector class for
- /// AArch64.
- /// \todo This should be generated by TableGen.
- //===----------------------------------------------------------------------===//
- #include "AArch64GlobalISelUtils.h"
- #include "AArch64InstrInfo.h"
- #include "AArch64MachineFunctionInfo.h"
- #include "AArch64RegisterBankInfo.h"
- #include "AArch64RegisterInfo.h"
- #include "AArch64Subtarget.h"
- #include "AArch64TargetMachine.h"
- #include "MCTargetDesc/AArch64AddressingModes.h"
- #include "MCTargetDesc/AArch64MCTargetDesc.h"
- #include "llvm/BinaryFormat/Dwarf.h"
- #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
- #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
- #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
- #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
- #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
- #include "llvm/CodeGen/GlobalISel/Utils.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineConstantPool.h"
- #include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineFunction.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineMemOperand.h"
- #include "llvm/CodeGen/MachineOperand.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/TargetOpcodes.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/DerivedTypes.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/IR/IntrinsicsAArch64.h"
- #include "llvm/IR/PatternMatch.h"
- #include "llvm/IR/Type.h"
- #include "llvm/Pass.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/raw_ostream.h"
- #include <optional>
- #define DEBUG_TYPE "aarch64-isel"
- using namespace llvm;
- using namespace MIPatternMatch;
- using namespace AArch64GISelUtils;
- namespace llvm {
- class BlockFrequencyInfo;
- class ProfileSummaryInfo;
- }
- namespace {
- #define GET_GLOBALISEL_PREDICATE_BITSET
- #include "AArch64GenGlobalISel.inc"
- #undef GET_GLOBALISEL_PREDICATE_BITSET
- class AArch64InstructionSelector : public InstructionSelector {
- public:
- AArch64InstructionSelector(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI,
- const AArch64RegisterBankInfo &RBI);
- bool select(MachineInstr &I) override;
- static const char *getName() { return DEBUG_TYPE; }
- void setupMF(MachineFunction &MF, GISelKnownBits *KB,
- CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) override {
- InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
- MIB.setMF(MF);
- // hasFnAttribute() is expensive to call on every BRCOND selection, so
- // cache it here for each run of the selector.
- ProduceNonFlagSettingCondBr =
- !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
- MFReturnAddr = Register();
- processPHIs(MF);
- }
- private:
- /// tblgen-erated 'select' implementation, used as the initial selector for
- /// the patterns that don't require complex C++.
- bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
- // A lowering phase that runs before any selection attempts.
- // Returns true if the instruction was modified.
- bool preISelLower(MachineInstr &I);
- // An early selection function that runs before the selectImpl() call.
- bool earlySelect(MachineInstr &I);
- // Do some preprocessing of G_PHIs before we begin selection.
- void processPHIs(MachineFunction &MF);
- bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
- /// Eliminate same-sized cross-bank copies into stores before selectImpl().
- bool contractCrossBankCopyIntoStore(MachineInstr &I,
- MachineRegisterInfo &MRI);
- bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
- MachineRegisterInfo &MRI) const;
- bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
- MachineRegisterInfo &MRI) const;
- ///@{
- /// Helper functions for selectCompareBranch.
- bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
- MachineIRBuilder &MIB) const;
- bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
- MachineIRBuilder &MIB) const;
- bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
- MachineIRBuilder &MIB) const;
- bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
- MachineBasicBlock *DstMBB,
- MachineIRBuilder &MIB) const;
- ///@}
- bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
- MachineRegisterInfo &MRI);
- bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
- // Helper to generate an equivalent of scalar_to_vector into a new register,
- // returned via 'Dst'.
- MachineInstr *emitScalarToVector(unsigned EltSize,
- const TargetRegisterClass *DstRC,
- Register Scalar,
- MachineIRBuilder &MIRBuilder) const;
- /// Emit a lane insert into \p DstReg, or a new vector register if
- /// std::nullopt is provided.
- ///
- /// The lane inserted into is defined by \p LaneIdx. The vector source
- /// register is given by \p SrcReg. The register containing the element is
- /// given by \p EltReg.
- MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
- Register EltReg, unsigned LaneIdx,
- const RegisterBank &RB,
- MachineIRBuilder &MIRBuilder) const;
- /// Emit a sequence of instructions representing a constant \p CV for a
- /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
- ///
- /// \returns the last instruction in the sequence on success, and nullptr
- /// otherwise.
- MachineInstr *emitConstantVector(Register Dst, Constant *CV,
- MachineIRBuilder &MIRBuilder,
- MachineRegisterInfo &MRI);
- bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
- bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
- MachineRegisterInfo &MRI);
- /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
- /// SUBREG_TO_REG.
- bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
- bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
- /// Helper function to select vector load intrinsics like
- /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
- /// \p Opc is the opcode that the selected instruction should use.
- /// \p NumVecs is the number of vector destinations for the instruction.
- /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
- bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
- MachineInstr &I);
- bool selectIntrinsicWithSideEffects(MachineInstr &I,
- MachineRegisterInfo &MRI);
- bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
- unsigned emitConstantPoolEntry(const Constant *CPVal,
- MachineFunction &MF) const;
- MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
- MachineIRBuilder &MIRBuilder) const;
- // Emit a vector concat operation.
- MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
- Register Op2,
- MachineIRBuilder &MIRBuilder) const;
- // Emit an integer compare between LHS and RHS, which checks for Predicate.
- MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
- /// Emit a floating point comparison between \p LHS and \p RHS.
- /// \p Pred if given is the intended predicate to use.
- MachineInstr *
- emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
- std::optional<CmpInst::Predicate> = std::nullopt) const;
- MachineInstr *
- emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
- std::initializer_list<llvm::SrcOp> SrcOps,
- MachineIRBuilder &MIRBuilder,
- const ComplexRendererFns &RenderFns = std::nullopt) const;
- /// Helper function to emit an add or sub instruction.
- ///
- /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
- /// in a specific order.
- ///
- /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
- ///
- /// \code
- /// const std::array<std::array<unsigned, 2>, 4> Table {
- /// {{AArch64::ADDXri, AArch64::ADDWri},
- /// {AArch64::ADDXrs, AArch64::ADDWrs},
- /// {AArch64::ADDXrr, AArch64::ADDWrr},
- /// {AArch64::SUBXri, AArch64::SUBWri},
- /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
- /// \endcode
- ///
- /// Each row in the table corresponds to a different addressing mode. Each
- /// column corresponds to a different register size.
- ///
- /// \attention Rows must be structured as follows:
- /// - Row 0: The ri opcode variants
- /// - Row 1: The rs opcode variants
- /// - Row 2: The rr opcode variants
- /// - Row 3: The ri opcode variants for negative immediates
- /// - Row 4: The rx opcode variants
- ///
- /// \attention Columns must be structured as follows:
- /// - Column 0: The 64-bit opcode variants
- /// - Column 1: The 32-bit opcode variants
- ///
- /// \p Dst is the destination register of the binop to emit.
- /// \p LHS is the left-hand operand of the binop to emit.
- /// \p RHS is the right-hand operand of the binop to emit.
- MachineInstr *emitAddSub(
- const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
- Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
- AArch64CC::CondCode CC,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
- const RegisterBank &DstRB, LLT ScalarTy,
- Register VecReg, unsigned LaneIdx,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
- AArch64CC::CondCode Pred,
- MachineIRBuilder &MIRBuilder) const;
- /// Emit a CSet for a FP compare.
- ///
- /// \p Dst is expected to be a 32-bit scalar register.
- MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
- MachineIRBuilder &MIRBuilder) const;
- /// Emit the overflow op for \p Opcode.
- ///
- /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
- /// G_USUBO, etc.
- std::pair<MachineInstr *, AArch64CC::CondCode>
- emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
- MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
- /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
- /// In some cases this is even possible with OR operations in the expression.
- MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
- MachineIRBuilder &MIB) const;
- MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
- CmpInst::Predicate CC,
- AArch64CC::CondCode Predicate,
- AArch64CC::CondCode OutCC,
- MachineIRBuilder &MIB) const;
- MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
- bool Negate, Register CCOp,
- AArch64CC::CondCode Predicate,
- MachineIRBuilder &MIB) const;
- /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
- /// \p IsNegative is true if the test should be "not zero".
- /// This will also optimize the test bit instruction when possible.
- MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
- MachineBasicBlock *DstMBB,
- MachineIRBuilder &MIB) const;
- /// Emit a CB(N)Z instruction which branches to \p DestMBB.
- MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
- MachineBasicBlock *DestMBB,
- MachineIRBuilder &MIB) const;
- // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
- // We use these manually instead of using the importer since it doesn't
- // support SDNodeXForm.
- ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
- ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
- ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
- ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
- ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
- ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
- ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
- ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
- unsigned Size) const;
- ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
- return selectAddrModeUnscaled(Root, 1);
- }
- ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
- return selectAddrModeUnscaled(Root, 2);
- }
- ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
- return selectAddrModeUnscaled(Root, 4);
- }
- ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
- return selectAddrModeUnscaled(Root, 8);
- }
- ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
- return selectAddrModeUnscaled(Root, 16);
- }
- /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
- /// from complex pattern matchers like selectAddrModeIndexed().
- ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
- MachineRegisterInfo &MRI) const;
- ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
- unsigned Size) const;
- template <int Width>
- ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
- return selectAddrModeIndexed(Root, Width / 8);
- }
- bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
- const MachineRegisterInfo &MRI) const;
- ComplexRendererFns
- selectAddrModeShiftedExtendXReg(MachineOperand &Root,
- unsigned SizeInBytes) const;
- /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
- /// or not a shift + extend should be folded into an addressing mode. Returns
- /// None when this is not profitable or possible.
- ComplexRendererFns
- selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
- MachineOperand &Offset, unsigned SizeInBytes,
- bool WantsExt) const;
- ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
- ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
- unsigned SizeInBytes) const;
- template <int Width>
- ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
- return selectAddrModeXRO(Root, Width / 8);
- }
- ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
- unsigned SizeInBytes) const;
- template <int Width>
- ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
- return selectAddrModeWRO(Root, Width / 8);
- }
- ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
- bool AllowROR = false) const;
- ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
- return selectShiftedRegister(Root);
- }
- ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
- return selectShiftedRegister(Root, true);
- }
- /// Given an extend instruction, determine the correct shift-extend type for
- /// that instruction.
- ///
- /// If the instruction is going to be used in a load or store, pass
- /// \p IsLoadStore = true.
- AArch64_AM::ShiftExtendType
- getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
- bool IsLoadStore = false) const;
- /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
- ///
- /// \returns Either \p Reg if no change was necessary, or the new register
- /// created by moving \p Reg.
- ///
- /// Note: This uses emitCopy right now.
- Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
- MachineIRBuilder &MIB) const;
- ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
- void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx = -1) const;
- void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
- int OpIdx = -1) const;
- void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
- int OpIdx = -1) const;
- void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx = -1) const;
- void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx = -1) const;
- void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx = -1) const;
- void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx = -1) const;
- // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
- void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
- // Optimization methods.
- bool tryOptSelect(GSelect &Sel);
- bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
- MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
- /// Return true if \p MI is a load or store of \p NumBytes bytes.
- bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
- /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
- /// register zeroed out. In other words, the result of MI has been explicitly
- /// zero extended.
- bool isDef32(const MachineInstr &MI) const;
- const AArch64TargetMachine &TM;
- const AArch64Subtarget &STI;
- const AArch64InstrInfo &TII;
- const AArch64RegisterInfo &TRI;
- const AArch64RegisterBankInfo &RBI;
- bool ProduceNonFlagSettingCondBr = false;
- // Some cached values used during selection.
- // We use LR as a live-in register, and we keep track of it here as it can be
- // clobbered by calls.
- Register MFReturnAddr;
- MachineIRBuilder MIB;
- #define GET_GLOBALISEL_PREDICATES_DECL
- #include "AArch64GenGlobalISel.inc"
- #undef GET_GLOBALISEL_PREDICATES_DECL
- // We declare the temporaries used by selectImpl() in the class to minimize the
- // cost of constructing placeholder values.
- #define GET_GLOBALISEL_TEMPORARIES_DECL
- #include "AArch64GenGlobalISel.inc"
- #undef GET_GLOBALISEL_TEMPORARIES_DECL
- };
- } // end anonymous namespace
- #define GET_GLOBALISEL_IMPL
- #include "AArch64GenGlobalISel.inc"
- #undef GET_GLOBALISEL_IMPL
- AArch64InstructionSelector::AArch64InstructionSelector(
- const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
- const AArch64RegisterBankInfo &RBI)
- : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
- RBI(RBI),
- #define GET_GLOBALISEL_PREDICATES_INIT
- #include "AArch64GenGlobalISel.inc"
- #undef GET_GLOBALISEL_PREDICATES_INIT
- #define GET_GLOBALISEL_TEMPORARIES_INIT
- #include "AArch64GenGlobalISel.inc"
- #undef GET_GLOBALISEL_TEMPORARIES_INIT
- {
- }
- // FIXME: This should be target-independent, inferred from the types declared
- // for each class in the bank.
- //
- /// Given a register bank, and a type, return the smallest register class that
- /// can represent that combination.
- static const TargetRegisterClass *
- getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
- bool GetAllRegSet = false) {
- if (RB.getID() == AArch64::GPRRegBankID) {
- if (Ty.getSizeInBits() <= 32)
- return GetAllRegSet ? &AArch64::GPR32allRegClass
- : &AArch64::GPR32RegClass;
- if (Ty.getSizeInBits() == 64)
- return GetAllRegSet ? &AArch64::GPR64allRegClass
- : &AArch64::GPR64RegClass;
- if (Ty.getSizeInBits() == 128)
- return &AArch64::XSeqPairsClassRegClass;
- return nullptr;
- }
- if (RB.getID() == AArch64::FPRRegBankID) {
- switch (Ty.getSizeInBits()) {
- case 8:
- return &AArch64::FPR8RegClass;
- case 16:
- return &AArch64::FPR16RegClass;
- case 32:
- return &AArch64::FPR32RegClass;
- case 64:
- return &AArch64::FPR64RegClass;
- case 128:
- return &AArch64::FPR128RegClass;
- }
- return nullptr;
- }
- return nullptr;
- }
- /// Given a register bank, and size in bits, return the smallest register class
- /// that can represent that combination.
- static const TargetRegisterClass *
- getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
- bool GetAllRegSet = false) {
- unsigned RegBankID = RB.getID();
- if (RegBankID == AArch64::GPRRegBankID) {
- if (SizeInBits <= 32)
- return GetAllRegSet ? &AArch64::GPR32allRegClass
- : &AArch64::GPR32RegClass;
- if (SizeInBits == 64)
- return GetAllRegSet ? &AArch64::GPR64allRegClass
- : &AArch64::GPR64RegClass;
- if (SizeInBits == 128)
- return &AArch64::XSeqPairsClassRegClass;
- }
- if (RegBankID == AArch64::FPRRegBankID) {
- switch (SizeInBits) {
- default:
- return nullptr;
- case 8:
- return &AArch64::FPR8RegClass;
- case 16:
- return &AArch64::FPR16RegClass;
- case 32:
- return &AArch64::FPR32RegClass;
- case 64:
- return &AArch64::FPR64RegClass;
- case 128:
- return &AArch64::FPR128RegClass;
- }
- }
- return nullptr;
- }
- /// Returns the correct subregister to use for a given register class.
- static bool getSubRegForClass(const TargetRegisterClass *RC,
- const TargetRegisterInfo &TRI, unsigned &SubReg) {
- switch (TRI.getRegSizeInBits(*RC)) {
- case 8:
- SubReg = AArch64::bsub;
- break;
- case 16:
- SubReg = AArch64::hsub;
- break;
- case 32:
- if (RC != &AArch64::FPR32RegClass)
- SubReg = AArch64::sub_32;
- else
- SubReg = AArch64::ssub;
- break;
- case 64:
- SubReg = AArch64::dsub;
- break;
- default:
- LLVM_DEBUG(
- dbgs() << "Couldn't find appropriate subregister for register class.");
- return false;
- }
- return true;
- }
- /// Returns the minimum size the given register bank can hold.
- static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
- switch (RB.getID()) {
- case AArch64::GPRRegBankID:
- return 32;
- case AArch64::FPRRegBankID:
- return 8;
- default:
- llvm_unreachable("Tried to get minimum size for unknown register bank.");
- }
- }
- /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
- /// Helper function for functions like createDTuple and createQTuple.
- ///
- /// \p RegClassIDs - The list of register class IDs available for some tuple of
- /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
- /// expected to contain between 2 and 4 tuple classes.
- ///
- /// \p SubRegs - The list of subregister classes associated with each register
- /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
- /// subregister class. The index of each subregister class is expected to
- /// correspond with the index of each register class.
- ///
- /// \returns Either the destination register of REG_SEQUENCE instruction that
- /// was created, or the 0th element of \p Regs if \p Regs contains a single
- /// element.
- static Register createTuple(ArrayRef<Register> Regs,
- const unsigned RegClassIDs[],
- const unsigned SubRegs[], MachineIRBuilder &MIB) {
- unsigned NumRegs = Regs.size();
- if (NumRegs == 1)
- return Regs[0];
- assert(NumRegs >= 2 && NumRegs <= 4 &&
- "Only support between two and 4 registers in a tuple!");
- const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
- auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
- auto RegSequence =
- MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
- for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
- RegSequence.addUse(Regs[I]);
- RegSequence.addImm(SubRegs[I]);
- }
- return RegSequence.getReg(0);
- }
- /// Create a tuple of D-registers using the registers in \p Regs.
- static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
- static const unsigned RegClassIDs[] = {
- AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
- static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2, AArch64::dsub3};
- return createTuple(Regs, RegClassIDs, SubRegs, MIB);
- }
- /// Create a tuple of Q-registers using the registers in \p Regs.
- static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
- static const unsigned RegClassIDs[] = {
- AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
- static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3};
- return createTuple(Regs, RegClassIDs, SubRegs, MIB);
- }
- static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
- auto &MI = *Root.getParent();
- auto &MBB = *MI.getParent();
- auto &MF = *MBB.getParent();
- auto &MRI = MF.getRegInfo();
- uint64_t Immed;
- if (Root.isImm())
- Immed = Root.getImm();
- else if (Root.isCImm())
- Immed = Root.getCImm()->getZExtValue();
- else if (Root.isReg()) {
- auto ValAndVReg =
- getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
- if (!ValAndVReg)
- return std::nullopt;
- Immed = ValAndVReg->Value.getSExtValue();
- } else
- return std::nullopt;
- return Immed;
- }
- /// Check whether \p I is a currently unsupported binary operation:
- /// - it has an unsized type
- /// - an operand is not a vreg
- /// - all operands are not in the same bank
- /// These are checks that should someday live in the verifier, but right now,
- /// these are mostly limitations of the aarch64 selector.
- static bool unsupportedBinOp(const MachineInstr &I,
- const AArch64RegisterBankInfo &RBI,
- const MachineRegisterInfo &MRI,
- const AArch64RegisterInfo &TRI) {
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
- if (!Ty.isValid()) {
- LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
- return true;
- }
- const RegisterBank *PrevOpBank = nullptr;
- for (auto &MO : I.operands()) {
- // FIXME: Support non-register operands.
- if (!MO.isReg()) {
- LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
- return true;
- }
- // FIXME: Can generic operations have physical registers operands? If
- // so, this will need to be taught about that, and we'll need to get the
- // bank out of the minimal class for the register.
- // Either way, this needs to be documented (and possibly verified).
- if (!MO.getReg().isVirtual()) {
- LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
- return true;
- }
- const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
- if (!OpBank) {
- LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
- return true;
- }
- if (PrevOpBank && OpBank != PrevOpBank) {
- LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
- return true;
- }
- PrevOpBank = OpBank;
- }
- return false;
- }
- /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
- /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
- /// and of size \p OpSize.
- /// \returns \p GenericOpc if the combination is unsupported.
- static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
- unsigned OpSize) {
- switch (RegBankID) {
- case AArch64::GPRRegBankID:
- if (OpSize == 32) {
- switch (GenericOpc) {
- case TargetOpcode::G_SHL:
- return AArch64::LSLVWr;
- case TargetOpcode::G_LSHR:
- return AArch64::LSRVWr;
- case TargetOpcode::G_ASHR:
- return AArch64::ASRVWr;
- default:
- return GenericOpc;
- }
- } else if (OpSize == 64) {
- switch (GenericOpc) {
- case TargetOpcode::G_PTR_ADD:
- return AArch64::ADDXrr;
- case TargetOpcode::G_SHL:
- return AArch64::LSLVXr;
- case TargetOpcode::G_LSHR:
- return AArch64::LSRVXr;
- case TargetOpcode::G_ASHR:
- return AArch64::ASRVXr;
- default:
- return GenericOpc;
- }
- }
- break;
- case AArch64::FPRRegBankID:
- switch (OpSize) {
- case 32:
- switch (GenericOpc) {
- case TargetOpcode::G_FADD:
- return AArch64::FADDSrr;
- case TargetOpcode::G_FSUB:
- return AArch64::FSUBSrr;
- case TargetOpcode::G_FMUL:
- return AArch64::FMULSrr;
- case TargetOpcode::G_FDIV:
- return AArch64::FDIVSrr;
- default:
- return GenericOpc;
- }
- case 64:
- switch (GenericOpc) {
- case TargetOpcode::G_FADD:
- return AArch64::FADDDrr;
- case TargetOpcode::G_FSUB:
- return AArch64::FSUBDrr;
- case TargetOpcode::G_FMUL:
- return AArch64::FMULDrr;
- case TargetOpcode::G_FDIV:
- return AArch64::FDIVDrr;
- case TargetOpcode::G_OR:
- return AArch64::ORRv8i8;
- default:
- return GenericOpc;
- }
- }
- break;
- }
- return GenericOpc;
- }
- /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
- /// appropriate for the (value) register bank \p RegBankID and of memory access
- /// size \p OpSize. This returns the variant with the base+unsigned-immediate
- /// addressing mode (e.g., LDRXui).
- /// \returns \p GenericOpc if the combination is unsupported.
- static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
- unsigned OpSize) {
- const bool isStore = GenericOpc == TargetOpcode::G_STORE;
- switch (RegBankID) {
- case AArch64::GPRRegBankID:
- switch (OpSize) {
- case 8:
- return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
- case 16:
- return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
- case 32:
- return isStore ? AArch64::STRWui : AArch64::LDRWui;
- case 64:
- return isStore ? AArch64::STRXui : AArch64::LDRXui;
- }
- break;
- case AArch64::FPRRegBankID:
- switch (OpSize) {
- case 8:
- return isStore ? AArch64::STRBui : AArch64::LDRBui;
- case 16:
- return isStore ? AArch64::STRHui : AArch64::LDRHui;
- case 32:
- return isStore ? AArch64::STRSui : AArch64::LDRSui;
- case 64:
- return isStore ? AArch64::STRDui : AArch64::LDRDui;
- case 128:
- return isStore ? AArch64::STRQui : AArch64::LDRQui;
- }
- break;
- }
- return GenericOpc;
- }
- /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
- /// to \p *To.
- ///
- /// E.g "To = COPY SrcReg:SubReg"
- static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI, Register SrcReg,
- const TargetRegisterClass *To, unsigned SubReg) {
- assert(SrcReg.isValid() && "Expected a valid source register?");
- assert(To && "Destination register class cannot be null");
- assert(SubReg && "Expected a valid subregister");
- MachineIRBuilder MIB(I);
- auto SubRegCopy =
- MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
- MachineOperand &RegOp = I.getOperand(1);
- RegOp.setReg(SubRegCopy.getReg(0));
- // It's possible that the destination register won't be constrained. Make
- // sure that happens.
- if (!I.getOperand(0).getReg().isPhysical())
- RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
- return true;
- }
- /// Helper function to get the source and destination register classes for a
- /// copy. Returns a std::pair containing the source register class for the
- /// copy, and the destination register class for the copy. If a register class
- /// cannot be determined, then it will be nullptr.
- static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
- getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
- Register DstReg = I.getOperand(0).getReg();
- Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
- unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
- // Special casing for cross-bank copies of s1s. We can technically represent
- // a 1-bit value with any size of register. The minimum size for a GPR is 32
- // bits. So, we need to put the FPR on 32 bits as well.
- //
- // FIXME: I'm not sure if this case holds true outside of copies. If it does,
- // then we can pull it into the helpers that get the appropriate class for a
- // register bank. Or make a new helper that carries along some constraint
- // information.
- if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
- SrcSize = DstSize = 32;
- return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
- getMinClassForRegBank(DstRegBank, DstSize, true)};
- }
- // FIXME: We need some sort of API in RBI/TRI to allow generic code to
- // constrain operands of simple instructions given a TargetRegisterClass
- // and LLT
- static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI) {
- for (MachineOperand &MO : I.operands()) {
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (!Reg)
- continue;
- if (Reg.isPhysical())
- continue;
- LLT Ty = MRI.getType(Reg);
- const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- const TargetRegisterClass *RC =
- RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
- if (!RC) {
- const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
- RC = getRegClassForTypeOnBank(Ty, RB);
- if (!RC) {
- LLVM_DEBUG(
- dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
- break;
- }
- }
- RBI.constrainGenericRegister(Reg, *RC, MRI);
- }
- return true;
- }
- static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
- Register DstReg = I.getOperand(0).getReg();
- Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- // Find the correct register classes for the source and destination registers.
- const TargetRegisterClass *SrcRC;
- const TargetRegisterClass *DstRC;
- std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
- if (!DstRC) {
- LLVM_DEBUG(dbgs() << "Unexpected dest size "
- << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
- return false;
- }
- // Is this a copy? If so, then we may need to insert a subregister copy.
- if (I.isCopy()) {
- // Yes. Check if there's anything to fix up.
- if (!SrcRC) {
- LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
- return false;
- }
- unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
- unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
- unsigned SubReg;
- // If the source bank doesn't support a subregister copy small enough,
- // then we first need to copy to the destination bank.
- if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
- const TargetRegisterClass *DstTempRC =
- getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
- getSubRegForClass(DstRC, TRI, SubReg);
- MachineIRBuilder MIB(I);
- auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
- copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
- } else if (SrcSize > DstSize) {
- // If the source register is bigger than the destination we need to
- // perform a subregister copy.
- const TargetRegisterClass *SubRegRC =
- getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
- getSubRegForClass(SubRegRC, TRI, SubReg);
- copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
- } else if (DstSize > SrcSize) {
- // If the destination register is bigger than the source we need to do
- // a promotion using SUBREG_TO_REG.
- const TargetRegisterClass *PromotionRC =
- getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
- getSubRegForClass(SrcRC, TRI, SubReg);
- Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
- BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
- .addImm(0)
- .addUse(SrcReg)
- .addImm(SubReg);
- MachineOperand &RegOp = I.getOperand(1);
- RegOp.setReg(PromoteReg);
- }
- // If the destination is a physical register, then there's nothing to
- // change, so we're done.
- if (DstReg.isPhysical())
- return true;
- }
- // No need to constrain SrcReg. It will get constrained when we hit another
- // of its use or its defs. Copies do not have constraints.
- if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
- << " operand\n");
- return false;
- }
- // If this a GPR ZEXT that we want to just reduce down into a copy.
- // The sizes will be mismatched with the source < 32b but that's ok.
- if (I.getOpcode() == TargetOpcode::G_ZEXT) {
- I.setDesc(TII.get(AArch64::COPY));
- assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
- return selectCopy(I, TII, MRI, TRI, RBI);
- }
- I.setDesc(TII.get(AArch64::COPY));
- return true;
- }
- static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
- if (!DstTy.isScalar() || !SrcTy.isScalar())
- return GenericOpc;
- const unsigned DstSize = DstTy.getSizeInBits();
- const unsigned SrcSize = SrcTy.getSizeInBits();
- switch (DstSize) {
- case 32:
- switch (SrcSize) {
- case 32:
- switch (GenericOpc) {
- case TargetOpcode::G_SITOFP:
- return AArch64::SCVTFUWSri;
- case TargetOpcode::G_UITOFP:
- return AArch64::UCVTFUWSri;
- case TargetOpcode::G_FPTOSI:
- return AArch64::FCVTZSUWSr;
- case TargetOpcode::G_FPTOUI:
- return AArch64::FCVTZUUWSr;
- default:
- return GenericOpc;
- }
- case 64:
- switch (GenericOpc) {
- case TargetOpcode::G_SITOFP:
- return AArch64::SCVTFUXSri;
- case TargetOpcode::G_UITOFP:
- return AArch64::UCVTFUXSri;
- case TargetOpcode::G_FPTOSI:
- return AArch64::FCVTZSUWDr;
- case TargetOpcode::G_FPTOUI:
- return AArch64::FCVTZUUWDr;
- default:
- return GenericOpc;
- }
- default:
- return GenericOpc;
- }
- case 64:
- switch (SrcSize) {
- case 32:
- switch (GenericOpc) {
- case TargetOpcode::G_SITOFP:
- return AArch64::SCVTFUWDri;
- case TargetOpcode::G_UITOFP:
- return AArch64::UCVTFUWDri;
- case TargetOpcode::G_FPTOSI:
- return AArch64::FCVTZSUXSr;
- case TargetOpcode::G_FPTOUI:
- return AArch64::FCVTZUUXSr;
- default:
- return GenericOpc;
- }
- case 64:
- switch (GenericOpc) {
- case TargetOpcode::G_SITOFP:
- return AArch64::SCVTFUXDri;
- case TargetOpcode::G_UITOFP:
- return AArch64::UCVTFUXDri;
- case TargetOpcode::G_FPTOSI:
- return AArch64::FCVTZSUXDr;
- case TargetOpcode::G_FPTOUI:
- return AArch64::FCVTZUUXDr;
- default:
- return GenericOpc;
- }
- default:
- return GenericOpc;
- }
- default:
- return GenericOpc;
- };
- return GenericOpc;
- }
- MachineInstr *
- AArch64InstructionSelector::emitSelect(Register Dst, Register True,
- Register False, AArch64CC::CondCode CC,
- MachineIRBuilder &MIB) const {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
- RBI.getRegBank(True, MRI, TRI)->getID() &&
- "Expected both select operands to have the same regbank?");
- LLT Ty = MRI.getType(True);
- if (Ty.isVector())
- return nullptr;
- const unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) &&
- "Expected 32 bit or 64 bit select only?");
- const bool Is32Bit = Size == 32;
- if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
- unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
- auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
- constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
- return &*FCSel;
- }
- // By default, we'll try and emit a CSEL.
- unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
- bool Optimized = false;
- auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
- &Optimized](Register &Reg, Register &OtherReg,
- bool Invert) {
- if (Optimized)
- return false;
- // Attempt to fold:
- //
- // %sub = G_SUB 0, %x
- // %select = G_SELECT cc, %reg, %sub
- //
- // Into:
- // %select = CSNEG %reg, %x, cc
- Register MatchReg;
- if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
- Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
- // Attempt to fold:
- //
- // %xor = G_XOR %x, -1
- // %select = G_SELECT cc, %reg, %xor
- //
- // Into:
- // %select = CSINV %reg, %x, cc
- if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
- // Attempt to fold:
- //
- // %add = G_ADD %x, 1
- // %select = G_SELECT cc, %reg, %add
- //
- // Into:
- // %select = CSINC %reg, %x, cc
- if (mi_match(Reg, MRI,
- m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
- m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
- return false;
- };
- // Helper lambda which tries to use CSINC/CSINV for the instruction when its
- // true/false values are constants.
- // FIXME: All of these patterns already exist in tablegen. We should be
- // able to import these.
- auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
- &Optimized]() {
- if (Optimized)
- return false;
- auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
- auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
- if (!TrueCst && !FalseCst)
- return false;
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
- if (TrueCst && FalseCst) {
- int64_t T = TrueCst->Value.getSExtValue();
- int64_t F = FalseCst->Value.getSExtValue();
- if (T == 0 && F == 1) {
- // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- True = ZReg;
- False = ZReg;
- return true;
- }
- if (T == 0 && F == -1) {
- // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- True = ZReg;
- False = ZReg;
- return true;
- }
- }
- if (TrueCst) {
- int64_t T = TrueCst->Value.getSExtValue();
- if (T == 1) {
- // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- True = False;
- False = ZReg;
- CC = AArch64CC::getInvertedCondCode(CC);
- return true;
- }
- if (T == -1) {
- // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- True = False;
- False = ZReg;
- CC = AArch64CC::getInvertedCondCode(CC);
- return true;
- }
- }
- if (FalseCst) {
- int64_t F = FalseCst->Value.getSExtValue();
- if (F == 1) {
- // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- False = ZReg;
- return true;
- }
- if (F == -1) {
- // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- False = ZReg;
- return true;
- }
- }
- return false;
- };
- Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
- Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
- Optimized |= TryOptSelectCst();
- auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
- constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
- return &*SelectInst;
- }
- static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
- switch (P) {
- default:
- llvm_unreachable("Unknown condition code!");
- case CmpInst::ICMP_NE:
- return AArch64CC::NE;
- case CmpInst::ICMP_EQ:
- return AArch64CC::EQ;
- case CmpInst::ICMP_SGT:
- return AArch64CC::GT;
- case CmpInst::ICMP_SGE:
- return AArch64CC::GE;
- case CmpInst::ICMP_SLT:
- return AArch64CC::LT;
- case CmpInst::ICMP_SLE:
- return AArch64CC::LE;
- case CmpInst::ICMP_UGT:
- return AArch64CC::HI;
- case CmpInst::ICMP_UGE:
- return AArch64CC::HS;
- case CmpInst::ICMP_ULT:
- return AArch64CC::LO;
- case CmpInst::ICMP_ULE:
- return AArch64CC::LS;
- }
- }
- /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
- static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
- AArch64CC::CondCode &CondCode,
- AArch64CC::CondCode &CondCode2) {
- CondCode2 = AArch64CC::AL;
- switch (CC) {
- default:
- llvm_unreachable("Unknown FP condition!");
- case CmpInst::FCMP_OEQ:
- CondCode = AArch64CC::EQ;
- break;
- case CmpInst::FCMP_OGT:
- CondCode = AArch64CC::GT;
- break;
- case CmpInst::FCMP_OGE:
- CondCode = AArch64CC::GE;
- break;
- case CmpInst::FCMP_OLT:
- CondCode = AArch64CC::MI;
- break;
- case CmpInst::FCMP_OLE:
- CondCode = AArch64CC::LS;
- break;
- case CmpInst::FCMP_ONE:
- CondCode = AArch64CC::MI;
- CondCode2 = AArch64CC::GT;
- break;
- case CmpInst::FCMP_ORD:
- CondCode = AArch64CC::VC;
- break;
- case CmpInst::FCMP_UNO:
- CondCode = AArch64CC::VS;
- break;
- case CmpInst::FCMP_UEQ:
- CondCode = AArch64CC::EQ;
- CondCode2 = AArch64CC::VS;
- break;
- case CmpInst::FCMP_UGT:
- CondCode = AArch64CC::HI;
- break;
- case CmpInst::FCMP_UGE:
- CondCode = AArch64CC::PL;
- break;
- case CmpInst::FCMP_ULT:
- CondCode = AArch64CC::LT;
- break;
- case CmpInst::FCMP_ULE:
- CondCode = AArch64CC::LE;
- break;
- case CmpInst::FCMP_UNE:
- CondCode = AArch64CC::NE;
- break;
- }
- }
- /// Convert an IR fp condition code to an AArch64 CC.
- /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
- /// should be AND'ed instead of OR'ed.
- static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
- AArch64CC::CondCode &CondCode,
- AArch64CC::CondCode &CondCode2) {
- CondCode2 = AArch64CC::AL;
- switch (CC) {
- default:
- changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
- assert(CondCode2 == AArch64CC::AL);
- break;
- case CmpInst::FCMP_ONE:
- // (a one b)
- // == ((a olt b) || (a ogt b))
- // == ((a ord b) && (a une b))
- CondCode = AArch64CC::VC;
- CondCode2 = AArch64CC::NE;
- break;
- case CmpInst::FCMP_UEQ:
- // (a ueq b)
- // == ((a uno b) || (a oeq b))
- // == ((a ule b) && (a uge b))
- CondCode = AArch64CC::PL;
- CondCode2 = AArch64CC::LE;
- break;
- }
- }
- /// Return a register which can be used as a bit to test in a TB(N)Z.
- static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
- MachineRegisterInfo &MRI) {
- assert(Reg.isValid() && "Expected valid register!");
- bool HasZext = false;
- while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
- unsigned Opc = MI->getOpcode();
- if (!MI->getOperand(0).isReg() ||
- !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
- break;
- // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
- //
- // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
- // on the truncated x is the same as the bit number on x.
- if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
- Opc == TargetOpcode::G_TRUNC) {
- if (Opc == TargetOpcode::G_ZEXT)
- HasZext = true;
- Register NextReg = MI->getOperand(1).getReg();
- // Did we find something worth folding?
- if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
- break;
- // NextReg is worth folding. Keep looking.
- Reg = NextReg;
- continue;
- }
- // Attempt to find a suitable operation with a constant on one side.
- std::optional<uint64_t> C;
- Register TestReg;
- switch (Opc) {
- default:
- break;
- case TargetOpcode::G_AND:
- case TargetOpcode::G_XOR: {
- TestReg = MI->getOperand(1).getReg();
- Register ConstantReg = MI->getOperand(2).getReg();
- auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
- if (!VRegAndVal) {
- // AND commutes, check the other side for a constant.
- // FIXME: Can we canonicalize the constant so that it's always on the
- // same side at some point earlier?
- std::swap(ConstantReg, TestReg);
- VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
- }
- if (VRegAndVal) {
- if (HasZext)
- C = VRegAndVal->Value.getZExtValue();
- else
- C = VRegAndVal->Value.getSExtValue();
- }
- break;
- }
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_SHL: {
- TestReg = MI->getOperand(1).getReg();
- auto VRegAndVal =
- getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
- if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
- break;
- }
- }
- // Didn't find a constant or viable register. Bail out of the loop.
- if (!C || !TestReg.isValid())
- break;
- // We found a suitable instruction with a constant. Check to see if we can
- // walk through the instruction.
- Register NextReg;
- unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
- switch (Opc) {
- default:
- break;
- case TargetOpcode::G_AND:
- // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
- if ((*C >> Bit) & 1)
- NextReg = TestReg;
- break;
- case TargetOpcode::G_SHL:
- // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
- // the type of the register.
- if (*C <= Bit && (Bit - *C) < TestRegSize) {
- NextReg = TestReg;
- Bit = Bit - *C;
- }
- break;
- case TargetOpcode::G_ASHR:
- // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
- // in x
- NextReg = TestReg;
- Bit = Bit + *C;
- if (Bit >= TestRegSize)
- Bit = TestRegSize - 1;
- break;
- case TargetOpcode::G_LSHR:
- // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
- if ((Bit + *C) < TestRegSize) {
- NextReg = TestReg;
- Bit = Bit + *C;
- }
- break;
- case TargetOpcode::G_XOR:
- // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
- // appropriate.
- //
- // e.g. If x' = xor x, c, and the b-th bit is set in c then
- //
- // tbz x', b -> tbnz x, b
- //
- // Because x' only has the b-th bit set if x does not.
- if ((*C >> Bit) & 1)
- Invert = !Invert;
- NextReg = TestReg;
- break;
- }
- // Check if we found anything worth folding.
- if (!NextReg.isValid())
- return Reg;
- Reg = NextReg;
- }
- return Reg;
- }
- MachineInstr *AArch64InstructionSelector::emitTestBit(
- Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
- MachineIRBuilder &MIB) const {
- assert(TestReg.isValid());
- assert(ProduceNonFlagSettingCondBr &&
- "Cannot emit TB(N)Z with speculation tracking!");
- MachineRegisterInfo &MRI = *MIB.getMRI();
- // Attempt to optimize the test bit by walking over instructions.
- TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
- LLT Ty = MRI.getType(TestReg);
- unsigned Size = Ty.getSizeInBits();
- assert(!Ty.isVector() && "Expected a scalar!");
- assert(Bit < 64 && "Bit is too large!");
- // When the test register is a 64-bit register, we have to narrow to make
- // TBNZW work.
- bool UseWReg = Bit < 32;
- unsigned NecessarySize = UseWReg ? 32 : 64;
- if (Size != NecessarySize)
- TestReg = moveScalarRegClass(
- TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
- MIB);
- static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
- {AArch64::TBZW, AArch64::TBNZW}};
- unsigned Opc = OpcTable[UseWReg][IsNegative];
- auto TestBitMI =
- MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
- constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
- return &*TestBitMI;
- }
- bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
- MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
- MachineIRBuilder &MIB) const {
- assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
- // Given something like this:
- //
- // %x = ...Something...
- // %one = G_CONSTANT i64 1
- // %zero = G_CONSTANT i64 0
- // %and = G_AND %x, %one
- // %cmp = G_ICMP intpred(ne), %and, %zero
- // %cmp_trunc = G_TRUNC %cmp
- // G_BRCOND %cmp_trunc, %bb.3
- //
- // We want to try and fold the AND into the G_BRCOND and produce either a
- // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
- //
- // In this case, we'd get
- //
- // TBNZ %x %bb.3
- //
- // Check if the AND has a constant on its RHS which we can use as a mask.
- // If it's a power of 2, then it's the same as checking a specific bit.
- // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit = getIConstantVRegValWithLookThrough(
- AndInst.getOperand(2).getReg(), *MIB.getMRI());
- if (!MaybeBit)
- return false;
- int32_t Bit = MaybeBit->Value.exactLogBase2();
- if (Bit < 0)
- return false;
- Register TestReg = AndInst.getOperand(1).getReg();
- // Emit a TB(N)Z.
- emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
- return true;
- }
- MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
- bool IsNegative,
- MachineBasicBlock *DestMBB,
- MachineIRBuilder &MIB) const {
- assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
- MachineRegisterInfo &MRI = *MIB.getMRI();
- assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
- AArch64::GPRRegBankID &&
- "Expected GPRs only?");
- auto Ty = MRI.getType(CompareReg);
- unsigned Width = Ty.getSizeInBits();
- assert(!Ty.isVector() && "Expected scalar only?");
- assert(Width <= 64 && "Expected width to be at most 64?");
- static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
- {AArch64::CBNZW, AArch64::CBNZX}};
- unsigned Opc = OpcTable[IsNegative][Width == 64];
- auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
- constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
- return &*BranchMI;
- }
- bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
- MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
- assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
- // totally clean. Some of them require two branches to implement.
- auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
- emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
- Pred);
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
- if (CC2 != AArch64CC::AL)
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
- MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
- assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
- //
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (!ProduceNonFlagSettingCondBr)
- return false;
- MachineRegisterInfo &MRI = *MIB.getMRI();
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- auto Pred =
- static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
- Register LHS = ICmp.getOperand(2).getReg();
- Register RHS = ICmp.getOperand(3).getReg();
- // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
- auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
- MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
- // When we can emit a TB(N)Z, prefer that.
- //
- // Handle non-commutative condition codes first.
- // Note that we don't want to do this when we have a G_AND because it can
- // become a tst. The tst will make the test bit in the TB(N)Z redundant.
- if (VRegAndVal && !AndInst) {
- int64_t C = VRegAndVal->Value.getSExtValue();
- // When we have a greater-than comparison, we can just test if the msb is
- // zero.
- if (C == -1 && Pred == CmpInst::ICMP_SGT) {
- uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
- emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
- I.eraseFromParent();
- return true;
- }
- // When we have a less than comparison, we can just test if the msb is not
- // zero.
- if (C == 0 && Pred == CmpInst::ICMP_SLT) {
- uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
- emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
- I.eraseFromParent();
- return true;
- }
- // Inversely, if we have a signed greater-than-or-equal comparison to zero,
- // we can test if the msb is zero.
- if (C == 0 && Pred == CmpInst::ICMP_SGE) {
- uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
- emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
- I.eraseFromParent();
- return true;
- }
- }
- // Attempt to handle commutative condition codes. Right now, that's only
- // eq/ne.
- if (ICmpInst::isEquality(Pred)) {
- if (!VRegAndVal) {
- std::swap(RHS, LHS);
- VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
- AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
- }
- if (VRegAndVal && VRegAndVal->Value == 0) {
- // If there's a G_AND feeding into this branch, try to fold it away by
- // emitting a TB(N)Z instead.
- //
- // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
- // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
- // would be redundant.
- if (AndInst &&
- tryOptAndIntoCompareBranch(
- *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
- I.eraseFromParent();
- return true;
- }
- // Otherwise, try to emit a CB(N)Z instead.
- auto LHSTy = MRI.getType(LHS);
- if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
- emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
- I.eraseFromParent();
- return true;
- }
- }
- }
- return false;
- }
- bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
- MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
- assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
- return true;
- // Couldn't optimize. Emit a compare + a Bcc.
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- auto PredOp = ICmp.getOperand(1);
- emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectCompareBranch(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
- Register CondReg = I.getOperand(0).getReg();
- MachineInstr *CCMI = MRI.getVRegDef(CondReg);
- // Try to select the G_BRCOND using whatever is feeding the condition if
- // possible.
- unsigned CCMIOpc = CCMI->getOpcode();
- if (CCMIOpc == TargetOpcode::G_FCMP)
- return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
- if (CCMIOpc == TargetOpcode::G_ICMP)
- return selectCompareBranchFedByICmp(I, *CCMI, MIB);
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (ProduceNonFlagSettingCondBr) {
- emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
- I.getOperand(1).getMBB(), MIB);
- I.eraseFromParent();
- return true;
- }
- // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
- auto TstMI =
- MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- auto Bcc = MIB.buildInstr(AArch64::Bcc)
- .addImm(AArch64CC::EQ)
- .addMBB(I.getOperand(1).getMBB());
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
- }
- /// Returns the element immediate value of a vector shift operand if found.
- /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
- static std::optional<int64_t> getVectorShiftImm(Register Reg,
- MachineRegisterInfo &MRI) {
- assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
- MachineInstr *OpMI = MRI.getVRegDef(Reg);
- return getAArch64VectorSplatScalar(*OpMI, MRI);
- }
- /// Matches and returns the shift immediate value for a SHL instruction given
- /// a shift operand.
- static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
- MachineRegisterInfo &MRI) {
- std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
- if (!ShiftImm)
- return std::nullopt;
- // Check the immediate is in range for a SHL.
- int64_t Imm = *ShiftImm;
- if (Imm < 0)
- return std::nullopt;
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
- return std::nullopt;
- case 8:
- if (Imm > 7)
- return std::nullopt;
- break;
- case 16:
- if (Imm > 15)
- return std::nullopt;
- break;
- case 32:
- if (Imm > 31)
- return std::nullopt;
- break;
- case 64:
- if (Imm > 63)
- return std::nullopt;
- break;
- }
- return Imm;
- }
- bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_SHL);
- Register DstReg = I.getOperand(0).getReg();
- const LLT Ty = MRI.getType(DstReg);
- Register Src1Reg = I.getOperand(1).getReg();
- Register Src2Reg = I.getOperand(2).getReg();
- if (!Ty.isVector())
- return false;
- // Check if we have a vector of constants on RHS that we can select as the
- // immediate form.
- std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
- unsigned Opc = 0;
- if (Ty == LLT::fixed_vector(2, 64)) {
- Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
- } else if (Ty == LLT::fixed_vector(4, 32)) {
- Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
- } else if (Ty == LLT::fixed_vector(2, 32)) {
- Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
- } else if (Ty == LLT::fixed_vector(4, 16)) {
- Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
- } else if (Ty == LLT::fixed_vector(8, 16)) {
- Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
- } else if (Ty == LLT::fixed_vector(16, 8)) {
- Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
- } else if (Ty == LLT::fixed_vector(8, 8)) {
- Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
- } else {
- LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
- return false;
- }
- auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
- if (ImmVal)
- Shl.addImm(*ImmVal);
- else
- Shl.addUse(Src2Reg);
- constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectVectorAshrLshr(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_ASHR ||
- I.getOpcode() == TargetOpcode::G_LSHR);
- Register DstReg = I.getOperand(0).getReg();
- const LLT Ty = MRI.getType(DstReg);
- Register Src1Reg = I.getOperand(1).getReg();
- Register Src2Reg = I.getOperand(2).getReg();
- if (!Ty.isVector())
- return false;
- bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
- // We expect the immediate case to be lowered in the PostLegalCombiner to
- // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
- // There is not a shift right register instruction, but the shift left
- // register instruction takes a signed value, where negative numbers specify a
- // right shift.
- unsigned Opc = 0;
- unsigned NegOpc = 0;
- const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
- if (Ty == LLT::fixed_vector(2, 64)) {
- Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
- NegOpc = AArch64::NEGv2i64;
- } else if (Ty == LLT::fixed_vector(4, 32)) {
- Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
- NegOpc = AArch64::NEGv4i32;
- } else if (Ty == LLT::fixed_vector(2, 32)) {
- Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
- NegOpc = AArch64::NEGv2i32;
- } else if (Ty == LLT::fixed_vector(4, 16)) {
- Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
- NegOpc = AArch64::NEGv4i16;
- } else if (Ty == LLT::fixed_vector(8, 16)) {
- Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
- NegOpc = AArch64::NEGv8i16;
- } else if (Ty == LLT::fixed_vector(16, 8)) {
- Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
- NegOpc = AArch64::NEGv16i8;
- } else if (Ty == LLT::fixed_vector(8, 8)) {
- Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
- NegOpc = AArch64::NEGv8i8;
- } else {
- LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
- return false;
- }
- auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
- constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
- auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
- constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectVaStartAAPCS(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- return false;
- }
- bool AArch64InstructionSelector::selectVaStartDarwin(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- Register ListReg = I.getOperand(0).getReg();
- Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto MIB =
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
- .addDef(ArgsAddrReg)
- .addFrameIndex(FuncInfo->getVarArgsStackIndex())
- .addImm(0)
- .addImm(0);
- constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
- MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
- .addUse(ArgsAddrReg)
- .addUse(ListReg)
- .addImm(0)
- .addMemOperand(*I.memoperands_begin());
- constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- void AArch64InstructionSelector::materializeLargeCMVal(
- MachineInstr &I, const Value *V, unsigned OpFlags) {
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
- MovZ->addOperand(MF, I.getOperand(1));
- MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
- AArch64II::MO_NC);
- MovZ->addOperand(MF, MachineOperand::CreateImm(0));
- constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
- auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
- Register ForceDstReg) {
- Register DstReg = ForceDstReg
- ? ForceDstReg
- : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
- if (auto *GV = dyn_cast<GlobalValue>(V)) {
- MovI->addOperand(MF, MachineOperand::CreateGA(
- GV, MovZ->getOperand(1).getOffset(), Flags));
- } else {
- MovI->addOperand(
- MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
- MovZ->getOperand(1).getOffset(), Flags));
- }
- MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
- constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
- return DstReg;
- };
- Register DstReg = BuildMovK(MovZ.getReg(0),
- AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
- DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
- BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
- }
- bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- switch (I.getOpcode()) {
- case TargetOpcode::G_STORE: {
- bool Changed = contractCrossBankCopyIntoStore(I, MRI);
- MachineOperand &SrcOp = I.getOperand(0);
- if (MRI.getType(SrcOp.getReg()).isPointer()) {
- // Allow matching with imported patterns for stores of pointers. Unlike
- // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
- // and constrain.
- auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
- Register NewSrc = Copy.getReg(0);
- SrcOp.setReg(NewSrc);
- RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
- Changed = true;
- }
- return Changed;
- }
- case TargetOpcode::G_PTR_ADD:
- return convertPtrAddToAdd(I, MRI);
- case TargetOpcode::G_LOAD: {
- // For scalar loads of pointers, we try to convert the dest type from p0
- // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
- // conversion, this should be ok because all users should have been
- // selected already, so the type doesn't matter for them.
- Register DstReg = I.getOperand(0).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- if (!DstTy.isPointer())
- return false;
- MRI.setType(DstReg, LLT::scalar(64));
- return true;
- }
- case AArch64::G_DUP: {
- // Convert the type from p0 to s64 to help selection.
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (!DstTy.getElementType().isPointer())
- return false;
- auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
- MRI.setType(I.getOperand(0).getReg(),
- DstTy.changeElementType(LLT::scalar(64)));
- MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
- I.getOperand(1).setReg(NewSrc.getReg(0));
- return true;
- }
- case TargetOpcode::G_UITOFP:
- case TargetOpcode::G_SITOFP: {
- // If both source and destination regbanks are FPR, then convert the opcode
- // to G_SITOF so that the importer can select it to an fpr variant.
- // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
- // copy.
- Register SrcReg = I.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
- return false;
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
- if (I.getOpcode() == TargetOpcode::G_SITOFP)
- I.setDesc(TII.get(AArch64::G_SITOF));
- else
- I.setDesc(TII.get(AArch64::G_UITOF));
- return true;
- }
- return false;
- }
- default:
- return false;
- }
- }
- /// This lowering tries to look for G_PTR_ADD instructions and then converts
- /// them to a standard G_ADD with a COPY on the source.
- ///
- /// The motivation behind this is to expose the add semantics to the imported
- /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
- /// because the selector works bottom up, uses before defs. By the time we
- /// end up trying to select a G_PTR_ADD, we should have already attempted to
- /// fold this into addressing modes and were therefore unsuccessful.
- bool AArch64InstructionSelector::convertPtrAddToAdd(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
- Register DstReg = I.getOperand(0).getReg();
- Register AddOp1Reg = I.getOperand(1).getReg();
- const LLT PtrTy = MRI.getType(DstReg);
- if (PtrTy.getAddressSpace() != 0)
- return false;
- const LLT CastPtrTy =
- PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
- auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
- // Set regbanks on the registers.
- if (PtrTy.isVector())
- MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
- else
- MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
- // %dst(intty) = G_ADD %intbase, off
- I.setDesc(TII.get(TargetOpcode::G_ADD));
- MRI.setType(DstReg, CastPtrTy);
- I.getOperand(1).setReg(PtrToInt.getReg(0));
- if (!select(*PtrToInt)) {
- LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
- return false;
- }
- // Also take the opportunity here to try to do some optimization.
- // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
- Register NegatedReg;
- if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
- return true;
- I.getOperand(2).setReg(NegatedReg);
- I.setDesc(TII.get(TargetOpcode::G_SUB));
- return true;
- }
- bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- // We try to match the immediate variant of LSL, which is actually an alias
- // for a special case of UBFM. Otherwise, we fall back to the imported
- // selector which will match the register variant.
- assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
- const auto &MO = I.getOperand(2);
- auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
- if (!VRegAndVal)
- return false;
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (DstTy.isVector())
- return false;
- bool Is64Bit = DstTy.getSizeInBits() == 64;
- auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
- auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
- if (!Imm1Fn || !Imm2Fn)
- return false;
- auto NewI =
- MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
- {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
- for (auto &RenderFn : *Imm1Fn)
- RenderFn(NewI);
- for (auto &RenderFn : *Imm2Fn)
- RenderFn(NewI);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
- }
- bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
- // If we're storing a scalar, it doesn't matter what register bank that
- // scalar is on. All that matters is the size.
- //
- // So, if we see something like this (with a 32-bit scalar as an example):
- //
- // %x:gpr(s32) = ... something ...
- // %y:fpr(s32) = COPY %x:gpr(s32)
- // G_STORE %y:fpr(s32)
- //
- // We can fix this up into something like this:
- //
- // G_STORE %x:gpr(s32)
- //
- // And then continue the selection process normally.
- Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
- if (!DefDstReg.isValid())
- return false;
- LLT DefDstTy = MRI.getType(DefDstReg);
- Register StoreSrcReg = I.getOperand(0).getReg();
- LLT StoreSrcTy = MRI.getType(StoreSrcReg);
- // If we get something strange like a physical register, then we shouldn't
- // go any further.
- if (!DefDstTy.isValid())
- return false;
- // Are the source and dst types the same size?
- if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
- return false;
- if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
- RBI.getRegBank(DefDstReg, MRI, TRI))
- return false;
- // We have a cross-bank copy, which is entering a store. Let's fold it.
- I.getOperand(0).setReg(DefDstReg);
- return true;
- }
- bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
- assert(I.getParent() && "Instruction should be in a basic block!");
- assert(I.getParent()->getParent() && "Instruction should be in a function!");
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- switch (I.getOpcode()) {
- case AArch64::G_DUP: {
- // Before selecting a DUP instruction, check if it is better selected as a
- // MOV or load from a constant pool.
- Register Src = I.getOperand(1).getReg();
- auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
- if (!ValAndVReg)
- return false;
- LLVMContext &Ctx = MF.getFunction().getContext();
- Register Dst = I.getOperand(0).getReg();
- auto *CV = ConstantDataVector::getSplat(
- MRI.getType(Dst).getNumElements(),
- ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
- ValAndVReg->Value));
- if (!emitConstantVector(Dst, CV, MIB, MRI))
- return false;
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_SEXT:
- // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
- // over a normal extend.
- if (selectUSMovFromExtend(I, MRI))
- return true;
- return false;
- case TargetOpcode::G_BR:
- return false;
- case TargetOpcode::G_SHL:
- return earlySelectSHL(I, MRI);
- case TargetOpcode::G_CONSTANT: {
- bool IsZero = false;
- if (I.getOperand(1).isCImm())
- IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
- else if (I.getOperand(1).isImm())
- IsZero = I.getOperand(1).getImm() == 0;
- if (!IsZero)
- return false;
- Register DefReg = I.getOperand(0).getReg();
- LLT Ty = MRI.getType(DefReg);
- if (Ty.getSizeInBits() == 64) {
- I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
- RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
- } else if (Ty.getSizeInBits() == 32) {
- I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
- RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
- } else
- return false;
- I.setDesc(TII.get(TargetOpcode::COPY));
- return true;
- }
- case TargetOpcode::G_ADD: {
- // Check if this is being fed by a G_ICMP on either side.
- //
- // (cmp pred, x, y) + z
- //
- // In the above case, when the cmp is true, we increment z by 1. So, we can
- // fold the add into the cset for the cmp by using cinc.
- //
- // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
- Register AddDst = I.getOperand(0).getReg();
- Register AddLHS = I.getOperand(1).getReg();
- Register AddRHS = I.getOperand(2).getReg();
- // Only handle scalars.
- LLT Ty = MRI.getType(AddLHS);
- if (Ty.isVector())
- return false;
- // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
- // bits.
- unsigned Size = Ty.getSizeInBits();
- if (Size != 32 && Size != 64)
- return false;
- auto MatchCmp = [&](Register Reg) -> MachineInstr * {
- if (!MRI.hasOneNonDBGUse(Reg))
- return nullptr;
- // If the LHS of the add is 32 bits, then we want to fold a 32-bit
- // compare.
- if (Size == 32)
- return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
- // We model scalar compares using 32-bit destinations right now.
- // If it's a 64-bit compare, it'll have 64-bit sources.
- Register ZExt;
- if (!mi_match(Reg, MRI,
- m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
- return nullptr;
- auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
- if (!Cmp ||
- MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
- return nullptr;
- return Cmp;
- };
- // Try to match
- // z + (cmp pred, x, y)
- MachineInstr *Cmp = MatchCmp(AddRHS);
- if (!Cmp) {
- // (cmp pred, x, y) + z
- std::swap(AddLHS, AddRHS);
- Cmp = MatchCmp(AddRHS);
- if (!Cmp)
- return false;
- }
- auto &PredOp = Cmp->getOperand(1);
- auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
- const AArch64CC::CondCode InvCC =
- changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
- MIB.setInstrAndDebugLoc(I);
- emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
- /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
- emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_OR: {
- // Look for operations that take the lower `Width=Size-ShiftImm` bits of
- // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
- // shifting and masking that we can replace with a BFI (encoded as a BFM).
- Register Dst = I.getOperand(0).getReg();
- LLT Ty = MRI.getType(Dst);
- if (!Ty.isScalar())
- return false;
- unsigned Size = Ty.getSizeInBits();
- if (Size != 32 && Size != 64)
- return false;
- Register ShiftSrc;
- int64_t ShiftImm;
- Register MaskSrc;
- int64_t MaskImm;
- if (!mi_match(
- Dst, MRI,
- m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
- m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
- return false;
- if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
- return false;
- int64_t Immr = Size - ShiftImm;
- int64_t Imms = Size - ShiftImm - 1;
- unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
- emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_FENCE: {
- if (I.getOperand(1).getImm() == 0)
- BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
- else
- BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
- .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
- I.eraseFromParent();
- return true;
- }
- default:
- return false;
- }
- }
- bool AArch64InstructionSelector::select(MachineInstr &I) {
- assert(I.getParent() && "Instruction should be in a basic block!");
- assert(I.getParent()->getParent() && "Instruction should be in a function!");
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
- if (Subtarget->requiresStrictAlign()) {
- // We don't support this feature yet.
- LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
- return false;
- }
- MIB.setInstrAndDebugLoc(I);
- unsigned Opcode = I.getOpcode();
- // G_PHI requires same handling as PHI
- if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
- // Certain non-generic instructions also need some special handling.
- if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
- const Register DefReg = I.getOperand(0).getReg();
- const LLT DefTy = MRI.getType(DefReg);
- const RegClassOrRegBank &RegClassOrBank =
- MRI.getRegClassOrRegBank(DefReg);
- const TargetRegisterClass *DefRC
- = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
- if (!DefRC) {
- if (!DefTy.isValid()) {
- LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
- return false;
- }
- const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
- DefRC = getRegClassForTypeOnBank(DefTy, RB);
- if (!DefRC) {
- LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
- return false;
- }
- }
- I.setDesc(TII.get(TargetOpcode::PHI));
- return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
- }
- if (I.isCopy())
- return selectCopy(I, TII, MRI, TRI, RBI);
- if (I.isDebugInstr())
- return selectDebugInstr(I, MRI, RBI);
- return true;
- }
- if (I.getNumOperands() != I.getNumExplicitOperands()) {
- LLVM_DEBUG(
- dbgs() << "Generic instruction has unexpected implicit operands\n");
- return false;
- }
- // Try to do some lowering before we start instruction selecting. These
- // lowerings are purely transformations on the input G_MIR and so selection
- // must continue after any modification of the instruction.
- if (preISelLower(I)) {
- Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
- }
- // There may be patterns where the importer can't deal with them optimally,
- // but does select it to a suboptimal sequence so our custom C++ selection
- // code later never has a chance to work on it. Therefore, we have an early
- // selection attempt here to give priority to certain selection routines
- // over the imported ones.
- if (earlySelect(I))
- return true;
- if (selectImpl(I, *CoverageInfo))
- return true;
- LLT Ty =
- I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
- switch (Opcode) {
- case TargetOpcode::G_SBFX:
- case TargetOpcode::G_UBFX: {
- static const unsigned OpcTable[2][2] = {
- {AArch64::UBFMWri, AArch64::UBFMXri},
- {AArch64::SBFMWri, AArch64::SBFMXri}};
- bool IsSigned = Opcode == TargetOpcode::G_SBFX;
- unsigned Size = Ty.getSizeInBits();
- unsigned Opc = OpcTable[IsSigned][Size == 64];
- auto Cst1 =
- getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
- assert(Cst1 && "Should have gotten a constant for src 1?");
- auto Cst2 =
- getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
- assert(Cst2 && "Should have gotten a constant for src 2?");
- auto LSB = Cst1->Value.getZExtValue();
- auto Width = Cst2->Value.getZExtValue();
- auto BitfieldInst =
- MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
- .addImm(LSB)
- .addImm(LSB + Width - 1);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
- }
- case TargetOpcode::G_BRCOND:
- return selectCompareBranch(I, MF, MRI);
- case TargetOpcode::G_BRINDIRECT: {
- I.setDesc(TII.get(AArch64::BR));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_BRJT:
- return selectBrJT(I, MRI);
- case AArch64::G_ADD_LOW: {
- // This op may have been separated from it's ADRP companion by the localizer
- // or some other code motion pass. Given that many CPUs will try to
- // macro fuse these operations anyway, select this into a MOVaddr pseudo
- // which will later be expanded into an ADRP+ADD pair after scheduling.
- MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
- if (BaseMI->getOpcode() != AArch64::ADRP) {
- I.setDesc(TII.get(AArch64::ADDXri));
- I.addOperand(MachineOperand::CreateImm(0));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- assert(TM.getCodeModel() == CodeModel::Small &&
- "Expected small code model");
- auto Op1 = BaseMI->getOperand(1);
- auto Op2 = I.getOperand(2);
- auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
- .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
- Op1.getTargetFlags())
- .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
- Op2.getTargetFlags());
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
- }
- case TargetOpcode::G_BSWAP: {
- // Handle vector types for G_BSWAP directly.
- Register DstReg = I.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- // We should only get vector types here; everything else is handled by the
- // importer right now.
- if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
- LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
- return false;
- }
- // Only handle 4 and 2 element vectors for now.
- // TODO: 16-bit elements.
- unsigned NumElts = DstTy.getNumElements();
- if (NumElts != 4 && NumElts != 2) {
- LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
- return false;
- }
- // Choose the correct opcode for the supported types. Right now, that's
- // v2s32, v4s32, and v2s64.
- unsigned Opc = 0;
- unsigned EltSize = DstTy.getElementType().getSizeInBits();
- if (EltSize == 32)
- Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
- : AArch64::REV32v16i8;
- else if (EltSize == 64)
- Opc = AArch64::REV64v16i8;
- // We should always get something by the time we get here...
- assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_CONSTANT: {
- const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
- const LLT s8 = LLT::scalar(8);
- const LLT s16 = LLT::scalar(16);
- const LLT s32 = LLT::scalar(32);
- const LLT s64 = LLT::scalar(64);
- const LLT s128 = LLT::scalar(128);
- const LLT p0 = LLT::pointer(0, 64);
- const Register DefReg = I.getOperand(0).getReg();
- const LLT DefTy = MRI.getType(DefReg);
- const unsigned DefSize = DefTy.getSizeInBits();
- const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
- // FIXME: Redundant check, but even less readable when factored out.
- if (isFP) {
- if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
- LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant, expected: " << s16 << " or " << s32
- << " or " << s64 << " or " << s128 << '\n');
- return false;
- }
- if (RB.getID() != AArch64::FPRRegBankID) {
- LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant on bank: " << RB
- << ", expected: FPR\n");
- return false;
- }
- // The case when we have 0.0 is covered by tablegen. Reject it here so we
- // can be sure tablegen works correctly and isn't rescued by this code.
- // 0.0 is not covered by tablegen for FP128. So we will handle this
- // scenario in the code here.
- if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
- return false;
- } else {
- // s32 and s64 are covered by tablegen.
- if (Ty != p0 && Ty != s8 && Ty != s16) {
- LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
- << " constant, expected: " << s32 << ", " << s64
- << ", or " << p0 << '\n');
- return false;
- }
- if (RB.getID() != AArch64::GPRRegBankID) {
- LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
- << " constant on bank: " << RB
- << ", expected: GPR\n");
- return false;
- }
- }
- if (isFP) {
- const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
- // For 16, 64, and 128b values, emit a constant pool load.
- switch (DefSize) {
- default:
- llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
- case 32:
- // For s32, use a cp load if we have optsize/minsize.
- if (!shouldOptForSize(&MF))
- break;
- [[fallthrough]];
- case 16:
- case 64:
- case 128: {
- auto *FPImm = I.getOperand(1).getFPImm();
- auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
- if (!LoadMI) {
- LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
- return false;
- }
- MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
- }
- }
- // Either emit a FMOV, or emit a copy to emit a normal mov.
- assert(DefSize == 32 &&
- "Expected constant pool loads for all sizes other than 32!");
- const Register DefGPRReg =
- MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MachineOperand &RegOp = I.getOperand(0);
- RegOp.setReg(DefGPRReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
- MIB.buildCopy({DefReg}, {DefGPRReg});
- if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
- return false;
- }
- MachineOperand &ImmOp = I.getOperand(1);
- // FIXME: Is going through int64_t always correct?
- ImmOp.ChangeToImmediate(
- ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- } else if (I.getOperand(1).isCImm()) {
- uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
- I.getOperand(1).ChangeToImmediate(Val);
- } else if (I.getOperand(1).isImm()) {
- uint64_t Val = I.getOperand(1).getImm();
- I.getOperand(1).ChangeToImmediate(Val);
- }
- const unsigned MovOpc =
- DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
- I.setDesc(TII.get(MovOpc));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- return true;
- }
- case TargetOpcode::G_EXTRACT: {
- Register DstReg = I.getOperand(0).getReg();
- Register SrcReg = I.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(DstReg);
- (void)DstTy;
- unsigned SrcSize = SrcTy.getSizeInBits();
- if (SrcTy.getSizeInBits() > 64) {
- // This should be an extract of an s128, which is like a vector extract.
- if (SrcTy.getSizeInBits() != 128)
- return false;
- // Only support extracting 64 bits from an s128 at the moment.
- if (DstTy.getSizeInBits() != 64)
- return false;
- unsigned Offset = I.getOperand(2).getImm();
- if (Offset % 64 != 0)
- return false;
- // Check we have the right regbank always.
- const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
- const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
- if (SrcRB.getID() == AArch64::GPRRegBankID) {
- auto NewI =
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addUse(SrcReg, 0,
- Offset == 0 ? AArch64::sube64 : AArch64::subo64);
- constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
- AArch64::GPR64RegClass, NewI->getOperand(0));
- I.eraseFromParent();
- return true;
- }
- // Emit the same code as a vector extract.
- // Offset must be a multiple of 64.
- unsigned LaneIdx = Offset / 64;
- MachineInstr *Extract = emitExtractVectorElt(
- DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
- if (!Extract)
- return false;
- I.eraseFromParent();
- return true;
- }
- I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
- MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
- Ty.getSizeInBits() - 1);
- if (SrcSize < 64) {
- assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
- "unexpected G_EXTRACT types");
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
- MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
- .addReg(DstReg, 0, AArch64::sub_32);
- RBI.constrainGenericRegister(I.getOperand(0).getReg(),
- AArch64::GPR32RegClass, MRI);
- I.getOperand(0).setReg(DstReg);
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_INSERT: {
- LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- unsigned DstSize = DstTy.getSizeInBits();
- // Larger inserts are vectors, same-size ones should be something else by
- // now (split up or turned into COPYs).
- if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
- return false;
- I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
- unsigned LSB = I.getOperand(3).getImm();
- unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
- I.getOperand(3).setImm((DstSize - LSB) % DstSize);
- MachineInstrBuilder(MF, I).addImm(Width - 1);
- if (DstSize < 64) {
- assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
- "unexpected G_INSERT types");
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
- BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
- TII.get(AArch64::SUBREG_TO_REG))
- .addDef(SrcReg)
- .addImm(0)
- .addUse(I.getOperand(2).getReg())
- .addImm(AArch64::sub_32);
- RBI.constrainGenericRegister(I.getOperand(2).getReg(),
- AArch64::GPR32RegClass, MRI);
- I.getOperand(2).setReg(SrcReg);
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_FRAME_INDEX: {
- // allocas and G_FRAME_INDEX are only supported in addrspace(0).
- if (Ty != LLT::pointer(0, 64)) {
- LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
- << ", expected: " << LLT::pointer(0, 64) << '\n');
- return false;
- }
- I.setDesc(TII.get(AArch64::ADDXri));
- // MOs for a #0 shifted immediate.
- I.addOperand(MachineOperand::CreateImm(0));
- I.addOperand(MachineOperand::CreateImm(0));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_GLOBAL_VALUE: {
- auto GV = I.getOperand(1).getGlobal();
- if (GV->isThreadLocal())
- return selectTLSGlobalValue(I, MRI);
- unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
- if (OpFlags & AArch64II::MO_GOT) {
- I.setDesc(TII.get(AArch64::LOADgot));
- I.getOperand(1).setTargetFlags(OpFlags);
- } else if (TM.getCodeModel() == CodeModel::Large) {
- // Materialize the global using movz/movk instructions.
- materializeLargeCMVal(I, GV, OpFlags);
- I.eraseFromParent();
- return true;
- } else if (TM.getCodeModel() == CodeModel::Tiny) {
- I.setDesc(TII.get(AArch64::ADR));
- I.getOperand(1).setTargetFlags(OpFlags);
- } else {
- I.setDesc(TII.get(AArch64::MOVaddr));
- I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
- MachineInstrBuilder MIB(MF, I);
- MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
- OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- }
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_ZEXTLOAD:
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_STORE: {
- GLoadStore &LdSt = cast<GLoadStore>(I);
- bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
- LLT PtrTy = MRI.getType(LdSt.getPointerReg());
- if (PtrTy != LLT::pointer(0, 64)) {
- LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
- << ", expected: " << LLT::pointer(0, 64) << '\n');
- return false;
- }
- uint64_t MemSizeInBytes = LdSt.getMemSize();
- unsigned MemSizeInBits = LdSt.getMemSizeInBits();
- AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
- // Need special instructions for atomics that affect ordering.
- if (Order != AtomicOrdering::NotAtomic &&
- Order != AtomicOrdering::Unordered &&
- Order != AtomicOrdering::Monotonic) {
- assert(!isa<GZExtLoad>(LdSt));
- if (MemSizeInBytes > 64)
- return false;
- if (isa<GLoad>(LdSt)) {
- static constexpr unsigned LDAPROpcodes[] = {
- AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
- static constexpr unsigned LDAROpcodes[] = {
- AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
- ArrayRef<unsigned> Opcodes =
- STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
- ? LDAPROpcodes
- : LDAROpcodes;
- I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
- } else {
- static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
- AArch64::STLRW, AArch64::STLRX};
- Register ValReg = LdSt.getReg(0);
- if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
- // Emit a subreg copy of 32 bits.
- Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
- .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
- I.getOperand(0).setReg(NewVal);
- }
- I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
- }
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- return true;
- }
- #ifndef NDEBUG
- const Register PtrReg = LdSt.getPointerReg();
- const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
- // Check that the pointer register is valid.
- assert(PtrRB.getID() == AArch64::GPRRegBankID &&
- "Load/Store pointer operand isn't a GPR");
- assert(MRI.getType(PtrReg).isPointer() &&
- "Load/Store pointer operand isn't a pointer");
- #endif
- const Register ValReg = LdSt.getReg(0);
- const LLT ValTy = MRI.getType(ValReg);
- const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
- // The code below doesn't support truncating stores, so we need to split it
- // again.
- if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
- unsigned SubReg;
- LLT MemTy = LdSt.getMMO().getMemoryType();
- auto *RC = getRegClassForTypeOnBank(MemTy, RB);
- if (!getSubRegForClass(RC, TRI, SubReg))
- return false;
- // Generate a subreg copy.
- auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
- .addReg(ValReg, 0, SubReg)
- .getReg(0);
- RBI.constrainGenericRegister(Copy, *RC, MRI);
- LdSt.getOperand(0).setReg(Copy);
- } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
- // If this is an any-extending load from the FPR bank, split it into a regular
- // load + extend.
- if (RB.getID() == AArch64::FPRRegBankID) {
- unsigned SubReg;
- LLT MemTy = LdSt.getMMO().getMemoryType();
- auto *RC = getRegClassForTypeOnBank(MemTy, RB);
- if (!getSubRegForClass(RC, TRI, SubReg))
- return false;
- Register OldDst = LdSt.getReg(0);
- Register NewDst =
- MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
- LdSt.getOperand(0).setReg(NewDst);
- MRI.setRegBank(NewDst, RB);
- // Generate a SUBREG_TO_REG to extend it.
- MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
- .addImm(0)
- .addUse(NewDst)
- .addImm(SubReg);
- auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
- RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
- MIB.setInstr(LdSt);
- }
- }
- // Helper lambda for partially selecting I. Either returns the original
- // instruction with an updated opcode, or a new instruction.
- auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
- bool IsStore = isa<GStore>(I);
- const unsigned NewOpc =
- selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
- if (NewOpc == I.getOpcode())
- return nullptr;
- // Check if we can fold anything into the addressing mode.
- auto AddrModeFns =
- selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
- if (!AddrModeFns) {
- // Can't fold anything. Use the original instruction.
- I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateImm(0));
- return &I;
- }
- // Folded something. Create a new instruction and return it.
- auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
- Register CurValReg = I.getOperand(0).getReg();
- IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
- NewInst.cloneMemRefs(I);
- for (auto &Fn : *AddrModeFns)
- Fn(NewInst);
- I.eraseFromParent();
- return &*NewInst;
- };
- MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
- if (!LoadStore)
- return false;
- // If we're storing a 0, use WZR/XZR.
- if (Opcode == TargetOpcode::G_STORE) {
- auto CVal = getIConstantVRegValWithLookThrough(
- LoadStore->getOperand(0).getReg(), MRI);
- if (CVal && CVal->Value == 0) {
- switch (LoadStore->getOpcode()) {
- case AArch64::STRWui:
- case AArch64::STRHHui:
- case AArch64::STRBBui:
- LoadStore->getOperand(0).setReg(AArch64::WZR);
- break;
- case AArch64::STRXui:
- LoadStore->getOperand(0).setReg(AArch64::XZR);
- break;
- }
- }
- }
- if (IsZExtLoad) {
- // The zextload from a smaller type to i32 should be handled by the
- // importer.
- if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
- return false;
- // If we have a ZEXTLOAD then change the load's type to be a narrower reg
- // and zero_extend with SUBREG_TO_REG.
- Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- Register DstReg = LoadStore->getOperand(0).getReg();
- LoadStore->getOperand(0).setReg(LdReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
- .addImm(0)
- .addUse(LdReg)
- .addImm(AArch64::sub_32);
- constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
- return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
- MRI);
- }
- return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
- }
- case TargetOpcode::G_SMULH:
- case TargetOpcode::G_UMULH: {
- // Reject the various things we don't support yet.
- if (unsupportedBinOp(I, RBI, MRI, TRI))
- return false;
- const Register DefReg = I.getOperand(0).getReg();
- const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
- if (RB.getID() != AArch64::GPRRegBankID) {
- LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
- return false;
- }
- if (Ty != LLT::scalar(64)) {
- LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
- << ", expected: " << LLT::scalar(64) << '\n');
- return false;
- }
- unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
- : AArch64::UMULHrr;
- I.setDesc(TII.get(NewOpc));
- // Now that we selected an opcode, we need to constrain the register
- // operands to use appropriate classes.
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
- if (MRI.getType(I.getOperand(0).getReg()).isVector())
- return selectVectorAshrLshr(I, MRI);
- [[fallthrough]];
- case TargetOpcode::G_SHL:
- if (Opcode == TargetOpcode::G_SHL &&
- MRI.getType(I.getOperand(0).getReg()).isVector())
- return selectVectorSHL(I, MRI);
- // These shifts were legalized to have 64 bit shift amounts because we
- // want to take advantage of the selection patterns that assume the
- // immediates are s64s, however, selectBinaryOp will assume both operands
- // will have the same bit size.
- {
- Register SrcReg = I.getOperand(1).getReg();
- Register ShiftReg = I.getOperand(2).getReg();
- const LLT ShiftTy = MRI.getType(ShiftReg);
- const LLT SrcTy = MRI.getType(SrcReg);
- if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
- ShiftTy.getSizeInBits() == 64) {
- assert(!ShiftTy.isVector() && "unexpected vector shift ty");
- // Insert a subregister copy to implement a 64->32 trunc
- auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
- .addReg(ShiftReg, 0, AArch64::sub_32);
- MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- I.getOperand(2).setReg(Trunc.getReg(0));
- }
- }
- [[fallthrough]];
- case TargetOpcode::G_OR: {
- // Reject the various things we don't support yet.
- if (unsupportedBinOp(I, RBI, MRI, TRI))
- return false;
- const unsigned OpSize = Ty.getSizeInBits();
- const Register DefReg = I.getOperand(0).getReg();
- const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
- const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
- if (NewOpc == I.getOpcode())
- return false;
- I.setDesc(TII.get(NewOpc));
- // FIXME: Should the type be always reset in setDesc?
- // Now that we selected an opcode, we need to constrain the register
- // operands to use appropriate classes.
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_PTR_ADD: {
- emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_UADDO:
- case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_USUBO: {
- // Emit the operation and get the correct condition code.
- auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
- I.getOperand(2), I.getOperand(3), MIB);
- // Now, put the overflow result in the register given by the first operand
- // to the overflow op. CSINC increments the result when the predicate is
- // false, so to get the increment when it's true, we need to use the
- // inverse. In this case, we want to increment when carry is set.
- Register ZReg = AArch64::WZR;
- emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
- getInvertedCondCode(OpAndCC.second), MIB);
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_PTRMASK: {
- Register MaskReg = I.getOperand(2).getReg();
- std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
- // TODO: Implement arbitrary cases
- if (!MaskVal || !isShiftedMask_64(*MaskVal))
- return false;
- uint64_t Mask = *MaskVal;
- I.setDesc(TII.get(AArch64::ANDXri));
- I.getOperand(2).ChangeToImmediate(
- AArch64_AM::encodeLogicalImmediate(Mask, 64));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_PTRTOINT:
- case TargetOpcode::G_TRUNC: {
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
- const Register DstReg = I.getOperand(0).getReg();
- const Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
- if (DstRB.getID() != SrcRB.getID()) {
- LLVM_DEBUG(
- dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
- return false;
- }
- if (DstRB.getID() == AArch64::GPRRegBankID) {
- const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
- if (!DstRC)
- return false;
- const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
- if (!SrcRC)
- return false;
- if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
- !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
- return false;
- }
- if (DstRC == SrcRC) {
- // Nothing to be done
- } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
- SrcTy == LLT::scalar(64)) {
- llvm_unreachable("TableGen can import this case");
- return false;
- } else if (DstRC == &AArch64::GPR32RegClass &&
- SrcRC == &AArch64::GPR64RegClass) {
- I.getOperand(1).setSubReg(AArch64::sub_32);
- } else {
- LLVM_DEBUG(
- dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
- return false;
- }
- I.setDesc(TII.get(TargetOpcode::COPY));
- return true;
- } else if (DstRB.getID() == AArch64::FPRRegBankID) {
- if (DstTy == LLT::fixed_vector(4, 16) &&
- SrcTy == LLT::fixed_vector(4, 32)) {
- I.setDesc(TII.get(AArch64::XTNv4i16));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- return true;
- }
- if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
- MachineInstr *Extract = emitExtractVectorElt(
- DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
- if (!Extract)
- return false;
- I.eraseFromParent();
- return true;
- }
- // We might have a vector G_PTRTOINT, in which case just emit a COPY.
- if (Opcode == TargetOpcode::G_PTRTOINT) {
- assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
- I.setDesc(TII.get(TargetOpcode::COPY));
- return selectCopy(I, TII, MRI, TRI, RBI);
- }
- }
- return false;
- }
- case TargetOpcode::G_ANYEXT: {
- if (selectUSMovFromExtend(I, MRI))
- return true;
- const Register DstReg = I.getOperand(0).getReg();
- const Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
- if (RBDst.getID() != AArch64::GPRRegBankID) {
- LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
- << ", expected: GPR\n");
- return false;
- }
- const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
- if (RBSrc.getID() != AArch64::GPRRegBankID) {
- LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
- << ", expected: GPR\n");
- return false;
- }
- const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
- if (DstSize == 0) {
- LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
- return false;
- }
- if (DstSize != 64 && DstSize > 32) {
- LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
- << ", expected: 32 or 64\n");
- return false;
- }
- // At this point G_ANYEXT is just like a plain COPY, but we need
- // to explicitly form the 64-bit value if any.
- if (DstSize > 32) {
- Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
- BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
- .addDef(ExtSrc)
- .addImm(0)
- .addUse(SrcReg)
- .addImm(AArch64::sub_32);
- I.getOperand(1).setReg(ExtSrc);
- }
- return selectCopy(I, TII, MRI, TRI, RBI);
- }
- case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_SEXT_INREG:
- case TargetOpcode::G_SEXT: {
- if (selectUSMovFromExtend(I, MRI))
- return true;
- unsigned Opcode = I.getOpcode();
- const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
- const Register DefReg = I.getOperand(0).getReg();
- Register SrcReg = I.getOperand(1).getReg();
- const LLT DstTy = MRI.getType(DefReg);
- const LLT SrcTy = MRI.getType(SrcReg);
- unsigned DstSize = DstTy.getSizeInBits();
- unsigned SrcSize = SrcTy.getSizeInBits();
- // SEXT_INREG has the same src reg size as dst, the size of the value to be
- // extended is encoded in the imm.
- if (Opcode == TargetOpcode::G_SEXT_INREG)
- SrcSize = I.getOperand(2).getImm();
- if (DstTy.isVector())
- return false; // Should be handled by imported patterns.
- assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
- AArch64::GPRRegBankID &&
- "Unexpected ext regbank");
- MachineInstr *ExtI;
- // First check if we're extending the result of a load which has a dest type
- // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
- // GPR register on AArch64 and all loads which are smaller automatically
- // zero-extend the upper bits. E.g.
- // %v(s8) = G_LOAD %p, :: (load 1)
- // %v2(s32) = G_ZEXT %v(s8)
- if (!IsSigned) {
- auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
- bool IsGPR =
- RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
- if (LoadMI && IsGPR) {
- const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
- unsigned BytesLoaded = MemOp->getSize();
- if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
- return selectCopy(I, TII, MRI, TRI, RBI);
- }
- // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
- // + SUBREG_TO_REG.
- if (IsGPR && SrcSize == 32 && DstSize == 64) {
- Register SubregToRegSrc =
- MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- const Register ZReg = AArch64::WZR;
- MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
- .addImm(0);
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
- .addImm(0)
- .addUse(SubregToRegSrc)
- .addImm(AArch64::sub_32);
- if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
- MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
- return false;
- }
- if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
- MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
- return false;
- }
- I.eraseFromParent();
- return true;
- }
- }
- if (DstSize == 64) {
- if (Opcode != TargetOpcode::G_SEXT_INREG) {
- // FIXME: Can we avoid manually doing this?
- if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
- MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
- << " operand\n");
- return false;
- }
- SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
- {&AArch64::GPR64RegClass}, {})
- .addImm(0)
- .addUse(SrcReg)
- .addImm(AArch64::sub_32)
- .getReg(0);
- }
- ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
- {DefReg}, {SrcReg})
- .addImm(0)
- .addImm(SrcSize - 1);
- } else if (DstSize <= 32) {
- ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
- {DefReg}, {SrcReg})
- .addImm(0)
- .addImm(SrcSize - 1);
- } else {
- return false;
- }
- constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP:
- case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
- SrcTy = MRI.getType(I.getOperand(1).getReg());
- const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
- if (NewOpc == Opcode)
- return false;
- I.setDesc(TII.get(NewOpc));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- I.setFlags(MachineInstr::NoFPExcept);
- return true;
- }
- case TargetOpcode::G_FREEZE:
- return selectCopy(I, TII, MRI, TRI, RBI);
- case TargetOpcode::G_INTTOPTR:
- // The importer is currently unable to import pointer types since they
- // didn't exist in SelectionDAG.
- return selectCopy(I, TII, MRI, TRI, RBI);
- case TargetOpcode::G_BITCAST:
- // Imported SelectionDAG rules can handle every bitcast except those that
- // bitcast from a type to the same type. Ideally, these shouldn't occur
- // but we might not run an optimizer that deletes them. The other exception
- // is bitcasts involving pointer types, as SelectionDAG has no knowledge
- // of them.
- return selectCopy(I, TII, MRI, TRI, RBI);
- case TargetOpcode::G_SELECT: {
- auto &Sel = cast<GSelect>(I);
- const Register CondReg = Sel.getCondReg();
- const Register TReg = Sel.getTrueReg();
- const Register FReg = Sel.getFalseReg();
- if (tryOptSelect(Sel))
- return true;
- // Make sure to use an unused vreg instead of wzr, so that the peephole
- // optimizations will be able to optimize these.
- Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
- return false;
- Sel.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_ICMP: {
- if (Ty.isVector())
- return selectVectorICmp(I, MRI);
- if (Ty != LLT::scalar(32)) {
- LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
- << ", expected: " << LLT::scalar(32) << '\n');
- return false;
- }
- auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- const AArch64CC::CondCode InvCC =
- changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
- emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
- /*Src2=*/AArch64::WZR, InvCC, MIB);
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_FCMP: {
- CmpInst::Predicate Pred =
- static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
- Pred) ||
- !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
- return false;
- I.eraseFromParent();
- return true;
- }
- case TargetOpcode::G_VASTART:
- return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
- : selectVaStartAAPCS(I, MF, MRI);
- case TargetOpcode::G_INTRINSIC:
- return selectIntrinsic(I, MRI);
- case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
- return selectIntrinsicWithSideEffects(I, MRI);
- case TargetOpcode::G_IMPLICIT_DEF: {
- I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- const Register DstReg = I.getOperand(0).getReg();
- const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
- RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
- return true;
- }
- case TargetOpcode::G_BLOCK_ADDR: {
- if (TM.getCodeModel() == CodeModel::Large) {
- materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
- I.eraseFromParent();
- return true;
- } else {
- I.setDesc(TII.get(AArch64::MOVaddrBA));
- auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
- I.getOperand(0).getReg())
- .addBlockAddress(I.getOperand(1).getBlockAddress(),
- /* Offset */ 0, AArch64II::MO_PAGE)
- .addBlockAddress(
- I.getOperand(1).getBlockAddress(), /* Offset */ 0,
- AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
- }
- }
- case AArch64::G_DUP: {
- // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
- // imported patterns. Do it manually here. Avoiding generating s16 gpr is
- // difficult because at RBS we may end up pessimizing the fpr case if we
- // decided to add an anyextend to fix this. Manual selection is the most
- // robust solution for now.
- if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
- AArch64::GPRRegBankID)
- return false; // We expect the fpr regbank case to be imported.
- LLT VecTy = MRI.getType(I.getOperand(0).getReg());
- if (VecTy == LLT::fixed_vector(8, 8))
- I.setDesc(TII.get(AArch64::DUPv8i8gpr));
- else if (VecTy == LLT::fixed_vector(16, 8))
- I.setDesc(TII.get(AArch64::DUPv16i8gpr));
- else if (VecTy == LLT::fixed_vector(4, 16))
- I.setDesc(TII.get(AArch64::DUPv4i16gpr));
- else if (VecTy == LLT::fixed_vector(8, 16))
- I.setDesc(TII.get(AArch64::DUPv8i16gpr));
- else
- return false;
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- case TargetOpcode::G_INTRINSIC_TRUNC:
- return selectIntrinsicTrunc(I, MRI);
- case TargetOpcode::G_INTRINSIC_ROUND:
- return selectIntrinsicRound(I, MRI);
- case TargetOpcode::G_BUILD_VECTOR:
- return selectBuildVector(I, MRI);
- case TargetOpcode::G_MERGE_VALUES:
- return selectMergeValues(I, MRI);
- case TargetOpcode::G_UNMERGE_VALUES:
- return selectUnmergeValues(I, MRI);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return selectShuffleVector(I, MRI);
- case TargetOpcode::G_EXTRACT_VECTOR_ELT:
- return selectExtractElt(I, MRI);
- case TargetOpcode::G_INSERT_VECTOR_ELT:
- return selectInsertElt(I, MRI);
- case TargetOpcode::G_CONCAT_VECTORS:
- return selectConcatVectors(I, MRI);
- case TargetOpcode::G_JUMP_TABLE:
- return selectJumpTable(I, MRI);
- case TargetOpcode::G_VECREDUCE_FADD:
- case TargetOpcode::G_VECREDUCE_ADD:
- return selectReduction(I, MRI);
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMCPY_INLINE:
- case TargetOpcode::G_MEMMOVE:
- case TargetOpcode::G_MEMSET:
- assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
- return selectMOPS(I, MRI);
- }
- return false;
- }
- bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- Register VecReg = I.getOperand(1).getReg();
- LLT VecTy = MRI.getType(VecReg);
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
- // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
- // a subregister copy afterwards.
- if (VecTy == LLT::fixed_vector(2, 32)) {
- Register DstReg = I.getOperand(0).getReg();
- auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
- {VecReg, VecReg});
- auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(AddP.getReg(0), 0, AArch64::ssub)
- .getReg(0);
- RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
- }
- unsigned Opc = 0;
- if (VecTy == LLT::fixed_vector(16, 8))
- Opc = AArch64::ADDVv16i8v;
- else if (VecTy == LLT::fixed_vector(8, 16))
- Opc = AArch64::ADDVv8i16v;
- else if (VecTy == LLT::fixed_vector(4, 32))
- Opc = AArch64::ADDVv4i32v;
- else if (VecTy == LLT::fixed_vector(2, 64))
- Opc = AArch64::ADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
- unsigned Opc = 0;
- if (VecTy == LLT::fixed_vector(2, 32))
- Opc = AArch64::FADDPv2i32p;
- else if (VecTy == LLT::fixed_vector(2, 64))
- Opc = AArch64::FADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- return false;
- }
- bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
- MachineRegisterInfo &MRI) {
- unsigned Mopcode;
- switch (GI.getOpcode()) {
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMCPY_INLINE:
- Mopcode = AArch64::MOPSMemoryCopyPseudo;
- break;
- case TargetOpcode::G_MEMMOVE:
- Mopcode = AArch64::MOPSMemoryMovePseudo;
- break;
- case TargetOpcode::G_MEMSET:
- // For tagged memset see llvm.aarch64.mops.memset.tag
- Mopcode = AArch64::MOPSMemorySetPseudo;
- break;
- }
- auto &DstPtr = GI.getOperand(0);
- auto &SrcOrVal = GI.getOperand(1);
- auto &Size = GI.getOperand(2);
- // Create copies of the registers that can be clobbered.
- const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
- const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
- const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
- const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
- const auto &SrcValRegClass =
- IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
- // Constrain to specific registers
- RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
- RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
- RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
- MIB.buildCopy(DstPtrCopy, DstPtr);
- MIB.buildCopy(SrcValCopy, SrcOrVal);
- MIB.buildCopy(SizeCopy, Size);
- // New instruction uses the copied registers because it must update them.
- // The defs are not used since they don't exist in G_MEM*. They are still
- // tied.
- // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
- Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
- Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- if (IsSet) {
- MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
- {DstPtrCopy, SizeCopy, SrcValCopy});
- } else {
- Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
- MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
- {DstPtrCopy, SrcValCopy, SizeCopy});
- }
- GI.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
- Register JTAddr = I.getOperand(0).getReg();
- unsigned JTI = I.getOperand(1).getIndex();
- Register Index = I.getOperand(2).getReg();
- Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
- MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
- auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
- {TargetReg, ScratchReg}, {JTAddr, Index})
- .addJumpTableIndex(JTI);
- // Build the indirect branch.
- MIB.buildInstr(AArch64::BR, {}, {TargetReg});
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
- }
- bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
- assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
- Register DstReg = I.getOperand(0).getReg();
- unsigned JTI = I.getOperand(1).getIndex();
- // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
- auto MovMI =
- MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
- .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
- .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
- }
- bool AArch64InstructionSelector::selectTLSGlobalValue(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- if (!STI.isTargetMachO())
- return false;
- MachineFunction &MF = *I.getParent()->getParent();
- MF.getFrameInfo().setAdjustsStack(true);
- const auto &GlobalOp = I.getOperand(1);
- assert(GlobalOp.getOffset() == 0 &&
- "Shouldn't have an offset on TLS globals!");
- const GlobalValue &GV = *GlobalOp.getGlobal();
- auto LoadGOT =
- MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
- .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
- auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
- {LoadGOT.getReg(0)})
- .addImm(0);
- MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
- .addUse(AArch64::X0, RegState::Implicit)
- .addDef(AArch64::X0, RegState::Implicit)
- .addRegMask(TRI.getTLSCallPreservedMask());
- MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
- RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
- MRI);
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectIntrinsicTrunc(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
- // Select the correct opcode.
- unsigned Opc = 0;
- if (!SrcTy.isVector()) {
- switch (SrcTy.getSizeInBits()) {
- default:
- case 16:
- Opc = AArch64::FRINTZHr;
- break;
- case 32:
- Opc = AArch64::FRINTZSr;
- break;
- case 64:
- Opc = AArch64::FRINTZDr;
- break;
- }
- } else {
- unsigned NumElts = SrcTy.getNumElements();
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- break;
- case 16:
- if (NumElts == 4)
- Opc = AArch64::FRINTZv4f16;
- else if (NumElts == 8)
- Opc = AArch64::FRINTZv8f16;
- break;
- case 32:
- if (NumElts == 2)
- Opc = AArch64::FRINTZv2f32;
- else if (NumElts == 4)
- Opc = AArch64::FRINTZv4f32;
- break;
- case 64:
- if (NumElts == 2)
- Opc = AArch64::FRINTZv2f64;
- break;
- }
- }
- if (!Opc) {
- // Didn't get an opcode above, bail.
- LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
- return false;
- }
- // Legalization would have set us up perfectly for this; we just need to
- // set the opcode and move on.
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- bool AArch64InstructionSelector::selectIntrinsicRound(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
- // Select the correct opcode.
- unsigned Opc = 0;
- if (!SrcTy.isVector()) {
- switch (SrcTy.getSizeInBits()) {
- default:
- case 16:
- Opc = AArch64::FRINTAHr;
- break;
- case 32:
- Opc = AArch64::FRINTASr;
- break;
- case 64:
- Opc = AArch64::FRINTADr;
- break;
- }
- } else {
- unsigned NumElts = SrcTy.getNumElements();
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- break;
- case 16:
- if (NumElts == 4)
- Opc = AArch64::FRINTAv4f16;
- else if (NumElts == 8)
- Opc = AArch64::FRINTAv8f16;
- break;
- case 32:
- if (NumElts == 2)
- Opc = AArch64::FRINTAv2f32;
- else if (NumElts == 4)
- Opc = AArch64::FRINTAv4f32;
- break;
- case 64:
- if (NumElts == 2)
- Opc = AArch64::FRINTAv2f64;
- break;
- }
- }
- if (!Opc) {
- // Didn't get an opcode above, bail.
- LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
- return false;
- }
- // Legalization would have set us up perfectly for this; we just need to
- // set the opcode and move on.
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
- bool AArch64InstructionSelector::selectVectorICmp(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- Register DstReg = I.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Register SrcReg = I.getOperand(2).getReg();
- Register Src2Reg = I.getOperand(3).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
- unsigned NumElts = DstTy.getNumElements();
- // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
- // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
- // Third index is cc opcode:
- // 0 == eq
- // 1 == ugt
- // 2 == uge
- // 3 == ult
- // 4 == ule
- // 5 == sgt
- // 6 == sge
- // 7 == slt
- // 8 == sle
- // ne is done by negating 'eq' result.
- // This table below assumes that for some comparisons the operands will be
- // commuted.
- // ult op == commute + ugt op
- // ule op == commute + uge op
- // slt op == commute + sgt op
- // sle op == commute + sge op
- unsigned PredIdx = 0;
- bool SwapOperands = false;
- CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
- switch (Pred) {
- case CmpInst::ICMP_NE:
- case CmpInst::ICMP_EQ:
- PredIdx = 0;
- break;
- case CmpInst::ICMP_UGT:
- PredIdx = 1;
- break;
- case CmpInst::ICMP_UGE:
- PredIdx = 2;
- break;
- case CmpInst::ICMP_ULT:
- PredIdx = 3;
- SwapOperands = true;
- break;
- case CmpInst::ICMP_ULE:
- PredIdx = 4;
- SwapOperands = true;
- break;
- case CmpInst::ICMP_SGT:
- PredIdx = 5;
- break;
- case CmpInst::ICMP_SGE:
- PredIdx = 6;
- break;
- case CmpInst::ICMP_SLT:
- PredIdx = 7;
- SwapOperands = true;
- break;
- case CmpInst::ICMP_SLE:
- PredIdx = 8;
- SwapOperands = true;
- break;
- default:
- llvm_unreachable("Unhandled icmp predicate");
- return false;
- }
- // This table obviously should be tablegen'd when we have our GISel native
- // tablegen selector.
- static const unsigned OpcTable[4][4][9] = {
- {
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */},
- {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
- AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
- AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
- {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
- AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
- AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
- },
- {
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */},
- {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
- AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
- AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
- {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
- AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
- AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */}
- },
- {
- {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
- AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
- AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
- {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
- AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
- AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */}
- },
- {
- {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
- AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
- AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */},
- {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
- 0 /* invalid */}
- },
- };
- unsigned EltIdx = Log2_32(SrcEltSize / 8);
- unsigned NumEltsIdx = Log2_32(NumElts / 2);
- unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
- if (!Opc) {
- LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
- return false;
- }
- const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
- const TargetRegisterClass *SrcRC =
- getRegClassForTypeOnBank(SrcTy, VecRB, true);
- if (!SrcRC) {
- LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
- return false;
- }
- unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
- if (SrcTy.getSizeInBits() == 128)
- NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
- if (SwapOperands)
- std::swap(SrcReg, Src2Reg);
- auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
- constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
- // Invert if we had a 'ne' cc.
- if (NotOpc) {
- Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
- constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
- } else {
- MIB.buildCopy(DstReg, Cmp.getReg(0));
- }
- RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
- I.eraseFromParent();
- return true;
- }
- MachineInstr *AArch64InstructionSelector::emitScalarToVector(
- unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
- MachineIRBuilder &MIRBuilder) const {
- auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
- auto BuildFn = [&](unsigned SubregIndex) {
- auto Ins =
- MIRBuilder
- .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
- .addImm(SubregIndex);
- constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
- constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
- return &*Ins;
- };
- switch (EltSize) {
- case 16:
- return BuildFn(AArch64::hsub);
- case 32:
- return BuildFn(AArch64::ssub);
- case 64:
- return BuildFn(AArch64::dsub);
- default:
- return nullptr;
- }
- }
- bool AArch64InstructionSelector::selectMergeValues(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
- assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
- const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
- if (I.getNumOperands() != 3)
- return false;
- // Merging 2 s64s into an s128.
- if (DstTy == LLT::scalar(128)) {
- if (SrcTy.getSizeInBits() != 64)
- return false;
- Register DstReg = I.getOperand(0).getReg();
- Register Src1Reg = I.getOperand(1).getReg();
- Register Src2Reg = I.getOperand(2).getReg();
- auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
- MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
- /* LaneIdx */ 0, RB, MIB);
- if (!InsMI)
- return false;
- MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
- Src2Reg, /* LaneIdx */ 1, RB, MIB);
- if (!Ins2MI)
- return false;
- constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- if (RB.getID() != AArch64::GPRRegBankID)
- return false;
- if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
- return false;
- auto *DstRC = &AArch64::GPR64RegClass;
- Register SubToRegDef = MRI.createVirtualRegister(DstRC);
- MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(TargetOpcode::SUBREG_TO_REG))
- .addDef(SubToRegDef)
- .addImm(0)
- .addUse(I.getOperand(1).getReg())
- .addImm(AArch64::sub_32);
- Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
- // Need to anyext the second scalar before we can use bfm
- MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(TargetOpcode::SUBREG_TO_REG))
- .addDef(SubToRegDef2)
- .addImm(0)
- .addUse(I.getOperand(2).getReg())
- .addImm(AArch64::sub_32);
- MachineInstr &BFM =
- *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
- .addDef(I.getOperand(0).getReg())
- .addUse(SubToRegDef)
- .addUse(SubToRegDef2)
- .addImm(32)
- .addImm(31);
- constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
- constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
- const unsigned EltSize) {
- // Choose a lane copy opcode and subregister based off of the size of the
- // vector's elements.
- switch (EltSize) {
- case 8:
- CopyOpc = AArch64::DUPi8;
- ExtractSubReg = AArch64::bsub;
- break;
- case 16:
- CopyOpc = AArch64::DUPi16;
- ExtractSubReg = AArch64::hsub;
- break;
- case 32:
- CopyOpc = AArch64::DUPi32;
- ExtractSubReg = AArch64::ssub;
- break;
- case 64:
- CopyOpc = AArch64::DUPi64;
- ExtractSubReg = AArch64::dsub;
- break;
- default:
- // Unknown size, bail out.
- LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
- return false;
- }
- return true;
- }
- MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
- std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
- Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- unsigned CopyOpc = 0;
- unsigned ExtractSubReg = 0;
- if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
- LLVM_DEBUG(
- dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
- return nullptr;
- }
- const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(ScalarTy, DstRB, true);
- if (!DstRC) {
- LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
- return nullptr;
- }
- const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
- const LLT &VecTy = MRI.getType(VecReg);
- const TargetRegisterClass *VecRC =
- getRegClassForTypeOnBank(VecTy, VecRB, true);
- if (!VecRC) {
- LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
- return nullptr;
- }
- // The register that we're going to copy into.
- Register InsertReg = VecReg;
- if (!DstReg)
- DstReg = MRI.createVirtualRegister(DstRC);
- // If the lane index is 0, we just use a subregister COPY.
- if (LaneIdx == 0) {
- auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
- .addReg(VecReg, 0, ExtractSubReg);
- RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
- return &*Copy;
- }
- // Lane copies require 128-bit wide registers. If we're dealing with an
- // unpacked vector, then we need to move up to that width. Insert an implicit
- // def and a subregister insert to get us there.
- if (VecTy.getSizeInBits() != 128) {
- MachineInstr *ScalarToVector = emitScalarToVector(
- VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
- if (!ScalarToVector)
- return nullptr;
- InsertReg = ScalarToVector->getOperand(0).getReg();
- }
- MachineInstr *LaneCopyMI =
- MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
- constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
- // Make sure that we actually constrain the initial copy.
- RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
- return LaneCopyMI;
- }
- bool AArch64InstructionSelector::selectExtractElt(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
- "unexpected opcode!");
- Register DstReg = I.getOperand(0).getReg();
- const LLT NarrowTy = MRI.getType(DstReg);
- const Register SrcReg = I.getOperand(1).getReg();
- const LLT WideTy = MRI.getType(SrcReg);
- (void)WideTy;
- assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
- "source register size too small!");
- assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
- // Need the lane index to determine the correct copy opcode.
- MachineOperand &LaneIdxOp = I.getOperand(2);
- assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
- if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
- LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
- return false;
- }
- // Find the index to extract from.
- auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
- if (!VRegAndVal)
- return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
- const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
- LaneIdx, MIB);
- if (!Extract)
- return false;
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectSplitVectorUnmerge(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- unsigned NumElts = I.getNumOperands() - 1;
- Register SrcReg = I.getOperand(NumElts).getReg();
- const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
- const LLT SrcTy = MRI.getType(SrcReg);
- assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
- if (SrcTy.getSizeInBits() > 128) {
- LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
- return false;
- }
- // We implement a split vector operation by treating the sub-vectors as
- // scalars and extracting them.
- const RegisterBank &DstRB =
- *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
- for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
- Register Dst = I.getOperand(OpIdx).getReg();
- MachineInstr *Extract =
- emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
- if (!Extract)
- return false;
- }
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "unexpected opcode");
- // TODO: Handle unmerging into GPRs and from scalars to scalars.
- if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
- AArch64::FPRRegBankID ||
- RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
- AArch64::FPRRegBankID) {
- LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
- "currently unsupported.\n");
- return false;
- }
- // The last operand is the vector source register, and every other operand is
- // a register to unpack into.
- unsigned NumElts = I.getNumOperands() - 1;
- Register SrcReg = I.getOperand(NumElts).getReg();
- const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
- const LLT WideTy = MRI.getType(SrcReg);
- (void)WideTy;
- assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
- "can only unmerge from vector or s128 types!");
- assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
- "source register size too small!");
- if (!NarrowTy.isScalar())
- return selectSplitVectorUnmerge(I, MRI);
- // Choose a lane copy opcode and subregister based off of the size of the
- // vector's elements.
- unsigned CopyOpc = 0;
- unsigned ExtractSubReg = 0;
- if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
- return false;
- // Set up for the lane copies.
- MachineBasicBlock &MBB = *I.getParent();
- // Stores the registers we'll be copying from.
- SmallVector<Register, 4> InsertRegs;
- // We'll use the first register twice, so we only need NumElts-1 registers.
- unsigned NumInsertRegs = NumElts - 1;
- // If our elements fit into exactly 128 bits, then we can copy from the source
- // directly. Otherwise, we need to do a bit of setup with some subregister
- // inserts.
- if (NarrowTy.getSizeInBits() * NumElts == 128) {
- InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
- } else {
- // No. We have to perform subregister inserts. For each insert, create an
- // implicit def and a subregister insert, and save the register we create.
- const TargetRegisterClass *RC = getRegClassForTypeOnBank(
- LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
- *RBI.getRegBank(SrcReg, MRI, TRI));
- unsigned SubReg = 0;
- bool Found = getSubRegForClass(RC, TRI, SubReg);
- (void)Found;
- assert(Found && "expected to find last operand's subeg idx");
- for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
- Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
- MachineInstr &ImpDefMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
- ImpDefReg);
- // Now, create the subregister insert from SrcReg.
- Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
- MachineInstr &InsMI =
- *BuildMI(MBB, I, I.getDebugLoc(),
- TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
- .addUse(ImpDefReg)
- .addUse(SrcReg)
- .addImm(SubReg);
- constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
- // Save the register so that we can copy from it after.
- InsertRegs.push_back(InsertReg);
- }
- }
- // Now that we've created any necessary subregister inserts, we can
- // create the copies.
- //
- // Perform the first copy separately as a subregister copy.
- Register CopyTo = I.getOperand(0).getReg();
- auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
- .addReg(InsertRegs[0], 0, ExtractSubReg);
- constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
- // Now, perform the remaining copies as vector lane copies.
- unsigned LaneIdx = 1;
- for (Register InsReg : InsertRegs) {
- Register CopyTo = I.getOperand(LaneIdx).getReg();
- MachineInstr &CopyInst =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
- .addUse(InsReg)
- .addImm(LaneIdx);
- constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
- ++LaneIdx;
- }
- // Separately constrain the first copy's destination. Because of the
- // limitation in constrainOperandRegClass, we can't guarantee that this will
- // actually be constrained. So, do it ourselves using the second operand.
- const TargetRegisterClass *RC =
- MRI.getRegClassOrNull(I.getOperand(1).getReg());
- if (!RC) {
- LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
- return false;
- }
- RBI.constrainGenericRegister(CopyTo, *RC, MRI);
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectConcatVectors(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
- "Unexpected opcode");
- Register Dst = I.getOperand(0).getReg();
- Register Op1 = I.getOperand(1).getReg();
- Register Op2 = I.getOperand(2).getReg();
- MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
- if (!ConcatMI)
- return false;
- I.eraseFromParent();
- return true;
- }
- unsigned
- AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
- MachineFunction &MF) const {
- Type *CPTy = CPVal->getType();
- Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
- MachineConstantPool *MCP = MF.getConstantPool();
- return MCP->getConstantPoolIndex(CPVal, Alignment);
- }
- MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
- const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
- auto &MF = MIRBuilder.getMF();
- unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
- auto Adrp =
- MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
- .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
- MachineInstr *LoadMI = nullptr;
- MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
- unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
- switch (Size) {
- case 16:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- break;
- case 8:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- break;
- case 4:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- break;
- case 2:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- break;
- default:
- LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
- << *CPVal->getType());
- return nullptr;
- }
- LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
- MachineMemOperand::MOLoad,
- Size, Align(Size)));
- constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
- constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
- return LoadMI;
- }
- /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
- /// size and RB.
- static std::pair<unsigned, unsigned>
- getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
- unsigned Opc, SubregIdx;
- if (RB.getID() == AArch64::GPRRegBankID) {
- if (EltSize == 16) {
- Opc = AArch64::INSvi16gpr;
- SubregIdx = AArch64::ssub;
- } else if (EltSize == 32) {
- Opc = AArch64::INSvi32gpr;
- SubregIdx = AArch64::ssub;
- } else if (EltSize == 64) {
- Opc = AArch64::INSvi64gpr;
- SubregIdx = AArch64::dsub;
- } else {
- llvm_unreachable("invalid elt size!");
- }
- } else {
- if (EltSize == 8) {
- Opc = AArch64::INSvi8lane;
- SubregIdx = AArch64::bsub;
- } else if (EltSize == 16) {
- Opc = AArch64::INSvi16lane;
- SubregIdx = AArch64::hsub;
- } else if (EltSize == 32) {
- Opc = AArch64::INSvi32lane;
- SubregIdx = AArch64::ssub;
- } else if (EltSize == 64) {
- Opc = AArch64::INSvi64lane;
- SubregIdx = AArch64::dsub;
- } else {
- llvm_unreachable("invalid elt size!");
- }
- }
- return std::make_pair(Opc, SubregIdx);
- }
- MachineInstr *AArch64InstructionSelector::emitInstr(
- unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
- std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
- const ComplexRendererFns &RenderFns) const {
- assert(Opcode && "Expected an opcode?");
- assert(!isPreISelGenericOpcode(Opcode) &&
- "Function should only be used to produce selected instructions!");
- auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
- if (RenderFns)
- for (auto &Fn : *RenderFns)
- Fn(MI);
- constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
- return &*MI;
- }
- MachineInstr *AArch64InstructionSelector::emitAddSub(
- const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
- Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected a scalar or pointer?");
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
- bool Is32Bit = Size == 32;
- // INSTRri form with positive arithmetic immediate.
- if (auto Fns = selectArithImmed(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
- // INSTRri form with negative arithmetic immediate.
- if (auto Fns = selectNegArithImmed(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
- // INSTRrx form.
- if (auto Fns = selectArithExtendedRegister(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
- // INSTRrs form.
- if (auto Fns = selectShiftedRegister(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
- return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
- MIRBuilder);
- }
- MachineInstr *
- AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::ADDXri, AArch64::ADDWri},
- {AArch64::ADDXrs, AArch64::ADDWrs},
- {AArch64::ADDXrr, AArch64::ADDWrr},
- {AArch64::SUBXri, AArch64::SUBWri},
- {AArch64::ADDXrx, AArch64::ADDWrx}}};
- return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
- }
- MachineInstr *
- AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::ADDSXri, AArch64::ADDSWri},
- {AArch64::ADDSXrs, AArch64::ADDSWrs},
- {AArch64::ADDSXrr, AArch64::ADDSWrr},
- {AArch64::SUBSXri, AArch64::SUBSWri},
- {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
- return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
- }
- MachineInstr *
- AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::SUBSXri, AArch64::SUBSWri},
- {AArch64::SUBSXrs, AArch64::SUBSWrs},
- {AArch64::SUBSXrr, AArch64::SUBSWrr},
- {AArch64::ADDSXri, AArch64::ADDSWri},
- {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
- return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
- }
- MachineInstr *
- AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
- auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
- return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
- }
- MachineInstr *
- AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- LLT Ty = MRI.getType(LHS.getReg());
- unsigned RegSize = Ty.getSizeInBits();
- bool Is32Bit = (RegSize == 32);
- const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
- {AArch64::ANDSXrs, AArch64::ANDSWrs},
- {AArch64::ANDSXrr, AArch64::ANDSWrr}};
- // ANDS needs a logical immediate for its immediate form. Check if we can
- // fold one in.
- if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
- int64_t Imm = ValAndVReg->Value.getSExtValue();
- if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
- auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
- TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- return &*TstMI;
- }
- }
- if (auto Fns = selectLogicalShiftedRegister(RHS))
- return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
- return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
- }
- MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
- MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
- assert(Predicate.isPredicate() && "Expected predicate?");
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- LLT CmpTy = MRI.getType(LHS.getReg());
- assert(!CmpTy.isVector() && "Expected scalar or pointer");
- unsigned Size = CmpTy.getSizeInBits();
- (void)Size;
- assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
- // Fold the compare into a cmn or tst if possible.
- if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
- return FoldCmp;
- auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
- return emitSUBS(Dst, LHS, RHS, MIRBuilder);
- }
- MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
- Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- #ifndef NDEBUG
- LLT Ty = MRI.getType(Dst);
- assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
- "Expected a 32-bit scalar register?");
- #endif
- const Register ZReg = AArch64::WZR;
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(Pred, CC1, CC2);
- auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
- if (CC2 == AArch64CC::AL)
- return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
- MIRBuilder);
- const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
- Register Def1Reg = MRI.createVirtualRegister(RC);
- Register Def2Reg = MRI.createVirtualRegister(RC);
- auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
- emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
- emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
- auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
- constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
- return &*OrMI;
- }
- MachineInstr *AArch64InstructionSelector::emitFPCompare(
- Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
- std::optional<CmpInst::Predicate> Pred) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- LLT Ty = MRI.getType(LHS);
- if (Ty.isVector())
- return nullptr;
- unsigned OpSize = Ty.getSizeInBits();
- if (OpSize != 32 && OpSize != 64)
- return nullptr;
- // If this is a compare against +0.0, then we don't have
- // to explicitly materialize a constant.
- const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
- bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
- auto IsEqualityPred = [](CmpInst::Predicate P) {
- return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
- P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
- };
- if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
- // Try commutating the operands.
- const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
- if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
- ShouldUseImm = true;
- std::swap(LHS, RHS);
- }
- }
- unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
- {AArch64::FCMPSri, AArch64::FCMPDri}};
- unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
- // Partially build the compare. Decide if we need to add a use for the
- // third operand based off whether or not we're comparing against 0.0.
- auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
- CmpMI.setMIFlags(MachineInstr::NoFPExcept);
- if (!ShouldUseImm)
- CmpMI.addUse(RHS);
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
- }
- MachineInstr *AArch64InstructionSelector::emitVectorConcat(
- std::optional<Register> Dst, Register Op1, Register Op2,
- MachineIRBuilder &MIRBuilder) const {
- // We implement a vector concat by:
- // 1. Use scalar_to_vector to insert the lower vector into the larger dest
- // 2. Insert the upper vector into the destination's upper element
- // TODO: some of this code is common with G_BUILD_VECTOR handling.
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- const LLT Op1Ty = MRI.getType(Op1);
- const LLT Op2Ty = MRI.getType(Op2);
- if (Op1Ty != Op2Ty) {
- LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
- return nullptr;
- }
- assert(Op1Ty.isVector() && "Expected a vector for vector concat");
- if (Op1Ty.getSizeInBits() >= 128) {
- LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
- return nullptr;
- }
- // At the moment we just support 64 bit vector concats.
- if (Op1Ty.getSizeInBits() != 64) {
- LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
- return nullptr;
- }
- const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
- const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
- const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
- MachineInstr *WidenedOp1 =
- emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
- MachineInstr *WidenedOp2 =
- emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
- if (!WidenedOp1 || !WidenedOp2) {
- LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
- return nullptr;
- }
- // Now do the insert of the upper element.
- unsigned InsertOpc, InsSubRegIdx;
- std::tie(InsertOpc, InsSubRegIdx) =
- getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
- if (!Dst)
- Dst = MRI.createVirtualRegister(DstRC);
- auto InsElt =
- MIRBuilder
- .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
- .addImm(1) /* Lane index */
- .addUse(WidenedOp2->getOperand(0).getReg())
- .addImm(0);
- constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
- return &*InsElt;
- }
- MachineInstr *
- AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
- Register Src2, AArch64CC::CondCode Pred,
- MachineIRBuilder &MIRBuilder) const {
- auto &MRI = *MIRBuilder.getMRI();
- const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
- // If we used a register class, then this won't necessarily have an LLT.
- // Compute the size based off whether or not we have a class or bank.
- unsigned Size;
- if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
- Size = TRI.getRegSizeInBits(*RC);
- else
- Size = MRI.getType(Dst).getSizeInBits();
- // Some opcodes use s1.
- assert(Size <= 64 && "Expected 64 bits or less only!");
- static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
- unsigned Opc = OpcTable[Size == 64];
- auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
- constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
- return &*CSINC;
- }
- std::pair<MachineInstr *, AArch64CC::CondCode>
- AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
- MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- switch (Opcode) {
- default:
- llvm_unreachable("Unexpected opcode!");
- case TargetOpcode::G_SADDO:
- return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
- case TargetOpcode::G_UADDO:
- return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
- case TargetOpcode::G_SSUBO:
- return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
- case TargetOpcode::G_USUBO:
- return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
- }
- }
- /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
- /// expressed as a conjunction.
- /// \param CanNegate Set to true if we can negate the whole sub-tree just by
- /// changing the conditions on the CMP tests.
- /// (this means we can call emitConjunctionRec() with
- /// Negate==true on this sub-tree)
- /// \param MustBeFirst Set to true if this subtree needs to be negated and we
- /// cannot do the negation naturally. We are required to
- /// emit the subtree first in this case.
- /// \param WillNegate Is true if are called when the result of this
- /// subexpression must be negated. This happens when the
- /// outer expression is an OR. We can use this fact to know
- /// that we have a double negation (or (or ...) ...) that
- /// can be implemented for free.
- static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
- bool WillNegate, MachineRegisterInfo &MRI,
- unsigned Depth = 0) {
- if (!MRI.hasOneNonDBGUse(Val))
- return false;
- MachineInstr *ValDef = MRI.getVRegDef(Val);
- unsigned Opcode = ValDef->getOpcode();
- if (isa<GAnyCmp>(ValDef)) {
- CanNegate = true;
- MustBeFirst = false;
- return true;
- }
- // Protect against exponential runtime and stack overflow.
- if (Depth > 6)
- return false;
- if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
- bool IsOR = Opcode == TargetOpcode::G_OR;
- Register O0 = ValDef->getOperand(1).getReg();
- Register O1 = ValDef->getOperand(2).getReg();
- bool CanNegateL;
- bool MustBeFirstL;
- if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
- return false;
- bool CanNegateR;
- bool MustBeFirstR;
- if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
- return false;
- if (MustBeFirstL && MustBeFirstR)
- return false;
- if (IsOR) {
- // For an OR expression we need to be able to naturally negate at least
- // one side or we cannot do the transformation at all.
- if (!CanNegateL && !CanNegateR)
- return false;
- // If we the result of the OR will be negated and we can naturally negate
- // the leaves, then this sub-tree as a whole negates naturally.
- CanNegate = WillNegate && CanNegateL && CanNegateR;
- // If we cannot naturally negate the whole sub-tree, then this must be
- // emitted first.
- MustBeFirst = !CanNegate;
- } else {
- assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
- // We cannot naturally negate an AND operation.
- CanNegate = false;
- MustBeFirst = MustBeFirstL || MustBeFirstR;
- }
- return true;
- }
- return false;
- }
- MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
- Register LHS, Register RHS, CmpInst::Predicate CC,
- AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
- MachineIRBuilder &MIB) const {
- // TODO: emit CMN as an optimization.
- auto &MRI = *MIB.getMRI();
- LLT OpTy = MRI.getType(LHS);
- assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
- unsigned CCmpOpc;
- std::optional<ValueAndVReg> C;
- if (CmpInst::isIntPredicate(CC)) {
- C = getIConstantVRegValWithLookThrough(RHS, MRI);
- if (C && C->Value.ult(32))
- CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
- else
- CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
- } else {
- switch (OpTy.getSizeInBits()) {
- case 16:
- CCmpOpc = AArch64::FCCMPHrr;
- break;
- case 32:
- CCmpOpc = AArch64::FCCMPSrr;
- break;
- case 64:
- CCmpOpc = AArch64::FCCMPDrr;
- break;
- default:
- return nullptr;
- }
- }
- AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
- unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
- auto CCmp =
- MIB.buildInstr(CCmpOpc, {}, {LHS});
- if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
- CCmp.addImm(C->Value.getZExtValue());
- else
- CCmp.addReg(RHS);
- CCmp.addImm(NZCV).addImm(Predicate);
- constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
- return &*CCmp;
- }
- MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
- Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
- AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
- // We're at a tree leaf, produce a conditional comparison operation.
- auto &MRI = *MIB.getMRI();
- MachineInstr *ValDef = MRI.getVRegDef(Val);
- unsigned Opcode = ValDef->getOpcode();
- if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
- Register LHS = Cmp->getLHSReg();
- Register RHS = Cmp->getRHSReg();
- CmpInst::Predicate CC = Cmp->getCond();
- if (Negate)
- CC = CmpInst::getInversePredicate(CC);
- if (isa<GICmp>(Cmp)) {
- OutCC = changeICMPPredToAArch64CC(CC);
- } else {
- // Handle special FP cases.
- AArch64CC::CondCode ExtraCC;
- changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
- // Some floating point conditions can't be tested with a single condition
- // code. Construct an additional comparison in this case.
- if (ExtraCC != AArch64CC::AL) {
- MachineInstr *ExtraCmp;
- if (!CCOp)
- ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
- else
- ExtraCmp =
- emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
- CCOp = ExtraCmp->getOperand(0).getReg();
- Predicate = ExtraCC;
- }
- }
- // Produce a normal comparison if we are first in the chain
- if (!CCOp) {
- auto Dst = MRI.cloneVirtualRegister(LHS);
- if (isa<GICmp>(Cmp))
- return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
- return emitFPCompare(Cmp->getOperand(2).getReg(),
- Cmp->getOperand(3).getReg(), MIB);
- }
- // Otherwise produce a ccmp.
- return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
- }
- assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
- bool IsOR = Opcode == TargetOpcode::G_OR;
- Register LHS = ValDef->getOperand(1).getReg();
- bool CanNegateL;
- bool MustBeFirstL;
- bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
- assert(ValidL && "Valid conjunction/disjunction tree");
- (void)ValidL;
- Register RHS = ValDef->getOperand(2).getReg();
- bool CanNegateR;
- bool MustBeFirstR;
- bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
- assert(ValidR && "Valid conjunction/disjunction tree");
- (void)ValidR;
- // Swap sub-tree that must come first to the right side.
- if (MustBeFirstL) {
- assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
- std::swap(LHS, RHS);
- std::swap(CanNegateL, CanNegateR);
- std::swap(MustBeFirstL, MustBeFirstR);
- }
- bool NegateR;
- bool NegateAfterR;
- bool NegateL;
- bool NegateAfterAll;
- if (Opcode == TargetOpcode::G_OR) {
- // Swap the sub-tree that we can negate naturally to the left.
- if (!CanNegateL) {
- assert(CanNegateR && "at least one side must be negatable");
- assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
- assert(!Negate);
- std::swap(LHS, RHS);
- NegateR = false;
- NegateAfterR = true;
- } else {
- // Negate the left sub-tree if possible, otherwise negate the result.
- NegateR = CanNegateR;
- NegateAfterR = !CanNegateR;
- }
- NegateL = true;
- NegateAfterAll = !Negate;
- } else {
- assert(Opcode == TargetOpcode::G_AND &&
- "Valid conjunction/disjunction tree");
- assert(!Negate && "Valid conjunction/disjunction tree");
- NegateL = false;
- NegateR = false;
- NegateAfterR = false;
- NegateAfterAll = false;
- }
- // Emit sub-trees.
- AArch64CC::CondCode RHSCC;
- MachineInstr *CmpR =
- emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
- if (NegateAfterR)
- RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
- MachineInstr *CmpL = emitConjunctionRec(
- LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
- if (NegateAfterAll)
- OutCC = AArch64CC::getInvertedCondCode(OutCC);
- return CmpL;
- }
- MachineInstr *AArch64InstructionSelector::emitConjunction(
- Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
- bool DummyCanNegate;
- bool DummyMustBeFirst;
- if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
- *MIB.getMRI()))
- return nullptr;
- return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
- }
- bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
- MachineInstr &CondMI) {
- AArch64CC::CondCode AArch64CC;
- MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
- if (!ConjMI)
- return false;
- emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
- SelI.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- // We want to recognize this pattern:
- //
- // $z = G_FCMP pred, $x, $y
- // ...
- // $w = G_SELECT $z, $a, $b
- //
- // Where the value of $z is *only* ever used by the G_SELECT (possibly with
- // some copies/truncs in between.)
- //
- // If we see this, then we can emit something like this:
- //
- // fcmp $x, $y
- // fcsel $w, $a, $b, pred
- //
- // Rather than emitting both of the rather long sequences in the standard
- // G_FCMP/G_SELECT select methods.
- // First, check if the condition is defined by a compare.
- MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
- // We can only fold if all of the defs have one use.
- Register CondDefReg = CondDef->getOperand(0).getReg();
- if (!MRI.hasOneNonDBGUse(CondDefReg)) {
- // Unless it's another select.
- for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
- if (CondDef == &UI)
- continue;
- if (UI.getOpcode() != TargetOpcode::G_SELECT)
- return false;
- }
- }
- // Is the condition defined by a compare?
- unsigned CondOpc = CondDef->getOpcode();
- if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
- if (tryOptSelectConjunction(I, *CondDef))
- return true;
- return false;
- }
- AArch64CC::CondCode CondCode;
- if (CondOpc == TargetOpcode::G_ICMP) {
- auto Pred =
- static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
- CondCode = changeICMPPredToAArch64CC(Pred);
- emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
- CondDef->getOperand(1), MIB);
- } else {
- // Get the condition code for the select.
- auto Pred =
- static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
- AArch64CC::CondCode CondCode2;
- changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
- // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
- // instructions to emit the comparison.
- // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
- // unnecessary.
- if (CondCode2 != AArch64CC::AL)
- return false;
- if (!emitFPCompare(CondDef->getOperand(2).getReg(),
- CondDef->getOperand(3).getReg(), MIB)) {
- LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
- return false;
- }
- }
- // Emit the select.
- emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
- I.getOperand(3).getReg(), CondCode, MIB);
- I.eraseFromParent();
- return true;
- }
- MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
- MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
- "Unexpected MachineOperand");
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- // We want to find this sort of thing:
- // x = G_SUB 0, y
- // G_ICMP z, x
- //
- // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
- // e.g:
- //
- // cmn z, y
- // Check if the RHS or LHS of the G_ICMP is defined by a SUB
- MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
- MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
- auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
- // Given this:
- //
- // x = G_SUB 0, y
- // G_ICMP x, z
- //
- // Produce this:
- //
- // cmn y, z
- if (isCMN(LHSDef, P, MRI))
- return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
- // Same idea here, but with the RHS of the compare instead:
- //
- // Given this:
- //
- // x = G_SUB 0, y
- // G_ICMP z, x
- //
- // Produce this:
- //
- // cmn z, y
- if (isCMN(RHSDef, P, MRI))
- return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
- // Given this:
- //
- // z = G_AND x, y
- // G_ICMP z, 0
- //
- // Produce this if the compare is signed:
- //
- // tst x, y
- if (!CmpInst::isUnsigned(P) && LHSDef &&
- LHSDef->getOpcode() == TargetOpcode::G_AND) {
- // Make sure that the RHS is 0.
- auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
- if (!ValAndVReg || ValAndVReg->Value != 0)
- return nullptr;
- return emitTST(LHSDef->getOperand(1),
- LHSDef->getOperand(2), MIRBuilder);
- }
- return nullptr;
- }
- bool AArch64InstructionSelector::selectShuffleVector(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- Register Src1Reg = I.getOperand(1).getReg();
- const LLT Src1Ty = MRI.getType(Src1Reg);
- Register Src2Reg = I.getOperand(2).getReg();
- const LLT Src2Ty = MRI.getType(Src2Reg);
- ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- LLVMContext &Ctx = MF.getFunction().getContext();
- // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
- // it's originated from a <1 x T> type. Those should have been lowered into
- // G_BUILD_VECTOR earlier.
- if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
- LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
- return false;
- }
- unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
- SmallVector<Constant *, 64> CstIdxs;
- for (int Val : Mask) {
- // For now, any undef indexes we'll just assume to be 0. This should be
- // optimized in future, e.g. to select DUP etc.
- Val = Val < 0 ? 0 : Val;
- for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
- unsigned Offset = Byte + Val * BytesPerElt;
- CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
- }
- }
- // Use a constant pool to load the index vector for TBL.
- Constant *CPVal = ConstantVector::get(CstIdxs);
- MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
- if (!IndexLoad) {
- LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
- return false;
- }
- if (DstTy.getSizeInBits() != 128) {
- assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
- // This case can be done with TBL1.
- MachineInstr *Concat =
- emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
- if (!Concat) {
- LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
- return false;
- }
- // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
- IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
- IndexLoad->getOperand(0).getReg(), MIB);
- auto TBL1 = MIB.buildInstr(
- AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
- {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
- constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
- auto Copy =
- MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
- .addReg(TBL1.getReg(0), 0, AArch64::dsub);
- RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
- I.eraseFromParent();
- return true;
- }
- // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
- // Q registers for regalloc.
- SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
- auto RegSeq = createQTuple(Regs, MIB);
- auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
- {RegSeq, IndexLoad->getOperand(0)});
- constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
- }
- MachineInstr *AArch64InstructionSelector::emitLaneInsert(
- std::optional<Register> DstReg, Register SrcReg, Register EltReg,
- unsigned LaneIdx, const RegisterBank &RB,
- MachineIRBuilder &MIRBuilder) const {
- MachineInstr *InsElt = nullptr;
- const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- // Create a register to define with the insert if one wasn't passed in.
- if (!DstReg)
- DstReg = MRI.createVirtualRegister(DstRC);
- unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
- unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
- if (RB.getID() == AArch64::FPRRegBankID) {
- auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
- InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
- .addImm(LaneIdx)
- .addUse(InsSub->getOperand(0).getReg())
- .addImm(0);
- } else {
- InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
- .addImm(LaneIdx)
- .addUse(EltReg);
- }
- constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
- return InsElt;
- }
- bool AArch64InstructionSelector::selectUSMovFromExtend(
- MachineInstr &MI, MachineRegisterInfo &MRI) {
- if (MI.getOpcode() != TargetOpcode::G_SEXT &&
- MI.getOpcode() != TargetOpcode::G_ZEXT &&
- MI.getOpcode() != TargetOpcode::G_ANYEXT)
- return false;
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
- const Register DefReg = MI.getOperand(0).getReg();
- const LLT DstTy = MRI.getType(DefReg);
- unsigned DstSize = DstTy.getSizeInBits();
- if (DstSize != 32 && DstSize != 64)
- return false;
- MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
- MI.getOperand(1).getReg(), MRI);
- int64_t Lane;
- if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
- return false;
- Register Src0 = Extract->getOperand(1).getReg();
- const LLT &VecTy = MRI.getType(Src0);
- if (VecTy.getSizeInBits() != 128) {
- const MachineInstr *ScalarToVector = emitScalarToVector(
- VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
- assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
- Src0 = ScalarToVector->getOperand(0).getReg();
- }
- unsigned Opcode;
- if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
- Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
- else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
- Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
- else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
- Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
- else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
- Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
- else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
- Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
- else
- llvm_unreachable("Unexpected type combo for S/UMov!");
- // We may need to generate one of these, depending on the type and sign of the
- // input:
- // DstReg = SMOV Src0, Lane;
- // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
- MachineInstr *ExtI = nullptr;
- if (DstSize == 64 && !IsSigned) {
- Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
- ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
- .addImm(0)
- .addUse(NewReg)
- .addImm(AArch64::sub_32);
- RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
- } else
- ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
- constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
- MI.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
- // Get information on the destination.
- Register DstReg = I.getOperand(0).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- unsigned VecSize = DstTy.getSizeInBits();
- // Get information on the element we want to insert into the destination.
- Register EltReg = I.getOperand(2).getReg();
- const LLT EltTy = MRI.getType(EltReg);
- unsigned EltSize = EltTy.getSizeInBits();
- if (EltSize < 16 || EltSize > 64)
- return false; // Don't support all element types yet.
- // Find the definition of the index. Bail out if it's not defined by a
- // G_CONSTANT.
- Register IdxReg = I.getOperand(3).getReg();
- auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
- if (!VRegAndVal)
- return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
- // Perform the lane insert.
- Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
- if (VecSize < 128) {
- // If the vector we're inserting into is smaller than 128 bits, widen it
- // to 128 to do the insert.
- MachineInstr *ScalarToVec =
- emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
- if (!ScalarToVec)
- return false;
- SrcReg = ScalarToVec->getOperand(0).getReg();
- }
- // Create an insert into a new FPR128 register.
- // Note that if our vector is already 128 bits, we end up emitting an extra
- // register.
- MachineInstr *InsMI =
- emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
- if (VecSize < 128) {
- // If we had to widen to perform the insert, then we have to demote back to
- // the original size to get the result we want.
- Register DemoteVec = InsMI->getOperand(0).getReg();
- const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
- if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
- LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
- return false;
- }
- unsigned SubReg = 0;
- if (!getSubRegForClass(RC, TRI, SubReg))
- return false;
- if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
- LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
- << "\n");
- return false;
- }
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(DemoteVec, 0, SubReg);
- RBI.constrainGenericRegister(DstReg, *RC, MRI);
- } else {
- // No widening needed.
- InsMI->getOperand(0).setReg(DstReg);
- constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
- }
- I.eraseFromParent();
- return true;
- }
- MachineInstr *
- AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
- MachineIRBuilder &MIRBuilder,
- MachineRegisterInfo &MRI) {
- LLT DstTy = MRI.getType(Dst);
- unsigned DstSize = DstTy.getSizeInBits();
- if (CV->isNullValue()) {
- if (DstSize == 128) {
- auto Mov =
- MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
- }
- if (DstSize == 64) {
- auto Mov =
- MIRBuilder
- .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
- .addImm(0);
- auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
- .addReg(Mov.getReg(0), 0, AArch64::dsub);
- RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
- return &*Copy;
- }
- }
- auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
- if (!CPLoad) {
- LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
- return nullptr;
- }
- auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
- RBI.constrainGenericRegister(
- Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
- return &*Copy;
- }
- bool AArch64InstructionSelector::tryOptConstantBuildVec(
- MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- unsigned DstSize = DstTy.getSizeInBits();
- assert(DstSize <= 128 && "Unexpected build_vec type!");
- if (DstSize < 32)
- return false;
- // Check if we're building a constant vector, in which case we want to
- // generate a constant pool load instead of a vector insert sequence.
- SmallVector<Constant *, 16> Csts;
- for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
- // Try to find G_CONSTANT or G_FCONSTANT
- auto *OpMI =
- getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
- if (OpMI)
- Csts.emplace_back(
- const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
- else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
- I.getOperand(Idx).getReg(), MRI)))
- Csts.emplace_back(
- const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
- else
- return false;
- }
- Constant *CV = ConstantVector::get(Csts);
- if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
- return false;
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- // Given:
- // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
- //
- // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
- Register Dst = I.getOperand(0).getReg();
- Register EltReg = I.getOperand(1).getReg();
- LLT EltTy = MRI.getType(EltReg);
- // If the index isn't on the same bank as its elements, then this can't be a
- // SUBREG_TO_REG.
- const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
- const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
- if (EltRB != DstRB)
- return false;
- if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
- [&MRI](const MachineOperand &Op) {
- return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
- MRI);
- }))
- return false;
- unsigned SubReg;
- const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
- if (!EltRC)
- return false;
- const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
- if (!DstRC)
- return false;
- if (!getSubRegForClass(EltRC, TRI, SubReg))
- return false;
- auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
- .addImm(0)
- .addUse(EltReg)
- .addImm(SubReg);
- I.eraseFromParent();
- constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
- return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
- }
- bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- // Until we port more of the optimized selections, for now just use a vector
- // insert sequence.
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
- unsigned EltSize = EltTy.getSizeInBits();
- if (tryOptConstantBuildVec(I, DstTy, MRI))
- return true;
- if (tryOptBuildVecToSubregToReg(I, MRI))
- return true;
- if (EltSize < 16 || EltSize > 64)
- return false; // Don't support all element types yet.
- const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
- const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
- MachineInstr *ScalarToVec =
- emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
- I.getOperand(1).getReg(), MIB);
- if (!ScalarToVec)
- return false;
- Register DstVec = ScalarToVec->getOperand(0).getReg();
- unsigned DstSize = DstTy.getSizeInBits();
- // Keep track of the last MI we inserted. Later on, we might be able to save
- // a copy using it.
- MachineInstr *PrevMI = nullptr;
- for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
- // Note that if we don't do a subregister copy, we can end up making an
- // extra register.
- PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(),
- i - 1, RB, MIB);
- DstVec = PrevMI->getOperand(0).getReg();
- }
- // If DstTy's size in bits is less than 128, then emit a subregister copy
- // from DstVec to the last register we've defined.
- if (DstSize < 128) {
- // Force this to be FPR using the destination vector.
- const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
- if (!RC)
- return false;
- if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
- LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
- return false;
- }
- unsigned SubReg = 0;
- if (!getSubRegForClass(RC, TRI, SubReg))
- return false;
- if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
- LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
- << "\n");
- return false;
- }
- Register Reg = MRI.createVirtualRegister(RC);
- Register DstReg = I.getOperand(0).getReg();
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
- MachineOperand &RegOp = I.getOperand(1);
- RegOp.setReg(Reg);
- RBI.constrainGenericRegister(DstReg, *RC, MRI);
- } else {
- // We don't need a subregister copy. Save a copy by re-using the
- // destination register on the final insert.
- assert(PrevMI && "PrevMI was null?");
- PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
- constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
- }
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
- unsigned NumVecs,
- MachineInstr &I) {
- assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
- assert(Opc && "Expected an opcode?");
- assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
- auto &MRI = *MIB.getMRI();
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 64 || Size == 128) &&
- "Destination must be 64 bits or 128 bits?");
- unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
- auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
- assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
- auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
- Load.cloneMemRefs(I);
- constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
- Register SelectedLoadDst = Load->getOperand(0).getReg();
- for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
- auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
- .addReg(SelectedLoadDst, 0, SubReg + Idx);
- // Emit the subreg copies and immediately select them.
- // FIXME: We should refactor our copy code into an emitCopy helper and
- // clean up uses of this pattern elsewhere in the selector.
- selectCopy(*Vec, TII, MRI, TRI, RBI);
- }
- return true;
- }
- bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- // Find the intrinsic ID.
- unsigned IntrinID = I.getIntrinsicID();
- const LLT S8 = LLT::scalar(8);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
- const LLT S64 = LLT::scalar(64);
- const LLT P0 = LLT::pointer(0, 64);
- // Select the instruction.
- switch (IntrinID) {
- default:
- return false;
- case Intrinsic::aarch64_ldxp:
- case Intrinsic::aarch64_ldaxp: {
- auto NewI = MIB.buildInstr(
- IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
- {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
- {I.getOperand(3)});
- NewI.cloneMemRefs(I);
- constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
- break;
- }
- case Intrinsic::trap:
- MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
- break;
- case Intrinsic::debugtrap:
- MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
- break;
- case Intrinsic::ubsantrap:
- MIB.buildInstr(AArch64::BRK, {}, {})
- .addImm(I.getOperand(1).getImm() | ('U' << 8));
- break;
- case Intrinsic::aarch64_neon_ld2: {
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
- unsigned Opc = 0;
- if (Ty == LLT::fixed_vector(8, S8))
- Opc = AArch64::LD2Twov8b;
- else if (Ty == LLT::fixed_vector(16, S8))
- Opc = AArch64::LD2Twov16b;
- else if (Ty == LLT::fixed_vector(4, S16))
- Opc = AArch64::LD2Twov4h;
- else if (Ty == LLT::fixed_vector(8, S16))
- Opc = AArch64::LD2Twov8h;
- else if (Ty == LLT::fixed_vector(2, S32))
- Opc = AArch64::LD2Twov2s;
- else if (Ty == LLT::fixed_vector(4, S32))
- Opc = AArch64::LD2Twov4s;
- else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
- Opc = AArch64::LD2Twov2d;
- else if (Ty == S64 || Ty == P0)
- Opc = AArch64::LD1Twov1d;
- else
- llvm_unreachable("Unexpected type for ld2!");
- selectVectorLoadIntrinsic(Opc, 2, I);
- break;
- }
- case Intrinsic::aarch64_neon_ld4: {
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
- unsigned Opc = 0;
- if (Ty == LLT::fixed_vector(8, S8))
- Opc = AArch64::LD4Fourv8b;
- else if (Ty == LLT::fixed_vector(16, S8))
- Opc = AArch64::LD4Fourv16b;
- else if (Ty == LLT::fixed_vector(4, S16))
- Opc = AArch64::LD4Fourv4h;
- else if (Ty == LLT::fixed_vector(8, S16))
- Opc = AArch64::LD4Fourv8h;
- else if (Ty == LLT::fixed_vector(2, S32))
- Opc = AArch64::LD4Fourv2s;
- else if (Ty == LLT::fixed_vector(4, S32))
- Opc = AArch64::LD4Fourv4s;
- else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
- Opc = AArch64::LD4Fourv2d;
- else if (Ty == S64 || Ty == P0)
- Opc = AArch64::LD1Fourv1d;
- else
- llvm_unreachable("Unexpected type for ld4!");
- selectVectorLoadIntrinsic(Opc, 4, I);
- break;
- }
- case Intrinsic::aarch64_neon_st2: {
- Register Src1 = I.getOperand(1).getReg();
- Register Src2 = I.getOperand(2).getReg();
- Register Ptr = I.getOperand(3).getReg();
- LLT Ty = MRI.getType(Src1);
- unsigned Opc;
- if (Ty == LLT::fixed_vector(8, S8))
- Opc = AArch64::ST2Twov8b;
- else if (Ty == LLT::fixed_vector(16, S8))
- Opc = AArch64::ST2Twov16b;
- else if (Ty == LLT::fixed_vector(4, S16))
- Opc = AArch64::ST2Twov4h;
- else if (Ty == LLT::fixed_vector(8, S16))
- Opc = AArch64::ST2Twov8h;
- else if (Ty == LLT::fixed_vector(2, S32))
- Opc = AArch64::ST2Twov2s;
- else if (Ty == LLT::fixed_vector(4, S32))
- Opc = AArch64::ST2Twov4s;
- else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
- Opc = AArch64::ST2Twov2d;
- else if (Ty == S64 || Ty == P0)
- Opc = AArch64::ST1Twov1d;
- else
- llvm_unreachable("Unexpected type for st2!");
- SmallVector<Register, 2> Regs = {Src1, Src2};
- Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
- : createDTuple(Regs, MIB);
- auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
- Store.cloneMemRefs(I);
- constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
- break;
- }
- case Intrinsic::aarch64_mops_memset_tag: {
- // Transform
- // %dst:gpr(p0) = \
- // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
- // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
- // where %dst is updated, into
- // %Rd:GPR64common, %Rn:GPR64) = \
- // MOPSMemorySetTaggingPseudo \
- // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
- // where Rd and Rn are tied.
- // It is expected that %val has been extended to s64 in legalization.
- // Note that the order of the size/value operands are swapped.
- Register DstDef = I.getOperand(0).getReg();
- // I.getOperand(1) is the intrinsic function
- Register DstUse = I.getOperand(2).getReg();
- Register ValUse = I.getOperand(3).getReg();
- Register SizeUse = I.getOperand(4).getReg();
- // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
- // Therefore an additional virtual register is requried for the updated size
- // operand. This value is not accessible via the semantics of the intrinsic.
- Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
- auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
- {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
- Memset.cloneMemRefs(I);
- constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
- break;
- }
- }
- I.eraseFromParent();
- return true;
- }
- bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- unsigned IntrinID = I.getIntrinsicID();
- switch (IntrinID) {
- default:
- break;
- case Intrinsic::aarch64_crypto_sha1h: {
- Register DstReg = I.getOperand(0).getReg();
- Register SrcReg = I.getOperand(2).getReg();
- // FIXME: Should this be an assert?
- if (MRI.getType(DstReg).getSizeInBits() != 32 ||
- MRI.getType(SrcReg).getSizeInBits() != 32)
- return false;
- // The operation has to happen on FPRs. Set up some new FPR registers for
- // the source and destination if they are on GPRs.
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
- SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
- MIB.buildCopy({SrcReg}, {I.getOperand(2)});
- // Make sure the copy ends up getting constrained properly.
- RBI.constrainGenericRegister(I.getOperand(2).getReg(),
- AArch64::GPR32RegClass, MRI);
- }
- if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
- DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
- // Actually insert the instruction.
- auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
- constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
- // Did we create a new register for the destination?
- if (DstReg != I.getOperand(0).getReg()) {
- // Yep. Copy the result of the instruction back into the original
- // destination.
- MIB.buildCopy({I.getOperand(0)}, {DstReg});
- RBI.constrainGenericRegister(I.getOperand(0).getReg(),
- AArch64::GPR32RegClass, MRI);
- }
- I.eraseFromParent();
- return true;
- }
- case Intrinsic::ptrauth_sign: {
- Register DstReg = I.getOperand(0).getReg();
- Register ValReg = I.getOperand(2).getReg();
- uint64_t Key = I.getOperand(3).getImm();
- Register DiscReg = I.getOperand(4).getReg();
- auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
- bool IsDiscZero = DiscVal && DiscVal->isNullValue();
- if (Key > AArch64PACKey::LAST)
- return false;
- unsigned Opcodes[][4] = {
- {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
- {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
- unsigned Opcode = Opcodes[IsDiscZero][Key];
- auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
- if (!IsDiscZero) {
- PAC.addUse(DiscReg);
- RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
- }
- RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
- I.eraseFromParent();
- return true;
- }
- case Intrinsic::ptrauth_strip: {
- Register DstReg = I.getOperand(0).getReg();
- Register ValReg = I.getOperand(2).getReg();
- uint64_t Key = I.getOperand(3).getImm();
- if (Key > AArch64PACKey::LAST)
- return false;
- unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key);
- MIB.buildInstr(Opcode, {DstReg}, {ValReg});
- RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
- RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI);
- I.eraseFromParent();
- return true;
- }
- case Intrinsic::frameaddress:
- case Intrinsic::returnaddress: {
- MachineFunction &MF = *I.getParent()->getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Depth = I.getOperand(2).getImm();
- Register DstReg = I.getOperand(0).getReg();
- RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
- if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
- if (!MFReturnAddr) {
- // Insert the copy from LR/X30 into the entry block, before it can be
- // clobbered by anything.
- MFI.setReturnAddressIsTaken(true);
- MFReturnAddr = getFunctionLiveInPhysReg(
- MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
- }
- if (STI.hasPAuth()) {
- MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
- } else {
- MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
- MIB.buildInstr(AArch64::XPACLRI);
- MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
- }
- I.eraseFromParent();
- return true;
- }
- MFI.setFrameAddressIsTaken(true);
- Register FrameAddr(AArch64::FP);
- while (Depth--) {
- Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
- auto Ldr =
- MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
- constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
- FrameAddr = NextFrame;
- }
- if (IntrinID == Intrinsic::frameaddress)
- MIB.buildCopy({DstReg}, {FrameAddr});
- else {
- MFI.setReturnAddressIsTaken(true);
- if (STI.hasPAuth()) {
- Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
- MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
- } else {
- MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
- .addImm(1);
- MIB.buildInstr(AArch64::XPACLRI);
- MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
- }
- }
- I.eraseFromParent();
- return true;
- }
- case Intrinsic::swift_async_context_addr:
- auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
- {Register(AArch64::FP)})
- .addImm(8)
- .addImm(0);
- constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
- MF->getFrameInfo().setFrameAddressIsTaken(true);
- MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
- I.eraseFromParent();
- return true;
- }
- return false;
- }
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
- auto MaybeImmed = getImmedFromMO(Root);
- if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
- return std::nullopt;
- uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
- return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
- }
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
- auto MaybeImmed = getImmedFromMO(Root);
- if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
- return std::nullopt;
- uint64_t Enc = 31 - *MaybeImmed;
- return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
- }
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
- auto MaybeImmed = getImmedFromMO(Root);
- if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
- return std::nullopt;
- uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
- return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
- }
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
- auto MaybeImmed = getImmedFromMO(Root);
- if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
- return std::nullopt;
- uint64_t Enc = 63 - *MaybeImmed;
- return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
- }
- /// Helper to select an immediate value that can be represented as a 12-bit
- /// value shifted left by either 0 or 12. If it is possible to do so, return
- /// the immediate and shift value. If not, return std::nullopt.
- ///
- /// Used by selectArithImmed and selectNegArithImmed.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::select12BitValueWithLeftShift(
- uint64_t Immed) const {
- unsigned ShiftAmt;
- if (Immed >> 12 == 0) {
- ShiftAmt = 0;
- } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
- ShiftAmt = 12;
- Immed = Immed >> 12;
- } else
- return std::nullopt;
- unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
- }};
- }
- /// SelectArithImmed - Select an immediate value that can be represented as
- /// a 12-bit value shifted left by either 0 or 12. If so, return true with
- /// Val set to the 12-bit value and Shift set to the shifter operand.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
- // This function is called from the addsub_shifted_imm ComplexPattern,
- // which lists [imm] as the list of opcode it's interested in, however
- // we still need to check whether the operand is actually an immediate
- // here because the ComplexPattern opcode list is only used in
- // root-level opcode matching.
- auto MaybeImmed = getImmedFromMO(Root);
- if (MaybeImmed == std::nullopt)
- return std::nullopt;
- return select12BitValueWithLeftShift(*MaybeImmed);
- }
- /// SelectNegArithImmed - As above, but negates the value before trying to
- /// select it.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
- // We need a register here, because we need to know if we have a 64 or 32
- // bit immediate.
- if (!Root.isReg())
- return std::nullopt;
- auto MaybeImmed = getImmedFromMO(Root);
- if (MaybeImmed == std::nullopt)
- return std::nullopt;
- uint64_t Immed = *MaybeImmed;
- // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
- // have the opposite effect on the C flag, so this pattern mustn't match under
- // those circumstances.
- if (Immed == 0)
- return std::nullopt;
- // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
- // the root.
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
- Immed = ~((uint32_t)Immed) + 1;
- else
- Immed = ~Immed + 1ULL;
- if (Immed & 0xFFFFFFFFFF000000ULL)
- return std::nullopt;
- Immed &= 0xFFFFFFULL;
- return select12BitValueWithLeftShift(Immed);
- }
- /// Return true if it is worth folding MI into an extended register. That is,
- /// if it's safe to pull it into the addressing mode of a load or store as a
- /// shift.
- bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
- MachineInstr &MI, const MachineRegisterInfo &MRI) const {
- // Always fold if there is one use, or if we're optimizing for size.
- Register DefReg = MI.getOperand(0).getReg();
- if (MRI.hasOneNonDBGUse(DefReg) ||
- MI.getParent()->getParent()->getFunction().hasOptSize())
- return true;
- // It's better to avoid folding and recomputing shifts when we don't have a
- // fastpath.
- if (!STI.hasLSLFast())
- return false;
- // We have a fastpath, so folding a shift in and potentially computing it
- // many times may be beneficial. Check if this is only used in memory ops.
- // If it is, then we should fold.
- return all_of(MRI.use_nodbg_instructions(DefReg),
- [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
- }
- static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
- switch (Type) {
- case AArch64_AM::SXTB:
- case AArch64_AM::SXTH:
- case AArch64_AM::SXTW:
- return true;
- default:
- return false;
- }
- }
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectExtendedSHL(
- MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
- unsigned SizeInBytes, bool WantsExt) const {
- assert(Base.isReg() && "Expected base to be a register operand");
- assert(Offset.isReg() && "Expected offset to be a register operand");
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
- unsigned OffsetOpc = OffsetInst->getOpcode();
- bool LookedThroughZExt = false;
- if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
- // Try to look through a ZEXT.
- if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
- return std::nullopt;
- OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
- OffsetOpc = OffsetInst->getOpcode();
- LookedThroughZExt = true;
- if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
- return std::nullopt;
- }
- // Make sure that the memory op is a valid size.
- int64_t LegalShiftVal = Log2_32(SizeInBytes);
- if (LegalShiftVal == 0)
- return std::nullopt;
- if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
- return std::nullopt;
- // Now, try to find the specific G_CONSTANT. Start by assuming that the
- // register we will offset is the LHS, and the register containing the
- // constant is the RHS.
- Register OffsetReg = OffsetInst->getOperand(1).getReg();
- Register ConstantReg = OffsetInst->getOperand(2).getReg();
- auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
- if (!ValAndVReg) {
- // We didn't get a constant on the RHS. If the opcode is a shift, then
- // we're done.
- if (OffsetOpc == TargetOpcode::G_SHL)
- return std::nullopt;
- // If we have a G_MUL, we can use either register. Try looking at the RHS.
- std::swap(OffsetReg, ConstantReg);
- ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
- if (!ValAndVReg)
- return std::nullopt;
- }
- // The value must fit into 3 bits, and must be positive. Make sure that is
- // true.
- int64_t ImmVal = ValAndVReg->Value.getSExtValue();
- // Since we're going to pull this into a shift, the constant value must be
- // a power of 2. If we got a multiply, then we need to check this.
- if (OffsetOpc == TargetOpcode::G_MUL) {
- if (!isPowerOf2_32(ImmVal))
- return std::nullopt;
- // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
- ImmVal = Log2_32(ImmVal);
- }
- if ((ImmVal & 0x7) != ImmVal)
- return std::nullopt;
- // We are only allowed to shift by LegalShiftVal. This shift value is built
- // into the instruction, so we can't just use whatever we want.
- if (ImmVal != LegalShiftVal)
- return std::nullopt;
- unsigned SignExtend = 0;
- if (WantsExt) {
- // Check if the offset is defined by an extend, unless we looked through a
- // G_ZEXT earlier.
- if (!LookedThroughZExt) {
- MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
- auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return std::nullopt;
- SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
- // We only support SXTW for signed extension here.
- if (SignExtend && Ext != AArch64_AM::SXTW)
- return std::nullopt;
- OffsetReg = ExtInst->getOperand(1).getReg();
- }
- // Need a 32-bit wide register here.
- MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
- OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
- }
- // We can use the LHS of the GEP as the base, and the LHS of the shift as an
- // offset. Signify that we are shifting by setting the shift flag to 1.
- return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
- [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
- [=](MachineInstrBuilder &MIB) {
- // Need to add both immediates here to make sure that they are both
- // added to the instruction.
- MIB.addImm(SignExtend);
- MIB.addImm(1);
- }}};
- }
- /// This is used for computing addresses like this:
- ///
- /// ldr x1, [x2, x3, lsl #3]
- ///
- /// Where x2 is the base register, and x3 is an offset register. The shift-left
- /// is a constant value specific to this load instruction. That is, we'll never
- /// see anything other than a 3 here (which corresponds to the size of the
- /// element being loaded.)
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
- MachineOperand &Root, unsigned SizeInBytes) const {
- if (!Root.isReg())
- return std::nullopt;
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- // We want to find something like this:
- //
- // val = G_CONSTANT LegalShiftVal
- // shift = G_SHL off_reg val
- // ptr = G_PTR_ADD base_reg shift
- // x = G_LOAD ptr
- //
- // And fold it into this addressing mode:
- //
- // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
- // Check if we can find the G_PTR_ADD.
- MachineInstr *PtrAdd =
- getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
- return std::nullopt;
- // Now, try to match an opcode which will match our specific offset.
- // We want a G_SHL or a G_MUL.
- MachineInstr *OffsetInst =
- getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
- return selectExtendedSHL(Root, PtrAdd->getOperand(1),
- OffsetInst->getOperand(0), SizeInBytes,
- /*WantsExt=*/false);
- }
- /// This is used for computing addresses like this:
- ///
- /// ldr x1, [x2, x3]
- ///
- /// Where x2 is the base register, and x3 is an offset register.
- ///
- /// When possible (or profitable) to fold a G_PTR_ADD into the address
- /// calculation, this will do so. Otherwise, it will return std::nullopt.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectAddrModeRegisterOffset(
- MachineOperand &Root) const {
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- // We need a GEP.
- MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
- if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
- return std::nullopt;
- // If this is used more than once, let's not bother folding.
- // TODO: Check if they are memory ops. If they are, then we can still fold
- // without having to recompute anything.
- if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
- return std::nullopt;
- // Base is the GEP's LHS, offset is its RHS.
- return {{[=](MachineInstrBuilder &MIB) {
- MIB.addUse(Gep->getOperand(1).getReg());
- },
- [=](MachineInstrBuilder &MIB) {
- MIB.addUse(Gep->getOperand(2).getReg());
- },
- [=](MachineInstrBuilder &MIB) {
- // Need to add both immediates here to make sure that they are both
- // added to the instruction.
- MIB.addImm(0);
- MIB.addImm(0);
- }}};
- }
- /// This is intended to be equivalent to selectAddrModeXRO in
- /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
- unsigned SizeInBytes) const {
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- if (!Root.isReg())
- return std::nullopt;
- MachineInstr *PtrAdd =
- getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd)
- return std::nullopt;
- // Check for an immediates which cannot be encoded in the [base + imm]
- // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
- // end up with code like:
- //
- // mov x0, wide
- // add x1 base, x0
- // ldr x2, [x1, x0]
- //
- // In this situation, we can use the [base, xreg] addressing mode to save an
- // add/sub:
- //
- // mov x0, wide
- // ldr x2, [base, x0]
- auto ValAndVReg =
- getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
- if (ValAndVReg) {
- unsigned Scale = Log2_32(SizeInBytes);
- int64_t ImmOff = ValAndVReg->Value.getSExtValue();
- // Skip immediates that can be selected in the load/store addresing
- // mode.
- if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
- ImmOff < (0x1000 << Scale))
- return std::nullopt;
- // Helper lambda to decide whether or not it is preferable to emit an add.
- auto isPreferredADD = [](int64_t ImmOff) {
- // Constants in [0x0, 0xfff] can be encoded in an add.
- if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
- return true;
- // Can it be encoded in an add lsl #12?
- if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
- return false;
- // It can be encoded in an add lsl #12, but we may not want to. If it is
- // possible to select this as a single movz, then prefer that. A single
- // movz is faster than an add with a shift.
- return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
- (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
- };
- // If the immediate can be encoded in a single add/sub, then bail out.
- if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
- return std::nullopt;
- }
- // Try to fold shifts into the addressing mode.
- auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
- if (AddrModeFns)
- return AddrModeFns;
- // If that doesn't work, see if it's possible to fold in registers from
- // a GEP.
- return selectAddrModeRegisterOffset(Root);
- }
- /// This is used for computing addresses like this:
- ///
- /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
- ///
- /// Where we have a 64-bit base register, a 32-bit offset register, and an
- /// extend (which may or may not be signed).
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
- unsigned SizeInBytes) const {
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- MachineInstr *PtrAdd =
- getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
- return std::nullopt;
- MachineOperand &LHS = PtrAdd->getOperand(1);
- MachineOperand &RHS = PtrAdd->getOperand(2);
- MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
- // The first case is the same as selectAddrModeXRO, except we need an extend.
- // In this case, we try to find a shift and extend, and fold them into the
- // addressing mode.
- //
- // E.g.
- //
- // off_reg = G_Z/S/ANYEXT ext_reg
- // val = G_CONSTANT LegalShiftVal
- // shift = G_SHL off_reg val
- // ptr = G_PTR_ADD base_reg shift
- // x = G_LOAD ptr
- //
- // In this case we can get a load like this:
- //
- // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
- auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
- SizeInBytes, /*WantsExt=*/true);
- if (ExtendedShl)
- return ExtendedShl;
- // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
- //
- // e.g.
- // ldr something, [base_reg, ext_reg, sxtw]
- if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
- return std::nullopt;
- // Check if this is an extend. We'll get an extend type if it is.
- AArch64_AM::ShiftExtendType Ext =
- getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return std::nullopt;
- // Need a 32-bit wide register.
- MachineIRBuilder MIB(*PtrAdd);
- Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
- AArch64::GPR32RegClass, MIB);
- unsigned SignExtend = Ext == AArch64_AM::SXTW;
- // Base is LHS, offset is ExtReg.
- return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
- [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
- [=](MachineInstrBuilder &MIB) {
- MIB.addImm(SignExtend);
- MIB.addImm(0);
- }}};
- }
- /// Select a "register plus unscaled signed 9-bit immediate" address. This
- /// should only match when there is an offset that is not valid for a scaled
- /// immediate addressing mode. The "Size" argument is the size in bytes of the
- /// memory reference, which is needed here to know what is valid for a scaled
- /// immediate.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
- unsigned Size) const {
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
- if (!Root.isReg())
- return std::nullopt;
- if (!isBaseWithConstantOffset(Root, MRI))
- return std::nullopt;
- MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
- MachineOperand &OffImm = RootDef->getOperand(2);
- if (!OffImm.isReg())
- return std::nullopt;
- MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
- if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
- return std::nullopt;
- int64_t RHSC;
- MachineOperand &RHSOp1 = RHS->getOperand(1);
- if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
- return std::nullopt;
- RHSC = RHSOp1.getCImm()->getSExtValue();
- // If the offset is valid as a scaled immediate, don't match here.
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
- return std::nullopt;
- if (RHSC >= -256 && RHSC < 256) {
- MachineOperand &Base = RootDef->getOperand(1);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
- }};
- }
- return std::nullopt;
- }
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
- unsigned Size,
- MachineRegisterInfo &MRI) const {
- if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
- return std::nullopt;
- MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
- if (Adrp.getOpcode() != AArch64::ADRP)
- return std::nullopt;
- // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
- auto Offset = Adrp.getOperand(1).getOffset();
- if (Offset % Size != 0)
- return std::nullopt;
- auto GV = Adrp.getOperand(1).getGlobal();
- if (GV->isThreadLocal())
- return std::nullopt;
- auto &MF = *RootDef.getParent()->getParent();
- if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
- return std::nullopt;
- unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
- MachineIRBuilder MIRBuilder(RootDef);
- Register AdrpReg = Adrp.getOperand(0).getReg();
- return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
- [=](MachineInstrBuilder &MIB) {
- MIB.addGlobalAddress(GV, Offset,
- OpFlags | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
- }}};
- }
- /// Select a "register plus scaled unsigned 12-bit immediate" address. The
- /// "Size" argument is the size in bytes of the memory reference, which
- /// determines the scale.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
- unsigned Size) const {
- MachineFunction &MF = *Root.getParent()->getParent()->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!Root.isReg())
- return std::nullopt;
- MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
- if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
- }};
- }
- CodeModel::Model CM = MF.getTarget().getCodeModel();
- // Check if we can fold in the ADD of small code model ADRP + ADD address.
- if (CM == CodeModel::Small) {
- auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
- if (OpFns)
- return OpFns;
- }
- if (isBaseWithConstantOffset(Root, MRI)) {
- MachineOperand &LHS = RootDef->getOperand(1);
- MachineOperand &RHS = RootDef->getOperand(2);
- MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
- MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
- int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
- unsigned Scale = Log2_32(Size);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
- if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
- }};
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
- }};
- }
- }
- // Before falling back to our general case, check if the unscaled
- // instructions can handle this. If so, that's preferable.
- if (selectAddrModeUnscaled(Root, Size))
- return std::nullopt;
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
- }};
- }
- /// Given a shift instruction, return the correct shift type for that
- /// instruction.
- static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return AArch64_AM::InvalidShiftExtend;
- case TargetOpcode::G_SHL:
- return AArch64_AM::LSL;
- case TargetOpcode::G_LSHR:
- return AArch64_AM::LSR;
- case TargetOpcode::G_ASHR:
- return AArch64_AM::ASR;
- case TargetOpcode::G_ROTR:
- return AArch64_AM::ROR;
- }
- }
- /// Select a "shifted register" operand. If the value is not shifted, set the
- /// shift operand to a default value of "lsl 0".
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
- bool AllowROR) const {
- if (!Root.isReg())
- return std::nullopt;
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
- // Check if the operand is defined by an instruction which corresponds to
- // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
- MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
- AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
- if (ShType == AArch64_AM::InvalidShiftExtend)
- return std::nullopt;
- if (ShType == AArch64_AM::ROR && !AllowROR)
- return std::nullopt;
- if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
- return std::nullopt;
- // Need an immediate on the RHS.
- MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
- auto Immed = getImmedFromMO(ShiftRHS);
- if (!Immed)
- return std::nullopt;
- // We have something that we can fold. Fold in the shift's LHS and RHS into
- // the instruction.
- MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
- Register ShiftReg = ShiftLHS.getReg();
- unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
- unsigned Val = *Immed & (NumBits - 1);
- unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
- return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
- }
- AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
- MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
- unsigned Opc = MI.getOpcode();
- // Handle explicit extend instructions first.
- if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
- unsigned Size;
- if (Opc == TargetOpcode::G_SEXT)
- Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- else
- Size = MI.getOperand(2).getImm();
- assert(Size != 64 && "Extend from 64 bits?");
- switch (Size) {
- case 8:
- return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
- case 16:
- return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
- case 32:
- return AArch64_AM::SXTW;
- default:
- return AArch64_AM::InvalidShiftExtend;
- }
- }
- if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
- unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- assert(Size != 64 && "Extend from 64 bits?");
- switch (Size) {
- case 8:
- return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
- case 16:
- return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
- case 32:
- return AArch64_AM::UXTW;
- default:
- return AArch64_AM::InvalidShiftExtend;
- }
- }
- // Don't have an explicit extend. Try to handle a G_AND with a constant mask
- // on the RHS.
- if (Opc != TargetOpcode::G_AND)
- return AArch64_AM::InvalidShiftExtend;
- std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
- if (!MaybeAndMask)
- return AArch64_AM::InvalidShiftExtend;
- uint64_t AndMask = *MaybeAndMask;
- switch (AndMask) {
- default:
- return AArch64_AM::InvalidShiftExtend;
- case 0xFF:
- return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
- case 0xFFFF:
- return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
- case 0xFFFFFFFF:
- return AArch64_AM::UXTW;
- }
- }
- Register AArch64InstructionSelector::moveScalarRegClass(
- Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- auto Ty = MRI.getType(Reg);
- assert(!Ty.isVector() && "Expected scalars only!");
- if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
- return Reg;
- // Create a copy and immediately select it.
- // FIXME: We should have an emitCopy function?
- auto Copy = MIB.buildCopy({&RC}, {Reg});
- selectCopy(*Copy, TII, MRI, TRI, RBI);
- return Copy.getReg(0);
- }
- /// Select an "extended register" operand. This operand folds in an extend
- /// followed by an optional left shift.
- InstructionSelector::ComplexRendererFns
- AArch64InstructionSelector::selectArithExtendedRegister(
- MachineOperand &Root) const {
- if (!Root.isReg())
- return std::nullopt;
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
- uint64_t ShiftVal = 0;
- Register ExtReg;
- AArch64_AM::ShiftExtendType Ext;
- MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
- if (!RootDef)
- return std::nullopt;
- if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
- return std::nullopt;
- // Check if we can fold a shift and an extend.
- if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
- // Look for a constant on the RHS of the shift.
- MachineOperand &RHS = RootDef->getOperand(2);
- std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
- if (!MaybeShiftVal)
- return std::nullopt;
- ShiftVal = *MaybeShiftVal;
- if (ShiftVal > 4)
- return std::nullopt;
- // Look for a valid extend instruction on the LHS of the shift.
- MachineOperand &LHS = RootDef->getOperand(1);
- MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
- if (!ExtDef)
- return std::nullopt;
- Ext = getExtendTypeForInst(*ExtDef, MRI);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return std::nullopt;
- ExtReg = ExtDef->getOperand(1).getReg();
- } else {
- // Didn't get a shift. Try just folding an extend.
- Ext = getExtendTypeForInst(*RootDef, MRI);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return std::nullopt;
- ExtReg = RootDef->getOperand(1).getReg();
- // If we have a 32 bit instruction which zeroes out the high half of a
- // register, we get an implicit zero extend for free. Check if we have one.
- // FIXME: We actually emit the extend right now even though we don't have
- // to.
- if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
- MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
- if (isDef32(*ExtInst))
- return std::nullopt;
- }
- }
- // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
- // copy.
- MachineIRBuilder MIB(*RootDef);
- ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
- return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
- [=](MachineInstrBuilder &MIB) {
- MIB.addImm(getArithExtendImm(Ext, ShiftVal));
- }}};
- }
- void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
- "Expected G_CONSTANT");
- std::optional<int64_t> CstVal =
- getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
- assert(CstVal && "Expected constant value");
- MIB.addImm(*CstVal);
- }
- void AArch64InstructionSelector::renderLogicalImm32(
- MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
- assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
- "Expected G_CONSTANT");
- uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
- uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
- MIB.addImm(Enc);
- }
- void AArch64InstructionSelector::renderLogicalImm64(
- MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
- assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
- "Expected G_CONSTANT");
- uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
- uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
- MIB.addImm(Enc);
- }
- void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
- "Expected G_FCONSTANT");
- MIB.addImm(
- AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
- }
- void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
- "Expected G_FCONSTANT");
- MIB.addImm(
- AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
- }
- void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
- "Expected G_FCONSTANT");
- MIB.addImm(
- AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
- }
- void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
- MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
- "Expected G_FCONSTANT");
- MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
- .getFPImm()
- ->getValueAPF()
- .bitcastToAPInt()
- .getZExtValue()));
- }
- bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
- const MachineInstr &MI, unsigned NumBytes) const {
- if (!MI.mayLoadOrStore())
- return false;
- assert(MI.hasOneMemOperand() &&
- "Expected load/store to have only one mem op!");
- return (*MI.memoperands_begin())->getSize() == NumBytes;
- }
- bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
- return false;
- // Only return true if we know the operation will zero-out the high half of
- // the 64-bit register. Truncates can be subregister copies, which don't
- // zero out the high bits. Copies and other copy-like instructions can be
- // fed by truncates, or could be lowered as subregister copies.
- switch (MI.getOpcode()) {
- default:
- return true;
- case TargetOpcode::COPY:
- case TargetOpcode::G_BITCAST:
- case TargetOpcode::G_TRUNC:
- case TargetOpcode::G_PHI:
- return false;
- }
- }
- // Perform fixups on the given PHI instruction's operands to force them all
- // to be the same as the destination regbank.
- static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
- const AArch64RegisterBankInfo &RBI) {
- assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
- Register DstReg = MI.getOperand(0).getReg();
- const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
- assert(DstRB && "Expected PHI dst to have regbank assigned");
- MachineIRBuilder MIB(MI);
- // Go through each operand and ensure it has the same regbank.
- for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
- if (!MO.isReg())
- continue;
- Register OpReg = MO.getReg();
- const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
- if (RB != DstRB) {
- // Insert a cross-bank copy.
- auto *OpDef = MRI.getVRegDef(OpReg);
- const LLT &Ty = MRI.getType(OpReg);
- MachineBasicBlock &OpDefBB = *OpDef->getParent();
- // Any instruction we insert must appear after all PHIs in the block
- // for the block to be valid MIR.
- MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
- if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
- InsertPt = OpDefBB.getFirstNonPHI();
- MIB.setInsertPt(*OpDef->getParent(), InsertPt);
- auto Copy = MIB.buildCopy(Ty, OpReg);
- MRI.setRegBank(Copy.getReg(0), *DstRB);
- MO.setReg(Copy.getReg(0));
- }
- }
- }
- void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
- // We're looking for PHIs, build a list so we don't invalidate iterators.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- SmallVector<MachineInstr *, 32> Phis;
- for (auto &BB : MF) {
- for (auto &MI : BB) {
- if (MI.getOpcode() == TargetOpcode::G_PHI)
- Phis.emplace_back(&MI);
- }
- }
- for (auto *MI : Phis) {
- // We need to do some work here if the operand types are < 16 bit and they
- // are split across fpr/gpr banks. Since all types <32b on gpr
- // end up being assigned gpr32 regclasses, we can end up with PHIs here
- // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
- // be selecting heterogenous regbanks for operands if possible, but we
- // still need to be able to deal with it here.
- //
- // To fix this, if we have a gpr-bank operand < 32b in size and at least
- // one other operand is on the fpr bank, then we add cross-bank copies
- // to homogenize the operand banks. For simplicity the bank that we choose
- // to settle on is whatever bank the def operand has. For example:
- //
- // %endbb:
- // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
- // =>
- // %bb2:
- // ...
- // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
- // ...
- // %endbb:
- // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
- bool HasGPROp = false, HasFPROp = false;
- for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
- if (!MO.isReg())
- continue;
- const LLT &Ty = MRI.getType(MO.getReg());
- if (!Ty.isValid() || !Ty.isScalar())
- break;
- if (Ty.getSizeInBits() >= 32)
- break;
- const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
- // If for some reason we don't have a regbank yet. Don't try anything.
- if (!RB)
- break;
- if (RB->getID() == AArch64::GPRRegBankID)
- HasGPROp = true;
- else
- HasFPROp = true;
- }
- // We have heterogenous regbanks, need to fixup.
- if (HasGPROp && HasFPROp)
- fixupPHIOpBanks(*MI, MRI, RBI);
- }
- }
- namespace llvm {
- InstructionSelector *
- createAArch64InstructionSelector(const AArch64TargetMachine &TM,
- AArch64Subtarget &Subtarget,
- AArch64RegisterBankInfo &RBI) {
- return new AArch64InstructionSelector(TM, Subtarget, RBI);
- }
- }
|