123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958 |
- //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines an instruction selector for the AArch64 target.
- //
- //===----------------------------------------------------------------------===//
- #include "AArch64MachineFunctionInfo.h"
- #include "AArch64TargetMachine.h"
- #include "MCTargetDesc/AArch64AddressingModes.h"
- #include "llvm/ADT/APSInt.h"
- #include "llvm/CodeGen/ISDOpcodes.h"
- #include "llvm/CodeGen/SelectionDAGISel.h"
- #include "llvm/IR/Function.h" // To access function attributes.
- #include "llvm/IR/GlobalValue.h"
- #include "llvm/IR/Intrinsics.h"
- #include "llvm/IR/IntrinsicsAArch64.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/ErrorHandling.h"
- #include "llvm/Support/KnownBits.h"
- #include "llvm/Support/MathExtras.h"
- #include "llvm/Support/raw_ostream.h"
- using namespace llvm;
- #define DEBUG_TYPE "aarch64-isel"
- #define PASS_NAME "AArch64 Instruction Selection"
- //===--------------------------------------------------------------------===//
- /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
- /// instructions for SelectionDAG operations.
- ///
- namespace {
- class AArch64DAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const AArch64Subtarget *Subtarget;
- public:
- static char ID;
- AArch64DAGToDAGISel() = delete;
- explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
- CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<AArch64Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
- void Select(SDNode *Node) override;
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
- bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
- std::vector<SDValue> &OutOps) override;
- template <signed Low, signed High, signed Scale>
- bool SelectRDVLImm(SDValue N, SDValue &Imm);
- bool tryMLAV64LaneV128(SDNode *N);
- bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
- bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
- bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
- bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
- bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
- bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
- return SelectShiftedRegister(N, false, Reg, Shift);
- }
- bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
- return SelectShiftedRegister(N, true, Reg, Shift);
- }
- bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
- }
- bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
- }
- bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
- }
- bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
- }
- bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
- }
- bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
- }
- bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
- }
- bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 1, Base, OffImm);
- }
- bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 2, Base, OffImm);
- }
- bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 4, Base, OffImm);
- }
- bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 8, Base, OffImm);
- }
- bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 16, Base, OffImm);
- }
- bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 1, Base, OffImm);
- }
- bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 2, Base, OffImm);
- }
- bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 4, Base, OffImm);
- }
- bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 8, Base, OffImm);
- }
- bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 16, Base, OffImm);
- }
- template <unsigned Size, unsigned Max>
- bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
- // Test if there is an appropriate addressing mode and check if the
- // immediate fits.
- bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
- if (Found) {
- if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
- int64_t C = CI->getSExtValue();
- if (C <= Max)
- return true;
- }
- }
- // Otherwise, base only, materialize address in register.
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
- return true;
- }
- template<int Width>
- bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &SignExtend, SDValue &DoShift) {
- return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
- }
- template<int Width>
- bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &SignExtend, SDValue &DoShift) {
- return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
- }
- bool SelectExtractHigh(SDValue N, SDValue &Res) {
- if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
- N = N->getOperand(0);
- if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- !isa<ConstantSDNode>(N->getOperand(1)))
- return false;
- EVT VT = N->getValueType(0);
- EVT LVT = N->getOperand(0).getValueType();
- unsigned Index = N->getConstantOperandVal(1);
- if (!VT.is64BitVector() || !LVT.is128BitVector() ||
- Index != VT.getVectorNumElements())
- return false;
- Res = N->getOperand(0);
- return true;
- }
- bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
- if (N.getOpcode() != AArch64ISD::VLSHR)
- return false;
- SDValue Op = N->getOperand(0);
- EVT VT = Op.getValueType();
- unsigned ShtAmt = N->getConstantOperandVal(1);
- if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
- return false;
- APInt Imm;
- if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
- Imm = APInt(VT.getScalarSizeInBits(),
- Op.getOperand(1).getConstantOperandVal(0)
- << Op.getOperand(1).getConstantOperandVal(1));
- else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
- isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
- Imm = APInt(VT.getScalarSizeInBits(),
- Op.getOperand(1).getConstantOperandVal(0));
- else
- return false;
- if (Imm != 1ULL << (ShtAmt - 1))
- return false;
- Res1 = Op.getOperand(0);
- Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
- return true;
- }
- bool SelectDupZeroOrUndef(SDValue N) {
- switch(N->getOpcode()) {
- case ISD::UNDEF:
- return true;
- case AArch64ISD::DUP:
- case ISD::SPLAT_VECTOR: {
- auto Opnd0 = N->getOperand(0);
- if (isNullConstant(Opnd0))
- return true;
- if (isNullFPConstant(Opnd0))
- return true;
- break;
- }
- default:
- break;
- }
- return false;
- }
- bool SelectDupZero(SDValue N) {
- switch(N->getOpcode()) {
- case AArch64ISD::DUP:
- case ISD::SPLAT_VECTOR: {
- auto Opnd0 = N->getOperand(0);
- if (isNullConstant(Opnd0))
- return true;
- if (isNullFPConstant(Opnd0))
- return true;
- break;
- }
- }
- return false;
- }
- template<MVT::SimpleValueType VT>
- bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
- return SelectSVEAddSubImm(N, VT, Imm, Shift);
- }
- template <MVT::SimpleValueType VT>
- bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
- return SelectSVECpyDupImm(N, VT, Imm, Shift);
- }
- template <MVT::SimpleValueType VT, bool Invert = false>
- bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
- return SelectSVELogicalImm(N, VT, Imm, Invert);
- }
- template <MVT::SimpleValueType VT>
- bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
- return SelectSVEArithImm(N, VT, Imm);
- }
- template <unsigned Low, unsigned High, bool AllowSaturation = false>
- bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
- return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
- }
- bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
- if (N->getOpcode() != ISD::SPLAT_VECTOR)
- return false;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
- /* High */ EltVT.getFixedSizeInBits(),
- /* AllowSaturation */ true, Imm);
- }
- // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
- template<signed Min, signed Max, signed Scale, bool Shift>
- bool SelectCntImm(SDValue N, SDValue &Imm) {
- if (!isa<ConstantSDNode>(N))
- return false;
- int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
- if (Shift)
- MulImm = 1LL << MulImm;
- if ((MulImm % std::abs(Scale)) != 0)
- return false;
- MulImm /= Scale;
- if ((MulImm >= Min) && (MulImm <= Max)) {
- Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
- return true;
- }
- return false;
- }
- template <signed Max, signed Scale>
- bool SelectEXTImm(SDValue N, SDValue &Imm) {
- if (!isa<ConstantSDNode>(N))
- return false;
- int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
- if (MulImm >= 0 && MulImm <= Max) {
- MulImm *= Scale;
- Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
- return true;
- }
- return false;
- }
- template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
- if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
- uint64_t C = CI->getZExtValue();
- Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
- return true;
- }
- return false;
- }
- /// Form sequences of consecutive 64/128-bit registers for use in NEON
- /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
- /// between 1 and 4 elements. If it contains a single element that is returned
- /// unchanged; otherwise a REG_SEQUENCE value is returned.
- SDValue createDTuple(ArrayRef<SDValue> Vecs);
- SDValue createQTuple(ArrayRef<SDValue> Vecs);
- // Form a sequence of SVE registers for instructions using list of vectors,
- // e.g. structured loads and stores (ldN, stN).
- SDValue createZTuple(ArrayRef<SDValue> Vecs);
- /// Generic helper for the createDTuple/createQTuple
- /// functions. Those should almost always be called instead.
- SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
- const unsigned SubRegs[]);
- void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
- bool tryIndexedLoad(SDNode *N);
- bool trySelectStackSlotTagP(SDNode *N);
- void SelectTagP(SDNode *N);
- void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
- unsigned SubRegIdx);
- void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
- unsigned SubRegIdx);
- void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
- unsigned Opc_rr, unsigned Opc_ri,
- bool IsIntr = false);
- void SelectWhilePair(SDNode *N, unsigned Opc);
- void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
- bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
- /// SVE Reg+Imm addressing mode.
- template <int64_t Min, int64_t Max>
- bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
- SDValue &OffImm);
- /// SVE Reg+Reg address mode.
- template <unsigned Scale>
- bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
- return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
- }
- template <unsigned MaxIdx, unsigned Scale>
- bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
- return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
- }
- void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
- void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
- void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
- unsigned Opc_rr, unsigned Opc_ri);
- std::tuple<unsigned, SDValue, SDValue>
- findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
- const SDValue &OldBase, const SDValue &OldOffset,
- unsigned Scale);
- bool tryBitfieldExtractOp(SDNode *N);
- bool tryBitfieldExtractOpFromSExt(SDNode *N);
- bool tryBitfieldInsertOp(SDNode *N);
- bool tryBitfieldInsertInZeroOp(SDNode *N);
- bool tryShiftAmountMod(SDNode *N);
- bool tryHighFPExt(SDNode *N);
- bool tryReadRegister(SDNode *N);
- bool tryWriteRegister(SDNode *N);
- // Include the pieces autogenerated from the target description.
- #include "AArch64GenDAGISel.inc"
- private:
- bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
- SDValue &Shift);
- bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
- bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
- SDValue &OffImm) {
- return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
- }
- bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
- unsigned Size, SDValue &Base,
- SDValue &OffImm);
- bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
- SDValue &OffImm);
- bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
- SDValue &OffImm);
- bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
- SDValue &Offset, SDValue &SignExtend,
- SDValue &DoShift);
- bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
- SDValue &Offset, SDValue &SignExtend,
- SDValue &DoShift);
- bool isWorthFolding(SDValue V) const;
- bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
- SDValue &Offset, SDValue &SignExtend);
- template<unsigned RegWidth>
- bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
- return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
- }
- bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
- bool SelectCMP_SWAP(SDNode *N);
- bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
- bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
- bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
- bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
- bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
- bool AllowSaturation, SDValue &Imm);
- bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
- bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
- SDValue &Offset);
- bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
- SDValue &Offset, unsigned Scale = 1);
- bool SelectAllActivePredicate(SDValue N);
- };
- } // end anonymous namespace
- char AArch64DAGToDAGISel::ID = 0;
- INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
- /// isIntImmediate - This method tests to see if the node is a constant
- /// operand. If so Imm will receive the 32-bit value.
- static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
- if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
- Imm = C->getZExtValue();
- return true;
- }
- return false;
- }
- // isIntImmediate - This method tests to see if a constant operand.
- // If so Imm will receive the value.
- static bool isIntImmediate(SDValue N, uint64_t &Imm) {
- return isIntImmediate(N.getNode(), Imm);
- }
- // isOpcWithIntImmediate - This method tests to see if the node is a specific
- // opcode and that it has a immediate integer right operand.
- // If so Imm will receive the 32 bit value.
- static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
- uint64_t &Imm) {
- return N->getOpcode() == Opc &&
- isIntImmediate(N->getOperand(1).getNode(), Imm);
- }
- // isIntImmediateEq - This method tests to see if N is a constant operand that
- // is equivalent to 'ImmExpected'.
- #ifndef NDEBUG
- static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
- uint64_t Imm;
- if (!isIntImmediate(N.getNode(), Imm))
- return false;
- return Imm == ImmExpected;
- }
- #endif
- bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
- switch(ConstraintID) {
- default:
- llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_Q:
- // We need to make sure that this one operand does not end up in XZR, thus
- // require the address to be in a PointerRegClass register.
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
- SDLoc dl(Op);
- SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
- SDValue NewOp =
- SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- dl, Op.getValueType(),
- Op, RC), 0);
- OutOps.push_back(NewOp);
- return false;
- }
- return true;
- }
- /// SelectArithImmed - Select an immediate value that can be represented as
- /// a 12-bit value shifted left by either 0 or 12. If so, return true with
- /// Val set to the 12-bit value and Shift set to the shifter operand.
- bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
- SDValue &Shift) {
- // This function is called from the addsub_shifted_imm ComplexPattern,
- // which lists [imm] as the list of opcode it's interested in, however
- // we still need to check whether the operand is actually an immediate
- // here because the ComplexPattern opcode list is only used in
- // root-level opcode matching.
- if (!isa<ConstantSDNode>(N.getNode()))
- return false;
- uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
- unsigned ShiftAmt;
- if (Immed >> 12 == 0) {
- ShiftAmt = 0;
- } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
- ShiftAmt = 12;
- Immed = Immed >> 12;
- } else
- return false;
- unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- SDLoc dl(N);
- Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
- Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
- return true;
- }
- /// SelectNegArithImmed - As above, but negates the value before trying to
- /// select it.
- bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
- SDValue &Shift) {
- // This function is called from the addsub_shifted_imm ComplexPattern,
- // which lists [imm] as the list of opcode it's interested in, however
- // we still need to check whether the operand is actually an immediate
- // here because the ComplexPattern opcode list is only used in
- // root-level opcode matching.
- if (!isa<ConstantSDNode>(N.getNode()))
- return false;
- // The immediate operand must be a 24-bit zero-extended immediate.
- uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
- // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
- // have the opposite effect on the C flag, so this pattern mustn't match under
- // those circumstances.
- if (Immed == 0)
- return false;
- if (N.getValueType() == MVT::i32)
- Immed = ~((uint32_t)Immed) + 1;
- else
- Immed = ~Immed + 1ULL;
- if (Immed & 0xFFFFFFFFFF000000ULL)
- return false;
- Immed &= 0xFFFFFFULL;
- return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
- Shift);
- }
- /// getShiftTypeForNode - Translate a shift node to the corresponding
- /// ShiftType value.
- static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
- switch (N.getOpcode()) {
- default:
- return AArch64_AM::InvalidShiftExtend;
- case ISD::SHL:
- return AArch64_AM::LSL;
- case ISD::SRL:
- return AArch64_AM::LSR;
- case ISD::SRA:
- return AArch64_AM::ASR;
- case ISD::ROTR:
- return AArch64_AM::ROR;
- }
- }
- /// Determine whether it is worth it to fold SHL into the addressing
- /// mode.
- static bool isWorthFoldingSHL(SDValue V) {
- assert(V.getOpcode() == ISD::SHL && "invalid opcode");
- // It is worth folding logical shift of up to three places.
- auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (!CSD)
- return false;
- unsigned ShiftVal = CSD->getZExtValue();
- if (ShiftVal > 3)
- return false;
- // Check if this particular node is reused in any non-memory related
- // operation. If yes, do not try to fold this node into the address
- // computation, since the computation will be kept.
- const SDNode *Node = V.getNode();
- for (SDNode *UI : Node->uses())
- if (!isa<MemSDNode>(*UI))
- for (SDNode *UII : UI->uses())
- if (!isa<MemSDNode>(*UII))
- return false;
- return true;
- }
- /// Determine whether it is worth to fold V into an extended register.
- bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // Trivial if we are optimizing for code size or if there is only
- // one use of the value.
- if (CurDAG->shouldOptForSize() || V.hasOneUse())
- return true;
- // If a subtarget has a fastpath LSL we can fold a logical shift into
- // the addressing mode and save a cycle.
- if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
- isWorthFoldingSHL(V))
- return true;
- if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
- const SDValue LHS = V.getOperand(0);
- const SDValue RHS = V.getOperand(1);
- if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
- return true;
- if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
- return true;
- }
- // It hurts otherwise, since the value will be reused.
- return false;
- }
- /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
- /// to select more shifted register
- bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
- SDValue &Shift) {
- EVT VT = N.getValueType();
- if (VT != MVT::i32 && VT != MVT::i64)
- return false;
- if (N->getOpcode() != ISD::AND || !N->hasOneUse())
- return false;
- SDValue LHS = N.getOperand(0);
- if (!LHS->hasOneUse())
- return false;
- unsigned LHSOpcode = LHS->getOpcode();
- if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
- return false;
- ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- if (!ShiftAmtNode)
- return false;
- uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
- ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!RHSC)
- return false;
- APInt AndMask = RHSC->getAPIntValue();
- unsigned LowZBits, MaskLen;
- if (!AndMask.isShiftedMask(LowZBits, MaskLen))
- return false;
- unsigned BitWidth = N.getValueSizeInBits();
- SDLoc DL(LHS);
- uint64_t NewShiftC;
- unsigned NewShiftOp;
- if (LHSOpcode == ISD::SHL) {
- // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
- // BitWidth != LowZBits + MaskLen doesn't match the pattern
- if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
- return false;
- NewShiftC = LowZBits - ShiftAmtC;
- NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
- } else {
- if (LowZBits == 0)
- return false;
- // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
- NewShiftC = LowZBits + ShiftAmtC;
- if (NewShiftC >= BitWidth)
- return false;
- // SRA need all high bits
- if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
- return false;
- // SRL high bits can be 0 or 1
- if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
- return false;
- if (LHSOpcode == ISD::SRL)
- NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
- else
- NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
- }
- assert(NewShiftC < BitWidth && "Invalid shift amount");
- SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
- SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
- Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
- NewShiftAmt, BitWidthMinus1),
- 0);
- unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
- Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
- return true;
- }
- /// SelectShiftedRegister - Select a "shifted register" operand. If the value
- /// is not shifted, set the Shift operand to default of "LSL 0". The logical
- /// instructions allow the shifted register to be rotated, but the arithmetic
- /// instructions do not. The AllowROR parameter specifies whether ROR is
- /// supported.
- bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
- SDValue &Reg, SDValue &Shift) {
- if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
- return true;
- AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
- if (ShType == AArch64_AM::InvalidShiftExtend)
- return false;
- if (!AllowROR && ShType == AArch64_AM::ROR)
- return false;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- unsigned BitSize = N.getValueSizeInBits();
- unsigned Val = RHS->getZExtValue() & (BitSize - 1);
- unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
- Reg = N.getOperand(0);
- Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
- return isWorthFolding(N);
- }
- return false;
- }
- /// getExtendTypeForNode - Translate an extend node to the corresponding
- /// ExtendType value.
- static AArch64_AM::ShiftExtendType
- getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
- if (N.getOpcode() == ISD::SIGN_EXTEND ||
- N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
- EVT SrcVT;
- if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
- SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
- else
- SrcVT = N.getOperand(0).getValueType();
- if (!IsLoadStore && SrcVT == MVT::i8)
- return AArch64_AM::SXTB;
- else if (!IsLoadStore && SrcVT == MVT::i16)
- return AArch64_AM::SXTH;
- else if (SrcVT == MVT::i32)
- return AArch64_AM::SXTW;
- assert(SrcVT != MVT::i64 && "extend from 64-bits?");
- return AArch64_AM::InvalidShiftExtend;
- } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
- N.getOpcode() == ISD::ANY_EXTEND) {
- EVT SrcVT = N.getOperand(0).getValueType();
- if (!IsLoadStore && SrcVT == MVT::i8)
- return AArch64_AM::UXTB;
- else if (!IsLoadStore && SrcVT == MVT::i16)
- return AArch64_AM::UXTH;
- else if (SrcVT == MVT::i32)
- return AArch64_AM::UXTW;
- assert(SrcVT != MVT::i64 && "extend from 64-bits?");
- return AArch64_AM::InvalidShiftExtend;
- } else if (N.getOpcode() == ISD::AND) {
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!CSD)
- return AArch64_AM::InvalidShiftExtend;
- uint64_t AndMask = CSD->getZExtValue();
- switch (AndMask) {
- default:
- return AArch64_AM::InvalidShiftExtend;
- case 0xFF:
- return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
- case 0xFFFF:
- return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
- case 0xFFFFFFFF:
- return AArch64_AM::UXTW;
- }
- }
- return AArch64_AM::InvalidShiftExtend;
- }
- // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
- static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
- if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
- DL->getOpcode() != AArch64ISD::DUPLANE32)
- return false;
- SDValue SV = DL->getOperand(0);
- if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
- return false;
- SDValue EV = SV.getOperand(1);
- if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
- ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
- ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
- LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
- LaneOp = EV.getOperand(0);
- return true;
- }
- // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
- // high lane extract.
- static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
- SDValue &LaneOp, int &LaneIdx) {
- if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
- std::swap(Op0, Op1);
- if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
- return false;
- }
- StdOp = Op1;
- return true;
- }
- /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
- /// is a lane in the upper half of a 128-bit vector. Recognize and select this
- /// so that we don't emit unnecessary lane extracts.
- bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
- SDLoc dl(N);
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
- SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
- int LaneIdx = -1; // Will hold the lane index.
- if (Op1.getOpcode() != ISD::MUL ||
- !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
- LaneIdx)) {
- std::swap(Op0, Op1);
- if (Op1.getOpcode() != ISD::MUL ||
- !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
- LaneIdx))
- return false;
- }
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
- SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
- unsigned MLAOpc = ~0U;
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized MLA.");
- case MVT::v4i16:
- MLAOpc = AArch64::MLAv4i16_indexed;
- break;
- case MVT::v8i16:
- MLAOpc = AArch64::MLAv8i16_indexed;
- break;
- case MVT::v2i32:
- MLAOpc = AArch64::MLAv2i32_indexed;
- break;
- case MVT::v4i32:
- MLAOpc = AArch64::MLAv4i32_indexed;
- break;
- }
- ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
- return true;
- }
- bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
- SDLoc dl(N);
- SDValue SMULLOp0;
- SDValue SMULLOp1;
- int LaneIdx;
- if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
- LaneIdx))
- return false;
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
- SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
- unsigned SMULLOpc = ~0U;
- if (IntNo == Intrinsic::aarch64_neon_smull) {
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized SMULL.");
- case MVT::v4i32:
- SMULLOpc = AArch64::SMULLv4i16_indexed;
- break;
- case MVT::v2i64:
- SMULLOpc = AArch64::SMULLv2i32_indexed;
- break;
- }
- } else if (IntNo == Intrinsic::aarch64_neon_umull) {
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized SMULL.");
- case MVT::v4i32:
- SMULLOpc = AArch64::UMULLv4i16_indexed;
- break;
- case MVT::v2i64:
- SMULLOpc = AArch64::UMULLv2i32_indexed;
- break;
- }
- } else
- llvm_unreachable("Unrecognized intrinsic.");
- ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
- return true;
- }
- /// Instructions that accept extend modifiers like UXTW expect the register
- /// being extended to be a GPR32, but the incoming DAG might be acting on a
- /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
- /// this is the case.
- static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
- if (N.getValueType() == MVT::i32)
- return N;
- SDLoc dl(N);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
- MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, MVT::i32, N, SubReg);
- return SDValue(Node, 0);
- }
- // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
- template<signed Low, signed High, signed Scale>
- bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
- if (!isa<ConstantSDNode>(N))
- return false;
- int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
- if ((MulImm % std::abs(Scale)) == 0) {
- int64_t RDVLImm = MulImm / Scale;
- if ((RDVLImm >= Low) && (RDVLImm <= High)) {
- Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
- return true;
- }
- }
- return false;
- }
- /// SelectArithExtendedRegister - Select a "extended register" operand. This
- /// operand folds in an extend followed by an optional left shift.
- bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
- SDValue &Shift) {
- unsigned ShiftVal = 0;
- AArch64_AM::ShiftExtendType Ext;
- if (N.getOpcode() == ISD::SHL) {
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!CSD)
- return false;
- ShiftVal = CSD->getZExtValue();
- if (ShiftVal > 4)
- return false;
- Ext = getExtendTypeForNode(N.getOperand(0));
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return false;
- Reg = N.getOperand(0).getOperand(0);
- } else {
- Ext = getExtendTypeForNode(N);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return false;
- Reg = N.getOperand(0);
- // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
- // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
- auto isDef32 = [](SDValue N) {
- unsigned Opc = N.getOpcode();
- return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
- Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
- Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
- Opc != ISD::FREEZE;
- };
- if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
- isDef32(Reg))
- return false;
- }
- // AArch64 mandates that the RHS of the operation must use the smallest
- // register class that could contain the size being extended from. Thus,
- // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
- // there might not be an actual 32-bit value in the program. We can
- // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
- assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
- Reg = narrowIfNeeded(CurDAG, Reg);
- Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
- MVT::i32);
- return isWorthFolding(N);
- }
- /// SelectArithUXTXRegister - Select a "UXTX register" operand. This
- /// operand is refered by the instructions have SP operand
- bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
- SDValue &Shift) {
- unsigned ShiftVal = 0;
- AArch64_AM::ShiftExtendType Ext;
- if (N.getOpcode() != ISD::SHL)
- return false;
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!CSD)
- return false;
- ShiftVal = CSD->getZExtValue();
- if (ShiftVal > 4)
- return false;
- Ext = AArch64_AM::UXTX;
- Reg = N.getOperand(0);
- Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
- MVT::i32);
- return isWorthFolding(N);
- }
- /// If there's a use of this ADDlow that's not itself a load/store then we'll
- /// need to create a real ADD instruction from it anyway and there's no point in
- /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
- /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
- /// leads to duplicated ADRP instructions.
- static bool isWorthFoldingADDlow(SDValue N) {
- for (auto *Use : N->uses()) {
- if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
- Use->getOpcode() != ISD::ATOMIC_LOAD &&
- Use->getOpcode() != ISD::ATOMIC_STORE)
- return false;
- // ldar and stlr have much more restrictive addressing modes (just a
- // register).
- if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
- return false;
- }
- return true;
- }
- /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
- /// immediate" address. The "Size" argument is the size in bytes of the memory
- /// reference, which determines the scale.
- bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
- unsigned BW, unsigned Size,
- SDValue &Base,
- SDValue &OffImm) {
- SDLoc dl(N);
- const DataLayout &DL = CurDAG->getDataLayout();
- const TargetLowering *TLI = getTargetLowering();
- if (N.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
- return true;
- }
- // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
- // selected here doesn't support labels/immediates, only base+offset.
- if (CurDAG->isBaseWithConstantOffset(N)) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- if (IsSignedImm) {
- int64_t RHSC = RHS->getSExtValue();
- unsigned Scale = Log2_32(Size);
- int64_t Range = 0x1LL << (BW - 1);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
- RHSC < (Range << Scale)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- }
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
- return true;
- }
- } else {
- // unsigned Immediate
- uint64_t RHSC = RHS->getZExtValue();
- unsigned Scale = Log2_32(Size);
- uint64_t Range = 0x1ULL << BW;
- if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- }
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
- return true;
- }
- }
- }
- }
- // Base only. The address will be materialized into a register before
- // the memory is accessed.
- // add x0, Xbase, #offset
- // stp x1, x2, [x0]
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
- return true;
- }
- /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
- /// immediate" address. The "Size" argument is the size in bytes of the memory
- /// reference, which determines the scale.
- bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
- SDValue &Base, SDValue &OffImm) {
- SDLoc dl(N);
- const DataLayout &DL = CurDAG->getDataLayout();
- const TargetLowering *TLI = getTargetLowering();
- if (N.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
- return true;
- }
- if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
- GlobalAddressSDNode *GAN =
- dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
- Base = N.getOperand(0);
- OffImm = N.getOperand(1);
- if (!GAN)
- return true;
- if (GAN->getOffset() % Size == 0 &&
- GAN->getGlobal()->getPointerAlignment(DL) >= Size)
- return true;
- }
- if (CurDAG->isBaseWithConstantOffset(N)) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int64_t RHSC = (int64_t)RHS->getZExtValue();
- unsigned Scale = Log2_32(Size);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- }
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
- return true;
- }
- }
- }
- // Before falling back to our general case, check if the unscaled
- // instructions can handle this. If so, that's preferable.
- if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
- return false;
- // Base only. The address will be materialized into a register before
- // the memory is accessed.
- // add x0, Xbase, #offset
- // ldr x0, [x0]
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
- return true;
- }
- /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
- /// immediate" address. This should only match when there is an offset that
- /// is not valid for a scaled immediate addressing mode. The "Size" argument
- /// is the size in bytes of the memory reference, which is needed here to know
- /// what is valid for a scaled immediate.
- bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
- SDValue &Base,
- SDValue &OffImm) {
- if (!CurDAG->isBaseWithConstantOffset(N))
- return false;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int64_t RHSC = RHS->getSExtValue();
- // If the offset is valid as a scaled immediate, don't match here.
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
- RHSC < (0x1000 << Log2_32(Size)))
- return false;
- if (RHSC >= -256 && RHSC < 256) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- const TargetLowering *TLI = getTargetLowering();
- Base = CurDAG->getTargetFrameIndex(
- FI, TLI->getPointerTy(CurDAG->getDataLayout()));
- }
- OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
- return true;
- }
- }
- return false;
- }
- static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
- SDLoc dl(N);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
- SDValue ImpDef = SDValue(
- CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
- MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
- return SDValue(Node, 0);
- }
- /// Check if the given SHL node (\p N), can be used to form an
- /// extended register for an addressing mode.
- bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
- bool WantExtend, SDValue &Offset,
- SDValue &SignExtend) {
- assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
- return false;
- SDLoc dl(N);
- if (WantExtend) {
- AArch64_AM::ShiftExtendType Ext =
- getExtendTypeForNode(N.getOperand(0), true);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return false;
- Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
- MVT::i32);
- } else {
- Offset = N.getOperand(0);
- SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
- }
- unsigned LegalShiftVal = Log2_32(Size);
- unsigned ShiftVal = CSD->getZExtValue();
- if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
- return false;
- return isWorthFolding(N);
- }
- bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
- SDValue &Base, SDValue &Offset,
- SDValue &SignExtend,
- SDValue &DoShift) {
- if (N.getOpcode() != ISD::ADD)
- return false;
- SDValue LHS = N.getOperand(0);
- SDValue RHS = N.getOperand(1);
- SDLoc dl(N);
- // We don't want to match immediate adds here, because they are better lowered
- // to the register-immediate addressing modes.
- if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
- return false;
- // Check if this particular node is reused in any non-memory related
- // operation. If yes, do not try to fold this node into the address
- // computation, since the computation will be kept.
- const SDNode *Node = N.getNode();
- for (SDNode *UI : Node->uses()) {
- if (!isa<MemSDNode>(*UI))
- return false;
- }
- // Remember if it is worth folding N when it produces extended register.
- bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
- // Try to match a shifted extend on the RHS.
- if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
- SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
- Base = LHS;
- DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
- return true;
- }
- // Try to match a shifted extend on the LHS.
- if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
- SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
- Base = RHS;
- DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
- return true;
- }
- // There was no shift, whatever else we find.
- DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
- AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
- // Try to match an unshifted extend on the LHS.
- if (IsExtendedRegisterWorthFolding &&
- (Ext = getExtendTypeForNode(LHS, true)) !=
- AArch64_AM::InvalidShiftExtend) {
- Base = RHS;
- Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
- MVT::i32);
- if (isWorthFolding(LHS))
- return true;
- }
- // Try to match an unshifted extend on the RHS.
- if (IsExtendedRegisterWorthFolding &&
- (Ext = getExtendTypeForNode(RHS, true)) !=
- AArch64_AM::InvalidShiftExtend) {
- Base = LHS;
- Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
- MVT::i32);
- if (isWorthFolding(RHS))
- return true;
- }
- return false;
- }
- // Check if the given immediate is preferred by ADD. If an immediate can be
- // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
- // encoded by one MOVZ, return true.
- static bool isPreferredADD(int64_t ImmOff) {
- // Constant in [0x0, 0xfff] can be encoded in ADD.
- if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
- return true;
- // Check if it can be encoded in an "ADD LSL #12".
- if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
- // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
- return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
- (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
- return false;
- }
- bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
- SDValue &Base, SDValue &Offset,
- SDValue &SignExtend,
- SDValue &DoShift) {
- if (N.getOpcode() != ISD::ADD)
- return false;
- SDValue LHS = N.getOperand(0);
- SDValue RHS = N.getOperand(1);
- SDLoc DL(N);
- // Check if this particular node is reused in any non-memory related
- // operation. If yes, do not try to fold this node into the address
- // computation, since the computation will be kept.
- const SDNode *Node = N.getNode();
- for (SDNode *UI : Node->uses()) {
- if (!isa<MemSDNode>(*UI))
- return false;
- }
- // Watch out if RHS is a wide immediate, it can not be selected into
- // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
- // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
- // instructions like:
- // MOV X0, WideImmediate
- // ADD X1, BaseReg, X0
- // LDR X2, [X1, 0]
- // For such situation, using [BaseReg, XReg] addressing mode can save one
- // ADD/SUB:
- // MOV X0, WideImmediate
- // LDR X2, [BaseReg, X0]
- if (isa<ConstantSDNode>(RHS)) {
- int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
- unsigned Scale = Log2_32(Size);
- // Skip the immediate can be selected by load/store addressing mode.
- // Also skip the immediate can be encoded by a single ADD (SUB is also
- // checked by using -ImmOff).
- if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
- isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
- return false;
- SDValue Ops[] = { RHS };
- SDNode *MOVI =
- CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
- SDValue MOVIV = SDValue(MOVI, 0);
- // This ADD of two X register will be selected into [Reg+Reg] mode.
- N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
- }
- // Remember if it is worth folding N when it produces extended register.
- bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
- // Try to match a shifted extend on the RHS.
- if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
- SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
- Base = LHS;
- DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
- return true;
- }
- // Try to match a shifted extend on the LHS.
- if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
- SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
- Base = RHS;
- DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
- return true;
- }
- // Match any non-shifted, non-extend, non-immediate add expression.
- Base = LHS;
- Offset = RHS;
- SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
- DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
- // Reg1 + Reg2 is free: no check needed.
- return true;
- }
- SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
- static const unsigned RegClassIDs[] = {
- AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
- static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2, AArch64::dsub3};
- return createTuple(Regs, RegClassIDs, SubRegs);
- }
- SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
- static const unsigned RegClassIDs[] = {
- AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
- static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3};
- return createTuple(Regs, RegClassIDs, SubRegs);
- }
- SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
- static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
- AArch64::ZPR3RegClassID,
- AArch64::ZPR4RegClassID};
- static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
- AArch64::zsub2, AArch64::zsub3};
- return createTuple(Regs, RegClassIDs, SubRegs);
- }
- SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
- const unsigned RegClassIDs[],
- const unsigned SubRegs[]) {
- // There's no special register-class for a vector-list of 1 element: it's just
- // a vector.
- if (Regs.size() == 1)
- return Regs[0];
- assert(Regs.size() >= 2 && Regs.size() <= 4);
- SDLoc DL(Regs[0]);
- SmallVector<SDValue, 4> Ops;
- // First operand of REG_SEQUENCE is the desired RegClass.
- Ops.push_back(
- CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
- // Then we get pairs of source & subregister-position for the components.
- for (unsigned i = 0; i < Regs.size(); ++i) {
- Ops.push_back(Regs[i]);
- Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
- }
- SDNode *N =
- CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
- return SDValue(N, 0);
- }
- void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
- bool isExt) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- unsigned ExtOff = isExt;
- // Form a REG_SEQUENCE to force register allocation.
- unsigned Vec0Off = ExtOff + 1;
- SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
- N->op_begin() + Vec0Off + NumVecs);
- SDValue RegSeq = createQTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- if (isExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
- ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
- }
- bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isUnindexed())
- return false;
- EVT VT = LD->getMemoryVT();
- EVT DstVT = N->getValueType(0);
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
- // We're not doing validity checking here. That was done when checking
- // if we should mark the load as indexed or not. We're just selecting
- // the right instruction.
- unsigned Opcode = 0;
- ISD::LoadExtType ExtType = LD->getExtensionType();
- bool InsertTo64 = false;
- if (VT == MVT::i64)
- Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
- else if (VT == MVT::i32) {
- if (ExtType == ISD::NON_EXTLOAD)
- Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
- else if (ExtType == ISD::SEXTLOAD)
- Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
- else {
- Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
- InsertTo64 = true;
- // The result of the load is only i32. It's the subreg_to_reg that makes
- // it into an i64.
- DstVT = MVT::i32;
- }
- } else if (VT == MVT::i16) {
- if (ExtType == ISD::SEXTLOAD) {
- if (DstVT == MVT::i64)
- Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
- else
- Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
- } else {
- Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
- InsertTo64 = DstVT == MVT::i64;
- // The result of the load is only i32. It's the subreg_to_reg that makes
- // it into an i64.
- DstVT = MVT::i32;
- }
- } else if (VT == MVT::i8) {
- if (ExtType == ISD::SEXTLOAD) {
- if (DstVT == MVT::i64)
- Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
- else
- Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
- } else {
- Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
- InsertTo64 = DstVT == MVT::i64;
- // The result of the load is only i32. It's the subreg_to_reg that makes
- // it into an i64.
- DstVT = MVT::i32;
- }
- } else if (VT == MVT::f16) {
- Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
- } else if (VT == MVT::bf16) {
- Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
- } else if (VT == MVT::f32) {
- Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
- } else if (VT == MVT::f64 || VT.is64BitVector()) {
- Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
- } else if (VT.is128BitVector()) {
- Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
- } else
- return false;
- SDValue Chain = LD->getChain();
- SDValue Base = LD->getBasePtr();
- ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
- int OffsetVal = (int)OffsetOp->getZExtValue();
- SDLoc dl(N);
- SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
- SDValue Ops[] = { Base, Offset, Chain };
- SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
- MVT::Other, Ops);
- // Transfer memoperands.
- MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
- // Either way, we're replacing the node, so tell the caller that.
- SDValue LoadedVal = SDValue(Res, 1);
- if (InsertTo64) {
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
- LoadedVal =
- SDValue(CurDAG->getMachineNode(
- AArch64::SUBREG_TO_REG, dl, MVT::i64,
- CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
- SubReg),
- 0);
- }
- ReplaceUses(SDValue(N, 0), LoadedVal);
- ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
- ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
- CurDAG->RemoveDeadNode(N);
- return true;
- }
- void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
- unsigned SubRegIdx) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Chain = N->getOperand(0);
- SDValue Ops[] = {N->getOperand(2), // Mem operand;
- Chain};
- const EVT ResTys[] = {MVT::Untyped, MVT::Other};
- SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- SDValue SuperReg = SDValue(Ld, 0);
- for (unsigned i = 0; i < NumVecs; ++i)
- ReplaceUses(SDValue(N, i),
- CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
- ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
- // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
- // because it's too simple to have needed special treatment during lowering.
- if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
- MachineMemOperand *MemOp = MemIntr->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
- }
- CurDAG->RemoveDeadNode(N);
- }
- void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
- unsigned Opc, unsigned SubRegIdx) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Chain = N->getOperand(0);
- SDValue Ops[] = {N->getOperand(1), // Mem operand
- N->getOperand(2), // Incremental
- Chain};
- const EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
- SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- // Update uses of write back register
- ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
- // Update uses of vector list
- SDValue SuperReg = SDValue(Ld, 1);
- if (NumVecs == 1)
- ReplaceUses(SDValue(N, 0), SuperReg);
- else
- for (unsigned i = 0; i < NumVecs; ++i)
- ReplaceUses(SDValue(N, i),
- CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
- // Update the chain
- ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
- CurDAG->RemoveDeadNode(N);
- }
- /// Optimize \param OldBase and \param OldOffset selecting the best addressing
- /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
- /// new Base and an SDValue representing the new offset.
- std::tuple<unsigned, SDValue, SDValue>
- AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
- unsigned Opc_ri,
- const SDValue &OldBase,
- const SDValue &OldOffset,
- unsigned Scale) {
- SDValue NewBase = OldBase;
- SDValue NewOffset = OldOffset;
- // Detect a possible Reg+Imm addressing mode.
- const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
- N, OldBase, NewBase, NewOffset);
- // Detect a possible reg+reg addressing mode, but only if we haven't already
- // detected a Reg+Imm one.
- const bool IsRegReg =
- !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
- // Select the instruction.
- return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
- }
- enum class SelectTypeKind {
- Int1 = 0,
- };
- /// This function selects an opcode from a list of opcodes, which is
- /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
- /// element types, in this order.
- template <SelectTypeKind Kind>
- static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
- // Only match scalable vector VTs
- if (!VT.isScalableVector())
- return 0;
- EVT EltVT = VT.getVectorElementType();
- switch (Kind) {
- case SelectTypeKind::Int1:
- if (EltVT != MVT::i1)
- return 0;
- break;
- }
- unsigned Offset;
- switch (VT.getVectorMinNumElements()) {
- case 16: // 8-bit
- Offset = 0;
- break;
- case 8: // 16-bit
- Offset = 1;
- break;
- case 4: // 32-bit
- Offset = 2;
- break;
- case 2: // 64-bit
- Offset = 3;
- break;
- default:
- return 0;
- }
- return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
- }
- void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
- SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
- SDValue SuperReg = SDValue(WhilePair, 0);
- for (unsigned I = 0; I < 2; ++I)
- ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
- AArch64::psub0 + I, DL, VT, SuperReg));
- CurDAG->RemoveDeadNode(N);
- }
- void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
- unsigned Opcode) {
- EVT VT = N->getValueType(0);
- SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
- SDValue Ops = createZTuple(Regs);
- SDLoc DL(N);
- SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
- SDValue SuperReg = SDValue(Intrinsic, 0);
- for (unsigned i = 0; i < NumVecs; ++i)
- ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
- AArch64::zsub0 + i, DL, VT, SuperReg));
- CurDAG->RemoveDeadNode(N);
- return;
- }
- void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
- unsigned Scale, unsigned Opc_ri,
- unsigned Opc_rr, bool IsIntr) {
- assert(Scale < 4 && "Invalid scaling value.");
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- SDValue Chain = N->getOperand(0);
- // Optimize addressing mode.
- SDValue Base, Offset;
- unsigned Opc;
- std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
- N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
- CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
- SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
- Base, // Memory operand
- Offset, Chain};
- const EVT ResTys[] = {MVT::Untyped, MVT::Other};
- SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
- SDValue SuperReg = SDValue(Load, 0);
- for (unsigned i = 0; i < NumVecs; ++i)
- ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
- AArch64::zsub0 + i, DL, VT, SuperReg));
- // Copy chain
- unsigned ChainIdx = NumVecs;
- ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
- CurDAG->RemoveDeadNode(N);
- }
- void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getOperand(2)->getValueType(0);
- // Form a REG_SEQUENCE to force register allocation.
- bool Is128Bit = VT.getSizeInBits() == 128;
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
- SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
- SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
- // Transfer memoperands.
- MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
- ReplaceNode(N, St);
- }
- void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
- unsigned Scale, unsigned Opc_rr,
- unsigned Opc_ri) {
- SDLoc dl(N);
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
- SDValue RegSeq = createZTuple(Regs);
- // Optimize addressing mode.
- unsigned Opc;
- SDValue Offset, Base;
- std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
- N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
- CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
- SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
- Base, // address
- Offset, // offset
- N->getOperand(0)}; // chain
- SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
- ReplaceNode(N, St);
- }
- bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
- SDValue &OffImm) {
- SDLoc dl(N);
- const DataLayout &DL = CurDAG->getDataLayout();
- const TargetLowering *TLI = getTargetLowering();
- // Try to match it for the frame address
- if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
- int FI = FINode->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
- return true;
- }
- return false;
- }
- void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getOperand(2)->getValueType(0);
- const EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other}; // Type for the Chain
- // Form a REG_SEQUENCE to force register allocation.
- bool Is128Bit = VT.getSizeInBits() == 128;
- SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
- SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SDValue Ops[] = {RegSeq,
- N->getOperand(NumVecs + 1), // base register
- N->getOperand(NumVecs + 2), // Incremental
- N->getOperand(0)}; // Chain
- SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- ReplaceNode(N, St);
- }
- namespace {
- /// WidenVector - Given a value in the V64 register class, produce the
- /// equivalent value in the V128 register class.
- class WidenVector {
- SelectionDAG &DAG;
- public:
- WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
- SDValue operator()(SDValue V64Reg) {
- EVT VT = V64Reg.getValueType();
- unsigned NarrowSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
- SDLoc DL(V64Reg);
- SDValue Undef =
- SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
- return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
- }
- };
- } // namespace
- /// NarrowVector - Given a value in the V128 register class, produce the
- /// equivalent value in the V64 register class.
- static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
- EVT VT = V128Reg.getValueType();
- unsigned WideSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
- return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
- V128Reg);
- }
- void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- bool Narrow = VT.getSizeInBits() == 64;
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
- if (Narrow)
- transform(Regs, Regs.begin(),
- WidenVector(*CurDAG));
- SDValue RegSeq = createQTuple(Regs);
- const EVT ResTys[] = {MVT::Untyped, MVT::Other};
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
- N->getOperand(NumVecs + 3), N->getOperand(0)};
- SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- SDValue SuperReg = SDValue(Ld, 0);
- EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3 };
- for (unsigned i = 0; i < NumVecs; ++i) {
- SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
- if (Narrow)
- NV = NarrowVector(NV, *CurDAG);
- ReplaceUses(SDValue(N, i), NV);
- }
- ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
- CurDAG->RemoveDeadNode(N);
- }
- void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- bool Narrow = VT.getSizeInBits() == 64;
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
- if (Narrow)
- transform(Regs, Regs.begin(),
- WidenVector(*CurDAG));
- SDValue RegSeq = createQTuple(Regs);
- const EVT ResTys[] = {MVT::i64, // Type of the write back register
- RegSeq->getValueType(0), MVT::Other};
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SDValue Ops[] = {RegSeq,
- CurDAG->getTargetConstant(LaneNo, dl,
- MVT::i64), // Lane Number
- N->getOperand(NumVecs + 2), // Base register
- N->getOperand(NumVecs + 3), // Incremental
- N->getOperand(0)};
- SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- // Update uses of the write back register
- ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
- // Update uses of the vector list
- SDValue SuperReg = SDValue(Ld, 1);
- if (NumVecs == 1) {
- ReplaceUses(SDValue(N, 0),
- Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
- } else {
- EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3 };
- for (unsigned i = 0; i < NumVecs; ++i) {
- SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
- SuperReg);
- if (Narrow)
- NV = NarrowVector(NV, *CurDAG);
- ReplaceUses(SDValue(N, i), NV);
- }
- }
- // Update the Chain
- ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
- CurDAG->RemoveDeadNode(N);
- }
- void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getOperand(2)->getValueType(0);
- bool Narrow = VT.getSizeInBits() == 64;
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
- if (Narrow)
- transform(Regs, Regs.begin(),
- WidenVector(*CurDAG));
- SDValue RegSeq = createQTuple(Regs);
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
- N->getOperand(NumVecs + 3), N->getOperand(0)};
- SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
- // Transfer memoperands.
- MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
- ReplaceNode(N, St);
- }
- void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getOperand(2)->getValueType(0);
- bool Narrow = VT.getSizeInBits() == 64;
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
- if (Narrow)
- transform(Regs, Regs.begin(),
- WidenVector(*CurDAG));
- SDValue RegSeq = createQTuple(Regs);
- const EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other};
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
- N->getOperand(NumVecs + 2), // Base Register
- N->getOperand(NumVecs + 3), // Incremental
- N->getOperand(0)};
- SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- // Transfer memoperands.
- MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
- ReplaceNode(N, St);
- }
- static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
- unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB,
- unsigned NumberOfIgnoredLowBits,
- bool BiggerPattern) {
- assert(N->getOpcode() == ISD::AND &&
- "N must be a AND operation to call this function");
- EVT VT = N->getValueType(0);
- // Here we can test the type of VT and return false when the type does not
- // match, but since it is done prior to that call in the current context
- // we turned that into an assert to avoid redundant code.
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Type checking must have been done before calling this function");
- // FIXME: simplify-demanded-bits in DAGCombine will probably have
- // changed the AND node to a 32-bit mask operation. We'll have to
- // undo that as part of the transform here if we want to catch all
- // the opportunities.
- // Currently the NumberOfIgnoredLowBits argument helps to recover
- // from these situations when matching bigger pattern (bitfield insert).
- // For unsigned extracts, check for a shift right and mask
- uint64_t AndImm = 0;
- if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
- return false;
- const SDNode *Op0 = N->getOperand(0).getNode();
- // Because of simplify-demanded-bits in DAGCombine, the mask may have been
- // simplified. Try to undo that
- AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
- // The immediate is a mask of the low bits iff imm & (imm+1) == 0
- if (AndImm & (AndImm + 1))
- return false;
- bool ClampMSB = false;
- uint64_t SrlImm = 0;
- // Handle the SRL + ANY_EXTEND case.
- if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
- isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
- // Extend the incoming operand of the SRL to 64-bit.
- Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
- // Make sure to clamp the MSB so that we preserve the semantics of the
- // original operations.
- ClampMSB = true;
- } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
- isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
- SrlImm)) {
- // If the shift result was truncated, we can still combine them.
- Opd0 = Op0->getOperand(0).getOperand(0);
- // Use the type of SRL node.
- VT = Opd0->getValueType(0);
- } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
- Opd0 = Op0->getOperand(0);
- ClampMSB = (VT == MVT::i32);
- } else if (BiggerPattern) {
- // Let's pretend a 0 shift right has been performed.
- // The resulting code will be at least as good as the original one
- // plus it may expose more opportunities for bitfield insert pattern.
- // FIXME: Currently we limit this to the bigger pattern, because
- // some optimizations expect AND and not UBFM.
- Opd0 = N->getOperand(0);
- } else
- return false;
- // Bail out on large immediates. This happens when no proper
- // combining/constant folding was performed.
- if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
- LLVM_DEBUG(
- (dbgs() << N
- << ": Found large shift immediate, this should not happen\n"));
- return false;
- }
- LSB = SrlImm;
- MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
- : countTrailingOnes<uint64_t>(AndImm)) -
- 1;
- if (ClampMSB)
- // Since we're moving the extend before the right shift operation, we need
- // to clamp the MSB to make sure we don't shift in undefined bits instead of
- // the zeros which would get shifted in with the original right shift
- // operation.
- MSB = MSB > 31 ? 31 : MSB;
- Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
- return true;
- }
- static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
- SDValue &Opd0, unsigned &Immr,
- unsigned &Imms) {
- assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
- EVT VT = N->getValueType(0);
- unsigned BitWidth = VT.getSizeInBits();
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Type checking must have been done before calling this function");
- SDValue Op = N->getOperand(0);
- if (Op->getOpcode() == ISD::TRUNCATE) {
- Op = Op->getOperand(0);
- VT = Op->getValueType(0);
- BitWidth = VT.getSizeInBits();
- }
- uint64_t ShiftImm;
- if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
- !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
- return false;
- unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
- if (ShiftImm + Width > BitWidth)
- return false;
- Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
- Opd0 = Op.getOperand(0);
- Immr = ShiftImm;
- Imms = ShiftImm + Width - 1;
- return true;
- }
- static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
- SDValue &Opd0, unsigned &LSB,
- unsigned &MSB) {
- // We are looking for the following pattern which basically extracts several
- // continuous bits from the source value and places it from the LSB of the
- // destination value, all other bits of the destination value or set to zero:
- //
- // Value2 = AND Value, MaskImm
- // SRL Value2, ShiftImm
- //
- // with MaskImm >> ShiftImm to search for the bit width.
- //
- // This gets selected into a single UBFM:
- //
- // UBFM Value, ShiftImm, findLastSet(MaskImm)
- //
- if (N->getOpcode() != ISD::SRL)
- return false;
- uint64_t AndMask = 0;
- if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
- return false;
- Opd0 = N->getOperand(0).getOperand(0);
- uint64_t SrlImm = 0;
- if (!isIntImmediate(N->getOperand(1), SrlImm))
- return false;
- // Check whether we really have several bits extract here.
- if (!isMask_64(AndMask >> SrlImm))
- return false;
- Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
- LSB = SrlImm;
- MSB = findLastSet(AndMask, ZB_Undefined);
- return true;
- }
- static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &Immr, unsigned &Imms,
- bool BiggerPattern) {
- assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
- "N must be a SHR/SRA operation to call this function");
- EVT VT = N->getValueType(0);
- // Here we can test the type of VT and return false when the type does not
- // match, but since it is done prior to that call in the current context
- // we turned that into an assert to avoid redundant code.
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Type checking must have been done before calling this function");
- // Check for AND + SRL doing several bits extract.
- if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
- return true;
- // We're looking for a shift of a shift.
- uint64_t ShlImm = 0;
- uint64_t TruncBits = 0;
- if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
- Opd0 = N->getOperand(0).getOperand(0);
- } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
- N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
- // We are looking for a shift of truncate. Truncate from i64 to i32 could
- // be considered as setting high 32 bits as zero. Our strategy here is to
- // always generate 64bit UBFM. This consistency will help the CSE pass
- // later find more redundancy.
- Opd0 = N->getOperand(0).getOperand(0);
- TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
- VT = Opd0.getValueType();
- assert(VT == MVT::i64 && "the promoted type should be i64");
- } else if (BiggerPattern) {
- // Let's pretend a 0 shift left has been performed.
- // FIXME: Currently we limit this to the bigger pattern case,
- // because some optimizations expect AND and not UBFM
- Opd0 = N->getOperand(0);
- } else
- return false;
- // Missing combines/constant folding may have left us with strange
- // constants.
- if (ShlImm >= VT.getSizeInBits()) {
- LLVM_DEBUG(
- (dbgs() << N
- << ": Found large shift immediate, this should not happen\n"));
- return false;
- }
- uint64_t SrlImm = 0;
- if (!isIntImmediate(N->getOperand(1), SrlImm))
- return false;
- assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
- "bad amount in shift node!");
- int immr = SrlImm - ShlImm;
- Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
- Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
- // SRA requires a signed extraction
- if (VT == MVT::i32)
- Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
- else
- Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
- return true;
- }
- bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
- assert(N->getOpcode() == ISD::SIGN_EXTEND);
- EVT VT = N->getValueType(0);
- EVT NarrowVT = N->getOperand(0)->getValueType(0);
- if (VT != MVT::i64 || NarrowVT != MVT::i32)
- return false;
- uint64_t ShiftImm;
- SDValue Op = N->getOperand(0);
- if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
- return false;
- SDLoc dl(N);
- // Extend the incoming operand of the shift to 64-bits.
- SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
- unsigned Immr = ShiftImm;
- unsigned Imms = NarrowVT.getSizeInBits() - 1;
- SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
- CurDAG->getTargetConstant(Imms, dl, VT)};
- CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
- return true;
- }
- /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
- /// extract of a subvector.
- bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
- assert(N->getOpcode() == ISD::FP_EXTEND);
- // There are 2 forms of fcvtl2 - extend to double or extend to float.
- SDValue Extract = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT NarrowVT = Extract.getValueType();
- if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
- (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
- return false;
- // Optionally look past a bitcast.
- Extract = peekThroughBitcasts(Extract);
- if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
- // Match extract from start of high half index.
- // Example: v8i16 -> v4i16 means the extract must begin at index 4.
- unsigned ExtractIndex = Extract.getConstantOperandVal(1);
- if (ExtractIndex != Extract.getValueType().getVectorNumElements())
- return false;
- auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
- CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
- return true;
- }
- static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
- SDValue &Opd0, unsigned &Immr, unsigned &Imms,
- unsigned NumberOfIgnoredLowBits = 0,
- bool BiggerPattern = false) {
- if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
- return false;
- switch (N->getOpcode()) {
- default:
- if (!N->isMachineOpcode())
- return false;
- break;
- case ISD::AND:
- return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
- NumberOfIgnoredLowBits, BiggerPattern);
- case ISD::SRL:
- case ISD::SRA:
- return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
- case ISD::SIGN_EXTEND_INREG:
- return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
- }
- unsigned NOpc = N->getMachineOpcode();
- switch (NOpc) {
- default:
- return false;
- case AArch64::SBFMWri:
- case AArch64::UBFMWri:
- case AArch64::SBFMXri:
- case AArch64::UBFMXri:
- Opc = NOpc;
- Opd0 = N->getOperand(0);
- Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- return true;
- }
- // Unreachable
- return false;
- }
- bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
- unsigned Opc, Immr, Imms;
- SDValue Opd0;
- if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
- return false;
- EVT VT = N->getValueType(0);
- SDLoc dl(N);
- // If the bit extract operation is 64bit but the original type is 32bit, we
- // need to add one EXTRACT_SUBREG.
- if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
- SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
- CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
- SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
- ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
- MVT::i32, SDValue(BFM, 0), SubReg));
- return true;
- }
- SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
- CurDAG->getTargetConstant(Imms, dl, VT)};
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
- }
- /// Does DstMask form a complementary pair with the mask provided by
- /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
- /// this asks whether DstMask zeroes precisely those bits that will be set by
- /// the other half.
- static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
- unsigned NumberOfIgnoredHighBits, EVT VT) {
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "i32 or i64 mask type expected!");
- unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
- APInt SignificantDstMask = APInt(BitWidth, DstMask);
- APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
- return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
- (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
- }
- // Look for bits that will be useful for later uses.
- // A bit is consider useless as soon as it is dropped and never used
- // before it as been dropped.
- // E.g., looking for useful bit of x
- // 1. y = x & 0x7
- // 2. z = y >> 2
- // After #1, x useful bits are 0x7, then the useful bits of x, live through
- // y.
- // After #2, the useful bits of x are 0x4.
- // However, if x is used on an unpredicatable instruction, then all its bits
- // are useful.
- // E.g.
- // 1. y = x & 0x7
- // 2. z = y >> 2
- // 3. str x, [@x]
- static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
- static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t Imm =
- cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
- Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
- UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
- getUsefulBits(Op, UsefulBits, Depth + 1);
- }
- static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
- uint64_t Imm, uint64_t MSB,
- unsigned Depth) {
- // inherit the bitwidth value
- APInt OpUsefulBits(UsefulBits);
- OpUsefulBits = 1;
- if (MSB >= Imm) {
- OpUsefulBits <<= MSB - Imm + 1;
- --OpUsefulBits;
- // The interesting part will be in the lower part of the result
- getUsefulBits(Op, OpUsefulBits, Depth + 1);
- // The interesting part was starting at Imm in the argument
- OpUsefulBits <<= Imm;
- } else {
- OpUsefulBits <<= MSB + 1;
- --OpUsefulBits;
- // The interesting part will be shifted in the result
- OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
- getUsefulBits(Op, OpUsefulBits, Depth + 1);
- // The interesting part was at zero in the argument
- OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
- }
- UsefulBits &= OpUsefulBits;
- }
- static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t Imm =
- cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
- uint64_t MSB =
- cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
- }
- static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t ShiftTypeAndValue =
- cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- APInt Mask(UsefulBits);
- Mask.clearAllBits();
- Mask.flipAllBits();
- if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
- // Shift Left
- uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
- Mask <<= ShiftAmt;
- getUsefulBits(Op, Mask, Depth + 1);
- Mask.lshrInPlace(ShiftAmt);
- } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
- // Shift Right
- // We do not handle AArch64_AM::ASR, because the sign will change the
- // number of useful bits
- uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
- Mask.lshrInPlace(ShiftAmt);
- getUsefulBits(Op, Mask, Depth + 1);
- Mask <<= ShiftAmt;
- } else
- return;
- UsefulBits &= Mask;
- }
- static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t Imm =
- cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- uint64_t MSB =
- cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
- APInt OpUsefulBits(UsefulBits);
- OpUsefulBits = 1;
- APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
- ResultUsefulBits.flipAllBits();
- APInt Mask(UsefulBits.getBitWidth(), 0);
- getUsefulBits(Op, ResultUsefulBits, Depth + 1);
- if (MSB >= Imm) {
- // The instruction is a BFXIL.
- uint64_t Width = MSB - Imm + 1;
- uint64_t LSB = Imm;
- OpUsefulBits <<= Width;
- --OpUsefulBits;
- if (Op.getOperand(1) == Orig) {
- // Copy the low bits from the result to bits starting from LSB.
- Mask = ResultUsefulBits & OpUsefulBits;
- Mask <<= LSB;
- }
- if (Op.getOperand(0) == Orig)
- // Bits starting from LSB in the input contribute to the result.
- Mask |= (ResultUsefulBits & ~OpUsefulBits);
- } else {
- // The instruction is a BFI.
- uint64_t Width = MSB + 1;
- uint64_t LSB = UsefulBits.getBitWidth() - Imm;
- OpUsefulBits <<= Width;
- --OpUsefulBits;
- OpUsefulBits <<= LSB;
- if (Op.getOperand(1) == Orig) {
- // Copy the bits from the result to the zero bits.
- Mask = ResultUsefulBits & OpUsefulBits;
- Mask.lshrInPlace(LSB);
- }
- if (Op.getOperand(0) == Orig)
- Mask |= (ResultUsefulBits & ~OpUsefulBits);
- }
- UsefulBits &= Mask;
- }
- static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
- SDValue Orig, unsigned Depth) {
- // Users of this node should have already been instruction selected
- // FIXME: Can we turn that into an assert?
- if (!UserNode->isMachineOpcode())
- return;
- switch (UserNode->getMachineOpcode()) {
- default:
- return;
- case AArch64::ANDSWri:
- case AArch64::ANDSXri:
- case AArch64::ANDWri:
- case AArch64::ANDXri:
- // We increment Depth only when we call the getUsefulBits
- return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
- Depth);
- case AArch64::UBFMWri:
- case AArch64::UBFMXri:
- return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
- getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
- Depth);
- return;
- case AArch64::BFMWri:
- case AArch64::BFMXri:
- return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
- case AArch64::STRBBui:
- case AArch64::STURBBi:
- if (UserNode->getOperand(0) != Orig)
- return;
- UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
- return;
- case AArch64::STRHHui:
- case AArch64::STURHHi:
- if (UserNode->getOperand(0) != Orig)
- return;
- UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
- return;
- }
- }
- static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
- if (Depth >= SelectionDAG::MaxRecursionDepth)
- return;
- // Initialize UsefulBits
- if (!Depth) {
- unsigned Bitwidth = Op.getScalarValueSizeInBits();
- // At the beginning, assume every produced bits is useful
- UsefulBits = APInt(Bitwidth, 0);
- UsefulBits.flipAllBits();
- }
- APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
- for (SDNode *Node : Op.getNode()->uses()) {
- // A use cannot produce useful bits
- APInt UsefulBitsForUse = APInt(UsefulBits);
- getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
- UsersUsefulBits |= UsefulBitsForUse;
- }
- // UsefulBits contains the produced bits that are meaningful for the
- // current definition, thus a user cannot make a bit meaningful at
- // this point
- UsefulBits &= UsersUsefulBits;
- }
- /// Create a machine node performing a notional SHL of Op by ShlAmount. If
- /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
- /// 0, return Op unchanged.
- static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
- if (ShlAmount == 0)
- return Op;
- EVT VT = Op.getValueType();
- SDLoc dl(Op);
- unsigned BitWidth = VT.getSizeInBits();
- unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
- SDNode *ShiftNode;
- if (ShlAmount > 0) {
- // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
- ShiftNode = CurDAG->getMachineNode(
- UBFMOpc, dl, VT, Op,
- CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
- CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
- } else {
- // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
- assert(ShlAmount < 0 && "expected right shift");
- int ShrAmount = -ShlAmount;
- ShiftNode = CurDAG->getMachineNode(
- UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
- CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
- }
- return SDValue(ShiftNode, 0);
- }
- // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
- static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
- bool BiggerPattern,
- const uint64_t NonZeroBits,
- SDValue &Src, int &DstLSB,
- int &Width);
- // For bit-field-positioning pattern "shl VAL, N)".
- static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
- bool BiggerPattern,
- const uint64_t NonZeroBits,
- SDValue &Src, int &DstLSB,
- int &Width);
- /// Does this tree qualify as an attempt to move a bitfield into position,
- /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
- static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
- bool BiggerPattern, SDValue &Src,
- int &DstLSB, int &Width) {
- EVT VT = Op.getValueType();
- unsigned BitWidth = VT.getSizeInBits();
- (void)BitWidth;
- assert(BitWidth == 32 || BitWidth == 64);
- KnownBits Known = CurDAG->computeKnownBits(Op);
- // Non-zero in the sense that they're not provably zero, which is the key
- // point if we want to use this value
- const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
- if (!isShiftedMask_64(NonZeroBits))
- return false;
- switch (Op.getOpcode()) {
- default:
- break;
- case ISD::AND:
- return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
- NonZeroBits, Src, DstLSB, Width);
- case ISD::SHL:
- return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
- NonZeroBits, Src, DstLSB, Width);
- }
- return false;
- }
- static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
- bool BiggerPattern,
- const uint64_t NonZeroBits,
- SDValue &Src, int &DstLSB,
- int &Width) {
- assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
- EVT VT = Op.getValueType();
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Caller guarantees VT is one of i32 or i64");
- (void)VT;
- uint64_t AndImm;
- if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
- return false;
- // If (~AndImm & NonZeroBits) is not zero at POS, we know that
- // 1) (AndImm & (1 << POS) == 0)
- // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
- //
- // 1) and 2) don't agree so something must be wrong (e.g., in
- // 'SelectionDAG::computeKnownBits')
- assert((~AndImm & NonZeroBits) == 0 &&
- "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
- SDValue AndOp0 = Op.getOperand(0);
- uint64_t ShlImm;
- SDValue ShlOp0;
- if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
- // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
- ShlOp0 = AndOp0.getOperand(0);
- } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
- isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
- ShlImm)) {
- // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
- // ShlVal == shl(val, N), which is a left shift on a smaller type.
- SDValue ShlVal = AndOp0.getOperand(0);
- // Since this is after type legalization and ShlVal is extended to MVT::i64,
- // expect VT to be MVT::i32.
- assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
- // Widens 'val' to MVT::i64 as the source of bit field positioning.
- ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
- } else
- return false;
- // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
- // then we'll end up generating AndOp0+UBFIZ instead of just keeping
- // AndOp0+AND.
- if (!BiggerPattern && !AndOp0.hasOneUse())
- return false;
- DstLSB = countTrailingZeros(NonZeroBits);
- Width = countTrailingOnes(NonZeroBits >> DstLSB);
- // Bail out on large Width. This happens when no proper combining / constant
- // folding was performed.
- if (Width >= (int)VT.getSizeInBits()) {
- // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
- // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
- // "val".
- // If VT is i32, what Width >= 32 means:
- // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
- // demands at least 'Width' bits (after dag-combiner). This together with
- // `any_extend` Op (undefined higher bits) indicates missed combination
- // when lowering the 'and' IR instruction to an machine IR instruction.
- LLVM_DEBUG(
- dbgs()
- << "Found large Width in bit-field-positioning -- this indicates no "
- "proper combining / constant folding was performed\n");
- return false;
- }
- // BFI encompasses sufficiently many nodes that it's worth inserting an extra
- // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
- // amount. BiggerPattern is true when this pattern is being matched for BFI,
- // BiggerPattern is false when this pattern is being matched for UBFIZ, in
- // which case it is not profitable to insert an extra shift.
- if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
- return false;
- Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
- return true;
- }
- // For node (shl (and val, mask), N)), returns true if the node is equivalent to
- // UBFIZ.
- static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
- SDValue &Src, int &DstLSB,
- int &Width) {
- // Caller should have verified that N is a left shift with constant shift
- // amount; asserts that.
- assert(Op.getOpcode() == ISD::SHL &&
- "Op.getNode() should be a SHL node to call this function");
- assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
- "Op.getNode() should shift ShlImm to call this function");
- uint64_t AndImm = 0;
- SDValue Op0 = Op.getOperand(0);
- if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
- return false;
- const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
- if (isMask_64(ShiftedAndImm)) {
- // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
- // should end with Mask, and could be prefixed with random bits if those
- // bits are shifted out.
- //
- // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
- // the AND result corresponding to those bits are shifted out, so it's fine
- // to not extract them.
- Width = countTrailingOnes(ShiftedAndImm);
- DstLSB = ShlImm;
- Src = Op0.getOperand(0);
- return true;
- }
- return false;
- }
- static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
- bool BiggerPattern,
- const uint64_t NonZeroBits,
- SDValue &Src, int &DstLSB,
- int &Width) {
- assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
- EVT VT = Op.getValueType();
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Caller guarantees that type is i32 or i64");
- (void)VT;
- uint64_t ShlImm;
- if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
- return false;
- if (!BiggerPattern && !Op.hasOneUse())
- return false;
- if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
- return true;
- DstLSB = countTrailingZeros(NonZeroBits);
- Width = countTrailingOnes(NonZeroBits >> DstLSB);
- if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
- return false;
- Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
- return true;
- }
- static bool isShiftedMask(uint64_t Mask, EVT VT) {
- assert(VT == MVT::i32 || VT == MVT::i64);
- if (VT == MVT::i32)
- return isShiftedMask_32(Mask);
- return isShiftedMask_64(Mask);
- }
- // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
- // inserted only sets known zero bits.
- static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
- assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return false;
- unsigned BitWidth = VT.getSizeInBits();
- uint64_t OrImm;
- if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
- return false;
- // Skip this transformation if the ORR immediate can be encoded in the ORR.
- // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
- // performance neutral.
- if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
- return false;
- uint64_t MaskImm;
- SDValue And = N->getOperand(0);
- // Must be a single use AND with an immediate operand.
- if (!And.hasOneUse() ||
- !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
- return false;
- // Compute the Known Zero for the AND as this allows us to catch more general
- // cases than just looking for AND with imm.
- KnownBits Known = CurDAG->computeKnownBits(And);
- // Non-zero in the sense that they're not provably zero, which is the key
- // point if we want to use this value.
- uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
- // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
- if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
- return false;
- // The bits being inserted must only set those bits that are known to be zero.
- if ((OrImm & NotKnownZero) != 0) {
- // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
- // currently handle this case.
- return false;
- }
- // BFI/BFXIL dst, src, #lsb, #width.
- int LSB = countTrailingOnes(NotKnownZero);
- int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
- // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
- unsigned ImmR = (BitWidth - LSB) % BitWidth;
- unsigned ImmS = Width - 1;
- // If we're creating a BFI instruction avoid cases where we need more
- // instructions to materialize the BFI constant as compared to the original
- // ORR. A BFXIL will use the same constant as the original ORR, so the code
- // should be no worse in this case.
- bool IsBFI = LSB != 0;
- uint64_t BFIImm = OrImm >> LSB;
- if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
- // We have a BFI instruction and we know the constant can't be materialized
- // with a ORR-immediate with the zero register.
- unsigned OrChunks = 0, BFIChunks = 0;
- for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
- if (((OrImm >> Shift) & 0xFFFF) != 0)
- ++OrChunks;
- if (((BFIImm >> Shift) & 0xFFFF) != 0)
- ++BFIChunks;
- }
- if (BFIChunks > OrChunks)
- return false;
- }
- // Materialize the constant to be inserted.
- SDLoc DL(N);
- unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
- SDNode *MOVI = CurDAG->getMachineNode(
- MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
- // Create the BFI/BFXIL instruction.
- SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
- CurDAG->getTargetConstant(ImmR, DL, VT),
- CurDAG->getTargetConstant(ImmS, DL, VT)};
- unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
- }
- static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
- SDValue &ShiftedOperand,
- uint64_t &EncodedShiftImm) {
- // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
- if (!Dst.hasOneUse())
- return false;
- EVT VT = Dst.getValueType();
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Caller should guarantee that VT is one of i32 or i64");
- const unsigned SizeInBits = VT.getSizeInBits();
- SDLoc DL(Dst.getNode());
- uint64_t AndImm, ShlImm;
- if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
- isShiftedMask_64(AndImm)) {
- // Avoid transforming 'DstOp0' if it has other uses than the AND node.
- SDValue DstOp0 = Dst.getOperand(0);
- if (!DstOp0.hasOneUse())
- return false;
- // An example to illustrate the transformation
- // From:
- // lsr x8, x1, #1
- // and x8, x8, #0x3f80
- // bfxil x8, x1, #0, #7
- // To:
- // and x8, x23, #0x7f
- // ubfx x9, x23, #8, #7
- // orr x23, x8, x9, lsl #7
- //
- // The number of instructions remains the same, but ORR is faster than BFXIL
- // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
- // the dependency chain is improved after the transformation.
- uint64_t SrlImm;
- if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
- uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm);
- if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
- unsigned MaskWidth =
- countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask);
- unsigned UBFMOpc =
- (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
- SDNode *UBFMNode = CurDAG->getMachineNode(
- UBFMOpc, DL, VT, DstOp0.getOperand(0),
- CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
- VT),
- CurDAG->getTargetConstant(
- SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
- ShiftedOperand = SDValue(UBFMNode, 0);
- EncodedShiftImm = AArch64_AM::getShifterImm(
- AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
- return true;
- }
- }
- return false;
- }
- if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
- ShiftedOperand = Dst.getOperand(0);
- EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
- return true;
- }
- uint64_t SrlImm;
- if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
- ShiftedOperand = Dst.getOperand(0);
- EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
- return true;
- }
- return false;
- }
- // Given an 'ISD::OR' node that is going to be selected as BFM, analyze
- // the operands and select it to AArch64::ORR with shifted registers if
- // that's more efficient. Returns true iff selection to AArch64::ORR happens.
- static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
- SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
- const bool BiggerPattern) {
- EVT VT = N->getValueType(0);
- assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
- assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
- (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
- "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Expect result type to be i32 or i64 since N is combinable to BFM");
- SDLoc DL(N);
- // Bail out if BFM simplifies away one node in BFM Dst.
- if (OrOpd1 != Dst)
- return false;
- const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
- // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
- // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
- if (BiggerPattern) {
- uint64_t SrcAndImm;
- if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
- isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
- // OrOpd0 = AND Src, #Mask
- // So BFM simplifies away one AND node from Src and doesn't simplify away
- // nodes from Dst. If ORR with left-shifted operand also simplifies away
- // one node (from Rd), ORR is better since it has higher throughput and
- // smaller latency than BFM on many AArch64 processors (and for the rest
- // ORR is at least as good as BFM).
- SDValue ShiftedOperand;
- uint64_t EncodedShiftImm;
- if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
- EncodedShiftImm)) {
- SDValue Ops[] = {OrOpd0, ShiftedOperand,
- CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
- CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
- return true;
- }
- }
- return false;
- }
- assert((!BiggerPattern) && "BiggerPattern should be handled above");
- uint64_t ShlImm;
- if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
- if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
- SDValue Ops[] = {
- Dst, Src,
- CurDAG->getTargetConstant(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
- CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
- return true;
- }
- // Select the following pattern to left-shifted operand rather than BFI.
- // %val1 = op ..
- // %val2 = shl %val1, #imm
- // %res = or %val1, %val2
- //
- // If N is selected to be BFI, we know that
- // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
- // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
- //
- // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
- if (OrOpd0.getOperand(0) == OrOpd1) {
- SDValue Ops[] = {
- OrOpd1, OrOpd1,
- CurDAG->getTargetConstant(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
- CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
- return true;
- }
- }
- uint64_t SrlImm;
- if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
- // Select the following pattern to right-shifted operand rather than BFXIL.
- // %val1 = op ..
- // %val2 = lshr %val1, #imm
- // %res = or %val1, %val2
- //
- // If N is selected to be BFXIL, we know that
- // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
- // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
- //
- // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
- if (OrOpd0.getOperand(0) == OrOpd1) {
- SDValue Ops[] = {
- OrOpd1, OrOpd1,
- CurDAG->getTargetConstant(
- AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
- CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
- return true;
- }
- }
- return false;
- }
- static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
- SelectionDAG *CurDAG) {
- assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return false;
- unsigned BitWidth = VT.getSizeInBits();
- // Because of simplify-demanded-bits in DAGCombine, involved masks may not
- // have the expected shape. Try to undo that.
- unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
- unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
- // Given a OR operation, check if we have the following pattern
- // ubfm c, b, imm, imm2 (or something that does the same jobs, see
- // isBitfieldExtractOp)
- // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
- // countTrailingZeros(mask2) == imm2 - imm + 1
- // f = d | c
- // if yes, replace the OR instruction with:
- // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
- // OR is commutative, check all combinations of operand order and values of
- // BiggerPattern, i.e.
- // Opd0, Opd1, BiggerPattern=false
- // Opd1, Opd0, BiggerPattern=false
- // Opd0, Opd1, BiggerPattern=true
- // Opd1, Opd0, BiggerPattern=true
- // Several of these combinations may match, so check with BiggerPattern=false
- // first since that will produce better results by matching more instructions
- // and/or inserting fewer extra instructions.
- for (int I = 0; I < 4; ++I) {
- SDValue Dst, Src;
- unsigned ImmR, ImmS;
- bool BiggerPattern = I / 2;
- SDValue OrOpd0Val = N->getOperand(I % 2);
- SDNode *OrOpd0 = OrOpd0Val.getNode();
- SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
- SDNode *OrOpd1 = OrOpd1Val.getNode();
- unsigned BFXOpc;
- int DstLSB, Width;
- if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
- NumberOfIgnoredLowBits, BiggerPattern)) {
- // Check that the returned opcode is compatible with the pattern,
- // i.e., same type and zero extended (U and not S)
- if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
- (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
- continue;
- // Compute the width of the bitfield insertion
- DstLSB = 0;
- Width = ImmS - ImmR + 1;
- // FIXME: This constraint is to catch bitfield insertion we may
- // want to widen the pattern if we want to grab general bitfied
- // move case
- if (Width <= 0)
- continue;
- // If the mask on the insertee is correct, we have a BFXIL operation. We
- // can share the ImmR and ImmS values from the already-computed UBFM.
- } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
- BiggerPattern,
- Src, DstLSB, Width)) {
- ImmR = (BitWidth - DstLSB) % BitWidth;
- ImmS = Width - 1;
- } else
- continue;
- // Check the second part of the pattern
- EVT VT = OrOpd1Val.getValueType();
- assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
- // Compute the Known Zero for the candidate of the first operand.
- // This allows to catch more general case than just looking for
- // AND with imm. Indeed, simplify-demanded-bits may have removed
- // the AND instruction because it proves it was useless.
- KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
- // Check if there is enough room for the second operand to appear
- // in the first one
- APInt BitsToBeInserted =
- APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
- if ((BitsToBeInserted & ~Known.Zero) != 0)
- continue;
- // Set the first operand
- uint64_t Imm;
- if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
- isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
- // In that case, we can eliminate the AND
- Dst = OrOpd1->getOperand(0);
- else
- // Maybe the AND has been removed by simplify-demanded-bits
- // or is useful because it discards more bits
- Dst = OrOpd1Val;
- // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
- // with shifted operand is more efficient.
- if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
- BiggerPattern))
- return true;
- // both parts match
- SDLoc DL(N);
- SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
- CurDAG->getTargetConstant(ImmS, DL, VT)};
- unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
- }
- // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
- // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
- // mask (e.g., 0x000ffff0).
- uint64_t Mask0Imm, Mask1Imm;
- SDValue And0 = N->getOperand(0);
- SDValue And1 = N->getOperand(1);
- if (And0.hasOneUse() && And1.hasOneUse() &&
- isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
- isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
- APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
- (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
- // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
- // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
- // bits to be inserted.
- if (isShiftedMask(Mask0Imm, VT)) {
- std::swap(And0, And1);
- std::swap(Mask0Imm, Mask1Imm);
- }
- SDValue Src = And1->getOperand(0);
- SDValue Dst = And0->getOperand(0);
- unsigned LSB = countTrailingZeros(Mask1Imm);
- int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
- // The BFXIL inserts the low-order bits from a source register, so right
- // shift the needed bits into place.
- SDLoc DL(N);
- unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
- uint64_t LsrImm = LSB;
- if (Src->hasOneUse() &&
- isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
- (LsrImm + LSB) < BitWidth) {
- Src = Src->getOperand(0);
- LsrImm += LSB;
- }
- SDNode *LSR = CurDAG->getMachineNode(
- ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
- CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
- // BFXIL is an alias of BFM, so translate to BFM operands.
- unsigned ImmR = (BitWidth - LSB) % BitWidth;
- unsigned ImmS = Width - 1;
- // Create the BFXIL instruction.
- SDValue Ops[] = {Dst, SDValue(LSR, 0),
- CurDAG->getTargetConstant(ImmR, DL, VT),
- CurDAG->getTargetConstant(ImmS, DL, VT)};
- unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
- }
- return false;
- }
- bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
- if (N->getOpcode() != ISD::OR)
- return false;
- APInt NUsefulBits;
- getUsefulBits(SDValue(N, 0), NUsefulBits);
- // If all bits are not useful, just return UNDEF.
- if (!NUsefulBits) {
- CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
- return true;
- }
- if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
- return true;
- return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
- }
- /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
- /// equivalent of a left shift by a constant amount followed by an and masking
- /// out a contiguous set of bits.
- bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
- if (N->getOpcode() != ISD::AND)
- return false;
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return false;
- SDValue Op0;
- int DstLSB, Width;
- if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
- Op0, DstLSB, Width))
- return false;
- // ImmR is the rotate right amount.
- unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
- // ImmS is the most significant bit of the source to be moved.
- unsigned ImmS = Width - 1;
- SDLoc DL(N);
- SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
- CurDAG->getTargetConstant(ImmS, DL, VT)};
- unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
- }
- /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
- /// variable shift/rotate instructions.
- bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
- EVT VT = N->getValueType(0);
- unsigned Opc;
- switch (N->getOpcode()) {
- case ISD::ROTR:
- Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
- break;
- case ISD::SHL:
- Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
- break;
- case ISD::SRL:
- Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
- break;
- case ISD::SRA:
- Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
- break;
- default:
- return false;
- }
- uint64_t Size;
- uint64_t Bits;
- if (VT == MVT::i32) {
- Bits = 5;
- Size = 32;
- } else if (VT == MVT::i64) {
- Bits = 6;
- Size = 64;
- } else
- return false;
- SDValue ShiftAmt = N->getOperand(1);
- SDLoc DL(N);
- SDValue NewShiftAmt;
- // Skip over an extend of the shift amount.
- if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
- ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
- ShiftAmt = ShiftAmt->getOperand(0);
- if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
- SDValue Add0 = ShiftAmt->getOperand(0);
- SDValue Add1 = ShiftAmt->getOperand(1);
- uint64_t Add0Imm;
- uint64_t Add1Imm;
- if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
- // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
- // to avoid the ADD/SUB.
- NewShiftAmt = Add0;
- } else if (ShiftAmt->getOpcode() == ISD::SUB &&
- isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
- (Add0Imm % Size == 0)) {
- // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
- // to generate a NEG instead of a SUB from a constant.
- unsigned NegOpc;
- unsigned ZeroReg;
- EVT SubVT = ShiftAmt->getValueType(0);
- if (SubVT == MVT::i32) {
- NegOpc = AArch64::SUBWrr;
- ZeroReg = AArch64::WZR;
- } else {
- assert(SubVT == MVT::i64);
- NegOpc = AArch64::SUBXrr;
- ZeroReg = AArch64::XZR;
- }
- SDValue Zero =
- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
- MachineSDNode *Neg =
- CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
- NewShiftAmt = SDValue(Neg, 0);
- } else if (ShiftAmt->getOpcode() == ISD::SUB &&
- isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
- // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
- // to generate a NOT instead of a SUB from a constant.
- unsigned NotOpc;
- unsigned ZeroReg;
- EVT SubVT = ShiftAmt->getValueType(0);
- if (SubVT == MVT::i32) {
- NotOpc = AArch64::ORNWrr;
- ZeroReg = AArch64::WZR;
- } else {
- assert(SubVT == MVT::i64);
- NotOpc = AArch64::ORNXrr;
- ZeroReg = AArch64::XZR;
- }
- SDValue Zero =
- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
- MachineSDNode *Not =
- CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
- NewShiftAmt = SDValue(Not, 0);
- } else
- return false;
- } else {
- // If the shift amount is masked with an AND, check that the mask covers the
- // bits that are implicitly ANDed off by the above opcodes and if so, skip
- // the AND.
- uint64_t MaskImm;
- if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
- !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
- return false;
- if (countTrailingOnes(MaskImm) < Bits)
- return false;
- NewShiftAmt = ShiftAmt->getOperand(0);
- }
- // Narrow/widen the shift amount to match the size of the shift operation.
- if (VT == MVT::i32)
- NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
- else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
- MachineSDNode *Ext = CurDAG->getMachineNode(
- AArch64::SUBREG_TO_REG, DL, VT,
- CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
- NewShiftAmt = SDValue(Ext, 0);
- }
- SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
- }
- bool
- AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
- unsigned RegWidth) {
- APFloat FVal(0.0);
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
- FVal = CN->getValueAPF();
- else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
- // Some otherwise illegal constants are allowed in this case.
- if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
- !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
- return false;
- ConstantPoolSDNode *CN =
- dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
- FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
- } else
- return false;
- // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
- // is between 1 and 32 for a destination w-register, or 1 and 64 for an
- // x-register.
- //
- // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
- // want THIS_NODE to be 2^fbits. This is much easier to deal with using
- // integers.
- bool IsExact;
- // fbits is between 1 and 64 in the worst-case, which means the fmul
- // could have 2^64 as an actual operand. Need 65 bits of precision.
- APSInt IntVal(65, true);
- FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
- // N.b. isPowerOf2 also checks for > 0.
- if (!IsExact || !IntVal.isPowerOf2()) return false;
- unsigned FBits = IntVal.logBase2();
- // Checks above should have guaranteed that we haven't lost information in
- // finding FBits, but it must still be in range.
- if (FBits == 0 || FBits > RegWidth) return false;
- FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
- return true;
- }
- // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
- // of the string and obtains the integer values from them and combines these
- // into a single value to be used in the MRS/MSR instruction.
- static int getIntOperandFromRegisterString(StringRef RegString) {
- SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ':');
- if (Fields.size() == 1)
- return -1;
- assert(Fields.size() == 5
- && "Invalid number of fields in read register string");
- SmallVector<int, 5> Ops;
- bool AllIntFields = true;
- for (StringRef Field : Fields) {
- unsigned IntField;
- AllIntFields &= !Field.getAsInteger(10, IntField);
- Ops.push_back(IntField);
- }
- assert(AllIntFields &&
- "Unexpected non-integer value in special register string.");
- (void)AllIntFields;
- // Need to combine the integer fields of the string into a single value
- // based on the bit encoding of MRS/MSR instruction.
- return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
- (Ops[3] << 3) | (Ops[4]);
- }
- // Lower the read_register intrinsic to an MRS instruction node if the special
- // register string argument is either of the form detailed in the ALCE (the
- // form described in getIntOperandsFromRegsterString) or is a named register
- // known by the MRS SysReg mapper.
- bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
- const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
- const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
- SDLoc DL(N);
- bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
- unsigned Opcode64Bit = AArch64::MRS;
- int Imm = getIntOperandFromRegisterString(RegString->getString());
- if (Imm == -1) {
- // No match, Use the sysreg mapper to map the remaining possible strings to
- // the value for the register to be used for the instruction operand.
- const auto *TheReg =
- AArch64SysReg::lookupSysRegByName(RegString->getString());
- if (TheReg && TheReg->Readable &&
- TheReg->haveFeatures(Subtarget->getFeatureBits()))
- Imm = TheReg->Encoding;
- else
- Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
- if (Imm == -1) {
- // Still no match, see if this is "pc" or give up.
- if (!ReadIs128Bit && RegString->getString() == "pc") {
- Opcode64Bit = AArch64::ADR;
- Imm = 0;
- } else {
- return false;
- }
- }
- }
- SDValue InChain = N->getOperand(0);
- SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
- if (!ReadIs128Bit) {
- CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
- {SysRegImm, InChain});
- } else {
- SDNode *MRRS = CurDAG->getMachineNode(
- AArch64::MRRS, DL,
- {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
- {SysRegImm, InChain});
- // Sysregs are not endian. The even register always contains the low half
- // of the register.
- SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
- SDValue(MRRS, 0));
- SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
- SDValue(MRRS, 0));
- SDValue OutChain = SDValue(MRRS, 1);
- ReplaceUses(SDValue(N, 0), Lo);
- ReplaceUses(SDValue(N, 1), Hi);
- ReplaceUses(SDValue(N, 2), OutChain);
- };
- return true;
- }
- // Lower the write_register intrinsic to an MSR instruction node if the special
- // register string argument is either of the form detailed in the ALCE (the
- // form described in getIntOperandsFromRegsterString) or is a named register
- // known by the MSR SysReg mapper.
- bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
- const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
- const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
- SDLoc DL(N);
- bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
- if (!WriteIs128Bit) {
- // Check if the register was one of those allowed as the pstatefield value
- // in the MSR (immediate) instruction. To accept the values allowed in the
- // pstatefield for the MSR (immediate) instruction, we also require that an
- // immediate value has been provided as an argument, we know that this is
- // the case as it has been ensured by semantic checking.
- auto trySelectPState = [&](auto PMapper, unsigned State) {
- if (PMapper) {
- assert(isa<ConstantSDNode>(N->getOperand(2)) &&
- "Expected a constant integer expression.");
- unsigned Reg = PMapper->Encoding;
- uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- CurDAG->SelectNodeTo(
- N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
- CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
- return true;
- }
- return false;
- };
- if (trySelectPState(
- AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
- AArch64::MSRpstateImm4))
- return true;
- if (trySelectPState(
- AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
- AArch64::MSRpstateImm1))
- return true;
- }
- int Imm = getIntOperandFromRegisterString(RegString->getString());
- if (Imm == -1) {
- // Use the sysreg mapper to attempt to map the remaining possible strings
- // to the value for the register to be used for the MSR (register)
- // instruction operand.
- auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
- if (TheReg && TheReg->Writeable &&
- TheReg->haveFeatures(Subtarget->getFeatureBits()))
- Imm = TheReg->Encoding;
- else
- Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
- if (Imm == -1)
- return false;
- }
- SDValue InChain = N->getOperand(0);
- if (!WriteIs128Bit) {
- CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
- CurDAG->getTargetConstant(Imm, DL, MVT::i32),
- N->getOperand(2), InChain);
- } else {
- // No endian swap. The lower half always goes into the even subreg, and the
- // higher half always into the odd supreg.
- SDNode *Pair = CurDAG->getMachineNode(
- TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
- {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
- MVT::i32),
- N->getOperand(2),
- CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
- N->getOperand(3),
- CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
- CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
- CurDAG->getTargetConstant(Imm, DL, MVT::i32),
- SDValue(Pair, 0), InChain);
- }
- return true;
- }
- /// We've got special pseudo-instructions for these
- bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
- unsigned Opcode;
- EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
- // Leave IR for LSE if subtarget supports it.
- if (Subtarget->hasLSE()) return false;
- if (MemTy == MVT::i8)
- Opcode = AArch64::CMP_SWAP_8;
- else if (MemTy == MVT::i16)
- Opcode = AArch64::CMP_SWAP_16;
- else if (MemTy == MVT::i32)
- Opcode = AArch64::CMP_SWAP_32;
- else if (MemTy == MVT::i64)
- Opcode = AArch64::CMP_SWAP_64;
- else
- llvm_unreachable("Unknown AtomicCmpSwap type");
- MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
- SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
- N->getOperand(0)};
- SDNode *CmpSwap = CurDAG->getMachineNode(
- Opcode, SDLoc(N),
- CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
- MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
- ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
- ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
- CurDAG->RemoveDeadNode(N);
- return true;
- }
- bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
- SDValue &Shift) {
- if (!isa<ConstantSDNode>(N))
- return false;
- SDLoc DL(N);
- uint64_t Val = cast<ConstantSDNode>(N)
- ->getAPIntValue()
- .trunc(VT.getFixedSizeInBits())
- .getZExtValue();
- switch (VT.SimpleTy) {
- case MVT::i8:
- // All immediates are supported.
- Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
- return true;
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- // Support 8bit unsigned immediates.
- if (Val <= 255) {
- Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
- return true;
- }
- // Support 16bit unsigned immediates that are a multiple of 256.
- if (Val <= 65280 && Val % 256 == 0) {
- Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
- return true;
- }
- break;
- default:
- break;
- }
- return false;
- }
- bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
- SDValue &Shift) {
- if (!isa<ConstantSDNode>(N))
- return false;
- SDLoc DL(N);
- int64_t Val = cast<ConstantSDNode>(N)
- ->getAPIntValue()
- .trunc(VT.getFixedSizeInBits())
- .getSExtValue();
- switch (VT.SimpleTy) {
- case MVT::i8:
- // All immediates are supported.
- Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
- return true;
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- // Support 8bit signed immediates.
- if (Val >= -128 && Val <= 127) {
- Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
- return true;
- }
- // Support 16bit signed immediates that are a multiple of 256.
- if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
- Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
- return true;
- }
- break;
- default:
- break;
- }
- return false;
- }
- bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
- if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- int64_t ImmVal = CNode->getSExtValue();
- SDLoc DL(N);
- if (ImmVal >= -128 && ImmVal < 128) {
- Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
- return true;
- }
- }
- return false;
- }
- bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
- if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CNode->getZExtValue();
- switch (VT.SimpleTy) {
- case MVT::i8:
- ImmVal &= 0xFF;
- break;
- case MVT::i16:
- ImmVal &= 0xFFFF;
- break;
- case MVT::i32:
- ImmVal &= 0xFFFFFFFF;
- break;
- case MVT::i64:
- break;
- default:
- llvm_unreachable("Unexpected type");
- }
- if (ImmVal < 256) {
- Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
- return true;
- }
- }
- return false;
- }
- bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
- bool Invert) {
- if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CNode->getZExtValue();
- SDLoc DL(N);
- if (Invert)
- ImmVal = ~ImmVal;
- // Shift mask depending on type size.
- switch (VT.SimpleTy) {
- case MVT::i8:
- ImmVal &= 0xFF;
- ImmVal |= ImmVal << 8;
- ImmVal |= ImmVal << 16;
- ImmVal |= ImmVal << 32;
- break;
- case MVT::i16:
- ImmVal &= 0xFFFF;
- ImmVal |= ImmVal << 16;
- ImmVal |= ImmVal << 32;
- break;
- case MVT::i32:
- ImmVal &= 0xFFFFFFFF;
- ImmVal |= ImmVal << 32;
- break;
- case MVT::i64:
- break;
- default:
- llvm_unreachable("Unexpected type");
- }
- uint64_t encoding;
- if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
- Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
- return true;
- }
- }
- return false;
- }
- // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
- // Rather than attempt to normalise everything we can sometimes saturate the
- // shift amount during selection. This function also allows for consistent
- // isel patterns by ensuring the resulting "Imm" node is of the i32 type
- // required by the instructions.
- bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
- uint64_t High, bool AllowSaturation,
- SDValue &Imm) {
- if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CN->getZExtValue();
- // Reject shift amounts that are too small.
- if (ImmVal < Low)
- return false;
- // Reject or saturate shift amounts that are too big.
- if (ImmVal > High) {
- if (!AllowSaturation)
- return false;
- ImmVal = High;
- }
- Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
- return true;
- }
- return false;
- }
- bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
- // tagp(FrameIndex, IRGstack, tag_offset):
- // since the offset between FrameIndex and IRGstack is a compile-time
- // constant, this can be lowered to a single ADDG instruction.
- if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
- return false;
- }
- SDValue IRG_SP = N->getOperand(2);
- if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
- cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
- Intrinsic::aarch64_irg_sp) {
- return false;
- }
- const TargetLowering *TLI = getTargetLowering();
- SDLoc DL(N);
- int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
- SDValue FiOp = CurDAG->getTargetFrameIndex(
- FI, TLI->getPointerTy(CurDAG->getDataLayout()));
- int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
- SDNode *Out = CurDAG->getMachineNode(
- AArch64::TAGPstack, DL, MVT::i64,
- {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
- CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
- ReplaceNode(N, Out);
- return true;
- }
- void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
- assert(isa<ConstantSDNode>(N->getOperand(3)) &&
- "llvm.aarch64.tagp third argument must be an immediate");
- if (trySelectStackSlotTagP(N))
- return;
- // FIXME: above applies in any case when offset between Op1 and Op2 is a
- // compile-time constant, not just for stack allocations.
- // General case for unrelated pointers in Op1 and Op2.
- SDLoc DL(N);
- int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
- SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
- {N->getOperand(1), N->getOperand(2)});
- SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
- {SDValue(N1, 0), N->getOperand(2)});
- SDNode *N3 = CurDAG->getMachineNode(
- AArch64::ADDG, DL, MVT::i64,
- {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
- CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
- ReplaceNode(N, N3);
- }
- // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
- // vector types larger than NEON don't have a matching SubRegIndex.
- static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
- assert(V.getValueType().isScalableVector() &&
- V.getValueType().getSizeInBits().getKnownMinValue() ==
- AArch64::SVEBitsPerBlock &&
- "Expected to extract from a packed scalable vector!");
- assert(VT.isFixedLengthVector() &&
- "Expected to extract a fixed length vector!");
- SDLoc DL(V);
- switch (VT.getSizeInBits()) {
- case 64: {
- auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
- return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
- }
- case 128: {
- auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
- return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
- }
- default: {
- auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
- return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
- }
- }
- }
- // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
- // vector types larger than NEON don't have a matching SubRegIndex.
- static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
- assert(VT.isScalableVector() &&
- VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
- "Expected to insert into a packed scalable vector!");
- assert(V.getValueType().isFixedLengthVector() &&
- "Expected to insert a fixed length vector!");
- SDLoc DL(V);
- switch (V.getValueType().getSizeInBits()) {
- case 64: {
- auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
- auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
- return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
- SDValue(Container, 0), V, SubReg);
- }
- case 128: {
- auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
- auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
- return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
- SDValue(Container, 0), V, SubReg);
- }
- default: {
- auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
- return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
- }
- }
- }
- void AArch64DAGToDAGISel::Select(SDNode *Node) {
- // If we have a custom node, we already have selected!
- if (Node->isMachineOpcode()) {
- LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
- Node->setNodeId(-1);
- return;
- }
- // Few custom selection stuff.
- EVT VT = Node->getValueType(0);
- switch (Node->getOpcode()) {
- default:
- break;
- case ISD::ATOMIC_CMP_SWAP:
- if (SelectCMP_SWAP(Node))
- return;
- break;
- case ISD::READ_REGISTER:
- case AArch64ISD::MRRS:
- if (tryReadRegister(Node))
- return;
- break;
- case ISD::WRITE_REGISTER:
- case AArch64ISD::MSRR:
- if (tryWriteRegister(Node))
- return;
- break;
- case ISD::ADD:
- if (tryMLAV64LaneV128(Node))
- return;
- break;
- case ISD::LOAD: {
- // Try to select as an indexed load. Fall through to normal processing
- // if we can't.
- if (tryIndexedLoad(Node))
- return;
- break;
- }
- case ISD::SRL:
- case ISD::AND:
- case ISD::SRA:
- case ISD::SIGN_EXTEND_INREG:
- if (tryBitfieldExtractOp(Node))
- return;
- if (tryBitfieldInsertInZeroOp(Node))
- return;
- [[fallthrough]];
- case ISD::ROTR:
- case ISD::SHL:
- if (tryShiftAmountMod(Node))
- return;
- break;
- case ISD::SIGN_EXTEND:
- if (tryBitfieldExtractOpFromSExt(Node))
- return;
- break;
- case ISD::FP_EXTEND:
- if (tryHighFPExt(Node))
- return;
- break;
- case ISD::OR:
- if (tryBitfieldInsertOp(Node))
- return;
- break;
- case ISD::EXTRACT_SUBVECTOR: {
- // Bail when not a "cast" like extract_subvector.
- if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
- break;
- // Bail when normal isel can do the job.
- EVT InVT = Node->getOperand(0).getValueType();
- if (VT.isScalableVector() || InVT.isFixedLengthVector())
- break;
- // NOTE: We can only get here when doing fixed length SVE code generation.
- // We do manual selection because the types involved are not linked to real
- // registers (despite being legal) and must be coerced into SVE registers.
- //
- // NOTE: If the above changes, be aware that selection will still not work
- // because the td definition of extract_vector does not support extracting
- // a fixed length vector from a scalable vector.
- ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
- return;
- }
- case ISD::INSERT_SUBVECTOR: {
- // Bail when not a "cast" like insert_subvector.
- if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
- break;
- if (!Node->getOperand(0).isUndef())
- break;
- // Bail when normal isel should do the job.
- EVT InVT = Node->getOperand(1).getValueType();
- if (VT.isFixedLengthVector() || InVT.isScalableVector())
- break;
- // NOTE: We can only get here when doing fixed length SVE code generation.
- // We do manual selection because the types involved are not linked to real
- // registers (despite being legal) and must be coerced into SVE registers.
- //
- // NOTE: If the above changes, be aware that selection will still not work
- // because the td definition of insert_vector does not support inserting a
- // fixed length vector into a scalable vector.
- ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
- return;
- }
- case ISD::Constant: {
- // Materialize zero constants as copies from WZR/XZR. This allows
- // the coalescer to propagate these into other instructions.
- ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
- if (ConstNode->isZero()) {
- if (VT == MVT::i32) {
- SDValue New = CurDAG->getCopyFromReg(
- CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
- ReplaceNode(Node, New.getNode());
- return;
- } else if (VT == MVT::i64) {
- SDValue New = CurDAG->getCopyFromReg(
- CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
- ReplaceNode(Node, New.getNode());
- return;
- }
- }
- break;
- }
- case ISD::FrameIndex: {
- // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
- int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
- const TargetLowering *TLI = getTargetLowering();
- SDValue TFI = CurDAG->getTargetFrameIndex(
- FI, TLI->getPointerTy(CurDAG->getDataLayout()));
- SDLoc DL(Node);
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
- CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
- CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
- return;
- }
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default:
- break;
- case Intrinsic::aarch64_ldaxp:
- case Intrinsic::aarch64_ldxp: {
- unsigned Op =
- IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
- SDValue MemAddr = Node->getOperand(2);
- SDLoc DL(Node);
- SDValue Chain = Node->getOperand(0);
- SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
- MVT::Other, MemAddr, Chain);
- // Transfer memoperands.
- MachineMemOperand *MemOp =
- cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
- ReplaceNode(Node, Ld);
- return;
- }
- case Intrinsic::aarch64_stlxp:
- case Intrinsic::aarch64_stxp: {
- unsigned Op =
- IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
- SDLoc DL(Node);
- SDValue Chain = Node->getOperand(0);
- SDValue ValLo = Node->getOperand(2);
- SDValue ValHi = Node->getOperand(3);
- SDValue MemAddr = Node->getOperand(4);
- // Place arguments in the right order.
- SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
- SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
- // Transfer memoperands.
- MachineMemOperand *MemOp =
- cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
- ReplaceNode(Node, St);
- return;
- }
- case Intrinsic::aarch64_neon_ld1x2:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld1x3:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld1x4:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld2:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld3:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld4:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld2r:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld3r:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld4r:
- if (VT == MVT::v8i8) {
- SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld2lane:
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectLoadLane(Node, 2, AArch64::LD2i8);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectLoadLane(Node, 2, AArch64::LD2i16);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectLoadLane(Node, 2, AArch64::LD2i32);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectLoadLane(Node, 2, AArch64::LD2i64);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld3lane:
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectLoadLane(Node, 3, AArch64::LD3i8);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectLoadLane(Node, 3, AArch64::LD3i16);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectLoadLane(Node, 3, AArch64::LD3i32);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectLoadLane(Node, 3, AArch64::LD3i64);
- return;
- }
- break;
- case Intrinsic::aarch64_neon_ld4lane:
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectLoadLane(Node, 4, AArch64::LD4i8);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectLoadLane(Node, 4, AArch64::LD4i16);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectLoadLane(Node, 4, AArch64::LD4i32);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectLoadLane(Node, 4, AArch64::LD4i64);
- return;
- }
- break;
- case Intrinsic::aarch64_ld64b:
- SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
- return;
- case Intrinsic::aarch64_sve_ld2_sret: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
- true);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
- true);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
- true);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
- true);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_sve_ld3_sret: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
- true);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
- true);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
- true);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
- true);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_sve_ld4_sret: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
- true);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
- true);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
- true);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
- true);
- return;
- }
- break;
- }
- case Intrinsic::swift_async_context_addr: {
- SDLoc DL(Node);
- SDValue Chain = Node->getOperand(0);
- SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
- SDValue Res = SDValue(
- CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
- CurDAG->getTargetConstant(8, DL, MVT::i32),
- CurDAG->getTargetConstant(0, DL, MVT::i32)),
- 0);
- ReplaceUses(SDValue(Node, 0), Res);
- ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
- CurDAG->RemoveDeadNode(Node);
- auto &MF = CurDAG->getMachineFunction();
- MF.getFrameInfo().setFrameAddressIsTaken(true);
- MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
- return;
- }
- }
- } break;
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
- case Intrinsic::aarch64_tagp:
- SelectTagP(Node);
- return;
- case Intrinsic::aarch64_neon_tbl2:
- SelectTable(Node, 2,
- VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
- false);
- return;
- case Intrinsic::aarch64_neon_tbl3:
- SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
- : AArch64::TBLv16i8Three,
- false);
- return;
- case Intrinsic::aarch64_neon_tbl4:
- SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
- : AArch64::TBLv16i8Four,
- false);
- return;
- case Intrinsic::aarch64_neon_tbx2:
- SelectTable(Node, 2,
- VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
- true);
- return;
- case Intrinsic::aarch64_neon_tbx3:
- SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
- : AArch64::TBXv16i8Three,
- true);
- return;
- case Intrinsic::aarch64_neon_tbx4:
- SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
- : AArch64::TBXv16i8Four,
- true);
- return;
- case Intrinsic::aarch64_neon_smull:
- case Intrinsic::aarch64_neon_umull:
- if (tryMULLV64LaneV128(IntNo, Node))
- return;
- break;
- case Intrinsic::aarch64_sve_whilege_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
- AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilegt_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
- AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilehi_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
- AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilehs_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
- AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilele_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
- AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilelo_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
- AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilels_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
- AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_whilelt_x2:
- if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
- Node->getValueType(0),
- {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
- AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
- SelectWhilePair(Node, Op);
- return;
- case Intrinsic::aarch64_sve_fcvts_x2:
- SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
- return;
- case Intrinsic::aarch64_sve_scvtf_x2:
- SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
- return;
- case Intrinsic::aarch64_sve_fcvtu_x2:
- SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
- return;
- case Intrinsic::aarch64_sve_ucvtf_x2:
- SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
- return;
- case Intrinsic::aarch64_sve_fcvts_x4:
- SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
- return;
- case Intrinsic::aarch64_sve_scvtf_x4:
- SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
- return;
- case Intrinsic::aarch64_sve_fcvtu_x4:
- SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
- return;
- case Intrinsic::aarch64_sve_ucvtf_x4:
- SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
- return;
- }
- break;
- }
- case ISD::INTRINSIC_VOID: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- if (Node->getNumOperands() >= 3)
- VT = Node->getOperand(2)->getValueType(0);
- switch (IntNo) {
- default:
- break;
- case Intrinsic::aarch64_neon_st1x2: {
- if (VT == MVT::v8i8) {
- SelectStore(Node, 2, AArch64::ST1Twov8b);
- return;
- } else if (VT == MVT::v16i8) {
- SelectStore(Node, 2, AArch64::ST1Twov16b);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v4bf16) {
- SelectStore(Node, 2, AArch64::ST1Twov4h);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
- VT == MVT::v8bf16) {
- SelectStore(Node, 2, AArch64::ST1Twov8h);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectStore(Node, 2, AArch64::ST1Twov2s);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectStore(Node, 2, AArch64::ST1Twov4s);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectStore(Node, 2, AArch64::ST1Twov2d);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectStore(Node, 2, AArch64::ST1Twov1d);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st1x3: {
- if (VT == MVT::v8i8) {
- SelectStore(Node, 3, AArch64::ST1Threev8b);
- return;
- } else if (VT == MVT::v16i8) {
- SelectStore(Node, 3, AArch64::ST1Threev16b);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v4bf16) {
- SelectStore(Node, 3, AArch64::ST1Threev4h);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
- VT == MVT::v8bf16) {
- SelectStore(Node, 3, AArch64::ST1Threev8h);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectStore(Node, 3, AArch64::ST1Threev2s);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectStore(Node, 3, AArch64::ST1Threev4s);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectStore(Node, 3, AArch64::ST1Threev2d);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectStore(Node, 3, AArch64::ST1Threev1d);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st1x4: {
- if (VT == MVT::v8i8) {
- SelectStore(Node, 4, AArch64::ST1Fourv8b);
- return;
- } else if (VT == MVT::v16i8) {
- SelectStore(Node, 4, AArch64::ST1Fourv16b);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v4bf16) {
- SelectStore(Node, 4, AArch64::ST1Fourv4h);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
- VT == MVT::v8bf16) {
- SelectStore(Node, 4, AArch64::ST1Fourv8h);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectStore(Node, 4, AArch64::ST1Fourv2s);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectStore(Node, 4, AArch64::ST1Fourv4s);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectStore(Node, 4, AArch64::ST1Fourv2d);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectStore(Node, 4, AArch64::ST1Fourv1d);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st2: {
- if (VT == MVT::v8i8) {
- SelectStore(Node, 2, AArch64::ST2Twov8b);
- return;
- } else if (VT == MVT::v16i8) {
- SelectStore(Node, 2, AArch64::ST2Twov16b);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v4bf16) {
- SelectStore(Node, 2, AArch64::ST2Twov4h);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
- VT == MVT::v8bf16) {
- SelectStore(Node, 2, AArch64::ST2Twov8h);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectStore(Node, 2, AArch64::ST2Twov2s);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectStore(Node, 2, AArch64::ST2Twov4s);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectStore(Node, 2, AArch64::ST2Twov2d);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectStore(Node, 2, AArch64::ST1Twov1d);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st3: {
- if (VT == MVT::v8i8) {
- SelectStore(Node, 3, AArch64::ST3Threev8b);
- return;
- } else if (VT == MVT::v16i8) {
- SelectStore(Node, 3, AArch64::ST3Threev16b);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v4bf16) {
- SelectStore(Node, 3, AArch64::ST3Threev4h);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
- VT == MVT::v8bf16) {
- SelectStore(Node, 3, AArch64::ST3Threev8h);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectStore(Node, 3, AArch64::ST3Threev2s);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectStore(Node, 3, AArch64::ST3Threev4s);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectStore(Node, 3, AArch64::ST3Threev2d);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectStore(Node, 3, AArch64::ST1Threev1d);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st4: {
- if (VT == MVT::v8i8) {
- SelectStore(Node, 4, AArch64::ST4Fourv8b);
- return;
- } else if (VT == MVT::v16i8) {
- SelectStore(Node, 4, AArch64::ST4Fourv16b);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v4bf16) {
- SelectStore(Node, 4, AArch64::ST4Fourv4h);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
- VT == MVT::v8bf16) {
- SelectStore(Node, 4, AArch64::ST4Fourv8h);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectStore(Node, 4, AArch64::ST4Fourv2s);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectStore(Node, 4, AArch64::ST4Fourv4s);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectStore(Node, 4, AArch64::ST4Fourv2d);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectStore(Node, 4, AArch64::ST1Fourv1d);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st2lane: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectStoreLane(Node, 2, AArch64::ST2i8);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectStoreLane(Node, 2, AArch64::ST2i16);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectStoreLane(Node, 2, AArch64::ST2i32);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectStoreLane(Node, 2, AArch64::ST2i64);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st3lane: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectStoreLane(Node, 3, AArch64::ST3i8);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectStoreLane(Node, 3, AArch64::ST3i16);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectStoreLane(Node, 3, AArch64::ST3i32);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectStoreLane(Node, 3, AArch64::ST3i64);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_neon_st4lane: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectStoreLane(Node, 4, AArch64::ST4i8);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectStoreLane(Node, 4, AArch64::ST4i16);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectStoreLane(Node, 4, AArch64::ST4i32);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectStoreLane(Node, 4, AArch64::ST4i64);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_sve_st2: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_sve_st3: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
- return;
- }
- break;
- }
- case Intrinsic::aarch64_sve_st4: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
- return;
- }
- break;
- }
- }
- break;
- }
- case AArch64ISD::LD2post: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD3post: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD4post: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD1x2post: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD1x3post: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD1x4post: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD1DUPpost: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD2DUPpost: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD3DUPpost: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD4DUPpost: {
- if (VT == MVT::v8i8) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
- return;
- }
- break;
- }
- case AArch64ISD::LD1LANEpost: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::LD2LANEpost: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::LD3LANEpost: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::LD4LANEpost: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST2post: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v8i8) {
- SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST3post: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v8i8) {
- SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST4post: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v8i8) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST1x2post: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v8i8) {
- SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST1x3post: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v8i8) {
- SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
- SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST1x4post: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v8i8) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
- return;
- } else if (VT == MVT::v16i8) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
- return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
- return;
- } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
- return;
- } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST2LANEpost: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST3LANEpost: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::ST4LANEpost: {
- VT = Node->getOperand(1).getValueType();
- if (VT == MVT::v16i8 || VT == MVT::v8i8) {
- SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
- return;
- } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
- return;
- } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32) {
- SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
- return;
- } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64) {
- SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
- return;
- }
- break;
- }
- case AArch64ISD::SVE_LD2_MERGE_ZERO: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
- return;
- }
- break;
- }
- case AArch64ISD::SVE_LD3_MERGE_ZERO: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
- return;
- }
- break;
- }
- case AArch64ISD::SVE_LD4_MERGE_ZERO: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
- return;
- }
- break;
- }
- }
- // Select the default instruction
- SelectCode(Node);
- }
- /// createAArch64ISelDag - This pass converts a legalized DAG into a
- /// AArch64-specific DAG, ready for instruction scheduling.
- FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new AArch64DAGToDAGISel(TM, OptLevel);
- }
- /// When \p PredVT is a scalable vector predicate in the form
- /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
- /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
- /// structured vectors (NumVec >1), the output data type is
- /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
- /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
- /// EVT.
- static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
- unsigned NumVec) {
- assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
- if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
- return EVT();
- if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
- PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
- return EVT();
- ElementCount EC = PredVT.getVectorElementCount();
- EVT ScalarVT =
- EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
- EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
- return MemVT;
- }
- /// Return the EVT of the data associated to a memory operation in \p
- /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
- static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
- if (isa<MemSDNode>(Root))
- return cast<MemSDNode>(Root)->getMemoryVT();
- if (isa<MemIntrinsicSDNode>(Root))
- return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
- const unsigned Opcode = Root->getOpcode();
- // For custom ISD nodes, we have to look at them individually to extract the
- // type of the data moved to/from memory.
- switch (Opcode) {
- case AArch64ISD::LD1_MERGE_ZERO:
- case AArch64ISD::LD1S_MERGE_ZERO:
- case AArch64ISD::LDNF1_MERGE_ZERO:
- case AArch64ISD::LDNF1S_MERGE_ZERO:
- return cast<VTSDNode>(Root->getOperand(3))->getVT();
- case AArch64ISD::ST1_PRED:
- return cast<VTSDNode>(Root->getOperand(4))->getVT();
- case AArch64ISD::SVE_LD2_MERGE_ZERO:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
- case AArch64ISD::SVE_LD3_MERGE_ZERO:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
- case AArch64ISD::SVE_LD4_MERGE_ZERO:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
- default:
- break;
- }
- if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
- return EVT();
- switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) {
- default:
- return EVT();
- case Intrinsic::aarch64_sme_ldr:
- case Intrinsic::aarch64_sme_str:
- return MVT::nxv16i8;
- case Intrinsic::aarch64_sve_prf:
- // We are using an SVE prefetch intrinsic. Type must be inferred from the
- // width of the predicate.
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
- case Intrinsic::aarch64_sve_ld2_sret:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
- case Intrinsic::aarch64_sve_ld3_sret:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
- case Intrinsic::aarch64_sve_ld4_sret:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
- }
- }
- /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
- /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
- /// where Root is the memory access using N for its address.
- template <int64_t Min, int64_t Max>
- bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
- SDValue &Base,
- SDValue &OffImm) {
- const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
- const DataLayout &DL = CurDAG->getDataLayout();
- const MachineFrameInfo &MFI = MF->getFrameInfo();
- if (N.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- // We can only encode VL scaled offsets, so only fold in frame indexes
- // referencing SVE objects.
- if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
- return true;
- }
- return false;
- }
- if (MemVT == EVT())
- return false;
- if (N.getOpcode() != ISD::ADD)
- return false;
- SDValue VScale = N.getOperand(1);
- if (VScale.getOpcode() != ISD::VSCALE)
- return false;
- TypeSize TS = MemVT.getSizeInBits();
- int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
- int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
- if ((MulImm % MemWidthBytes) != 0)
- return false;
- int64_t Offset = MulImm / MemWidthBytes;
- if (Offset < Min || Offset > Max)
- return false;
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- // We can only encode VL scaled offsets, so only fold in frame indexes
- // referencing SVE objects.
- if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- }
- OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
- return true;
- }
- /// Select register plus register addressing mode for SVE, with scaled
- /// offset.
- bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
- SDValue &Base,
- SDValue &Offset) {
- if (N.getOpcode() != ISD::ADD)
- return false;
- // Process an ADD node.
- const SDValue LHS = N.getOperand(0);
- const SDValue RHS = N.getOperand(1);
- // 8 bit data does not come with the SHL node, so it is treated
- // separately.
- if (Scale == 0) {
- Base = LHS;
- Offset = RHS;
- return true;
- }
- if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
- int64_t ImmOff = C->getSExtValue();
- unsigned Size = 1 << Scale;
- // To use the reg+reg addressing mode, the immediate must be a multiple of
- // the vector element's byte size.
- if (ImmOff % Size)
- return false;
- SDLoc DL(N);
- Base = LHS;
- Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
- SDValue Ops[] = {Offset};
- SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
- Offset = SDValue(MI, 0);
- return true;
- }
- // Check if the RHS is a shift node with a constant.
- if (RHS.getOpcode() != ISD::SHL)
- return false;
- const SDValue ShiftRHS = RHS.getOperand(1);
- if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
- if (C->getZExtValue() == Scale) {
- Base = LHS;
- Offset = RHS.getOperand(0);
- return true;
- }
- return false;
- }
- bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
- const AArch64TargetLowering *TLI =
- static_cast<const AArch64TargetLowering *>(getTargetLowering());
- return TLI->isAllActivePredicate(*CurDAG, N);
- }
- bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
- SDValue &Base, SDValue &Offset,
- unsigned Scale) {
- if (N.getOpcode() != ISD::ADD) {
- Base = N;
- Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
- return true;
- }
- // Process an ADD node.
- const SDValue LHS = N.getOperand(0);
- const SDValue RHS = N.getOperand(1);
- if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
- int64_t ImmOff = C->getSExtValue();
- if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0))
- return false;
- Base = LHS;
- Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
- return true;
- }
- return false;
- }
|