AArch64ISelDAGToDAG.cpp 188 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151
  1. //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines an instruction selector for the AArch64 target.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "AArch64MachineFunctionInfo.h"
  13. #include "AArch64TargetMachine.h"
  14. #include "MCTargetDesc/AArch64AddressingModes.h"
  15. #include "llvm/ADT/APSInt.h"
  16. #include "llvm/CodeGen/SelectionDAGISel.h"
  17. #include "llvm/IR/Function.h" // To access function attributes.
  18. #include "llvm/IR/GlobalValue.h"
  19. #include "llvm/IR/Intrinsics.h"
  20. #include "llvm/IR/IntrinsicsAArch64.h"
  21. #include "llvm/Support/Debug.h"
  22. #include "llvm/Support/ErrorHandling.h"
  23. #include "llvm/Support/KnownBits.h"
  24. #include "llvm/Support/MathExtras.h"
  25. #include "llvm/Support/raw_ostream.h"
  26. using namespace llvm;
  27. #define DEBUG_TYPE "aarch64-isel"
  28. //===--------------------------------------------------------------------===//
  29. /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
  30. /// instructions for SelectionDAG operations.
  31. ///
  32. namespace {
  33. class AArch64DAGToDAGISel : public SelectionDAGISel {
  34. /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  35. /// make the right decision when generating code for different targets.
  36. const AArch64Subtarget *Subtarget;
  37. public:
  38. explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
  39. CodeGenOpt::Level OptLevel)
  40. : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
  41. StringRef getPassName() const override {
  42. return "AArch64 Instruction Selection";
  43. }
  44. bool runOnMachineFunction(MachineFunction &MF) override {
  45. Subtarget = &MF.getSubtarget<AArch64Subtarget>();
  46. return SelectionDAGISel::runOnMachineFunction(MF);
  47. }
  48. void Select(SDNode *Node) override;
  49. /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
  50. /// inline asm expressions.
  51. bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  52. unsigned ConstraintID,
  53. std::vector<SDValue> &OutOps) override;
  54. template <signed Low, signed High, signed Scale>
  55. bool SelectRDVLImm(SDValue N, SDValue &Imm);
  56. bool tryMLAV64LaneV128(SDNode *N);
  57. bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
  58. bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
  59. bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
  60. bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
  61. bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
  62. return SelectShiftedRegister(N, false, Reg, Shift);
  63. }
  64. bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
  65. return SelectShiftedRegister(N, true, Reg, Shift);
  66. }
  67. bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
  68. return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
  69. }
  70. bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
  71. return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
  72. }
  73. bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
  74. return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
  75. }
  76. bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
  77. return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
  78. }
  79. bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
  80. return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
  81. }
  82. bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
  83. return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
  84. }
  85. bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
  86. return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
  87. }
  88. bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
  89. return SelectAddrModeIndexed(N, 1, Base, OffImm);
  90. }
  91. bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
  92. return SelectAddrModeIndexed(N, 2, Base, OffImm);
  93. }
  94. bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
  95. return SelectAddrModeIndexed(N, 4, Base, OffImm);
  96. }
  97. bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
  98. return SelectAddrModeIndexed(N, 8, Base, OffImm);
  99. }
  100. bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
  101. return SelectAddrModeIndexed(N, 16, Base, OffImm);
  102. }
  103. bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
  104. return SelectAddrModeUnscaled(N, 1, Base, OffImm);
  105. }
  106. bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
  107. return SelectAddrModeUnscaled(N, 2, Base, OffImm);
  108. }
  109. bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
  110. return SelectAddrModeUnscaled(N, 4, Base, OffImm);
  111. }
  112. bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
  113. return SelectAddrModeUnscaled(N, 8, Base, OffImm);
  114. }
  115. bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
  116. return SelectAddrModeUnscaled(N, 16, Base, OffImm);
  117. }
  118. template <unsigned Size, unsigned Max>
  119. bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
  120. // Test if there is an appropriate addressing mode and check if the
  121. // immediate fits.
  122. bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
  123. if (Found) {
  124. if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
  125. int64_t C = CI->getSExtValue();
  126. if (C <= Max)
  127. return true;
  128. }
  129. }
  130. // Otherwise, base only, materialize address in register.
  131. Base = N;
  132. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
  133. return true;
  134. }
  135. template<int Width>
  136. bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
  137. SDValue &SignExtend, SDValue &DoShift) {
  138. return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
  139. }
  140. template<int Width>
  141. bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
  142. SDValue &SignExtend, SDValue &DoShift) {
  143. return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
  144. }
  145. bool SelectDupZeroOrUndef(SDValue N) {
  146. switch(N->getOpcode()) {
  147. case ISD::UNDEF:
  148. return true;
  149. case AArch64ISD::DUP:
  150. case ISD::SPLAT_VECTOR: {
  151. auto Opnd0 = N->getOperand(0);
  152. if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
  153. if (CN->isZero())
  154. return true;
  155. if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
  156. if (CN->isZero())
  157. return true;
  158. break;
  159. }
  160. default:
  161. break;
  162. }
  163. return false;
  164. }
  165. bool SelectDupZero(SDValue N) {
  166. switch(N->getOpcode()) {
  167. case AArch64ISD::DUP:
  168. case ISD::SPLAT_VECTOR: {
  169. auto Opnd0 = N->getOperand(0);
  170. if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
  171. if (CN->isZero())
  172. return true;
  173. if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
  174. if (CN->isZero())
  175. return true;
  176. break;
  177. }
  178. }
  179. return false;
  180. }
  181. template<MVT::SimpleValueType VT>
  182. bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
  183. return SelectSVEAddSubImm(N, VT, Imm, Shift);
  184. }
  185. template <MVT::SimpleValueType VT, bool Invert = false>
  186. bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
  187. return SelectSVELogicalImm(N, VT, Imm, Invert);
  188. }
  189. template <MVT::SimpleValueType VT>
  190. bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
  191. return SelectSVEArithImm(N, VT, Imm);
  192. }
  193. template <unsigned Low, unsigned High, bool AllowSaturation = false>
  194. bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
  195. return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
  196. }
  197. // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
  198. template<signed Min, signed Max, signed Scale, bool Shift>
  199. bool SelectCntImm(SDValue N, SDValue &Imm) {
  200. if (!isa<ConstantSDNode>(N))
  201. return false;
  202. int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
  203. if (Shift)
  204. MulImm = 1LL << MulImm;
  205. if ((MulImm % std::abs(Scale)) != 0)
  206. return false;
  207. MulImm /= Scale;
  208. if ((MulImm >= Min) && (MulImm <= Max)) {
  209. Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
  210. return true;
  211. }
  212. return false;
  213. }
  214. template <signed Max, signed Scale>
  215. bool SelectEXTImm(SDValue N, SDValue &Imm) {
  216. if (!isa<ConstantSDNode>(N))
  217. return false;
  218. int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
  219. if (MulImm >= 0 && MulImm <= Max) {
  220. MulImm *= Scale;
  221. Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
  222. return true;
  223. }
  224. return false;
  225. }
  226. /// Form sequences of consecutive 64/128-bit registers for use in NEON
  227. /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
  228. /// between 1 and 4 elements. If it contains a single element that is returned
  229. /// unchanged; otherwise a REG_SEQUENCE value is returned.
  230. SDValue createDTuple(ArrayRef<SDValue> Vecs);
  231. SDValue createQTuple(ArrayRef<SDValue> Vecs);
  232. // Form a sequence of SVE registers for instructions using list of vectors,
  233. // e.g. structured loads and stores (ldN, stN).
  234. SDValue createZTuple(ArrayRef<SDValue> Vecs);
  235. /// Generic helper for the createDTuple/createQTuple
  236. /// functions. Those should almost always be called instead.
  237. SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
  238. const unsigned SubRegs[]);
  239. void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
  240. bool tryIndexedLoad(SDNode *N);
  241. bool trySelectStackSlotTagP(SDNode *N);
  242. void SelectTagP(SDNode *N);
  243. void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
  244. unsigned SubRegIdx);
  245. void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
  246. unsigned SubRegIdx);
  247. void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
  248. void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
  249. void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
  250. unsigned Opc_rr, unsigned Opc_ri,
  251. bool IsIntr = false);
  252. bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
  253. /// SVE Reg+Imm addressing mode.
  254. template <int64_t Min, int64_t Max>
  255. bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
  256. SDValue &OffImm);
  257. /// SVE Reg+Reg address mode.
  258. template <unsigned Scale>
  259. bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
  260. return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
  261. }
  262. void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
  263. void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
  264. void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
  265. void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
  266. void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
  267. unsigned Opc_rr, unsigned Opc_ri);
  268. std::tuple<unsigned, SDValue, SDValue>
  269. findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
  270. const SDValue &OldBase, const SDValue &OldOffset,
  271. unsigned Scale);
  272. bool tryBitfieldExtractOp(SDNode *N);
  273. bool tryBitfieldExtractOpFromSExt(SDNode *N);
  274. bool tryBitfieldInsertOp(SDNode *N);
  275. bool tryBitfieldInsertInZeroOp(SDNode *N);
  276. bool tryShiftAmountMod(SDNode *N);
  277. bool tryHighFPExt(SDNode *N);
  278. bool tryReadRegister(SDNode *N);
  279. bool tryWriteRegister(SDNode *N);
  280. // Include the pieces autogenerated from the target description.
  281. #include "AArch64GenDAGISel.inc"
  282. private:
  283. bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
  284. SDValue &Shift);
  285. bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
  286. SDValue &OffImm) {
  287. return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
  288. }
  289. bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
  290. unsigned Size, SDValue &Base,
  291. SDValue &OffImm);
  292. bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
  293. SDValue &OffImm);
  294. bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
  295. SDValue &OffImm);
  296. bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
  297. SDValue &Offset, SDValue &SignExtend,
  298. SDValue &DoShift);
  299. bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
  300. SDValue &Offset, SDValue &SignExtend,
  301. SDValue &DoShift);
  302. bool isWorthFolding(SDValue V) const;
  303. bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
  304. SDValue &Offset, SDValue &SignExtend);
  305. template<unsigned RegWidth>
  306. bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
  307. return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
  308. }
  309. bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
  310. bool SelectCMP_SWAP(SDNode *N);
  311. bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
  312. bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
  313. bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
  314. bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
  315. bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
  316. bool AllowSaturation, SDValue &Imm);
  317. bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
  318. bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
  319. SDValue &Offset);
  320. bool SelectAllActivePredicate(SDValue N);
  321. };
  322. } // end anonymous namespace
  323. /// isIntImmediate - This method tests to see if the node is a constant
  324. /// operand. If so Imm will receive the 32-bit value.
  325. static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
  326. if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
  327. Imm = C->getZExtValue();
  328. return true;
  329. }
  330. return false;
  331. }
  332. // isIntImmediate - This method tests to see if a constant operand.
  333. // If so Imm will receive the value.
  334. static bool isIntImmediate(SDValue N, uint64_t &Imm) {
  335. return isIntImmediate(N.getNode(), Imm);
  336. }
  337. // isOpcWithIntImmediate - This method tests to see if the node is a specific
  338. // opcode and that it has a immediate integer right operand.
  339. // If so Imm will receive the 32 bit value.
  340. static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
  341. uint64_t &Imm) {
  342. return N->getOpcode() == Opc &&
  343. isIntImmediate(N->getOperand(1).getNode(), Imm);
  344. }
  345. bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
  346. const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
  347. switch(ConstraintID) {
  348. default:
  349. llvm_unreachable("Unexpected asm memory constraint");
  350. case InlineAsm::Constraint_m:
  351. case InlineAsm::Constraint_o:
  352. case InlineAsm::Constraint_Q:
  353. // We need to make sure that this one operand does not end up in XZR, thus
  354. // require the address to be in a PointerRegClass register.
  355. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
  356. const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
  357. SDLoc dl(Op);
  358. SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
  359. SDValue NewOp =
  360. SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
  361. dl, Op.getValueType(),
  362. Op, RC), 0);
  363. OutOps.push_back(NewOp);
  364. return false;
  365. }
  366. return true;
  367. }
  368. /// SelectArithImmed - Select an immediate value that can be represented as
  369. /// a 12-bit value shifted left by either 0 or 12. If so, return true with
  370. /// Val set to the 12-bit value and Shift set to the shifter operand.
  371. bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
  372. SDValue &Shift) {
  373. // This function is called from the addsub_shifted_imm ComplexPattern,
  374. // which lists [imm] as the list of opcode it's interested in, however
  375. // we still need to check whether the operand is actually an immediate
  376. // here because the ComplexPattern opcode list is only used in
  377. // root-level opcode matching.
  378. if (!isa<ConstantSDNode>(N.getNode()))
  379. return false;
  380. uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
  381. unsigned ShiftAmt;
  382. if (Immed >> 12 == 0) {
  383. ShiftAmt = 0;
  384. } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
  385. ShiftAmt = 12;
  386. Immed = Immed >> 12;
  387. } else
  388. return false;
  389. unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
  390. SDLoc dl(N);
  391. Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
  392. Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
  393. return true;
  394. }
  395. /// SelectNegArithImmed - As above, but negates the value before trying to
  396. /// select it.
  397. bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
  398. SDValue &Shift) {
  399. // This function is called from the addsub_shifted_imm ComplexPattern,
  400. // which lists [imm] as the list of opcode it's interested in, however
  401. // we still need to check whether the operand is actually an immediate
  402. // here because the ComplexPattern opcode list is only used in
  403. // root-level opcode matching.
  404. if (!isa<ConstantSDNode>(N.getNode()))
  405. return false;
  406. // The immediate operand must be a 24-bit zero-extended immediate.
  407. uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
  408. // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
  409. // have the opposite effect on the C flag, so this pattern mustn't match under
  410. // those circumstances.
  411. if (Immed == 0)
  412. return false;
  413. if (N.getValueType() == MVT::i32)
  414. Immed = ~((uint32_t)Immed) + 1;
  415. else
  416. Immed = ~Immed + 1ULL;
  417. if (Immed & 0xFFFFFFFFFF000000ULL)
  418. return false;
  419. Immed &= 0xFFFFFFULL;
  420. return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
  421. Shift);
  422. }
  423. /// getShiftTypeForNode - Translate a shift node to the corresponding
  424. /// ShiftType value.
  425. static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
  426. switch (N.getOpcode()) {
  427. default:
  428. return AArch64_AM::InvalidShiftExtend;
  429. case ISD::SHL:
  430. return AArch64_AM::LSL;
  431. case ISD::SRL:
  432. return AArch64_AM::LSR;
  433. case ISD::SRA:
  434. return AArch64_AM::ASR;
  435. case ISD::ROTR:
  436. return AArch64_AM::ROR;
  437. }
  438. }
  439. /// Determine whether it is worth it to fold SHL into the addressing
  440. /// mode.
  441. static bool isWorthFoldingSHL(SDValue V) {
  442. assert(V.getOpcode() == ISD::SHL && "invalid opcode");
  443. // It is worth folding logical shift of up to three places.
  444. auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
  445. if (!CSD)
  446. return false;
  447. unsigned ShiftVal = CSD->getZExtValue();
  448. if (ShiftVal > 3)
  449. return false;
  450. // Check if this particular node is reused in any non-memory related
  451. // operation. If yes, do not try to fold this node into the address
  452. // computation, since the computation will be kept.
  453. const SDNode *Node = V.getNode();
  454. for (SDNode *UI : Node->uses())
  455. if (!isa<MemSDNode>(*UI))
  456. for (SDNode *UII : UI->uses())
  457. if (!isa<MemSDNode>(*UII))
  458. return false;
  459. return true;
  460. }
  461. /// Determine whether it is worth to fold V into an extended register.
  462. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
  463. // Trivial if we are optimizing for code size or if there is only
  464. // one use of the value.
  465. if (CurDAG->shouldOptForSize() || V.hasOneUse())
  466. return true;
  467. // If a subtarget has a fastpath LSL we can fold a logical shift into
  468. // the addressing mode and save a cycle.
  469. if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
  470. isWorthFoldingSHL(V))
  471. return true;
  472. if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
  473. const SDValue LHS = V.getOperand(0);
  474. const SDValue RHS = V.getOperand(1);
  475. if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
  476. return true;
  477. if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
  478. return true;
  479. }
  480. // It hurts otherwise, since the value will be reused.
  481. return false;
  482. }
  483. /// SelectShiftedRegister - Select a "shifted register" operand. If the value
  484. /// is not shifted, set the Shift operand to default of "LSL 0". The logical
  485. /// instructions allow the shifted register to be rotated, but the arithmetic
  486. /// instructions do not. The AllowROR parameter specifies whether ROR is
  487. /// supported.
  488. bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
  489. SDValue &Reg, SDValue &Shift) {
  490. AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
  491. if (ShType == AArch64_AM::InvalidShiftExtend)
  492. return false;
  493. if (!AllowROR && ShType == AArch64_AM::ROR)
  494. return false;
  495. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  496. unsigned BitSize = N.getValueSizeInBits();
  497. unsigned Val = RHS->getZExtValue() & (BitSize - 1);
  498. unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
  499. Reg = N.getOperand(0);
  500. Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
  501. return isWorthFolding(N);
  502. }
  503. return false;
  504. }
  505. /// getExtendTypeForNode - Translate an extend node to the corresponding
  506. /// ExtendType value.
  507. static AArch64_AM::ShiftExtendType
  508. getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
  509. if (N.getOpcode() == ISD::SIGN_EXTEND ||
  510. N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
  511. EVT SrcVT;
  512. if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
  513. SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
  514. else
  515. SrcVT = N.getOperand(0).getValueType();
  516. if (!IsLoadStore && SrcVT == MVT::i8)
  517. return AArch64_AM::SXTB;
  518. else if (!IsLoadStore && SrcVT == MVT::i16)
  519. return AArch64_AM::SXTH;
  520. else if (SrcVT == MVT::i32)
  521. return AArch64_AM::SXTW;
  522. assert(SrcVT != MVT::i64 && "extend from 64-bits?");
  523. return AArch64_AM::InvalidShiftExtend;
  524. } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
  525. N.getOpcode() == ISD::ANY_EXTEND) {
  526. EVT SrcVT = N.getOperand(0).getValueType();
  527. if (!IsLoadStore && SrcVT == MVT::i8)
  528. return AArch64_AM::UXTB;
  529. else if (!IsLoadStore && SrcVT == MVT::i16)
  530. return AArch64_AM::UXTH;
  531. else if (SrcVT == MVT::i32)
  532. return AArch64_AM::UXTW;
  533. assert(SrcVT != MVT::i64 && "extend from 64-bits?");
  534. return AArch64_AM::InvalidShiftExtend;
  535. } else if (N.getOpcode() == ISD::AND) {
  536. ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
  537. if (!CSD)
  538. return AArch64_AM::InvalidShiftExtend;
  539. uint64_t AndMask = CSD->getZExtValue();
  540. switch (AndMask) {
  541. default:
  542. return AArch64_AM::InvalidShiftExtend;
  543. case 0xFF:
  544. return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
  545. case 0xFFFF:
  546. return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
  547. case 0xFFFFFFFF:
  548. return AArch64_AM::UXTW;
  549. }
  550. }
  551. return AArch64_AM::InvalidShiftExtend;
  552. }
  553. // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
  554. static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
  555. if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
  556. DL->getOpcode() != AArch64ISD::DUPLANE32)
  557. return false;
  558. SDValue SV = DL->getOperand(0);
  559. if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
  560. return false;
  561. SDValue EV = SV.getOperand(1);
  562. if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  563. return false;
  564. ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
  565. ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
  566. LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
  567. LaneOp = EV.getOperand(0);
  568. return true;
  569. }
  570. // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
  571. // high lane extract.
  572. static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
  573. SDValue &LaneOp, int &LaneIdx) {
  574. if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
  575. std::swap(Op0, Op1);
  576. if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
  577. return false;
  578. }
  579. StdOp = Op1;
  580. return true;
  581. }
  582. /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
  583. /// is a lane in the upper half of a 128-bit vector. Recognize and select this
  584. /// so that we don't emit unnecessary lane extracts.
  585. bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
  586. SDLoc dl(N);
  587. SDValue Op0 = N->getOperand(0);
  588. SDValue Op1 = N->getOperand(1);
  589. SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
  590. SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
  591. int LaneIdx = -1; // Will hold the lane index.
  592. if (Op1.getOpcode() != ISD::MUL ||
  593. !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
  594. LaneIdx)) {
  595. std::swap(Op0, Op1);
  596. if (Op1.getOpcode() != ISD::MUL ||
  597. !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
  598. LaneIdx))
  599. return false;
  600. }
  601. SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
  602. SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
  603. unsigned MLAOpc = ~0U;
  604. switch (N->getSimpleValueType(0).SimpleTy) {
  605. default:
  606. llvm_unreachable("Unrecognized MLA.");
  607. case MVT::v4i16:
  608. MLAOpc = AArch64::MLAv4i16_indexed;
  609. break;
  610. case MVT::v8i16:
  611. MLAOpc = AArch64::MLAv8i16_indexed;
  612. break;
  613. case MVT::v2i32:
  614. MLAOpc = AArch64::MLAv2i32_indexed;
  615. break;
  616. case MVT::v4i32:
  617. MLAOpc = AArch64::MLAv4i32_indexed;
  618. break;
  619. }
  620. ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
  621. return true;
  622. }
  623. bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
  624. SDLoc dl(N);
  625. SDValue SMULLOp0;
  626. SDValue SMULLOp1;
  627. int LaneIdx;
  628. if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
  629. LaneIdx))
  630. return false;
  631. SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
  632. SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
  633. unsigned SMULLOpc = ~0U;
  634. if (IntNo == Intrinsic::aarch64_neon_smull) {
  635. switch (N->getSimpleValueType(0).SimpleTy) {
  636. default:
  637. llvm_unreachable("Unrecognized SMULL.");
  638. case MVT::v4i32:
  639. SMULLOpc = AArch64::SMULLv4i16_indexed;
  640. break;
  641. case MVT::v2i64:
  642. SMULLOpc = AArch64::SMULLv2i32_indexed;
  643. break;
  644. }
  645. } else if (IntNo == Intrinsic::aarch64_neon_umull) {
  646. switch (N->getSimpleValueType(0).SimpleTy) {
  647. default:
  648. llvm_unreachable("Unrecognized SMULL.");
  649. case MVT::v4i32:
  650. SMULLOpc = AArch64::UMULLv4i16_indexed;
  651. break;
  652. case MVT::v2i64:
  653. SMULLOpc = AArch64::UMULLv2i32_indexed;
  654. break;
  655. }
  656. } else
  657. llvm_unreachable("Unrecognized intrinsic.");
  658. ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
  659. return true;
  660. }
  661. /// Instructions that accept extend modifiers like UXTW expect the register
  662. /// being extended to be a GPR32, but the incoming DAG might be acting on a
  663. /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
  664. /// this is the case.
  665. static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
  666. if (N.getValueType() == MVT::i32)
  667. return N;
  668. SDLoc dl(N);
  669. SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
  670. MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  671. dl, MVT::i32, N, SubReg);
  672. return SDValue(Node, 0);
  673. }
  674. // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
  675. template<signed Low, signed High, signed Scale>
  676. bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
  677. if (!isa<ConstantSDNode>(N))
  678. return false;
  679. int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
  680. if ((MulImm % std::abs(Scale)) == 0) {
  681. int64_t RDVLImm = MulImm / Scale;
  682. if ((RDVLImm >= Low) && (RDVLImm <= High)) {
  683. Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
  684. return true;
  685. }
  686. }
  687. return false;
  688. }
  689. /// SelectArithExtendedRegister - Select a "extended register" operand. This
  690. /// operand folds in an extend followed by an optional left shift.
  691. bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
  692. SDValue &Shift) {
  693. unsigned ShiftVal = 0;
  694. AArch64_AM::ShiftExtendType Ext;
  695. if (N.getOpcode() == ISD::SHL) {
  696. ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
  697. if (!CSD)
  698. return false;
  699. ShiftVal = CSD->getZExtValue();
  700. if (ShiftVal > 4)
  701. return false;
  702. Ext = getExtendTypeForNode(N.getOperand(0));
  703. if (Ext == AArch64_AM::InvalidShiftExtend)
  704. return false;
  705. Reg = N.getOperand(0).getOperand(0);
  706. } else {
  707. Ext = getExtendTypeForNode(N);
  708. if (Ext == AArch64_AM::InvalidShiftExtend)
  709. return false;
  710. Reg = N.getOperand(0);
  711. // Don't match if free 32-bit -> 64-bit zext can be used instead.
  712. if (Ext == AArch64_AM::UXTW &&
  713. Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
  714. return false;
  715. }
  716. // AArch64 mandates that the RHS of the operation must use the smallest
  717. // register class that could contain the size being extended from. Thus,
  718. // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
  719. // there might not be an actual 32-bit value in the program. We can
  720. // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
  721. assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
  722. Reg = narrowIfNeeded(CurDAG, Reg);
  723. Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
  724. MVT::i32);
  725. return isWorthFolding(N);
  726. }
  727. /// If there's a use of this ADDlow that's not itself a load/store then we'll
  728. /// need to create a real ADD instruction from it anyway and there's no point in
  729. /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
  730. /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
  731. /// leads to duplicated ADRP instructions.
  732. static bool isWorthFoldingADDlow(SDValue N) {
  733. for (auto Use : N->uses()) {
  734. if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
  735. Use->getOpcode() != ISD::ATOMIC_LOAD &&
  736. Use->getOpcode() != ISD::ATOMIC_STORE)
  737. return false;
  738. // ldar and stlr have much more restrictive addressing modes (just a
  739. // register).
  740. if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
  741. return false;
  742. }
  743. return true;
  744. }
  745. /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
  746. /// immediate" address. The "Size" argument is the size in bytes of the memory
  747. /// reference, which determines the scale.
  748. bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
  749. unsigned BW, unsigned Size,
  750. SDValue &Base,
  751. SDValue &OffImm) {
  752. SDLoc dl(N);
  753. const DataLayout &DL = CurDAG->getDataLayout();
  754. const TargetLowering *TLI = getTargetLowering();
  755. if (N.getOpcode() == ISD::FrameIndex) {
  756. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  757. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  758. OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
  759. return true;
  760. }
  761. // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
  762. // selected here doesn't support labels/immediates, only base+offset.
  763. if (CurDAG->isBaseWithConstantOffset(N)) {
  764. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  765. if (IsSignedImm) {
  766. int64_t RHSC = RHS->getSExtValue();
  767. unsigned Scale = Log2_32(Size);
  768. int64_t Range = 0x1LL << (BW - 1);
  769. if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
  770. RHSC < (Range << Scale)) {
  771. Base = N.getOperand(0);
  772. if (Base.getOpcode() == ISD::FrameIndex) {
  773. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  774. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  775. }
  776. OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
  777. return true;
  778. }
  779. } else {
  780. // unsigned Immediate
  781. uint64_t RHSC = RHS->getZExtValue();
  782. unsigned Scale = Log2_32(Size);
  783. uint64_t Range = 0x1ULL << BW;
  784. if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
  785. Base = N.getOperand(0);
  786. if (Base.getOpcode() == ISD::FrameIndex) {
  787. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  788. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  789. }
  790. OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
  791. return true;
  792. }
  793. }
  794. }
  795. }
  796. // Base only. The address will be materialized into a register before
  797. // the memory is accessed.
  798. // add x0, Xbase, #offset
  799. // stp x1, x2, [x0]
  800. Base = N;
  801. OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
  802. return true;
  803. }
  804. /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
  805. /// immediate" address. The "Size" argument is the size in bytes of the memory
  806. /// reference, which determines the scale.
  807. bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
  808. SDValue &Base, SDValue &OffImm) {
  809. SDLoc dl(N);
  810. const DataLayout &DL = CurDAG->getDataLayout();
  811. const TargetLowering *TLI = getTargetLowering();
  812. if (N.getOpcode() == ISD::FrameIndex) {
  813. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  814. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  815. OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
  816. return true;
  817. }
  818. if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
  819. GlobalAddressSDNode *GAN =
  820. dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
  821. Base = N.getOperand(0);
  822. OffImm = N.getOperand(1);
  823. if (!GAN)
  824. return true;
  825. if (GAN->getOffset() % Size == 0 &&
  826. GAN->getGlobal()->getPointerAlignment(DL) >= Size)
  827. return true;
  828. }
  829. if (CurDAG->isBaseWithConstantOffset(N)) {
  830. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  831. int64_t RHSC = (int64_t)RHS->getZExtValue();
  832. unsigned Scale = Log2_32(Size);
  833. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
  834. Base = N.getOperand(0);
  835. if (Base.getOpcode() == ISD::FrameIndex) {
  836. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  837. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  838. }
  839. OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
  840. return true;
  841. }
  842. }
  843. }
  844. // Before falling back to our general case, check if the unscaled
  845. // instructions can handle this. If so, that's preferable.
  846. if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
  847. return false;
  848. // Base only. The address will be materialized into a register before
  849. // the memory is accessed.
  850. // add x0, Xbase, #offset
  851. // ldr x0, [x0]
  852. Base = N;
  853. OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
  854. return true;
  855. }
  856. /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
  857. /// immediate" address. This should only match when there is an offset that
  858. /// is not valid for a scaled immediate addressing mode. The "Size" argument
  859. /// is the size in bytes of the memory reference, which is needed here to know
  860. /// what is valid for a scaled immediate.
  861. bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
  862. SDValue &Base,
  863. SDValue &OffImm) {
  864. if (!CurDAG->isBaseWithConstantOffset(N))
  865. return false;
  866. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  867. int64_t RHSC = RHS->getSExtValue();
  868. // If the offset is valid as a scaled immediate, don't match here.
  869. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
  870. RHSC < (0x1000 << Log2_32(Size)))
  871. return false;
  872. if (RHSC >= -256 && RHSC < 256) {
  873. Base = N.getOperand(0);
  874. if (Base.getOpcode() == ISD::FrameIndex) {
  875. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  876. const TargetLowering *TLI = getTargetLowering();
  877. Base = CurDAG->getTargetFrameIndex(
  878. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  879. }
  880. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
  881. return true;
  882. }
  883. }
  884. return false;
  885. }
  886. static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
  887. SDLoc dl(N);
  888. SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
  889. SDValue ImpDef = SDValue(
  890. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
  891. MachineSDNode *Node = CurDAG->getMachineNode(
  892. TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
  893. return SDValue(Node, 0);
  894. }
  895. /// Check if the given SHL node (\p N), can be used to form an
  896. /// extended register for an addressing mode.
  897. bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
  898. bool WantExtend, SDValue &Offset,
  899. SDValue &SignExtend) {
  900. assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
  901. ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
  902. if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
  903. return false;
  904. SDLoc dl(N);
  905. if (WantExtend) {
  906. AArch64_AM::ShiftExtendType Ext =
  907. getExtendTypeForNode(N.getOperand(0), true);
  908. if (Ext == AArch64_AM::InvalidShiftExtend)
  909. return false;
  910. Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
  911. SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
  912. MVT::i32);
  913. } else {
  914. Offset = N.getOperand(0);
  915. SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
  916. }
  917. unsigned LegalShiftVal = Log2_32(Size);
  918. unsigned ShiftVal = CSD->getZExtValue();
  919. if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
  920. return false;
  921. return isWorthFolding(N);
  922. }
  923. bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
  924. SDValue &Base, SDValue &Offset,
  925. SDValue &SignExtend,
  926. SDValue &DoShift) {
  927. if (N.getOpcode() != ISD::ADD)
  928. return false;
  929. SDValue LHS = N.getOperand(0);
  930. SDValue RHS = N.getOperand(1);
  931. SDLoc dl(N);
  932. // We don't want to match immediate adds here, because they are better lowered
  933. // to the register-immediate addressing modes.
  934. if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
  935. return false;
  936. // Check if this particular node is reused in any non-memory related
  937. // operation. If yes, do not try to fold this node into the address
  938. // computation, since the computation will be kept.
  939. const SDNode *Node = N.getNode();
  940. for (SDNode *UI : Node->uses()) {
  941. if (!isa<MemSDNode>(*UI))
  942. return false;
  943. }
  944. // Remember if it is worth folding N when it produces extended register.
  945. bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
  946. // Try to match a shifted extend on the RHS.
  947. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
  948. SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
  949. Base = LHS;
  950. DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
  951. return true;
  952. }
  953. // Try to match a shifted extend on the LHS.
  954. if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
  955. SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
  956. Base = RHS;
  957. DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
  958. return true;
  959. }
  960. // There was no shift, whatever else we find.
  961. DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
  962. AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
  963. // Try to match an unshifted extend on the LHS.
  964. if (IsExtendedRegisterWorthFolding &&
  965. (Ext = getExtendTypeForNode(LHS, true)) !=
  966. AArch64_AM::InvalidShiftExtend) {
  967. Base = RHS;
  968. Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
  969. SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
  970. MVT::i32);
  971. if (isWorthFolding(LHS))
  972. return true;
  973. }
  974. // Try to match an unshifted extend on the RHS.
  975. if (IsExtendedRegisterWorthFolding &&
  976. (Ext = getExtendTypeForNode(RHS, true)) !=
  977. AArch64_AM::InvalidShiftExtend) {
  978. Base = LHS;
  979. Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
  980. SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
  981. MVT::i32);
  982. if (isWorthFolding(RHS))
  983. return true;
  984. }
  985. return false;
  986. }
  987. // Check if the given immediate is preferred by ADD. If an immediate can be
  988. // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
  989. // encoded by one MOVZ, return true.
  990. static bool isPreferredADD(int64_t ImmOff) {
  991. // Constant in [0x0, 0xfff] can be encoded in ADD.
  992. if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
  993. return true;
  994. // Check if it can be encoded in an "ADD LSL #12".
  995. if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
  996. // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
  997. return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
  998. (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
  999. return false;
  1000. }
  1001. bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
  1002. SDValue &Base, SDValue &Offset,
  1003. SDValue &SignExtend,
  1004. SDValue &DoShift) {
  1005. if (N.getOpcode() != ISD::ADD)
  1006. return false;
  1007. SDValue LHS = N.getOperand(0);
  1008. SDValue RHS = N.getOperand(1);
  1009. SDLoc DL(N);
  1010. // Check if this particular node is reused in any non-memory related
  1011. // operation. If yes, do not try to fold this node into the address
  1012. // computation, since the computation will be kept.
  1013. const SDNode *Node = N.getNode();
  1014. for (SDNode *UI : Node->uses()) {
  1015. if (!isa<MemSDNode>(*UI))
  1016. return false;
  1017. }
  1018. // Watch out if RHS is a wide immediate, it can not be selected into
  1019. // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
  1020. // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
  1021. // instructions like:
  1022. // MOV X0, WideImmediate
  1023. // ADD X1, BaseReg, X0
  1024. // LDR X2, [X1, 0]
  1025. // For such situation, using [BaseReg, XReg] addressing mode can save one
  1026. // ADD/SUB:
  1027. // MOV X0, WideImmediate
  1028. // LDR X2, [BaseReg, X0]
  1029. if (isa<ConstantSDNode>(RHS)) {
  1030. int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
  1031. unsigned Scale = Log2_32(Size);
  1032. // Skip the immediate can be selected by load/store addressing mode.
  1033. // Also skip the immediate can be encoded by a single ADD (SUB is also
  1034. // checked by using -ImmOff).
  1035. if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
  1036. isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
  1037. return false;
  1038. SDValue Ops[] = { RHS };
  1039. SDNode *MOVI =
  1040. CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
  1041. SDValue MOVIV = SDValue(MOVI, 0);
  1042. // This ADD of two X register will be selected into [Reg+Reg] mode.
  1043. N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
  1044. }
  1045. // Remember if it is worth folding N when it produces extended register.
  1046. bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
  1047. // Try to match a shifted extend on the RHS.
  1048. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
  1049. SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
  1050. Base = LHS;
  1051. DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
  1052. return true;
  1053. }
  1054. // Try to match a shifted extend on the LHS.
  1055. if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
  1056. SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
  1057. Base = RHS;
  1058. DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
  1059. return true;
  1060. }
  1061. // Match any non-shifted, non-extend, non-immediate add expression.
  1062. Base = LHS;
  1063. Offset = RHS;
  1064. SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
  1065. DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
  1066. // Reg1 + Reg2 is free: no check needed.
  1067. return true;
  1068. }
  1069. SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
  1070. static const unsigned RegClassIDs[] = {
  1071. AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
  1072. static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
  1073. AArch64::dsub2, AArch64::dsub3};
  1074. return createTuple(Regs, RegClassIDs, SubRegs);
  1075. }
  1076. SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
  1077. static const unsigned RegClassIDs[] = {
  1078. AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
  1079. static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
  1080. AArch64::qsub2, AArch64::qsub3};
  1081. return createTuple(Regs, RegClassIDs, SubRegs);
  1082. }
  1083. SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
  1084. static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
  1085. AArch64::ZPR3RegClassID,
  1086. AArch64::ZPR4RegClassID};
  1087. static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
  1088. AArch64::zsub2, AArch64::zsub3};
  1089. return createTuple(Regs, RegClassIDs, SubRegs);
  1090. }
  1091. SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
  1092. const unsigned RegClassIDs[],
  1093. const unsigned SubRegs[]) {
  1094. // There's no special register-class for a vector-list of 1 element: it's just
  1095. // a vector.
  1096. if (Regs.size() == 1)
  1097. return Regs[0];
  1098. assert(Regs.size() >= 2 && Regs.size() <= 4);
  1099. SDLoc DL(Regs[0]);
  1100. SmallVector<SDValue, 4> Ops;
  1101. // First operand of REG_SEQUENCE is the desired RegClass.
  1102. Ops.push_back(
  1103. CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
  1104. // Then we get pairs of source & subregister-position for the components.
  1105. for (unsigned i = 0; i < Regs.size(); ++i) {
  1106. Ops.push_back(Regs[i]);
  1107. Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
  1108. }
  1109. SDNode *N =
  1110. CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
  1111. return SDValue(N, 0);
  1112. }
  1113. void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
  1114. bool isExt) {
  1115. SDLoc dl(N);
  1116. EVT VT = N->getValueType(0);
  1117. unsigned ExtOff = isExt;
  1118. // Form a REG_SEQUENCE to force register allocation.
  1119. unsigned Vec0Off = ExtOff + 1;
  1120. SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
  1121. N->op_begin() + Vec0Off + NumVecs);
  1122. SDValue RegSeq = createQTuple(Regs);
  1123. SmallVector<SDValue, 6> Ops;
  1124. if (isExt)
  1125. Ops.push_back(N->getOperand(1));
  1126. Ops.push_back(RegSeq);
  1127. Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
  1128. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
  1129. }
  1130. bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
  1131. LoadSDNode *LD = cast<LoadSDNode>(N);
  1132. if (LD->isUnindexed())
  1133. return false;
  1134. EVT VT = LD->getMemoryVT();
  1135. EVT DstVT = N->getValueType(0);
  1136. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1137. bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
  1138. // We're not doing validity checking here. That was done when checking
  1139. // if we should mark the load as indexed or not. We're just selecting
  1140. // the right instruction.
  1141. unsigned Opcode = 0;
  1142. ISD::LoadExtType ExtType = LD->getExtensionType();
  1143. bool InsertTo64 = false;
  1144. if (VT == MVT::i64)
  1145. Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
  1146. else if (VT == MVT::i32) {
  1147. if (ExtType == ISD::NON_EXTLOAD)
  1148. Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
  1149. else if (ExtType == ISD::SEXTLOAD)
  1150. Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
  1151. else {
  1152. Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
  1153. InsertTo64 = true;
  1154. // The result of the load is only i32. It's the subreg_to_reg that makes
  1155. // it into an i64.
  1156. DstVT = MVT::i32;
  1157. }
  1158. } else if (VT == MVT::i16) {
  1159. if (ExtType == ISD::SEXTLOAD) {
  1160. if (DstVT == MVT::i64)
  1161. Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
  1162. else
  1163. Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
  1164. } else {
  1165. Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
  1166. InsertTo64 = DstVT == MVT::i64;
  1167. // The result of the load is only i32. It's the subreg_to_reg that makes
  1168. // it into an i64.
  1169. DstVT = MVT::i32;
  1170. }
  1171. } else if (VT == MVT::i8) {
  1172. if (ExtType == ISD::SEXTLOAD) {
  1173. if (DstVT == MVT::i64)
  1174. Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
  1175. else
  1176. Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
  1177. } else {
  1178. Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
  1179. InsertTo64 = DstVT == MVT::i64;
  1180. // The result of the load is only i32. It's the subreg_to_reg that makes
  1181. // it into an i64.
  1182. DstVT = MVT::i32;
  1183. }
  1184. } else if (VT == MVT::f16) {
  1185. Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
  1186. } else if (VT == MVT::bf16) {
  1187. Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
  1188. } else if (VT == MVT::f32) {
  1189. Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
  1190. } else if (VT == MVT::f64 || VT.is64BitVector()) {
  1191. Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
  1192. } else if (VT.is128BitVector()) {
  1193. Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
  1194. } else
  1195. return false;
  1196. SDValue Chain = LD->getChain();
  1197. SDValue Base = LD->getBasePtr();
  1198. ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
  1199. int OffsetVal = (int)OffsetOp->getZExtValue();
  1200. SDLoc dl(N);
  1201. SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
  1202. SDValue Ops[] = { Base, Offset, Chain };
  1203. SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
  1204. MVT::Other, Ops);
  1205. // Transfer memoperands.
  1206. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  1207. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
  1208. // Either way, we're replacing the node, so tell the caller that.
  1209. SDValue LoadedVal = SDValue(Res, 1);
  1210. if (InsertTo64) {
  1211. SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
  1212. LoadedVal =
  1213. SDValue(CurDAG->getMachineNode(
  1214. AArch64::SUBREG_TO_REG, dl, MVT::i64,
  1215. CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
  1216. SubReg),
  1217. 0);
  1218. }
  1219. ReplaceUses(SDValue(N, 0), LoadedVal);
  1220. ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
  1221. ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
  1222. CurDAG->RemoveDeadNode(N);
  1223. return true;
  1224. }
  1225. void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
  1226. unsigned SubRegIdx) {
  1227. SDLoc dl(N);
  1228. EVT VT = N->getValueType(0);
  1229. SDValue Chain = N->getOperand(0);
  1230. SDValue Ops[] = {N->getOperand(2), // Mem operand;
  1231. Chain};
  1232. const EVT ResTys[] = {MVT::Untyped, MVT::Other};
  1233. SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1234. SDValue SuperReg = SDValue(Ld, 0);
  1235. for (unsigned i = 0; i < NumVecs; ++i)
  1236. ReplaceUses(SDValue(N, i),
  1237. CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
  1238. ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
  1239. // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
  1240. // because it's too simple to have needed special treatment during lowering.
  1241. if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
  1242. MachineMemOperand *MemOp = MemIntr->getMemOperand();
  1243. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
  1244. }
  1245. CurDAG->RemoveDeadNode(N);
  1246. }
  1247. void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
  1248. unsigned Opc, unsigned SubRegIdx) {
  1249. SDLoc dl(N);
  1250. EVT VT = N->getValueType(0);
  1251. SDValue Chain = N->getOperand(0);
  1252. SDValue Ops[] = {N->getOperand(1), // Mem operand
  1253. N->getOperand(2), // Incremental
  1254. Chain};
  1255. const EVT ResTys[] = {MVT::i64, // Type of the write back register
  1256. MVT::Untyped, MVT::Other};
  1257. SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1258. // Update uses of write back register
  1259. ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
  1260. // Update uses of vector list
  1261. SDValue SuperReg = SDValue(Ld, 1);
  1262. if (NumVecs == 1)
  1263. ReplaceUses(SDValue(N, 0), SuperReg);
  1264. else
  1265. for (unsigned i = 0; i < NumVecs; ++i)
  1266. ReplaceUses(SDValue(N, i),
  1267. CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
  1268. // Update the chain
  1269. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
  1270. CurDAG->RemoveDeadNode(N);
  1271. }
  1272. /// Optimize \param OldBase and \param OldOffset selecting the best addressing
  1273. /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
  1274. /// new Base and an SDValue representing the new offset.
  1275. std::tuple<unsigned, SDValue, SDValue>
  1276. AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
  1277. unsigned Opc_ri,
  1278. const SDValue &OldBase,
  1279. const SDValue &OldOffset,
  1280. unsigned Scale) {
  1281. SDValue NewBase = OldBase;
  1282. SDValue NewOffset = OldOffset;
  1283. // Detect a possible Reg+Imm addressing mode.
  1284. const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
  1285. N, OldBase, NewBase, NewOffset);
  1286. // Detect a possible reg+reg addressing mode, but only if we haven't already
  1287. // detected a Reg+Imm one.
  1288. const bool IsRegReg =
  1289. !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
  1290. // Select the instruction.
  1291. return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
  1292. }
  1293. void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
  1294. unsigned Scale, unsigned Opc_ri,
  1295. unsigned Opc_rr, bool IsIntr) {
  1296. assert(Scale < 4 && "Invalid scaling value.");
  1297. SDLoc DL(N);
  1298. EVT VT = N->getValueType(0);
  1299. SDValue Chain = N->getOperand(0);
  1300. // Optimize addressing mode.
  1301. SDValue Base, Offset;
  1302. unsigned Opc;
  1303. std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
  1304. N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
  1305. CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
  1306. SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
  1307. Base, // Memory operand
  1308. Offset, Chain};
  1309. const EVT ResTys[] = {MVT::Untyped, MVT::Other};
  1310. SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
  1311. SDValue SuperReg = SDValue(Load, 0);
  1312. for (unsigned i = 0; i < NumVecs; ++i)
  1313. ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
  1314. AArch64::zsub0 + i, DL, VT, SuperReg));
  1315. // Copy chain
  1316. unsigned ChainIdx = NumVecs;
  1317. ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
  1318. CurDAG->RemoveDeadNode(N);
  1319. }
  1320. void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
  1321. unsigned Opc) {
  1322. SDLoc dl(N);
  1323. EVT VT = N->getOperand(2)->getValueType(0);
  1324. // Form a REG_SEQUENCE to force register allocation.
  1325. bool Is128Bit = VT.getSizeInBits() == 128;
  1326. SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
  1327. SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
  1328. SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
  1329. SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
  1330. // Transfer memoperands.
  1331. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1332. CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
  1333. ReplaceNode(N, St);
  1334. }
  1335. void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
  1336. unsigned Scale, unsigned Opc_rr,
  1337. unsigned Opc_ri) {
  1338. SDLoc dl(N);
  1339. // Form a REG_SEQUENCE to force register allocation.
  1340. SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
  1341. SDValue RegSeq = createZTuple(Regs);
  1342. // Optimize addressing mode.
  1343. unsigned Opc;
  1344. SDValue Offset, Base;
  1345. std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
  1346. N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
  1347. CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
  1348. SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
  1349. Base, // address
  1350. Offset, // offset
  1351. N->getOperand(0)}; // chain
  1352. SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
  1353. ReplaceNode(N, St);
  1354. }
  1355. bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
  1356. SDValue &OffImm) {
  1357. SDLoc dl(N);
  1358. const DataLayout &DL = CurDAG->getDataLayout();
  1359. const TargetLowering *TLI = getTargetLowering();
  1360. // Try to match it for the frame address
  1361. if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
  1362. int FI = FINode->getIndex();
  1363. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  1364. OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
  1365. return true;
  1366. }
  1367. return false;
  1368. }
  1369. void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
  1370. unsigned Opc) {
  1371. SDLoc dl(N);
  1372. EVT VT = N->getOperand(2)->getValueType(0);
  1373. const EVT ResTys[] = {MVT::i64, // Type of the write back register
  1374. MVT::Other}; // Type for the Chain
  1375. // Form a REG_SEQUENCE to force register allocation.
  1376. bool Is128Bit = VT.getSizeInBits() == 128;
  1377. SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
  1378. SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
  1379. SDValue Ops[] = {RegSeq,
  1380. N->getOperand(NumVecs + 1), // base register
  1381. N->getOperand(NumVecs + 2), // Incremental
  1382. N->getOperand(0)}; // Chain
  1383. SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1384. ReplaceNode(N, St);
  1385. }
  1386. namespace {
  1387. /// WidenVector - Given a value in the V64 register class, produce the
  1388. /// equivalent value in the V128 register class.
  1389. class WidenVector {
  1390. SelectionDAG &DAG;
  1391. public:
  1392. WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
  1393. SDValue operator()(SDValue V64Reg) {
  1394. EVT VT = V64Reg.getValueType();
  1395. unsigned NarrowSize = VT.getVectorNumElements();
  1396. MVT EltTy = VT.getVectorElementType().getSimpleVT();
  1397. MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
  1398. SDLoc DL(V64Reg);
  1399. SDValue Undef =
  1400. SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
  1401. return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
  1402. }
  1403. };
  1404. } // namespace
  1405. /// NarrowVector - Given a value in the V128 register class, produce the
  1406. /// equivalent value in the V64 register class.
  1407. static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
  1408. EVT VT = V128Reg.getValueType();
  1409. unsigned WideSize = VT.getVectorNumElements();
  1410. MVT EltTy = VT.getVectorElementType().getSimpleVT();
  1411. MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
  1412. return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
  1413. V128Reg);
  1414. }
  1415. void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
  1416. unsigned Opc) {
  1417. SDLoc dl(N);
  1418. EVT VT = N->getValueType(0);
  1419. bool Narrow = VT.getSizeInBits() == 64;
  1420. // Form a REG_SEQUENCE to force register allocation.
  1421. SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
  1422. if (Narrow)
  1423. transform(Regs, Regs.begin(),
  1424. WidenVector(*CurDAG));
  1425. SDValue RegSeq = createQTuple(Regs);
  1426. const EVT ResTys[] = {MVT::Untyped, MVT::Other};
  1427. unsigned LaneNo =
  1428. cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
  1429. SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
  1430. N->getOperand(NumVecs + 3), N->getOperand(0)};
  1431. SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1432. SDValue SuperReg = SDValue(Ld, 0);
  1433. EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
  1434. static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
  1435. AArch64::qsub2, AArch64::qsub3 };
  1436. for (unsigned i = 0; i < NumVecs; ++i) {
  1437. SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
  1438. if (Narrow)
  1439. NV = NarrowVector(NV, *CurDAG);
  1440. ReplaceUses(SDValue(N, i), NV);
  1441. }
  1442. ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
  1443. CurDAG->RemoveDeadNode(N);
  1444. }
  1445. void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
  1446. unsigned Opc) {
  1447. SDLoc dl(N);
  1448. EVT VT = N->getValueType(0);
  1449. bool Narrow = VT.getSizeInBits() == 64;
  1450. // Form a REG_SEQUENCE to force register allocation.
  1451. SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
  1452. if (Narrow)
  1453. transform(Regs, Regs.begin(),
  1454. WidenVector(*CurDAG));
  1455. SDValue RegSeq = createQTuple(Regs);
  1456. const EVT ResTys[] = {MVT::i64, // Type of the write back register
  1457. RegSeq->getValueType(0), MVT::Other};
  1458. unsigned LaneNo =
  1459. cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
  1460. SDValue Ops[] = {RegSeq,
  1461. CurDAG->getTargetConstant(LaneNo, dl,
  1462. MVT::i64), // Lane Number
  1463. N->getOperand(NumVecs + 2), // Base register
  1464. N->getOperand(NumVecs + 3), // Incremental
  1465. N->getOperand(0)};
  1466. SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1467. // Update uses of the write back register
  1468. ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
  1469. // Update uses of the vector list
  1470. SDValue SuperReg = SDValue(Ld, 1);
  1471. if (NumVecs == 1) {
  1472. ReplaceUses(SDValue(N, 0),
  1473. Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
  1474. } else {
  1475. EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
  1476. static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
  1477. AArch64::qsub2, AArch64::qsub3 };
  1478. for (unsigned i = 0; i < NumVecs; ++i) {
  1479. SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
  1480. SuperReg);
  1481. if (Narrow)
  1482. NV = NarrowVector(NV, *CurDAG);
  1483. ReplaceUses(SDValue(N, i), NV);
  1484. }
  1485. }
  1486. // Update the Chain
  1487. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
  1488. CurDAG->RemoveDeadNode(N);
  1489. }
  1490. void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
  1491. unsigned Opc) {
  1492. SDLoc dl(N);
  1493. EVT VT = N->getOperand(2)->getValueType(0);
  1494. bool Narrow = VT.getSizeInBits() == 64;
  1495. // Form a REG_SEQUENCE to force register allocation.
  1496. SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
  1497. if (Narrow)
  1498. transform(Regs, Regs.begin(),
  1499. WidenVector(*CurDAG));
  1500. SDValue RegSeq = createQTuple(Regs);
  1501. unsigned LaneNo =
  1502. cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
  1503. SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
  1504. N->getOperand(NumVecs + 3), N->getOperand(0)};
  1505. SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
  1506. // Transfer memoperands.
  1507. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1508. CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
  1509. ReplaceNode(N, St);
  1510. }
  1511. void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
  1512. unsigned Opc) {
  1513. SDLoc dl(N);
  1514. EVT VT = N->getOperand(2)->getValueType(0);
  1515. bool Narrow = VT.getSizeInBits() == 64;
  1516. // Form a REG_SEQUENCE to force register allocation.
  1517. SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
  1518. if (Narrow)
  1519. transform(Regs, Regs.begin(),
  1520. WidenVector(*CurDAG));
  1521. SDValue RegSeq = createQTuple(Regs);
  1522. const EVT ResTys[] = {MVT::i64, // Type of the write back register
  1523. MVT::Other};
  1524. unsigned LaneNo =
  1525. cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
  1526. SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
  1527. N->getOperand(NumVecs + 2), // Base Register
  1528. N->getOperand(NumVecs + 3), // Incremental
  1529. N->getOperand(0)};
  1530. SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1531. // Transfer memoperands.
  1532. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1533. CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
  1534. ReplaceNode(N, St);
  1535. }
  1536. static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
  1537. unsigned &Opc, SDValue &Opd0,
  1538. unsigned &LSB, unsigned &MSB,
  1539. unsigned NumberOfIgnoredLowBits,
  1540. bool BiggerPattern) {
  1541. assert(N->getOpcode() == ISD::AND &&
  1542. "N must be a AND operation to call this function");
  1543. EVT VT = N->getValueType(0);
  1544. // Here we can test the type of VT and return false when the type does not
  1545. // match, but since it is done prior to that call in the current context
  1546. // we turned that into an assert to avoid redundant code.
  1547. assert((VT == MVT::i32 || VT == MVT::i64) &&
  1548. "Type checking must have been done before calling this function");
  1549. // FIXME: simplify-demanded-bits in DAGCombine will probably have
  1550. // changed the AND node to a 32-bit mask operation. We'll have to
  1551. // undo that as part of the transform here if we want to catch all
  1552. // the opportunities.
  1553. // Currently the NumberOfIgnoredLowBits argument helps to recover
  1554. // form these situations when matching bigger pattern (bitfield insert).
  1555. // For unsigned extracts, check for a shift right and mask
  1556. uint64_t AndImm = 0;
  1557. if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
  1558. return false;
  1559. const SDNode *Op0 = N->getOperand(0).getNode();
  1560. // Because of simplify-demanded-bits in DAGCombine, the mask may have been
  1561. // simplified. Try to undo that
  1562. AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
  1563. // The immediate is a mask of the low bits iff imm & (imm+1) == 0
  1564. if (AndImm & (AndImm + 1))
  1565. return false;
  1566. bool ClampMSB = false;
  1567. uint64_t SrlImm = 0;
  1568. // Handle the SRL + ANY_EXTEND case.
  1569. if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
  1570. isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
  1571. // Extend the incoming operand of the SRL to 64-bit.
  1572. Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
  1573. // Make sure to clamp the MSB so that we preserve the semantics of the
  1574. // original operations.
  1575. ClampMSB = true;
  1576. } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
  1577. isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
  1578. SrlImm)) {
  1579. // If the shift result was truncated, we can still combine them.
  1580. Opd0 = Op0->getOperand(0).getOperand(0);
  1581. // Use the type of SRL node.
  1582. VT = Opd0->getValueType(0);
  1583. } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
  1584. Opd0 = Op0->getOperand(0);
  1585. } else if (BiggerPattern) {
  1586. // Let's pretend a 0 shift right has been performed.
  1587. // The resulting code will be at least as good as the original one
  1588. // plus it may expose more opportunities for bitfield insert pattern.
  1589. // FIXME: Currently we limit this to the bigger pattern, because
  1590. // some optimizations expect AND and not UBFM.
  1591. Opd0 = N->getOperand(0);
  1592. } else
  1593. return false;
  1594. // Bail out on large immediates. This happens when no proper
  1595. // combining/constant folding was performed.
  1596. if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
  1597. LLVM_DEBUG(
  1598. (dbgs() << N
  1599. << ": Found large shift immediate, this should not happen\n"));
  1600. return false;
  1601. }
  1602. LSB = SrlImm;
  1603. MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
  1604. : countTrailingOnes<uint64_t>(AndImm)) -
  1605. 1;
  1606. if (ClampMSB)
  1607. // Since we're moving the extend before the right shift operation, we need
  1608. // to clamp the MSB to make sure we don't shift in undefined bits instead of
  1609. // the zeros which would get shifted in with the original right shift
  1610. // operation.
  1611. MSB = MSB > 31 ? 31 : MSB;
  1612. Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
  1613. return true;
  1614. }
  1615. static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
  1616. SDValue &Opd0, unsigned &Immr,
  1617. unsigned &Imms) {
  1618. assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
  1619. EVT VT = N->getValueType(0);
  1620. unsigned BitWidth = VT.getSizeInBits();
  1621. assert((VT == MVT::i32 || VT == MVT::i64) &&
  1622. "Type checking must have been done before calling this function");
  1623. SDValue Op = N->getOperand(0);
  1624. if (Op->getOpcode() == ISD::TRUNCATE) {
  1625. Op = Op->getOperand(0);
  1626. VT = Op->getValueType(0);
  1627. BitWidth = VT.getSizeInBits();
  1628. }
  1629. uint64_t ShiftImm;
  1630. if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
  1631. !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
  1632. return false;
  1633. unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
  1634. if (ShiftImm + Width > BitWidth)
  1635. return false;
  1636. Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
  1637. Opd0 = Op.getOperand(0);
  1638. Immr = ShiftImm;
  1639. Imms = ShiftImm + Width - 1;
  1640. return true;
  1641. }
  1642. static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
  1643. SDValue &Opd0, unsigned &LSB,
  1644. unsigned &MSB) {
  1645. // We are looking for the following pattern which basically extracts several
  1646. // continuous bits from the source value and places it from the LSB of the
  1647. // destination value, all other bits of the destination value or set to zero:
  1648. //
  1649. // Value2 = AND Value, MaskImm
  1650. // SRL Value2, ShiftImm
  1651. //
  1652. // with MaskImm >> ShiftImm to search for the bit width.
  1653. //
  1654. // This gets selected into a single UBFM:
  1655. //
  1656. // UBFM Value, ShiftImm, BitWide + SrlImm -1
  1657. //
  1658. if (N->getOpcode() != ISD::SRL)
  1659. return false;
  1660. uint64_t AndMask = 0;
  1661. if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
  1662. return false;
  1663. Opd0 = N->getOperand(0).getOperand(0);
  1664. uint64_t SrlImm = 0;
  1665. if (!isIntImmediate(N->getOperand(1), SrlImm))
  1666. return false;
  1667. // Check whether we really have several bits extract here.
  1668. unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
  1669. if (BitWide && isMask_64(AndMask >> SrlImm)) {
  1670. if (N->getValueType(0) == MVT::i32)
  1671. Opc = AArch64::UBFMWri;
  1672. else
  1673. Opc = AArch64::UBFMXri;
  1674. LSB = SrlImm;
  1675. MSB = BitWide + SrlImm - 1;
  1676. return true;
  1677. }
  1678. return false;
  1679. }
  1680. static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
  1681. unsigned &Immr, unsigned &Imms,
  1682. bool BiggerPattern) {
  1683. assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
  1684. "N must be a SHR/SRA operation to call this function");
  1685. EVT VT = N->getValueType(0);
  1686. // Here we can test the type of VT and return false when the type does not
  1687. // match, but since it is done prior to that call in the current context
  1688. // we turned that into an assert to avoid redundant code.
  1689. assert((VT == MVT::i32 || VT == MVT::i64) &&
  1690. "Type checking must have been done before calling this function");
  1691. // Check for AND + SRL doing several bits extract.
  1692. if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
  1693. return true;
  1694. // We're looking for a shift of a shift.
  1695. uint64_t ShlImm = 0;
  1696. uint64_t TruncBits = 0;
  1697. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
  1698. Opd0 = N->getOperand(0).getOperand(0);
  1699. } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
  1700. N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
  1701. // We are looking for a shift of truncate. Truncate from i64 to i32 could
  1702. // be considered as setting high 32 bits as zero. Our strategy here is to
  1703. // always generate 64bit UBFM. This consistency will help the CSE pass
  1704. // later find more redundancy.
  1705. Opd0 = N->getOperand(0).getOperand(0);
  1706. TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
  1707. VT = Opd0.getValueType();
  1708. assert(VT == MVT::i64 && "the promoted type should be i64");
  1709. } else if (BiggerPattern) {
  1710. // Let's pretend a 0 shift left has been performed.
  1711. // FIXME: Currently we limit this to the bigger pattern case,
  1712. // because some optimizations expect AND and not UBFM
  1713. Opd0 = N->getOperand(0);
  1714. } else
  1715. return false;
  1716. // Missing combines/constant folding may have left us with strange
  1717. // constants.
  1718. if (ShlImm >= VT.getSizeInBits()) {
  1719. LLVM_DEBUG(
  1720. (dbgs() << N
  1721. << ": Found large shift immediate, this should not happen\n"));
  1722. return false;
  1723. }
  1724. uint64_t SrlImm = 0;
  1725. if (!isIntImmediate(N->getOperand(1), SrlImm))
  1726. return false;
  1727. assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
  1728. "bad amount in shift node!");
  1729. int immr = SrlImm - ShlImm;
  1730. Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
  1731. Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
  1732. // SRA requires a signed extraction
  1733. if (VT == MVT::i32)
  1734. Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
  1735. else
  1736. Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
  1737. return true;
  1738. }
  1739. bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
  1740. assert(N->getOpcode() == ISD::SIGN_EXTEND);
  1741. EVT VT = N->getValueType(0);
  1742. EVT NarrowVT = N->getOperand(0)->getValueType(0);
  1743. if (VT != MVT::i64 || NarrowVT != MVT::i32)
  1744. return false;
  1745. uint64_t ShiftImm;
  1746. SDValue Op = N->getOperand(0);
  1747. if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
  1748. return false;
  1749. SDLoc dl(N);
  1750. // Extend the incoming operand of the shift to 64-bits.
  1751. SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
  1752. unsigned Immr = ShiftImm;
  1753. unsigned Imms = NarrowVT.getSizeInBits() - 1;
  1754. SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
  1755. CurDAG->getTargetConstant(Imms, dl, VT)};
  1756. CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
  1757. return true;
  1758. }
  1759. /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
  1760. /// extract of a subvector.
  1761. bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
  1762. assert(N->getOpcode() == ISD::FP_EXTEND);
  1763. // There are 2 forms of fcvtl2 - extend to double or extend to float.
  1764. SDValue Extract = N->getOperand(0);
  1765. EVT VT = N->getValueType(0);
  1766. EVT NarrowVT = Extract.getValueType();
  1767. if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
  1768. (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
  1769. return false;
  1770. // Optionally look past a bitcast.
  1771. Extract = peekThroughBitcasts(Extract);
  1772. if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  1773. return false;
  1774. // Match extract from start of high half index.
  1775. // Example: v8i16 -> v4i16 means the extract must begin at index 4.
  1776. unsigned ExtractIndex = Extract.getConstantOperandVal(1);
  1777. if (ExtractIndex != Extract.getValueType().getVectorNumElements())
  1778. return false;
  1779. auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
  1780. CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
  1781. return true;
  1782. }
  1783. static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
  1784. SDValue &Opd0, unsigned &Immr, unsigned &Imms,
  1785. unsigned NumberOfIgnoredLowBits = 0,
  1786. bool BiggerPattern = false) {
  1787. if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
  1788. return false;
  1789. switch (N->getOpcode()) {
  1790. default:
  1791. if (!N->isMachineOpcode())
  1792. return false;
  1793. break;
  1794. case ISD::AND:
  1795. return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
  1796. NumberOfIgnoredLowBits, BiggerPattern);
  1797. case ISD::SRL:
  1798. case ISD::SRA:
  1799. return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
  1800. case ISD::SIGN_EXTEND_INREG:
  1801. return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
  1802. }
  1803. unsigned NOpc = N->getMachineOpcode();
  1804. switch (NOpc) {
  1805. default:
  1806. return false;
  1807. case AArch64::SBFMWri:
  1808. case AArch64::UBFMWri:
  1809. case AArch64::SBFMXri:
  1810. case AArch64::UBFMXri:
  1811. Opc = NOpc;
  1812. Opd0 = N->getOperand(0);
  1813. Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
  1814. Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
  1815. return true;
  1816. }
  1817. // Unreachable
  1818. return false;
  1819. }
  1820. bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
  1821. unsigned Opc, Immr, Imms;
  1822. SDValue Opd0;
  1823. if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
  1824. return false;
  1825. EVT VT = N->getValueType(0);
  1826. SDLoc dl(N);
  1827. // If the bit extract operation is 64bit but the original type is 32bit, we
  1828. // need to add one EXTRACT_SUBREG.
  1829. if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
  1830. SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
  1831. CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
  1832. SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
  1833. SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
  1834. ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
  1835. MVT::i32, SDValue(BFM, 0), SubReg));
  1836. return true;
  1837. }
  1838. SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
  1839. CurDAG->getTargetConstant(Imms, dl, VT)};
  1840. CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  1841. return true;
  1842. }
  1843. /// Does DstMask form a complementary pair with the mask provided by
  1844. /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
  1845. /// this asks whether DstMask zeroes precisely those bits that will be set by
  1846. /// the other half.
  1847. static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
  1848. unsigned NumberOfIgnoredHighBits, EVT VT) {
  1849. assert((VT == MVT::i32 || VT == MVT::i64) &&
  1850. "i32 or i64 mask type expected!");
  1851. unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
  1852. APInt SignificantDstMask = APInt(BitWidth, DstMask);
  1853. APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
  1854. return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
  1855. (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
  1856. }
  1857. // Look for bits that will be useful for later uses.
  1858. // A bit is consider useless as soon as it is dropped and never used
  1859. // before it as been dropped.
  1860. // E.g., looking for useful bit of x
  1861. // 1. y = x & 0x7
  1862. // 2. z = y >> 2
  1863. // After #1, x useful bits are 0x7, then the useful bits of x, live through
  1864. // y.
  1865. // After #2, the useful bits of x are 0x4.
  1866. // However, if x is used on an unpredicatable instruction, then all its bits
  1867. // are useful.
  1868. // E.g.
  1869. // 1. y = x & 0x7
  1870. // 2. z = y >> 2
  1871. // 3. str x, [@x]
  1872. static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
  1873. static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
  1874. unsigned Depth) {
  1875. uint64_t Imm =
  1876. cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
  1877. Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
  1878. UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
  1879. getUsefulBits(Op, UsefulBits, Depth + 1);
  1880. }
  1881. static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
  1882. uint64_t Imm, uint64_t MSB,
  1883. unsigned Depth) {
  1884. // inherit the bitwidth value
  1885. APInt OpUsefulBits(UsefulBits);
  1886. OpUsefulBits = 1;
  1887. if (MSB >= Imm) {
  1888. OpUsefulBits <<= MSB - Imm + 1;
  1889. --OpUsefulBits;
  1890. // The interesting part will be in the lower part of the result
  1891. getUsefulBits(Op, OpUsefulBits, Depth + 1);
  1892. // The interesting part was starting at Imm in the argument
  1893. OpUsefulBits <<= Imm;
  1894. } else {
  1895. OpUsefulBits <<= MSB + 1;
  1896. --OpUsefulBits;
  1897. // The interesting part will be shifted in the result
  1898. OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
  1899. getUsefulBits(Op, OpUsefulBits, Depth + 1);
  1900. // The interesting part was at zero in the argument
  1901. OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
  1902. }
  1903. UsefulBits &= OpUsefulBits;
  1904. }
  1905. static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
  1906. unsigned Depth) {
  1907. uint64_t Imm =
  1908. cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
  1909. uint64_t MSB =
  1910. cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
  1911. getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
  1912. }
  1913. static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
  1914. unsigned Depth) {
  1915. uint64_t ShiftTypeAndValue =
  1916. cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
  1917. APInt Mask(UsefulBits);
  1918. Mask.clearAllBits();
  1919. Mask.flipAllBits();
  1920. if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
  1921. // Shift Left
  1922. uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
  1923. Mask <<= ShiftAmt;
  1924. getUsefulBits(Op, Mask, Depth + 1);
  1925. Mask.lshrInPlace(ShiftAmt);
  1926. } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
  1927. // Shift Right
  1928. // We do not handle AArch64_AM::ASR, because the sign will change the
  1929. // number of useful bits
  1930. uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
  1931. Mask.lshrInPlace(ShiftAmt);
  1932. getUsefulBits(Op, Mask, Depth + 1);
  1933. Mask <<= ShiftAmt;
  1934. } else
  1935. return;
  1936. UsefulBits &= Mask;
  1937. }
  1938. static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
  1939. unsigned Depth) {
  1940. uint64_t Imm =
  1941. cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
  1942. uint64_t MSB =
  1943. cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
  1944. APInt OpUsefulBits(UsefulBits);
  1945. OpUsefulBits = 1;
  1946. APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
  1947. ResultUsefulBits.flipAllBits();
  1948. APInt Mask(UsefulBits.getBitWidth(), 0);
  1949. getUsefulBits(Op, ResultUsefulBits, Depth + 1);
  1950. if (MSB >= Imm) {
  1951. // The instruction is a BFXIL.
  1952. uint64_t Width = MSB - Imm + 1;
  1953. uint64_t LSB = Imm;
  1954. OpUsefulBits <<= Width;
  1955. --OpUsefulBits;
  1956. if (Op.getOperand(1) == Orig) {
  1957. // Copy the low bits from the result to bits starting from LSB.
  1958. Mask = ResultUsefulBits & OpUsefulBits;
  1959. Mask <<= LSB;
  1960. }
  1961. if (Op.getOperand(0) == Orig)
  1962. // Bits starting from LSB in the input contribute to the result.
  1963. Mask |= (ResultUsefulBits & ~OpUsefulBits);
  1964. } else {
  1965. // The instruction is a BFI.
  1966. uint64_t Width = MSB + 1;
  1967. uint64_t LSB = UsefulBits.getBitWidth() - Imm;
  1968. OpUsefulBits <<= Width;
  1969. --OpUsefulBits;
  1970. OpUsefulBits <<= LSB;
  1971. if (Op.getOperand(1) == Orig) {
  1972. // Copy the bits from the result to the zero bits.
  1973. Mask = ResultUsefulBits & OpUsefulBits;
  1974. Mask.lshrInPlace(LSB);
  1975. }
  1976. if (Op.getOperand(0) == Orig)
  1977. Mask |= (ResultUsefulBits & ~OpUsefulBits);
  1978. }
  1979. UsefulBits &= Mask;
  1980. }
  1981. static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
  1982. SDValue Orig, unsigned Depth) {
  1983. // Users of this node should have already been instruction selected
  1984. // FIXME: Can we turn that into an assert?
  1985. if (!UserNode->isMachineOpcode())
  1986. return;
  1987. switch (UserNode->getMachineOpcode()) {
  1988. default:
  1989. return;
  1990. case AArch64::ANDSWri:
  1991. case AArch64::ANDSXri:
  1992. case AArch64::ANDWri:
  1993. case AArch64::ANDXri:
  1994. // We increment Depth only when we call the getUsefulBits
  1995. return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
  1996. Depth);
  1997. case AArch64::UBFMWri:
  1998. case AArch64::UBFMXri:
  1999. return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
  2000. case AArch64::ORRWrs:
  2001. case AArch64::ORRXrs:
  2002. if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
  2003. getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
  2004. Depth);
  2005. return;
  2006. case AArch64::BFMWri:
  2007. case AArch64::BFMXri:
  2008. return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
  2009. case AArch64::STRBBui:
  2010. case AArch64::STURBBi:
  2011. if (UserNode->getOperand(0) != Orig)
  2012. return;
  2013. UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
  2014. return;
  2015. case AArch64::STRHHui:
  2016. case AArch64::STURHHi:
  2017. if (UserNode->getOperand(0) != Orig)
  2018. return;
  2019. UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
  2020. return;
  2021. }
  2022. }
  2023. static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
  2024. if (Depth >= SelectionDAG::MaxRecursionDepth)
  2025. return;
  2026. // Initialize UsefulBits
  2027. if (!Depth) {
  2028. unsigned Bitwidth = Op.getScalarValueSizeInBits();
  2029. // At the beginning, assume every produced bits is useful
  2030. UsefulBits = APInt(Bitwidth, 0);
  2031. UsefulBits.flipAllBits();
  2032. }
  2033. APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
  2034. for (SDNode *Node : Op.getNode()->uses()) {
  2035. // A use cannot produce useful bits
  2036. APInt UsefulBitsForUse = APInt(UsefulBits);
  2037. getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
  2038. UsersUsefulBits |= UsefulBitsForUse;
  2039. }
  2040. // UsefulBits contains the produced bits that are meaningful for the
  2041. // current definition, thus a user cannot make a bit meaningful at
  2042. // this point
  2043. UsefulBits &= UsersUsefulBits;
  2044. }
  2045. /// Create a machine node performing a notional SHL of Op by ShlAmount. If
  2046. /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
  2047. /// 0, return Op unchanged.
  2048. static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
  2049. if (ShlAmount == 0)
  2050. return Op;
  2051. EVT VT = Op.getValueType();
  2052. SDLoc dl(Op);
  2053. unsigned BitWidth = VT.getSizeInBits();
  2054. unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
  2055. SDNode *ShiftNode;
  2056. if (ShlAmount > 0) {
  2057. // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
  2058. ShiftNode = CurDAG->getMachineNode(
  2059. UBFMOpc, dl, VT, Op,
  2060. CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
  2061. CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
  2062. } else {
  2063. // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
  2064. assert(ShlAmount < 0 && "expected right shift");
  2065. int ShrAmount = -ShlAmount;
  2066. ShiftNode = CurDAG->getMachineNode(
  2067. UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
  2068. CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
  2069. }
  2070. return SDValue(ShiftNode, 0);
  2071. }
  2072. /// Does this tree qualify as an attempt to move a bitfield into position,
  2073. /// essentially "(and (shl VAL, N), Mask)".
  2074. static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
  2075. bool BiggerPattern,
  2076. SDValue &Src, int &ShiftAmount,
  2077. int &MaskWidth) {
  2078. EVT VT = Op.getValueType();
  2079. unsigned BitWidth = VT.getSizeInBits();
  2080. (void)BitWidth;
  2081. assert(BitWidth == 32 || BitWidth == 64);
  2082. KnownBits Known = CurDAG->computeKnownBits(Op);
  2083. // Non-zero in the sense that they're not provably zero, which is the key
  2084. // point if we want to use this value
  2085. uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
  2086. // Discard a constant AND mask if present. It's safe because the node will
  2087. // already have been factored into the computeKnownBits calculation above.
  2088. uint64_t AndImm;
  2089. if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
  2090. assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
  2091. Op = Op.getOperand(0);
  2092. }
  2093. // Don't match if the SHL has more than one use, since then we'll end up
  2094. // generating SHL+UBFIZ instead of just keeping SHL+AND.
  2095. if (!BiggerPattern && !Op.hasOneUse())
  2096. return false;
  2097. uint64_t ShlImm;
  2098. if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
  2099. return false;
  2100. Op = Op.getOperand(0);
  2101. if (!isShiftedMask_64(NonZeroBits))
  2102. return false;
  2103. ShiftAmount = countTrailingZeros(NonZeroBits);
  2104. MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
  2105. // BFI encompasses sufficiently many nodes that it's worth inserting an extra
  2106. // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
  2107. // amount. BiggerPattern is true when this pattern is being matched for BFI,
  2108. // BiggerPattern is false when this pattern is being matched for UBFIZ, in
  2109. // which case it is not profitable to insert an extra shift.
  2110. if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
  2111. return false;
  2112. Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
  2113. return true;
  2114. }
  2115. static bool isShiftedMask(uint64_t Mask, EVT VT) {
  2116. assert(VT == MVT::i32 || VT == MVT::i64);
  2117. if (VT == MVT::i32)
  2118. return isShiftedMask_32(Mask);
  2119. return isShiftedMask_64(Mask);
  2120. }
  2121. // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
  2122. // inserted only sets known zero bits.
  2123. static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
  2124. assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
  2125. EVT VT = N->getValueType(0);
  2126. if (VT != MVT::i32 && VT != MVT::i64)
  2127. return false;
  2128. unsigned BitWidth = VT.getSizeInBits();
  2129. uint64_t OrImm;
  2130. if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
  2131. return false;
  2132. // Skip this transformation if the ORR immediate can be encoded in the ORR.
  2133. // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
  2134. // performance neutral.
  2135. if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
  2136. return false;
  2137. uint64_t MaskImm;
  2138. SDValue And = N->getOperand(0);
  2139. // Must be a single use AND with an immediate operand.
  2140. if (!And.hasOneUse() ||
  2141. !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
  2142. return false;
  2143. // Compute the Known Zero for the AND as this allows us to catch more general
  2144. // cases than just looking for AND with imm.
  2145. KnownBits Known = CurDAG->computeKnownBits(And);
  2146. // Non-zero in the sense that they're not provably zero, which is the key
  2147. // point if we want to use this value.
  2148. uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
  2149. // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
  2150. if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
  2151. return false;
  2152. // The bits being inserted must only set those bits that are known to be zero.
  2153. if ((OrImm & NotKnownZero) != 0) {
  2154. // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
  2155. // currently handle this case.
  2156. return false;
  2157. }
  2158. // BFI/BFXIL dst, src, #lsb, #width.
  2159. int LSB = countTrailingOnes(NotKnownZero);
  2160. int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
  2161. // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
  2162. unsigned ImmR = (BitWidth - LSB) % BitWidth;
  2163. unsigned ImmS = Width - 1;
  2164. // If we're creating a BFI instruction avoid cases where we need more
  2165. // instructions to materialize the BFI constant as compared to the original
  2166. // ORR. A BFXIL will use the same constant as the original ORR, so the code
  2167. // should be no worse in this case.
  2168. bool IsBFI = LSB != 0;
  2169. uint64_t BFIImm = OrImm >> LSB;
  2170. if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
  2171. // We have a BFI instruction and we know the constant can't be materialized
  2172. // with a ORR-immediate with the zero register.
  2173. unsigned OrChunks = 0, BFIChunks = 0;
  2174. for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
  2175. if (((OrImm >> Shift) & 0xFFFF) != 0)
  2176. ++OrChunks;
  2177. if (((BFIImm >> Shift) & 0xFFFF) != 0)
  2178. ++BFIChunks;
  2179. }
  2180. if (BFIChunks > OrChunks)
  2181. return false;
  2182. }
  2183. // Materialize the constant to be inserted.
  2184. SDLoc DL(N);
  2185. unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
  2186. SDNode *MOVI = CurDAG->getMachineNode(
  2187. MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
  2188. // Create the BFI/BFXIL instruction.
  2189. SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
  2190. CurDAG->getTargetConstant(ImmR, DL, VT),
  2191. CurDAG->getTargetConstant(ImmS, DL, VT)};
  2192. unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
  2193. CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  2194. return true;
  2195. }
  2196. static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
  2197. SelectionDAG *CurDAG) {
  2198. assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
  2199. EVT VT = N->getValueType(0);
  2200. if (VT != MVT::i32 && VT != MVT::i64)
  2201. return false;
  2202. unsigned BitWidth = VT.getSizeInBits();
  2203. // Because of simplify-demanded-bits in DAGCombine, involved masks may not
  2204. // have the expected shape. Try to undo that.
  2205. unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
  2206. unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
  2207. // Given a OR operation, check if we have the following pattern
  2208. // ubfm c, b, imm, imm2 (or something that does the same jobs, see
  2209. // isBitfieldExtractOp)
  2210. // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
  2211. // countTrailingZeros(mask2) == imm2 - imm + 1
  2212. // f = d | c
  2213. // if yes, replace the OR instruction with:
  2214. // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
  2215. // OR is commutative, check all combinations of operand order and values of
  2216. // BiggerPattern, i.e.
  2217. // Opd0, Opd1, BiggerPattern=false
  2218. // Opd1, Opd0, BiggerPattern=false
  2219. // Opd0, Opd1, BiggerPattern=true
  2220. // Opd1, Opd0, BiggerPattern=true
  2221. // Several of these combinations may match, so check with BiggerPattern=false
  2222. // first since that will produce better results by matching more instructions
  2223. // and/or inserting fewer extra instructions.
  2224. for (int I = 0; I < 4; ++I) {
  2225. SDValue Dst, Src;
  2226. unsigned ImmR, ImmS;
  2227. bool BiggerPattern = I / 2;
  2228. SDValue OrOpd0Val = N->getOperand(I % 2);
  2229. SDNode *OrOpd0 = OrOpd0Val.getNode();
  2230. SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
  2231. SDNode *OrOpd1 = OrOpd1Val.getNode();
  2232. unsigned BFXOpc;
  2233. int DstLSB, Width;
  2234. if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
  2235. NumberOfIgnoredLowBits, BiggerPattern)) {
  2236. // Check that the returned opcode is compatible with the pattern,
  2237. // i.e., same type and zero extended (U and not S)
  2238. if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
  2239. (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
  2240. continue;
  2241. // Compute the width of the bitfield insertion
  2242. DstLSB = 0;
  2243. Width = ImmS - ImmR + 1;
  2244. // FIXME: This constraint is to catch bitfield insertion we may
  2245. // want to widen the pattern if we want to grab general bitfied
  2246. // move case
  2247. if (Width <= 0)
  2248. continue;
  2249. // If the mask on the insertee is correct, we have a BFXIL operation. We
  2250. // can share the ImmR and ImmS values from the already-computed UBFM.
  2251. } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
  2252. BiggerPattern,
  2253. Src, DstLSB, Width)) {
  2254. ImmR = (BitWidth - DstLSB) % BitWidth;
  2255. ImmS = Width - 1;
  2256. } else
  2257. continue;
  2258. // Check the second part of the pattern
  2259. EVT VT = OrOpd1Val.getValueType();
  2260. assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
  2261. // Compute the Known Zero for the candidate of the first operand.
  2262. // This allows to catch more general case than just looking for
  2263. // AND with imm. Indeed, simplify-demanded-bits may have removed
  2264. // the AND instruction because it proves it was useless.
  2265. KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
  2266. // Check if there is enough room for the second operand to appear
  2267. // in the first one
  2268. APInt BitsToBeInserted =
  2269. APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
  2270. if ((BitsToBeInserted & ~Known.Zero) != 0)
  2271. continue;
  2272. // Set the first operand
  2273. uint64_t Imm;
  2274. if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
  2275. isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
  2276. // In that case, we can eliminate the AND
  2277. Dst = OrOpd1->getOperand(0);
  2278. else
  2279. // Maybe the AND has been removed by simplify-demanded-bits
  2280. // or is useful because it discards more bits
  2281. Dst = OrOpd1Val;
  2282. // both parts match
  2283. SDLoc DL(N);
  2284. SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
  2285. CurDAG->getTargetConstant(ImmS, DL, VT)};
  2286. unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
  2287. CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  2288. return true;
  2289. }
  2290. // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
  2291. // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
  2292. // mask (e.g., 0x000ffff0).
  2293. uint64_t Mask0Imm, Mask1Imm;
  2294. SDValue And0 = N->getOperand(0);
  2295. SDValue And1 = N->getOperand(1);
  2296. if (And0.hasOneUse() && And1.hasOneUse() &&
  2297. isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
  2298. isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
  2299. APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
  2300. (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
  2301. // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
  2302. // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
  2303. // bits to be inserted.
  2304. if (isShiftedMask(Mask0Imm, VT)) {
  2305. std::swap(And0, And1);
  2306. std::swap(Mask0Imm, Mask1Imm);
  2307. }
  2308. SDValue Src = And1->getOperand(0);
  2309. SDValue Dst = And0->getOperand(0);
  2310. unsigned LSB = countTrailingZeros(Mask1Imm);
  2311. int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
  2312. // The BFXIL inserts the low-order bits from a source register, so right
  2313. // shift the needed bits into place.
  2314. SDLoc DL(N);
  2315. unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
  2316. SDNode *LSR = CurDAG->getMachineNode(
  2317. ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
  2318. CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
  2319. // BFXIL is an alias of BFM, so translate to BFM operands.
  2320. unsigned ImmR = (BitWidth - LSB) % BitWidth;
  2321. unsigned ImmS = Width - 1;
  2322. // Create the BFXIL instruction.
  2323. SDValue Ops[] = {Dst, SDValue(LSR, 0),
  2324. CurDAG->getTargetConstant(ImmR, DL, VT),
  2325. CurDAG->getTargetConstant(ImmS, DL, VT)};
  2326. unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
  2327. CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  2328. return true;
  2329. }
  2330. return false;
  2331. }
  2332. bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
  2333. if (N->getOpcode() != ISD::OR)
  2334. return false;
  2335. APInt NUsefulBits;
  2336. getUsefulBits(SDValue(N, 0), NUsefulBits);
  2337. // If all bits are not useful, just return UNDEF.
  2338. if (!NUsefulBits) {
  2339. CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
  2340. return true;
  2341. }
  2342. if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
  2343. return true;
  2344. return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
  2345. }
  2346. /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
  2347. /// equivalent of a left shift by a constant amount followed by an and masking
  2348. /// out a contiguous set of bits.
  2349. bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
  2350. if (N->getOpcode() != ISD::AND)
  2351. return false;
  2352. EVT VT = N->getValueType(0);
  2353. if (VT != MVT::i32 && VT != MVT::i64)
  2354. return false;
  2355. SDValue Op0;
  2356. int DstLSB, Width;
  2357. if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
  2358. Op0, DstLSB, Width))
  2359. return false;
  2360. // ImmR is the rotate right amount.
  2361. unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
  2362. // ImmS is the most significant bit of the source to be moved.
  2363. unsigned ImmS = Width - 1;
  2364. SDLoc DL(N);
  2365. SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
  2366. CurDAG->getTargetConstant(ImmS, DL, VT)};
  2367. unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
  2368. CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  2369. return true;
  2370. }
  2371. /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
  2372. /// variable shift/rotate instructions.
  2373. bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
  2374. EVT VT = N->getValueType(0);
  2375. unsigned Opc;
  2376. switch (N->getOpcode()) {
  2377. case ISD::ROTR:
  2378. Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
  2379. break;
  2380. case ISD::SHL:
  2381. Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
  2382. break;
  2383. case ISD::SRL:
  2384. Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
  2385. break;
  2386. case ISD::SRA:
  2387. Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
  2388. break;
  2389. default:
  2390. return false;
  2391. }
  2392. uint64_t Size;
  2393. uint64_t Bits;
  2394. if (VT == MVT::i32) {
  2395. Bits = 5;
  2396. Size = 32;
  2397. } else if (VT == MVT::i64) {
  2398. Bits = 6;
  2399. Size = 64;
  2400. } else
  2401. return false;
  2402. SDValue ShiftAmt = N->getOperand(1);
  2403. SDLoc DL(N);
  2404. SDValue NewShiftAmt;
  2405. // Skip over an extend of the shift amount.
  2406. if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
  2407. ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
  2408. ShiftAmt = ShiftAmt->getOperand(0);
  2409. if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
  2410. SDValue Add0 = ShiftAmt->getOperand(0);
  2411. SDValue Add1 = ShiftAmt->getOperand(1);
  2412. uint64_t Add0Imm;
  2413. uint64_t Add1Imm;
  2414. // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
  2415. // to avoid the ADD/SUB.
  2416. if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
  2417. NewShiftAmt = Add0;
  2418. // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
  2419. // generate a NEG instead of a SUB of a constant.
  2420. else if (ShiftAmt->getOpcode() == ISD::SUB &&
  2421. isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
  2422. (Add0Imm % Size == 0)) {
  2423. unsigned NegOpc;
  2424. unsigned ZeroReg;
  2425. EVT SubVT = ShiftAmt->getValueType(0);
  2426. if (SubVT == MVT::i32) {
  2427. NegOpc = AArch64::SUBWrr;
  2428. ZeroReg = AArch64::WZR;
  2429. } else {
  2430. assert(SubVT == MVT::i64);
  2431. NegOpc = AArch64::SUBXrr;
  2432. ZeroReg = AArch64::XZR;
  2433. }
  2434. SDValue Zero =
  2435. CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
  2436. MachineSDNode *Neg =
  2437. CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
  2438. NewShiftAmt = SDValue(Neg, 0);
  2439. } else
  2440. return false;
  2441. } else {
  2442. // If the shift amount is masked with an AND, check that the mask covers the
  2443. // bits that are implicitly ANDed off by the above opcodes and if so, skip
  2444. // the AND.
  2445. uint64_t MaskImm;
  2446. if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
  2447. !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
  2448. return false;
  2449. if (countTrailingOnes(MaskImm) < Bits)
  2450. return false;
  2451. NewShiftAmt = ShiftAmt->getOperand(0);
  2452. }
  2453. // Narrow/widen the shift amount to match the size of the shift operation.
  2454. if (VT == MVT::i32)
  2455. NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
  2456. else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
  2457. SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
  2458. MachineSDNode *Ext = CurDAG->getMachineNode(
  2459. AArch64::SUBREG_TO_REG, DL, VT,
  2460. CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
  2461. NewShiftAmt = SDValue(Ext, 0);
  2462. }
  2463. SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
  2464. CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  2465. return true;
  2466. }
  2467. bool
  2468. AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
  2469. unsigned RegWidth) {
  2470. APFloat FVal(0.0);
  2471. if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
  2472. FVal = CN->getValueAPF();
  2473. else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
  2474. // Some otherwise illegal constants are allowed in this case.
  2475. if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
  2476. !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
  2477. return false;
  2478. ConstantPoolSDNode *CN =
  2479. dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
  2480. FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
  2481. } else
  2482. return false;
  2483. // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
  2484. // is between 1 and 32 for a destination w-register, or 1 and 64 for an
  2485. // x-register.
  2486. //
  2487. // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
  2488. // want THIS_NODE to be 2^fbits. This is much easier to deal with using
  2489. // integers.
  2490. bool IsExact;
  2491. // fbits is between 1 and 64 in the worst-case, which means the fmul
  2492. // could have 2^64 as an actual operand. Need 65 bits of precision.
  2493. APSInt IntVal(65, true);
  2494. FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
  2495. // N.b. isPowerOf2 also checks for > 0.
  2496. if (!IsExact || !IntVal.isPowerOf2()) return false;
  2497. unsigned FBits = IntVal.logBase2();
  2498. // Checks above should have guaranteed that we haven't lost information in
  2499. // finding FBits, but it must still be in range.
  2500. if (FBits == 0 || FBits > RegWidth) return false;
  2501. FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
  2502. return true;
  2503. }
  2504. // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
  2505. // of the string and obtains the integer values from them and combines these
  2506. // into a single value to be used in the MRS/MSR instruction.
  2507. static int getIntOperandFromRegisterString(StringRef RegString) {
  2508. SmallVector<StringRef, 5> Fields;
  2509. RegString.split(Fields, ':');
  2510. if (Fields.size() == 1)
  2511. return -1;
  2512. assert(Fields.size() == 5
  2513. && "Invalid number of fields in read register string");
  2514. SmallVector<int, 5> Ops;
  2515. bool AllIntFields = true;
  2516. for (StringRef Field : Fields) {
  2517. unsigned IntField;
  2518. AllIntFields &= !Field.getAsInteger(10, IntField);
  2519. Ops.push_back(IntField);
  2520. }
  2521. assert(AllIntFields &&
  2522. "Unexpected non-integer value in special register string.");
  2523. (void)AllIntFields;
  2524. // Need to combine the integer fields of the string into a single value
  2525. // based on the bit encoding of MRS/MSR instruction.
  2526. return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
  2527. (Ops[3] << 3) | (Ops[4]);
  2528. }
  2529. // Lower the read_register intrinsic to an MRS instruction node if the special
  2530. // register string argument is either of the form detailed in the ALCE (the
  2531. // form described in getIntOperandsFromRegsterString) or is a named register
  2532. // known by the MRS SysReg mapper.
  2533. bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
  2534. const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
  2535. const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
  2536. SDLoc DL(N);
  2537. int Reg = getIntOperandFromRegisterString(RegString->getString());
  2538. if (Reg != -1) {
  2539. ReplaceNode(N, CurDAG->getMachineNode(
  2540. AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
  2541. CurDAG->getTargetConstant(Reg, DL, MVT::i32),
  2542. N->getOperand(0)));
  2543. return true;
  2544. }
  2545. // Use the sysreg mapper to map the remaining possible strings to the
  2546. // value for the register to be used for the instruction operand.
  2547. auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
  2548. if (TheReg && TheReg->Readable &&
  2549. TheReg->haveFeatures(Subtarget->getFeatureBits()))
  2550. Reg = TheReg->Encoding;
  2551. else
  2552. Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
  2553. if (Reg != -1) {
  2554. ReplaceNode(N, CurDAG->getMachineNode(
  2555. AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
  2556. CurDAG->getTargetConstant(Reg, DL, MVT::i32),
  2557. N->getOperand(0)));
  2558. return true;
  2559. }
  2560. if (RegString->getString() == "pc") {
  2561. ReplaceNode(N, CurDAG->getMachineNode(
  2562. AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
  2563. CurDAG->getTargetConstant(0, DL, MVT::i32),
  2564. N->getOperand(0)));
  2565. return true;
  2566. }
  2567. return false;
  2568. }
  2569. // Lower the write_register intrinsic to an MSR instruction node if the special
  2570. // register string argument is either of the form detailed in the ALCE (the
  2571. // form described in getIntOperandsFromRegsterString) or is a named register
  2572. // known by the MSR SysReg mapper.
  2573. bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
  2574. const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
  2575. const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
  2576. SDLoc DL(N);
  2577. int Reg = getIntOperandFromRegisterString(RegString->getString());
  2578. if (Reg != -1) {
  2579. ReplaceNode(
  2580. N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
  2581. CurDAG->getTargetConstant(Reg, DL, MVT::i32),
  2582. N->getOperand(2), N->getOperand(0)));
  2583. return true;
  2584. }
  2585. // Check if the register was one of those allowed as the pstatefield value in
  2586. // the MSR (immediate) instruction. To accept the values allowed in the
  2587. // pstatefield for the MSR (immediate) instruction, we also require that an
  2588. // immediate value has been provided as an argument, we know that this is
  2589. // the case as it has been ensured by semantic checking.
  2590. auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
  2591. if (PMapper) {
  2592. assert (isa<ConstantSDNode>(N->getOperand(2))
  2593. && "Expected a constant integer expression.");
  2594. unsigned Reg = PMapper->Encoding;
  2595. uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
  2596. unsigned State;
  2597. if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
  2598. assert(Immed < 2 && "Bad imm");
  2599. State = AArch64::MSRpstateImm1;
  2600. } else {
  2601. assert(Immed < 16 && "Bad imm");
  2602. State = AArch64::MSRpstateImm4;
  2603. }
  2604. ReplaceNode(N, CurDAG->getMachineNode(
  2605. State, DL, MVT::Other,
  2606. CurDAG->getTargetConstant(Reg, DL, MVT::i32),
  2607. CurDAG->getTargetConstant(Immed, DL, MVT::i16),
  2608. N->getOperand(0)));
  2609. return true;
  2610. }
  2611. // Use the sysreg mapper to attempt to map the remaining possible strings
  2612. // to the value for the register to be used for the MSR (register)
  2613. // instruction operand.
  2614. auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
  2615. if (TheReg && TheReg->Writeable &&
  2616. TheReg->haveFeatures(Subtarget->getFeatureBits()))
  2617. Reg = TheReg->Encoding;
  2618. else
  2619. Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
  2620. if (Reg != -1) {
  2621. ReplaceNode(N, CurDAG->getMachineNode(
  2622. AArch64::MSR, DL, MVT::Other,
  2623. CurDAG->getTargetConstant(Reg, DL, MVT::i32),
  2624. N->getOperand(2), N->getOperand(0)));
  2625. return true;
  2626. }
  2627. return false;
  2628. }
  2629. /// We've got special pseudo-instructions for these
  2630. bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
  2631. unsigned Opcode;
  2632. EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
  2633. // Leave IR for LSE if subtarget supports it.
  2634. if (Subtarget->hasLSE()) return false;
  2635. if (MemTy == MVT::i8)
  2636. Opcode = AArch64::CMP_SWAP_8;
  2637. else if (MemTy == MVT::i16)
  2638. Opcode = AArch64::CMP_SWAP_16;
  2639. else if (MemTy == MVT::i32)
  2640. Opcode = AArch64::CMP_SWAP_32;
  2641. else if (MemTy == MVT::i64)
  2642. Opcode = AArch64::CMP_SWAP_64;
  2643. else
  2644. llvm_unreachable("Unknown AtomicCmpSwap type");
  2645. MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
  2646. SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
  2647. N->getOperand(0)};
  2648. SDNode *CmpSwap = CurDAG->getMachineNode(
  2649. Opcode, SDLoc(N),
  2650. CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
  2651. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  2652. CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
  2653. ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
  2654. ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
  2655. CurDAG->RemoveDeadNode(N);
  2656. return true;
  2657. }
  2658. bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
  2659. SDValue &Offset) {
  2660. auto C = dyn_cast<ConstantSDNode>(N);
  2661. if (!C)
  2662. return false;
  2663. auto Ty = N->getValueType(0);
  2664. int64_t Imm = C->getSExtValue();
  2665. SDLoc DL(N);
  2666. if ((Imm >= -128) && (Imm <= 127)) {
  2667. Base = CurDAG->getTargetConstant(Imm, DL, Ty);
  2668. Offset = CurDAG->getTargetConstant(0, DL, Ty);
  2669. return true;
  2670. }
  2671. if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
  2672. Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
  2673. Offset = CurDAG->getTargetConstant(8, DL, Ty);
  2674. return true;
  2675. }
  2676. return false;
  2677. }
  2678. bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
  2679. if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
  2680. const int64_t ImmVal = CNode->getSExtValue();
  2681. SDLoc DL(N);
  2682. switch (VT.SimpleTy) {
  2683. case MVT::i8:
  2684. // Can always select i8s, no shift, mask the immediate value to
  2685. // deal with sign-extended value from lowering.
  2686. Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
  2687. Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32);
  2688. return true;
  2689. case MVT::i16:
  2690. // i16 values get sign-extended to 32-bits during lowering.
  2691. if ((ImmVal & 0xFF) == ImmVal) {
  2692. Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
  2693. Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
  2694. return true;
  2695. } else if ((ImmVal & 0xFF) == 0) {
  2696. Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
  2697. Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32);
  2698. return true;
  2699. }
  2700. break;
  2701. case MVT::i32:
  2702. case MVT::i64:
  2703. // Range of immediate won't trigger signedness problems for 32/64b.
  2704. if ((ImmVal & 0xFF) == ImmVal) {
  2705. Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
  2706. Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
  2707. return true;
  2708. } else if ((ImmVal & 0xFF00) == ImmVal) {
  2709. Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
  2710. Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32);
  2711. return true;
  2712. }
  2713. break;
  2714. default:
  2715. break;
  2716. }
  2717. }
  2718. return false;
  2719. }
  2720. bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
  2721. if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
  2722. int64_t ImmVal = CNode->getSExtValue();
  2723. SDLoc DL(N);
  2724. if (ImmVal >= -128 && ImmVal < 128) {
  2725. Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
  2726. return true;
  2727. }
  2728. }
  2729. return false;
  2730. }
  2731. bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
  2732. if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
  2733. uint64_t ImmVal = CNode->getZExtValue();
  2734. switch (VT.SimpleTy) {
  2735. case MVT::i8:
  2736. ImmVal &= 0xFF;
  2737. break;
  2738. case MVT::i16:
  2739. ImmVal &= 0xFFFF;
  2740. break;
  2741. case MVT::i32:
  2742. ImmVal &= 0xFFFFFFFF;
  2743. break;
  2744. case MVT::i64:
  2745. break;
  2746. default:
  2747. llvm_unreachable("Unexpected type");
  2748. }
  2749. if (ImmVal < 256) {
  2750. Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
  2751. return true;
  2752. }
  2753. }
  2754. return false;
  2755. }
  2756. bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
  2757. bool Invert) {
  2758. if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
  2759. uint64_t ImmVal = CNode->getZExtValue();
  2760. SDLoc DL(N);
  2761. if (Invert)
  2762. ImmVal = ~ImmVal;
  2763. // Shift mask depending on type size.
  2764. switch (VT.SimpleTy) {
  2765. case MVT::i8:
  2766. ImmVal &= 0xFF;
  2767. ImmVal |= ImmVal << 8;
  2768. ImmVal |= ImmVal << 16;
  2769. ImmVal |= ImmVal << 32;
  2770. break;
  2771. case MVT::i16:
  2772. ImmVal &= 0xFFFF;
  2773. ImmVal |= ImmVal << 16;
  2774. ImmVal |= ImmVal << 32;
  2775. break;
  2776. case MVT::i32:
  2777. ImmVal &= 0xFFFFFFFF;
  2778. ImmVal |= ImmVal << 32;
  2779. break;
  2780. case MVT::i64:
  2781. break;
  2782. default:
  2783. llvm_unreachable("Unexpected type");
  2784. }
  2785. uint64_t encoding;
  2786. if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
  2787. Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
  2788. return true;
  2789. }
  2790. }
  2791. return false;
  2792. }
  2793. // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
  2794. // Rather than attempt to normalise everything we can sometimes saturate the
  2795. // shift amount during selection. This function also allows for consistent
  2796. // isel patterns by ensuring the resulting "Imm" node is of the i32 type
  2797. // required by the instructions.
  2798. bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
  2799. uint64_t High, bool AllowSaturation,
  2800. SDValue &Imm) {
  2801. if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
  2802. uint64_t ImmVal = CN->getZExtValue();
  2803. // Reject shift amounts that are too small.
  2804. if (ImmVal < Low)
  2805. return false;
  2806. // Reject or saturate shift amounts that are too big.
  2807. if (ImmVal > High) {
  2808. if (!AllowSaturation)
  2809. return false;
  2810. ImmVal = High;
  2811. }
  2812. Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
  2813. return true;
  2814. }
  2815. return false;
  2816. }
  2817. bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
  2818. // tagp(FrameIndex, IRGstack, tag_offset):
  2819. // since the offset between FrameIndex and IRGstack is a compile-time
  2820. // constant, this can be lowered to a single ADDG instruction.
  2821. if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
  2822. return false;
  2823. }
  2824. SDValue IRG_SP = N->getOperand(2);
  2825. if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
  2826. cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
  2827. Intrinsic::aarch64_irg_sp) {
  2828. return false;
  2829. }
  2830. const TargetLowering *TLI = getTargetLowering();
  2831. SDLoc DL(N);
  2832. int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
  2833. SDValue FiOp = CurDAG->getTargetFrameIndex(
  2834. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  2835. int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2836. SDNode *Out = CurDAG->getMachineNode(
  2837. AArch64::TAGPstack, DL, MVT::i64,
  2838. {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
  2839. CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
  2840. ReplaceNode(N, Out);
  2841. return true;
  2842. }
  2843. void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
  2844. assert(isa<ConstantSDNode>(N->getOperand(3)) &&
  2845. "llvm.aarch64.tagp third argument must be an immediate");
  2846. if (trySelectStackSlotTagP(N))
  2847. return;
  2848. // FIXME: above applies in any case when offset between Op1 and Op2 is a
  2849. // compile-time constant, not just for stack allocations.
  2850. // General case for unrelated pointers in Op1 and Op2.
  2851. SDLoc DL(N);
  2852. int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2853. SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
  2854. {N->getOperand(1), N->getOperand(2)});
  2855. SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
  2856. {SDValue(N1, 0), N->getOperand(2)});
  2857. SDNode *N3 = CurDAG->getMachineNode(
  2858. AArch64::ADDG, DL, MVT::i64,
  2859. {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
  2860. CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
  2861. ReplaceNode(N, N3);
  2862. }
  2863. // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
  2864. // vector types larger than NEON don't have a matching SubRegIndex.
  2865. static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
  2866. assert(V.getValueType().isScalableVector() &&
  2867. V.getValueType().getSizeInBits().getKnownMinSize() ==
  2868. AArch64::SVEBitsPerBlock &&
  2869. "Expected to extract from a packed scalable vector!");
  2870. assert(VT.isFixedLengthVector() &&
  2871. "Expected to extract a fixed length vector!");
  2872. SDLoc DL(V);
  2873. switch (VT.getSizeInBits()) {
  2874. case 64: {
  2875. auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
  2876. return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
  2877. }
  2878. case 128: {
  2879. auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
  2880. return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
  2881. }
  2882. default: {
  2883. auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
  2884. return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
  2885. }
  2886. }
  2887. }
  2888. // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
  2889. // vector types larger than NEON don't have a matching SubRegIndex.
  2890. static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
  2891. assert(VT.isScalableVector() &&
  2892. VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
  2893. "Expected to insert into a packed scalable vector!");
  2894. assert(V.getValueType().isFixedLengthVector() &&
  2895. "Expected to insert a fixed length vector!");
  2896. SDLoc DL(V);
  2897. switch (V.getValueType().getSizeInBits()) {
  2898. case 64: {
  2899. auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
  2900. auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
  2901. return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
  2902. SDValue(Container, 0), V, SubReg);
  2903. }
  2904. case 128: {
  2905. auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
  2906. auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
  2907. return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
  2908. SDValue(Container, 0), V, SubReg);
  2909. }
  2910. default: {
  2911. auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
  2912. return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
  2913. }
  2914. }
  2915. }
  2916. void AArch64DAGToDAGISel::Select(SDNode *Node) {
  2917. // If we have a custom node, we already have selected!
  2918. if (Node->isMachineOpcode()) {
  2919. LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
  2920. Node->setNodeId(-1);
  2921. return;
  2922. }
  2923. // Few custom selection stuff.
  2924. EVT VT = Node->getValueType(0);
  2925. switch (Node->getOpcode()) {
  2926. default:
  2927. break;
  2928. case ISD::ATOMIC_CMP_SWAP:
  2929. if (SelectCMP_SWAP(Node))
  2930. return;
  2931. break;
  2932. case ISD::READ_REGISTER:
  2933. if (tryReadRegister(Node))
  2934. return;
  2935. break;
  2936. case ISD::WRITE_REGISTER:
  2937. if (tryWriteRegister(Node))
  2938. return;
  2939. break;
  2940. case ISD::ADD:
  2941. if (tryMLAV64LaneV128(Node))
  2942. return;
  2943. break;
  2944. case ISD::LOAD: {
  2945. // Try to select as an indexed load. Fall through to normal processing
  2946. // if we can't.
  2947. if (tryIndexedLoad(Node))
  2948. return;
  2949. break;
  2950. }
  2951. case ISD::SRL:
  2952. case ISD::AND:
  2953. case ISD::SRA:
  2954. case ISD::SIGN_EXTEND_INREG:
  2955. if (tryBitfieldExtractOp(Node))
  2956. return;
  2957. if (tryBitfieldInsertInZeroOp(Node))
  2958. return;
  2959. LLVM_FALLTHROUGH;
  2960. case ISD::ROTR:
  2961. case ISD::SHL:
  2962. if (tryShiftAmountMod(Node))
  2963. return;
  2964. break;
  2965. case ISD::SIGN_EXTEND:
  2966. if (tryBitfieldExtractOpFromSExt(Node))
  2967. return;
  2968. break;
  2969. case ISD::FP_EXTEND:
  2970. if (tryHighFPExt(Node))
  2971. return;
  2972. break;
  2973. case ISD::OR:
  2974. if (tryBitfieldInsertOp(Node))
  2975. return;
  2976. break;
  2977. case ISD::EXTRACT_SUBVECTOR: {
  2978. // Bail when not a "cast" like extract_subvector.
  2979. if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
  2980. break;
  2981. // Bail when normal isel can do the job.
  2982. EVT InVT = Node->getOperand(0).getValueType();
  2983. if (VT.isScalableVector() || InVT.isFixedLengthVector())
  2984. break;
  2985. // NOTE: We can only get here when doing fixed length SVE code generation.
  2986. // We do manual selection because the types involved are not linked to real
  2987. // registers (despite being legal) and must be coerced into SVE registers.
  2988. //
  2989. // NOTE: If the above changes, be aware that selection will still not work
  2990. // because the td definition of extract_vector does not support extracting
  2991. // a fixed length vector from a scalable vector.
  2992. ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
  2993. return;
  2994. }
  2995. case ISD::INSERT_SUBVECTOR: {
  2996. // Bail when not a "cast" like insert_subvector.
  2997. if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
  2998. break;
  2999. if (!Node->getOperand(0).isUndef())
  3000. break;
  3001. // Bail when normal isel should do the job.
  3002. EVT InVT = Node->getOperand(1).getValueType();
  3003. if (VT.isFixedLengthVector() || InVT.isScalableVector())
  3004. break;
  3005. // NOTE: We can only get here when doing fixed length SVE code generation.
  3006. // We do manual selection because the types involved are not linked to real
  3007. // registers (despite being legal) and must be coerced into SVE registers.
  3008. //
  3009. // NOTE: If the above changes, be aware that selection will still not work
  3010. // because the td definition of insert_vector does not support inserting a
  3011. // fixed length vector into a scalable vector.
  3012. ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
  3013. return;
  3014. }
  3015. case ISD::Constant: {
  3016. // Materialize zero constants as copies from WZR/XZR. This allows
  3017. // the coalescer to propagate these into other instructions.
  3018. ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
  3019. if (ConstNode->isZero()) {
  3020. if (VT == MVT::i32) {
  3021. SDValue New = CurDAG->getCopyFromReg(
  3022. CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
  3023. ReplaceNode(Node, New.getNode());
  3024. return;
  3025. } else if (VT == MVT::i64) {
  3026. SDValue New = CurDAG->getCopyFromReg(
  3027. CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
  3028. ReplaceNode(Node, New.getNode());
  3029. return;
  3030. }
  3031. }
  3032. break;
  3033. }
  3034. case ISD::FrameIndex: {
  3035. // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
  3036. int FI = cast<FrameIndexSDNode>(Node)->getIndex();
  3037. unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
  3038. const TargetLowering *TLI = getTargetLowering();
  3039. SDValue TFI = CurDAG->getTargetFrameIndex(
  3040. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  3041. SDLoc DL(Node);
  3042. SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
  3043. CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
  3044. CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
  3045. return;
  3046. }
  3047. case ISD::INTRINSIC_W_CHAIN: {
  3048. unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
  3049. switch (IntNo) {
  3050. default:
  3051. break;
  3052. case Intrinsic::aarch64_ldaxp:
  3053. case Intrinsic::aarch64_ldxp: {
  3054. unsigned Op =
  3055. IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
  3056. SDValue MemAddr = Node->getOperand(2);
  3057. SDLoc DL(Node);
  3058. SDValue Chain = Node->getOperand(0);
  3059. SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
  3060. MVT::Other, MemAddr, Chain);
  3061. // Transfer memoperands.
  3062. MachineMemOperand *MemOp =
  3063. cast<MemIntrinsicSDNode>(Node)->getMemOperand();
  3064. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
  3065. ReplaceNode(Node, Ld);
  3066. return;
  3067. }
  3068. case Intrinsic::aarch64_stlxp:
  3069. case Intrinsic::aarch64_stxp: {
  3070. unsigned Op =
  3071. IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
  3072. SDLoc DL(Node);
  3073. SDValue Chain = Node->getOperand(0);
  3074. SDValue ValLo = Node->getOperand(2);
  3075. SDValue ValHi = Node->getOperand(3);
  3076. SDValue MemAddr = Node->getOperand(4);
  3077. // Place arguments in the right order.
  3078. SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
  3079. SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
  3080. // Transfer memoperands.
  3081. MachineMemOperand *MemOp =
  3082. cast<MemIntrinsicSDNode>(Node)->getMemOperand();
  3083. CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
  3084. ReplaceNode(Node, St);
  3085. return;
  3086. }
  3087. case Intrinsic::aarch64_neon_ld1x2:
  3088. if (VT == MVT::v8i8) {
  3089. SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
  3090. return;
  3091. } else if (VT == MVT::v16i8) {
  3092. SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
  3093. return;
  3094. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3095. SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
  3096. return;
  3097. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3098. SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
  3099. return;
  3100. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3101. SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
  3102. return;
  3103. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3104. SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
  3105. return;
  3106. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3107. SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
  3108. return;
  3109. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3110. SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
  3111. return;
  3112. }
  3113. break;
  3114. case Intrinsic::aarch64_neon_ld1x3:
  3115. if (VT == MVT::v8i8) {
  3116. SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
  3117. return;
  3118. } else if (VT == MVT::v16i8) {
  3119. SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
  3120. return;
  3121. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3122. SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
  3123. return;
  3124. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3125. SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
  3126. return;
  3127. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3128. SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
  3129. return;
  3130. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3131. SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
  3132. return;
  3133. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3134. SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
  3135. return;
  3136. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3137. SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
  3138. return;
  3139. }
  3140. break;
  3141. case Intrinsic::aarch64_neon_ld1x4:
  3142. if (VT == MVT::v8i8) {
  3143. SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
  3144. return;
  3145. } else if (VT == MVT::v16i8) {
  3146. SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
  3147. return;
  3148. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3149. SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
  3150. return;
  3151. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3152. SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
  3153. return;
  3154. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3155. SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
  3156. return;
  3157. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3158. SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
  3159. return;
  3160. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3161. SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
  3162. return;
  3163. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3164. SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
  3165. return;
  3166. }
  3167. break;
  3168. case Intrinsic::aarch64_neon_ld2:
  3169. if (VT == MVT::v8i8) {
  3170. SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
  3171. return;
  3172. } else if (VT == MVT::v16i8) {
  3173. SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
  3174. return;
  3175. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3176. SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
  3177. return;
  3178. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3179. SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
  3180. return;
  3181. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3182. SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
  3183. return;
  3184. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3185. SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
  3186. return;
  3187. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3188. SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
  3189. return;
  3190. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3191. SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
  3192. return;
  3193. }
  3194. break;
  3195. case Intrinsic::aarch64_neon_ld3:
  3196. if (VT == MVT::v8i8) {
  3197. SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
  3198. return;
  3199. } else if (VT == MVT::v16i8) {
  3200. SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
  3201. return;
  3202. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3203. SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
  3204. return;
  3205. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3206. SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
  3207. return;
  3208. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3209. SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
  3210. return;
  3211. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3212. SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
  3213. return;
  3214. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3215. SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
  3216. return;
  3217. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3218. SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
  3219. return;
  3220. }
  3221. break;
  3222. case Intrinsic::aarch64_neon_ld4:
  3223. if (VT == MVT::v8i8) {
  3224. SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
  3225. return;
  3226. } else if (VT == MVT::v16i8) {
  3227. SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
  3228. return;
  3229. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3230. SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
  3231. return;
  3232. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3233. SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
  3234. return;
  3235. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3236. SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
  3237. return;
  3238. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3239. SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
  3240. return;
  3241. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3242. SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
  3243. return;
  3244. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3245. SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
  3246. return;
  3247. }
  3248. break;
  3249. case Intrinsic::aarch64_neon_ld2r:
  3250. if (VT == MVT::v8i8) {
  3251. SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
  3252. return;
  3253. } else if (VT == MVT::v16i8) {
  3254. SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
  3255. return;
  3256. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3257. SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
  3258. return;
  3259. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3260. SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
  3261. return;
  3262. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3263. SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
  3264. return;
  3265. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3266. SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
  3267. return;
  3268. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3269. SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
  3270. return;
  3271. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3272. SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
  3273. return;
  3274. }
  3275. break;
  3276. case Intrinsic::aarch64_neon_ld3r:
  3277. if (VT == MVT::v8i8) {
  3278. SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
  3279. return;
  3280. } else if (VT == MVT::v16i8) {
  3281. SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
  3282. return;
  3283. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3284. SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
  3285. return;
  3286. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3287. SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
  3288. return;
  3289. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3290. SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
  3291. return;
  3292. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3293. SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
  3294. return;
  3295. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3296. SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
  3297. return;
  3298. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3299. SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
  3300. return;
  3301. }
  3302. break;
  3303. case Intrinsic::aarch64_neon_ld4r:
  3304. if (VT == MVT::v8i8) {
  3305. SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
  3306. return;
  3307. } else if (VT == MVT::v16i8) {
  3308. SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
  3309. return;
  3310. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3311. SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
  3312. return;
  3313. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3314. SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
  3315. return;
  3316. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3317. SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
  3318. return;
  3319. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3320. SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
  3321. return;
  3322. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3323. SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
  3324. return;
  3325. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3326. SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
  3327. return;
  3328. }
  3329. break;
  3330. case Intrinsic::aarch64_neon_ld2lane:
  3331. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  3332. SelectLoadLane(Node, 2, AArch64::LD2i8);
  3333. return;
  3334. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3335. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  3336. SelectLoadLane(Node, 2, AArch64::LD2i16);
  3337. return;
  3338. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  3339. VT == MVT::v2f32) {
  3340. SelectLoadLane(Node, 2, AArch64::LD2i32);
  3341. return;
  3342. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  3343. VT == MVT::v1f64) {
  3344. SelectLoadLane(Node, 2, AArch64::LD2i64);
  3345. return;
  3346. }
  3347. break;
  3348. case Intrinsic::aarch64_neon_ld3lane:
  3349. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  3350. SelectLoadLane(Node, 3, AArch64::LD3i8);
  3351. return;
  3352. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3353. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  3354. SelectLoadLane(Node, 3, AArch64::LD3i16);
  3355. return;
  3356. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  3357. VT == MVT::v2f32) {
  3358. SelectLoadLane(Node, 3, AArch64::LD3i32);
  3359. return;
  3360. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  3361. VT == MVT::v1f64) {
  3362. SelectLoadLane(Node, 3, AArch64::LD3i64);
  3363. return;
  3364. }
  3365. break;
  3366. case Intrinsic::aarch64_neon_ld4lane:
  3367. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  3368. SelectLoadLane(Node, 4, AArch64::LD4i8);
  3369. return;
  3370. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3371. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  3372. SelectLoadLane(Node, 4, AArch64::LD4i16);
  3373. return;
  3374. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  3375. VT == MVT::v2f32) {
  3376. SelectLoadLane(Node, 4, AArch64::LD4i32);
  3377. return;
  3378. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  3379. VT == MVT::v1f64) {
  3380. SelectLoadLane(Node, 4, AArch64::LD4i64);
  3381. return;
  3382. }
  3383. break;
  3384. case Intrinsic::aarch64_ld64b:
  3385. SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
  3386. return;
  3387. case Intrinsic::aarch64_sve_ld2_sret: {
  3388. if (VT == MVT::nxv16i8) {
  3389. SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
  3390. true);
  3391. return;
  3392. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  3393. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  3394. SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
  3395. true);
  3396. return;
  3397. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  3398. SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
  3399. true);
  3400. return;
  3401. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  3402. SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
  3403. true);
  3404. return;
  3405. }
  3406. break;
  3407. }
  3408. case Intrinsic::aarch64_sve_ld3_sret: {
  3409. if (VT == MVT::nxv16i8) {
  3410. SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
  3411. true);
  3412. return;
  3413. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  3414. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  3415. SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
  3416. true);
  3417. return;
  3418. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  3419. SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
  3420. true);
  3421. return;
  3422. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  3423. SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
  3424. true);
  3425. return;
  3426. }
  3427. break;
  3428. }
  3429. case Intrinsic::aarch64_sve_ld4_sret: {
  3430. if (VT == MVT::nxv16i8) {
  3431. SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
  3432. true);
  3433. return;
  3434. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  3435. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  3436. SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
  3437. true);
  3438. return;
  3439. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  3440. SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
  3441. true);
  3442. return;
  3443. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  3444. SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
  3445. true);
  3446. return;
  3447. }
  3448. break;
  3449. }
  3450. }
  3451. } break;
  3452. case ISD::INTRINSIC_WO_CHAIN: {
  3453. unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
  3454. switch (IntNo) {
  3455. default:
  3456. break;
  3457. case Intrinsic::aarch64_tagp:
  3458. SelectTagP(Node);
  3459. return;
  3460. case Intrinsic::aarch64_neon_tbl2:
  3461. SelectTable(Node, 2,
  3462. VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
  3463. false);
  3464. return;
  3465. case Intrinsic::aarch64_neon_tbl3:
  3466. SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
  3467. : AArch64::TBLv16i8Three,
  3468. false);
  3469. return;
  3470. case Intrinsic::aarch64_neon_tbl4:
  3471. SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
  3472. : AArch64::TBLv16i8Four,
  3473. false);
  3474. return;
  3475. case Intrinsic::aarch64_neon_tbx2:
  3476. SelectTable(Node, 2,
  3477. VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
  3478. true);
  3479. return;
  3480. case Intrinsic::aarch64_neon_tbx3:
  3481. SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
  3482. : AArch64::TBXv16i8Three,
  3483. true);
  3484. return;
  3485. case Intrinsic::aarch64_neon_tbx4:
  3486. SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
  3487. : AArch64::TBXv16i8Four,
  3488. true);
  3489. return;
  3490. case Intrinsic::aarch64_neon_smull:
  3491. case Intrinsic::aarch64_neon_umull:
  3492. if (tryMULLV64LaneV128(IntNo, Node))
  3493. return;
  3494. break;
  3495. case Intrinsic::swift_async_context_addr: {
  3496. SDLoc DL(Node);
  3497. CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64,
  3498. CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
  3499. AArch64::FP, MVT::i64),
  3500. CurDAG->getTargetConstant(8, DL, MVT::i32),
  3501. CurDAG->getTargetConstant(0, DL, MVT::i32));
  3502. auto &MF = CurDAG->getMachineFunction();
  3503. MF.getFrameInfo().setFrameAddressIsTaken(true);
  3504. MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
  3505. return;
  3506. }
  3507. }
  3508. break;
  3509. }
  3510. case ISD::INTRINSIC_VOID: {
  3511. unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
  3512. if (Node->getNumOperands() >= 3)
  3513. VT = Node->getOperand(2)->getValueType(0);
  3514. switch (IntNo) {
  3515. default:
  3516. break;
  3517. case Intrinsic::aarch64_neon_st1x2: {
  3518. if (VT == MVT::v8i8) {
  3519. SelectStore(Node, 2, AArch64::ST1Twov8b);
  3520. return;
  3521. } else if (VT == MVT::v16i8) {
  3522. SelectStore(Node, 2, AArch64::ST1Twov16b);
  3523. return;
  3524. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3525. VT == MVT::v4bf16) {
  3526. SelectStore(Node, 2, AArch64::ST1Twov4h);
  3527. return;
  3528. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
  3529. VT == MVT::v8bf16) {
  3530. SelectStore(Node, 2, AArch64::ST1Twov8h);
  3531. return;
  3532. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3533. SelectStore(Node, 2, AArch64::ST1Twov2s);
  3534. return;
  3535. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3536. SelectStore(Node, 2, AArch64::ST1Twov4s);
  3537. return;
  3538. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3539. SelectStore(Node, 2, AArch64::ST1Twov2d);
  3540. return;
  3541. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3542. SelectStore(Node, 2, AArch64::ST1Twov1d);
  3543. return;
  3544. }
  3545. break;
  3546. }
  3547. case Intrinsic::aarch64_neon_st1x3: {
  3548. if (VT == MVT::v8i8) {
  3549. SelectStore(Node, 3, AArch64::ST1Threev8b);
  3550. return;
  3551. } else if (VT == MVT::v16i8) {
  3552. SelectStore(Node, 3, AArch64::ST1Threev16b);
  3553. return;
  3554. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3555. VT == MVT::v4bf16) {
  3556. SelectStore(Node, 3, AArch64::ST1Threev4h);
  3557. return;
  3558. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
  3559. VT == MVT::v8bf16) {
  3560. SelectStore(Node, 3, AArch64::ST1Threev8h);
  3561. return;
  3562. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3563. SelectStore(Node, 3, AArch64::ST1Threev2s);
  3564. return;
  3565. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3566. SelectStore(Node, 3, AArch64::ST1Threev4s);
  3567. return;
  3568. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3569. SelectStore(Node, 3, AArch64::ST1Threev2d);
  3570. return;
  3571. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3572. SelectStore(Node, 3, AArch64::ST1Threev1d);
  3573. return;
  3574. }
  3575. break;
  3576. }
  3577. case Intrinsic::aarch64_neon_st1x4: {
  3578. if (VT == MVT::v8i8) {
  3579. SelectStore(Node, 4, AArch64::ST1Fourv8b);
  3580. return;
  3581. } else if (VT == MVT::v16i8) {
  3582. SelectStore(Node, 4, AArch64::ST1Fourv16b);
  3583. return;
  3584. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3585. VT == MVT::v4bf16) {
  3586. SelectStore(Node, 4, AArch64::ST1Fourv4h);
  3587. return;
  3588. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
  3589. VT == MVT::v8bf16) {
  3590. SelectStore(Node, 4, AArch64::ST1Fourv8h);
  3591. return;
  3592. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3593. SelectStore(Node, 4, AArch64::ST1Fourv2s);
  3594. return;
  3595. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3596. SelectStore(Node, 4, AArch64::ST1Fourv4s);
  3597. return;
  3598. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3599. SelectStore(Node, 4, AArch64::ST1Fourv2d);
  3600. return;
  3601. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3602. SelectStore(Node, 4, AArch64::ST1Fourv1d);
  3603. return;
  3604. }
  3605. break;
  3606. }
  3607. case Intrinsic::aarch64_neon_st2: {
  3608. if (VT == MVT::v8i8) {
  3609. SelectStore(Node, 2, AArch64::ST2Twov8b);
  3610. return;
  3611. } else if (VT == MVT::v16i8) {
  3612. SelectStore(Node, 2, AArch64::ST2Twov16b);
  3613. return;
  3614. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3615. VT == MVT::v4bf16) {
  3616. SelectStore(Node, 2, AArch64::ST2Twov4h);
  3617. return;
  3618. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
  3619. VT == MVT::v8bf16) {
  3620. SelectStore(Node, 2, AArch64::ST2Twov8h);
  3621. return;
  3622. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3623. SelectStore(Node, 2, AArch64::ST2Twov2s);
  3624. return;
  3625. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3626. SelectStore(Node, 2, AArch64::ST2Twov4s);
  3627. return;
  3628. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3629. SelectStore(Node, 2, AArch64::ST2Twov2d);
  3630. return;
  3631. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3632. SelectStore(Node, 2, AArch64::ST1Twov1d);
  3633. return;
  3634. }
  3635. break;
  3636. }
  3637. case Intrinsic::aarch64_neon_st3: {
  3638. if (VT == MVT::v8i8) {
  3639. SelectStore(Node, 3, AArch64::ST3Threev8b);
  3640. return;
  3641. } else if (VT == MVT::v16i8) {
  3642. SelectStore(Node, 3, AArch64::ST3Threev16b);
  3643. return;
  3644. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3645. VT == MVT::v4bf16) {
  3646. SelectStore(Node, 3, AArch64::ST3Threev4h);
  3647. return;
  3648. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
  3649. VT == MVT::v8bf16) {
  3650. SelectStore(Node, 3, AArch64::ST3Threev8h);
  3651. return;
  3652. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3653. SelectStore(Node, 3, AArch64::ST3Threev2s);
  3654. return;
  3655. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3656. SelectStore(Node, 3, AArch64::ST3Threev4s);
  3657. return;
  3658. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3659. SelectStore(Node, 3, AArch64::ST3Threev2d);
  3660. return;
  3661. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3662. SelectStore(Node, 3, AArch64::ST1Threev1d);
  3663. return;
  3664. }
  3665. break;
  3666. }
  3667. case Intrinsic::aarch64_neon_st4: {
  3668. if (VT == MVT::v8i8) {
  3669. SelectStore(Node, 4, AArch64::ST4Fourv8b);
  3670. return;
  3671. } else if (VT == MVT::v16i8) {
  3672. SelectStore(Node, 4, AArch64::ST4Fourv16b);
  3673. return;
  3674. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3675. VT == MVT::v4bf16) {
  3676. SelectStore(Node, 4, AArch64::ST4Fourv4h);
  3677. return;
  3678. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
  3679. VT == MVT::v8bf16) {
  3680. SelectStore(Node, 4, AArch64::ST4Fourv8h);
  3681. return;
  3682. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3683. SelectStore(Node, 4, AArch64::ST4Fourv2s);
  3684. return;
  3685. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3686. SelectStore(Node, 4, AArch64::ST4Fourv4s);
  3687. return;
  3688. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3689. SelectStore(Node, 4, AArch64::ST4Fourv2d);
  3690. return;
  3691. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3692. SelectStore(Node, 4, AArch64::ST1Fourv1d);
  3693. return;
  3694. }
  3695. break;
  3696. }
  3697. case Intrinsic::aarch64_neon_st2lane: {
  3698. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  3699. SelectStoreLane(Node, 2, AArch64::ST2i8);
  3700. return;
  3701. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3702. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  3703. SelectStoreLane(Node, 2, AArch64::ST2i16);
  3704. return;
  3705. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  3706. VT == MVT::v2f32) {
  3707. SelectStoreLane(Node, 2, AArch64::ST2i32);
  3708. return;
  3709. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  3710. VT == MVT::v1f64) {
  3711. SelectStoreLane(Node, 2, AArch64::ST2i64);
  3712. return;
  3713. }
  3714. break;
  3715. }
  3716. case Intrinsic::aarch64_neon_st3lane: {
  3717. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  3718. SelectStoreLane(Node, 3, AArch64::ST3i8);
  3719. return;
  3720. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3721. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  3722. SelectStoreLane(Node, 3, AArch64::ST3i16);
  3723. return;
  3724. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  3725. VT == MVT::v2f32) {
  3726. SelectStoreLane(Node, 3, AArch64::ST3i32);
  3727. return;
  3728. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  3729. VT == MVT::v1f64) {
  3730. SelectStoreLane(Node, 3, AArch64::ST3i64);
  3731. return;
  3732. }
  3733. break;
  3734. }
  3735. case Intrinsic::aarch64_neon_st4lane: {
  3736. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  3737. SelectStoreLane(Node, 4, AArch64::ST4i8);
  3738. return;
  3739. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  3740. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  3741. SelectStoreLane(Node, 4, AArch64::ST4i16);
  3742. return;
  3743. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  3744. VT == MVT::v2f32) {
  3745. SelectStoreLane(Node, 4, AArch64::ST4i32);
  3746. return;
  3747. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  3748. VT == MVT::v1f64) {
  3749. SelectStoreLane(Node, 4, AArch64::ST4i64);
  3750. return;
  3751. }
  3752. break;
  3753. }
  3754. case Intrinsic::aarch64_sve_st2: {
  3755. if (VT == MVT::nxv16i8) {
  3756. SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
  3757. return;
  3758. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  3759. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  3760. SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
  3761. return;
  3762. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  3763. SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
  3764. return;
  3765. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  3766. SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
  3767. return;
  3768. }
  3769. break;
  3770. }
  3771. case Intrinsic::aarch64_sve_st3: {
  3772. if (VT == MVT::nxv16i8) {
  3773. SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
  3774. return;
  3775. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  3776. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  3777. SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
  3778. return;
  3779. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  3780. SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
  3781. return;
  3782. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  3783. SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
  3784. return;
  3785. }
  3786. break;
  3787. }
  3788. case Intrinsic::aarch64_sve_st4: {
  3789. if (VT == MVT::nxv16i8) {
  3790. SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
  3791. return;
  3792. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  3793. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  3794. SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
  3795. return;
  3796. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  3797. SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
  3798. return;
  3799. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  3800. SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
  3801. return;
  3802. }
  3803. break;
  3804. }
  3805. }
  3806. break;
  3807. }
  3808. case AArch64ISD::LD2post: {
  3809. if (VT == MVT::v8i8) {
  3810. SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
  3811. return;
  3812. } else if (VT == MVT::v16i8) {
  3813. SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
  3814. return;
  3815. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3816. SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
  3817. return;
  3818. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3819. SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
  3820. return;
  3821. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3822. SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
  3823. return;
  3824. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3825. SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
  3826. return;
  3827. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3828. SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
  3829. return;
  3830. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3831. SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
  3832. return;
  3833. }
  3834. break;
  3835. }
  3836. case AArch64ISD::LD3post: {
  3837. if (VT == MVT::v8i8) {
  3838. SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
  3839. return;
  3840. } else if (VT == MVT::v16i8) {
  3841. SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
  3842. return;
  3843. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3844. SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
  3845. return;
  3846. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3847. SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
  3848. return;
  3849. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3850. SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
  3851. return;
  3852. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3853. SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
  3854. return;
  3855. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3856. SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
  3857. return;
  3858. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3859. SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
  3860. return;
  3861. }
  3862. break;
  3863. }
  3864. case AArch64ISD::LD4post: {
  3865. if (VT == MVT::v8i8) {
  3866. SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
  3867. return;
  3868. } else if (VT == MVT::v16i8) {
  3869. SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
  3870. return;
  3871. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3872. SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
  3873. return;
  3874. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3875. SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
  3876. return;
  3877. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3878. SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
  3879. return;
  3880. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3881. SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
  3882. return;
  3883. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3884. SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
  3885. return;
  3886. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3887. SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
  3888. return;
  3889. }
  3890. break;
  3891. }
  3892. case AArch64ISD::LD1x2post: {
  3893. if (VT == MVT::v8i8) {
  3894. SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
  3895. return;
  3896. } else if (VT == MVT::v16i8) {
  3897. SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
  3898. return;
  3899. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3900. SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
  3901. return;
  3902. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3903. SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
  3904. return;
  3905. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3906. SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
  3907. return;
  3908. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3909. SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
  3910. return;
  3911. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3912. SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
  3913. return;
  3914. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3915. SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
  3916. return;
  3917. }
  3918. break;
  3919. }
  3920. case AArch64ISD::LD1x3post: {
  3921. if (VT == MVT::v8i8) {
  3922. SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
  3923. return;
  3924. } else if (VT == MVT::v16i8) {
  3925. SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
  3926. return;
  3927. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3928. SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
  3929. return;
  3930. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3931. SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
  3932. return;
  3933. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3934. SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
  3935. return;
  3936. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3937. SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
  3938. return;
  3939. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3940. SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
  3941. return;
  3942. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3943. SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
  3944. return;
  3945. }
  3946. break;
  3947. }
  3948. case AArch64ISD::LD1x4post: {
  3949. if (VT == MVT::v8i8) {
  3950. SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
  3951. return;
  3952. } else if (VT == MVT::v16i8) {
  3953. SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
  3954. return;
  3955. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3956. SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
  3957. return;
  3958. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3959. SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
  3960. return;
  3961. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3962. SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
  3963. return;
  3964. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3965. SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
  3966. return;
  3967. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3968. SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
  3969. return;
  3970. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3971. SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
  3972. return;
  3973. }
  3974. break;
  3975. }
  3976. case AArch64ISD::LD1DUPpost: {
  3977. if (VT == MVT::v8i8) {
  3978. SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
  3979. return;
  3980. } else if (VT == MVT::v16i8) {
  3981. SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
  3982. return;
  3983. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  3984. SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
  3985. return;
  3986. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  3987. SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
  3988. return;
  3989. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  3990. SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
  3991. return;
  3992. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  3993. SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
  3994. return;
  3995. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  3996. SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
  3997. return;
  3998. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  3999. SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
  4000. return;
  4001. }
  4002. break;
  4003. }
  4004. case AArch64ISD::LD2DUPpost: {
  4005. if (VT == MVT::v8i8) {
  4006. SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
  4007. return;
  4008. } else if (VT == MVT::v16i8) {
  4009. SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
  4010. return;
  4011. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4012. SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
  4013. return;
  4014. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4015. SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
  4016. return;
  4017. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4018. SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
  4019. return;
  4020. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4021. SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
  4022. return;
  4023. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4024. SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
  4025. return;
  4026. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4027. SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
  4028. return;
  4029. }
  4030. break;
  4031. }
  4032. case AArch64ISD::LD3DUPpost: {
  4033. if (VT == MVT::v8i8) {
  4034. SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
  4035. return;
  4036. } else if (VT == MVT::v16i8) {
  4037. SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
  4038. return;
  4039. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4040. SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
  4041. return;
  4042. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4043. SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
  4044. return;
  4045. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4046. SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
  4047. return;
  4048. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4049. SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
  4050. return;
  4051. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4052. SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
  4053. return;
  4054. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4055. SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
  4056. return;
  4057. }
  4058. break;
  4059. }
  4060. case AArch64ISD::LD4DUPpost: {
  4061. if (VT == MVT::v8i8) {
  4062. SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
  4063. return;
  4064. } else if (VT == MVT::v16i8) {
  4065. SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
  4066. return;
  4067. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4068. SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
  4069. return;
  4070. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4071. SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
  4072. return;
  4073. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4074. SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
  4075. return;
  4076. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4077. SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
  4078. return;
  4079. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4080. SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
  4081. return;
  4082. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4083. SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
  4084. return;
  4085. }
  4086. break;
  4087. }
  4088. case AArch64ISD::LD1LANEpost: {
  4089. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4090. SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
  4091. return;
  4092. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4093. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4094. SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
  4095. return;
  4096. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4097. VT == MVT::v2f32) {
  4098. SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
  4099. return;
  4100. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4101. VT == MVT::v1f64) {
  4102. SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
  4103. return;
  4104. }
  4105. break;
  4106. }
  4107. case AArch64ISD::LD2LANEpost: {
  4108. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4109. SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
  4110. return;
  4111. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4112. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4113. SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
  4114. return;
  4115. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4116. VT == MVT::v2f32) {
  4117. SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
  4118. return;
  4119. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4120. VT == MVT::v1f64) {
  4121. SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
  4122. return;
  4123. }
  4124. break;
  4125. }
  4126. case AArch64ISD::LD3LANEpost: {
  4127. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4128. SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
  4129. return;
  4130. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4131. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4132. SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
  4133. return;
  4134. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4135. VT == MVT::v2f32) {
  4136. SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
  4137. return;
  4138. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4139. VT == MVT::v1f64) {
  4140. SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
  4141. return;
  4142. }
  4143. break;
  4144. }
  4145. case AArch64ISD::LD4LANEpost: {
  4146. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4147. SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
  4148. return;
  4149. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4150. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4151. SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
  4152. return;
  4153. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4154. VT == MVT::v2f32) {
  4155. SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
  4156. return;
  4157. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4158. VT == MVT::v1f64) {
  4159. SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
  4160. return;
  4161. }
  4162. break;
  4163. }
  4164. case AArch64ISD::ST2post: {
  4165. VT = Node->getOperand(1).getValueType();
  4166. if (VT == MVT::v8i8) {
  4167. SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
  4168. return;
  4169. } else if (VT == MVT::v16i8) {
  4170. SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
  4171. return;
  4172. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4173. SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
  4174. return;
  4175. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4176. SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
  4177. return;
  4178. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4179. SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
  4180. return;
  4181. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4182. SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
  4183. return;
  4184. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4185. SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
  4186. return;
  4187. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4188. SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
  4189. return;
  4190. }
  4191. break;
  4192. }
  4193. case AArch64ISD::ST3post: {
  4194. VT = Node->getOperand(1).getValueType();
  4195. if (VT == MVT::v8i8) {
  4196. SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
  4197. return;
  4198. } else if (VT == MVT::v16i8) {
  4199. SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
  4200. return;
  4201. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4202. SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
  4203. return;
  4204. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4205. SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
  4206. return;
  4207. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4208. SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
  4209. return;
  4210. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4211. SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
  4212. return;
  4213. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4214. SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
  4215. return;
  4216. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4217. SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
  4218. return;
  4219. }
  4220. break;
  4221. }
  4222. case AArch64ISD::ST4post: {
  4223. VT = Node->getOperand(1).getValueType();
  4224. if (VT == MVT::v8i8) {
  4225. SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
  4226. return;
  4227. } else if (VT == MVT::v16i8) {
  4228. SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
  4229. return;
  4230. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4231. SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
  4232. return;
  4233. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4234. SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
  4235. return;
  4236. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4237. SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
  4238. return;
  4239. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4240. SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
  4241. return;
  4242. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4243. SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
  4244. return;
  4245. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4246. SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
  4247. return;
  4248. }
  4249. break;
  4250. }
  4251. case AArch64ISD::ST1x2post: {
  4252. VT = Node->getOperand(1).getValueType();
  4253. if (VT == MVT::v8i8) {
  4254. SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
  4255. return;
  4256. } else if (VT == MVT::v16i8) {
  4257. SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
  4258. return;
  4259. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4260. SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
  4261. return;
  4262. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4263. SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
  4264. return;
  4265. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4266. SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
  4267. return;
  4268. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4269. SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
  4270. return;
  4271. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4272. SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
  4273. return;
  4274. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4275. SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
  4276. return;
  4277. }
  4278. break;
  4279. }
  4280. case AArch64ISD::ST1x3post: {
  4281. VT = Node->getOperand(1).getValueType();
  4282. if (VT == MVT::v8i8) {
  4283. SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
  4284. return;
  4285. } else if (VT == MVT::v16i8) {
  4286. SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
  4287. return;
  4288. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4289. SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
  4290. return;
  4291. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
  4292. SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
  4293. return;
  4294. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4295. SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
  4296. return;
  4297. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4298. SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
  4299. return;
  4300. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4301. SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
  4302. return;
  4303. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4304. SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
  4305. return;
  4306. }
  4307. break;
  4308. }
  4309. case AArch64ISD::ST1x4post: {
  4310. VT = Node->getOperand(1).getValueType();
  4311. if (VT == MVT::v8i8) {
  4312. SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
  4313. return;
  4314. } else if (VT == MVT::v16i8) {
  4315. SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
  4316. return;
  4317. } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
  4318. SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
  4319. return;
  4320. } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
  4321. SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
  4322. return;
  4323. } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
  4324. SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
  4325. return;
  4326. } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
  4327. SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
  4328. return;
  4329. } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
  4330. SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
  4331. return;
  4332. } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
  4333. SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
  4334. return;
  4335. }
  4336. break;
  4337. }
  4338. case AArch64ISD::ST2LANEpost: {
  4339. VT = Node->getOperand(1).getValueType();
  4340. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4341. SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
  4342. return;
  4343. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4344. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4345. SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
  4346. return;
  4347. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4348. VT == MVT::v2f32) {
  4349. SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
  4350. return;
  4351. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4352. VT == MVT::v1f64) {
  4353. SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
  4354. return;
  4355. }
  4356. break;
  4357. }
  4358. case AArch64ISD::ST3LANEpost: {
  4359. VT = Node->getOperand(1).getValueType();
  4360. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4361. SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
  4362. return;
  4363. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4364. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4365. SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
  4366. return;
  4367. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4368. VT == MVT::v2f32) {
  4369. SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
  4370. return;
  4371. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4372. VT == MVT::v1f64) {
  4373. SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
  4374. return;
  4375. }
  4376. break;
  4377. }
  4378. case AArch64ISD::ST4LANEpost: {
  4379. VT = Node->getOperand(1).getValueType();
  4380. if (VT == MVT::v16i8 || VT == MVT::v8i8) {
  4381. SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
  4382. return;
  4383. } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
  4384. VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
  4385. SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
  4386. return;
  4387. } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
  4388. VT == MVT::v2f32) {
  4389. SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
  4390. return;
  4391. } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
  4392. VT == MVT::v1f64) {
  4393. SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
  4394. return;
  4395. }
  4396. break;
  4397. }
  4398. case AArch64ISD::SVE_LD2_MERGE_ZERO: {
  4399. if (VT == MVT::nxv16i8) {
  4400. SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
  4401. return;
  4402. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  4403. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  4404. SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
  4405. return;
  4406. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  4407. SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
  4408. return;
  4409. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  4410. SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
  4411. return;
  4412. }
  4413. break;
  4414. }
  4415. case AArch64ISD::SVE_LD3_MERGE_ZERO: {
  4416. if (VT == MVT::nxv16i8) {
  4417. SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
  4418. return;
  4419. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  4420. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  4421. SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
  4422. return;
  4423. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  4424. SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
  4425. return;
  4426. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  4427. SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
  4428. return;
  4429. }
  4430. break;
  4431. }
  4432. case AArch64ISD::SVE_LD4_MERGE_ZERO: {
  4433. if (VT == MVT::nxv16i8) {
  4434. SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
  4435. return;
  4436. } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
  4437. (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
  4438. SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
  4439. return;
  4440. } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
  4441. SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
  4442. return;
  4443. } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
  4444. SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
  4445. return;
  4446. }
  4447. break;
  4448. }
  4449. }
  4450. // Select the default instruction
  4451. SelectCode(Node);
  4452. }
  4453. /// createAArch64ISelDag - This pass converts a legalized DAG into a
  4454. /// AArch64-specific DAG, ready for instruction scheduling.
  4455. FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
  4456. CodeGenOpt::Level OptLevel) {
  4457. return new AArch64DAGToDAGISel(TM, OptLevel);
  4458. }
  4459. /// When \p PredVT is a scalable vector predicate in the form
  4460. /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
  4461. /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
  4462. /// structured vectors (NumVec >1), the output data type is
  4463. /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
  4464. /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
  4465. /// EVT.
  4466. static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
  4467. unsigned NumVec) {
  4468. assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
  4469. if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
  4470. return EVT();
  4471. if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
  4472. PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
  4473. return EVT();
  4474. ElementCount EC = PredVT.getVectorElementCount();
  4475. EVT ScalarVT =
  4476. EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
  4477. EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
  4478. return MemVT;
  4479. }
  4480. /// Return the EVT of the data associated to a memory operation in \p
  4481. /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
  4482. static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
  4483. if (isa<MemSDNode>(Root))
  4484. return cast<MemSDNode>(Root)->getMemoryVT();
  4485. if (isa<MemIntrinsicSDNode>(Root))
  4486. return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
  4487. const unsigned Opcode = Root->getOpcode();
  4488. // For custom ISD nodes, we have to look at them individually to extract the
  4489. // type of the data moved to/from memory.
  4490. switch (Opcode) {
  4491. case AArch64ISD::LD1_MERGE_ZERO:
  4492. case AArch64ISD::LD1S_MERGE_ZERO:
  4493. case AArch64ISD::LDNF1_MERGE_ZERO:
  4494. case AArch64ISD::LDNF1S_MERGE_ZERO:
  4495. return cast<VTSDNode>(Root->getOperand(3))->getVT();
  4496. case AArch64ISD::ST1_PRED:
  4497. return cast<VTSDNode>(Root->getOperand(4))->getVT();
  4498. case AArch64ISD::SVE_LD2_MERGE_ZERO:
  4499. return getPackedVectorTypeFromPredicateType(
  4500. Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
  4501. case AArch64ISD::SVE_LD3_MERGE_ZERO:
  4502. return getPackedVectorTypeFromPredicateType(
  4503. Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
  4504. case AArch64ISD::SVE_LD4_MERGE_ZERO:
  4505. return getPackedVectorTypeFromPredicateType(
  4506. Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
  4507. default:
  4508. break;
  4509. }
  4510. if (Opcode != ISD::INTRINSIC_VOID)
  4511. return EVT();
  4512. const unsigned IntNo =
  4513. cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
  4514. if (IntNo != Intrinsic::aarch64_sve_prf)
  4515. return EVT();
  4516. // We are using an SVE prefetch intrinsic. Type must be inferred
  4517. // from the width of the predicate.
  4518. return getPackedVectorTypeFromPredicateType(
  4519. Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
  4520. }
  4521. /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
  4522. /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
  4523. /// where Root is the memory access using N for its address.
  4524. template <int64_t Min, int64_t Max>
  4525. bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
  4526. SDValue &Base,
  4527. SDValue &OffImm) {
  4528. const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
  4529. const DataLayout &DL = CurDAG->getDataLayout();
  4530. if (N.getOpcode() == ISD::FrameIndex) {
  4531. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  4532. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  4533. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
  4534. return true;
  4535. }
  4536. if (MemVT == EVT())
  4537. return false;
  4538. if (N.getOpcode() != ISD::ADD)
  4539. return false;
  4540. SDValue VScale = N.getOperand(1);
  4541. if (VScale.getOpcode() != ISD::VSCALE)
  4542. return false;
  4543. TypeSize TS = MemVT.getSizeInBits();
  4544. int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8;
  4545. int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
  4546. if ((MulImm % MemWidthBytes) != 0)
  4547. return false;
  4548. int64_t Offset = MulImm / MemWidthBytes;
  4549. if (Offset < Min || Offset > Max)
  4550. return false;
  4551. Base = N.getOperand(0);
  4552. if (Base.getOpcode() == ISD::FrameIndex) {
  4553. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  4554. Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
  4555. }
  4556. OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
  4557. return true;
  4558. }
  4559. /// Select register plus register addressing mode for SVE, with scaled
  4560. /// offset.
  4561. bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
  4562. SDValue &Base,
  4563. SDValue &Offset) {
  4564. if (N.getOpcode() != ISD::ADD)
  4565. return false;
  4566. // Process an ADD node.
  4567. const SDValue LHS = N.getOperand(0);
  4568. const SDValue RHS = N.getOperand(1);
  4569. // 8 bit data does not come with the SHL node, so it is treated
  4570. // separately.
  4571. if (Scale == 0) {
  4572. Base = LHS;
  4573. Offset = RHS;
  4574. return true;
  4575. }
  4576. if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
  4577. int64_t ImmOff = C->getSExtValue();
  4578. unsigned Size = 1 << Scale;
  4579. // To use the reg+reg addressing mode, the immediate must be a multiple of
  4580. // the vector element's byte size.
  4581. if (ImmOff % Size)
  4582. return false;
  4583. SDLoc DL(N);
  4584. Base = LHS;
  4585. Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
  4586. SDValue Ops[] = {Offset};
  4587. SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
  4588. Offset = SDValue(MI, 0);
  4589. return true;
  4590. }
  4591. // Check if the RHS is a shift node with a constant.
  4592. if (RHS.getOpcode() != ISD::SHL)
  4593. return false;
  4594. const SDValue ShiftRHS = RHS.getOperand(1);
  4595. if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
  4596. if (C->getZExtValue() == Scale) {
  4597. Base = LHS;
  4598. Offset = RHS.getOperand(0);
  4599. return true;
  4600. }
  4601. return false;
  4602. }
  4603. bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
  4604. const AArch64TargetLowering *TLI =
  4605. static_cast<const AArch64TargetLowering *>(getTargetLowering());
  4606. return TLI->isAllActivePredicate(*CurDAG, N);
  4607. }