AArch64InstrInfo.td 411 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425
  1. //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // AArch64 Instruction definitions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // ARM Instruction Predicate Definitions.
  14. //
  15. def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
  16. AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">;
  17. def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
  18. AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">;
  19. def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
  20. AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">;
  21. def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
  22. AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
  23. def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
  24. AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
  25. def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
  26. AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
  27. def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
  28. AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
  29. def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
  30. AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">;
  31. def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
  32. AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
  33. def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
  34. AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
  35. def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
  36. AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">;
  37. def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
  38. AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
  39. def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
  40. AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">;
  41. def HasEL3 : Predicate<"Subtarget->hasEL3()">,
  42. AssemblerPredicate<(all_of FeatureEL3), "el3">;
  43. def HasVH : Predicate<"Subtarget->hasVH()">,
  44. AssemblerPredicate<(all_of FeatureVH), "vh">;
  45. def HasLOR : Predicate<"Subtarget->hasLOR()">,
  46. AssemblerPredicate<(all_of FeatureLOR), "lor">;
  47. def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
  48. AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
  49. def HasJS : Predicate<"Subtarget->hasJS()">,
  50. AssemblerPredicate<(all_of FeatureJS), "jsconv">;
  51. def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
  52. AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">;
  53. def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
  54. AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">;
  55. def HasNV : Predicate<"Subtarget->hasNV()">,
  56. AssemblerPredicate<(all_of FeatureNV), "nv">;
  57. def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
  58. AssemblerPredicate<(all_of FeatureMPAM), "mpam">;
  59. def HasDIT : Predicate<"Subtarget->hasDIT()">,
  60. AssemblerPredicate<(all_of FeatureDIT), "dit">;
  61. def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
  62. AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">;
  63. def HasAM : Predicate<"Subtarget->hasAM()">,
  64. AssemblerPredicate<(all_of FeatureAM), "am">;
  65. def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
  66. AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
  67. def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
  68. AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
  69. def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
  70. AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
  71. def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
  72. AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
  73. def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
  74. AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">;
  75. def HasNEON : Predicate<"Subtarget->hasNEON()">,
  76. AssemblerPredicate<(all_of FeatureNEON), "neon">;
  77. def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
  78. AssemblerPredicate<(all_of FeatureCrypto), "crypto">;
  79. def HasSM4 : Predicate<"Subtarget->hasSM4()">,
  80. AssemblerPredicate<(all_of FeatureSM4), "sm4">;
  81. def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
  82. AssemblerPredicate<(all_of FeatureSHA3), "sha3">;
  83. def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
  84. AssemblerPredicate<(all_of FeatureSHA2), "sha2">;
  85. def HasAES : Predicate<"Subtarget->hasAES()">,
  86. AssemblerPredicate<(all_of FeatureAES), "aes">;
  87. def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
  88. AssemblerPredicate<(all_of FeatureDotProd), "dotprod">;
  89. def HasCRC : Predicate<"Subtarget->hasCRC()">,
  90. AssemblerPredicate<(all_of FeatureCRC), "crc">;
  91. def HasLSE : Predicate<"Subtarget->hasLSE()">,
  92. AssemblerPredicate<(all_of FeatureLSE), "lse">;
  93. def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
  94. def HasRAS : Predicate<"Subtarget->hasRAS()">,
  95. AssemblerPredicate<(all_of FeatureRAS), "ras">;
  96. def HasRDM : Predicate<"Subtarget->hasRDM()">,
  97. AssemblerPredicate<(all_of FeatureRDM), "rdm">;
  98. def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
  99. def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
  100. AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">;
  101. def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
  102. AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">;
  103. def HasSPE : Predicate<"Subtarget->hasSPE()">,
  104. AssemblerPredicate<(all_of FeatureSPE), "spe">;
  105. def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
  106. AssemblerPredicate<(all_of FeatureFuseAES),
  107. "fuse-aes">;
  108. def HasSVE : Predicate<"Subtarget->hasSVE()">,
  109. AssemblerPredicate<(all_of FeatureSVE), "sve">;
  110. def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
  111. AssemblerPredicate<(all_of FeatureSVE2), "sve2">;
  112. def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
  113. AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">;
  114. def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
  115. AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">;
  116. def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
  117. AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">;
  118. def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
  119. AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
  120. def HasSME : Predicate<"Subtarget->hasSME()">,
  121. AssemblerPredicate<(all_of FeatureSME), "sme">;
  122. def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">,
  123. AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">;
  124. def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">,
  125. AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">;
  126. def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">,
  127. AssemblerPredicate<(all_of FeatureStreamingSVE), "sme">;
  128. // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
  129. // they should be enabled if either has been specified.
  130. def HasSVEorStreamingSVE
  131. : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">,
  132. AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE),
  133. "sve or sme">;
  134. def HasSVE2orStreamingSVE
  135. : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">,
  136. AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE),
  137. "sve2 or sme">;
  138. // A subset of NEON instructions are legal in Streaming SVE execution mode,
  139. // they should be enabled if either has been specified.
  140. def HasNEONorStreamingSVE
  141. : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">,
  142. AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE),
  143. "neon or sme">;
  144. def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
  145. AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
  146. def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
  147. AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">;
  148. def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
  149. AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">;
  150. def HasSB : Predicate<"Subtarget->hasSB()">,
  151. AssemblerPredicate<(all_of FeatureSB), "sb">;
  152. def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
  153. AssemblerPredicate<(all_of FeaturePredRes), "predres">;
  154. def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
  155. AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">;
  156. def HasBTI : Predicate<"Subtarget->hasBTI()">,
  157. AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">;
  158. def HasMTE : Predicate<"Subtarget->hasMTE()">,
  159. AssemblerPredicate<(all_of FeatureMTE), "mte">;
  160. def HasTME : Predicate<"Subtarget->hasTME()">,
  161. AssemblerPredicate<(all_of FeatureTME), "tme">;
  162. def HasETE : Predicate<"Subtarget->hasETE()">,
  163. AssemblerPredicate<(all_of FeatureETE), "ete">;
  164. def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
  165. AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
  166. def HasBF16 : Predicate<"Subtarget->hasBF16()">,
  167. AssemblerPredicate<(all_of FeatureBF16), "bf16">;
  168. def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
  169. AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">;
  170. def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
  171. AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
  172. def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
  173. AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
  174. def HasXS : Predicate<"Subtarget->hasXS()">,
  175. AssemblerPredicate<(all_of FeatureXS), "xs">;
  176. def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
  177. AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
  178. def HasLS64 : Predicate<"Subtarget->hasLS64()">,
  179. AssemblerPredicate<(all_of FeatureLS64), "ls64">;
  180. def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
  181. AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
  182. def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
  183. AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
  184. def HasHBC : Predicate<"Subtarget->hasHBC()">,
  185. AssemblerPredicate<(all_of FeatureHBC), "hbc">;
  186. def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
  187. AssemblerPredicate<(all_of FeatureMOPS), "mops">;
  188. def IsLE : Predicate<"Subtarget->isLittleEndian()">;
  189. def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
  190. def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
  191. def UseExperimentalZeroingPseudos
  192. : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
  193. def UseAlternateSExtLoadCVTF32
  194. : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
  195. def UseNegativeImmediates
  196. : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
  197. "NegativeImmediates">;
  198. def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
  199. def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
  200. SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
  201. SDTCisInt<1>]>>;
  202. //===----------------------------------------------------------------------===//
  203. // AArch64-specific DAG Nodes.
  204. //
  205. // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
  206. def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
  207. [SDTCisSameAs<0, 2>,
  208. SDTCisSameAs<0, 3>,
  209. SDTCisInt<0>, SDTCisVT<1, i32>]>;
  210. // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
  211. def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
  212. [SDTCisSameAs<0, 1>,
  213. SDTCisSameAs<0, 2>,
  214. SDTCisInt<0>,
  215. SDTCisVT<3, i32>]>;
  216. // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
  217. def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
  218. [SDTCisSameAs<0, 2>,
  219. SDTCisSameAs<0, 3>,
  220. SDTCisInt<0>,
  221. SDTCisVT<1, i32>,
  222. SDTCisVT<4, i32>]>;
  223. def SDT_AArch64Brcond : SDTypeProfile<0, 3,
  224. [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
  225. SDTCisVT<2, i32>]>;
  226. def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
  227. def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
  228. SDTCisVT<2, OtherVT>]>;
  229. def SDT_AArch64CSel : SDTypeProfile<1, 4,
  230. [SDTCisSameAs<0, 1>,
  231. SDTCisSameAs<0, 2>,
  232. SDTCisInt<3>,
  233. SDTCisVT<4, i32>]>;
  234. def SDT_AArch64CCMP : SDTypeProfile<1, 5,
  235. [SDTCisVT<0, i32>,
  236. SDTCisInt<1>,
  237. SDTCisSameAs<1, 2>,
  238. SDTCisInt<3>,
  239. SDTCisInt<4>,
  240. SDTCisVT<5, i32>]>;
  241. def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
  242. [SDTCisVT<0, i32>,
  243. SDTCisFP<1>,
  244. SDTCisSameAs<1, 2>,
  245. SDTCisInt<3>,
  246. SDTCisInt<4>,
  247. SDTCisVT<5, i32>]>;
  248. def SDT_AArch64FCmp : SDTypeProfile<0, 2,
  249. [SDTCisFP<0>,
  250. SDTCisSameAs<0, 1>]>;
  251. def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
  252. def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
  253. def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
  254. def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
  255. SDTCisSameAs<0, 1>,
  256. SDTCisSameAs<0, 2>]>;
  257. def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
  258. def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
  259. def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
  260. SDTCisInt<2>, SDTCisInt<3>]>;
  261. def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
  262. def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
  263. SDTCisSameAs<0,2>, SDTCisInt<3>]>;
  264. def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
  265. def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
  266. SDTCisVec<2>, SDTCisSameAs<2,3>]>;
  267. def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
  268. SDTCisSameAs<0,1>,
  269. SDTCisSameAs<0,2>]>;
  270. def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
  271. def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
  272. def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
  273. def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
  274. SDTCisSameAs<0,2>]>;
  275. def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
  276. SDTCisSameAs<0,2>,
  277. SDTCisSameAs<0,3>]>;
  278. def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
  279. def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
  280. def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
  281. def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
  282. SDTCisPtrTy<1>]>;
  283. def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
  284. def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
  285. def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
  286. def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
  287. // Generates the general dynamic sequences, i.e.
  288. // adrp x0, :tlsdesc:var
  289. // ldr x1, [x0, #:tlsdesc_lo12:var]
  290. // add x0, x0, #:tlsdesc_lo12:var
  291. // .tlsdesccall var
  292. // blr x1
  293. // (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
  294. // number of operands (the variable)
  295. def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
  296. [SDTCisPtrTy<0>]>;
  297. def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
  298. [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
  299. SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
  300. SDTCisSameAs<1, 4>]>;
  301. def SDT_AArch64TBL : SDTypeProfile<1, 2, [
  302. SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
  303. ]>;
  304. // non-extending masked load fragment.
  305. def nonext_masked_load :
  306. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  307. (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
  308. return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
  309. cast<MaskedLoadSDNode>(N)->isUnindexed() &&
  310. !cast<MaskedLoadSDNode>(N)->isNonTemporal();
  311. }]>;
  312. // sign extending masked load fragments.
  313. def asext_masked_load :
  314. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  315. (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
  316. return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
  317. cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) &&
  318. cast<MaskedLoadSDNode>(N)->isUnindexed();
  319. }]>;
  320. def asext_masked_load_i8 :
  321. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  322. (asext_masked_load node:$ptr, node:$pred, node:$def), [{
  323. return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
  324. }]>;
  325. def asext_masked_load_i16 :
  326. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  327. (asext_masked_load node:$ptr, node:$pred, node:$def), [{
  328. return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
  329. }]>;
  330. def asext_masked_load_i32 :
  331. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  332. (asext_masked_load node:$ptr, node:$pred, node:$def), [{
  333. return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
  334. }]>;
  335. // zero extending masked load fragments.
  336. def zext_masked_load :
  337. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  338. (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
  339. return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD &&
  340. cast<MaskedLoadSDNode>(N)->isUnindexed();
  341. }]>;
  342. def zext_masked_load_i8 :
  343. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  344. (zext_masked_load node:$ptr, node:$pred, node:$def), [{
  345. return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
  346. }]>;
  347. def zext_masked_load_i16 :
  348. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  349. (zext_masked_load node:$ptr, node:$pred, node:$def), [{
  350. return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
  351. }]>;
  352. def zext_masked_load_i32 :
  353. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  354. (zext_masked_load node:$ptr, node:$pred, node:$def), [{
  355. return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
  356. }]>;
  357. def non_temporal_load :
  358. PatFrag<(ops node:$ptr, node:$pred, node:$def),
  359. (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
  360. return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
  361. cast<MaskedLoadSDNode>(N)->isUnindexed() &&
  362. cast<MaskedLoadSDNode>(N)->isNonTemporal();
  363. }]>;
  364. // non-truncating masked store fragment.
  365. def nontrunc_masked_store :
  366. PatFrag<(ops node:$val, node:$ptr, node:$pred),
  367. (masked_st node:$val, node:$ptr, undef, node:$pred), [{
  368. return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
  369. cast<MaskedStoreSDNode>(N)->isUnindexed() &&
  370. !cast<MaskedStoreSDNode>(N)->isNonTemporal();
  371. }]>;
  372. // truncating masked store fragments.
  373. def trunc_masked_store :
  374. PatFrag<(ops node:$val, node:$ptr, node:$pred),
  375. (masked_st node:$val, node:$ptr, undef, node:$pred), [{
  376. return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
  377. cast<MaskedStoreSDNode>(N)->isUnindexed();
  378. }]>;
  379. def trunc_masked_store_i8 :
  380. PatFrag<(ops node:$val, node:$ptr, node:$pred),
  381. (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
  382. return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
  383. }]>;
  384. def trunc_masked_store_i16 :
  385. PatFrag<(ops node:$val, node:$ptr, node:$pred),
  386. (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
  387. return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
  388. }]>;
  389. def trunc_masked_store_i32 :
  390. PatFrag<(ops node:$val, node:$ptr, node:$pred),
  391. (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
  392. return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
  393. }]>;
  394. def non_temporal_store :
  395. PatFrag<(ops node:$val, node:$ptr, node:$pred),
  396. (masked_st node:$val, node:$ptr, undef, node:$pred), [{
  397. return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
  398. cast<MaskedStoreSDNode>(N)->isUnindexed() &&
  399. cast<MaskedStoreSDNode>(N)->isNonTemporal();
  400. }]>;
  401. // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
  402. def top16Zero: PatLeaf<(i32 GPR32:$src), [{
  403. return SDValue(N,0)->getValueType(0) == MVT::i32 &&
  404. CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
  405. }]>;
  406. // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
  407. def top32Zero: PatLeaf<(i64 GPR64:$src), [{
  408. return SDValue(N,0)->getValueType(0) == MVT::i64 &&
  409. CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
  410. }]>;
  411. // Node definitions.
  412. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
  413. def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
  414. def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
  415. def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
  416. def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
  417. SDCallSeqStart<[ SDTCisVT<0, i32>,
  418. SDTCisVT<1, i32> ]>,
  419. [SDNPHasChain, SDNPOutGlue]>;
  420. def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",
  421. SDCallSeqEnd<[ SDTCisVT<0, i32>,
  422. SDTCisVT<1, i32> ]>,
  423. [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
  424. def AArch64call : SDNode<"AArch64ISD::CALL",
  425. SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
  426. [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
  427. SDNPVariadic]>;
  428. def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI",
  429. SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
  430. [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
  431. SDNPVariadic]>;
  432. def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
  433. SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
  434. [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
  435. SDNPVariadic]>;
  436. def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
  437. [SDNPHasChain]>;
  438. def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
  439. [SDNPHasChain]>;
  440. def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
  441. [SDNPHasChain]>;
  442. def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
  443. [SDNPHasChain]>;
  444. def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
  445. [SDNPHasChain]>;
  446. def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
  447. def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
  448. def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
  449. def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
  450. def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
  451. [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
  452. def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
  453. def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
  454. def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
  455. [SDNPCommutative]>;
  456. def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
  457. def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
  458. [SDNPCommutative]>;
  459. def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
  460. def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
  461. def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
  462. def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
  463. def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
  464. def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
  465. def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
  466. def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
  467. [SDNPHasChain]>;
  468. def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
  469. [SDNPHasChain]>;
  470. def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
  471. [(AArch64strict_fcmp node:$lhs, node:$rhs),
  472. (AArch64fcmp node:$lhs, node:$rhs)]>;
  473. def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
  474. def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
  475. def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
  476. def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
  477. def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
  478. def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
  479. def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
  480. def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
  481. def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
  482. def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
  483. def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
  484. def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
  485. def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
  486. def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
  487. def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
  488. def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
  489. def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
  490. def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
  491. def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
  492. def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
  493. def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
  494. def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
  495. def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
  496. def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
  497. def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
  498. def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
  499. def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
  500. def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
  501. def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
  502. def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
  503. def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
  504. def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
  505. def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
  506. def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
  507. def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
  508. def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
  509. def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
  510. def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
  511. def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
  512. def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
  513. def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
  514. def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
  515. def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
  516. def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
  517. def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
  518. def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
  519. def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
  520. def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
  521. def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
  522. (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
  523. def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
  524. def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
  525. def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
  526. def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
  527. def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
  528. def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
  529. def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
  530. def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
  531. [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
  532. def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
  533. [SDNPHasChain, SDNPSideEffect]>;
  534. def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
  535. def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
  536. def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
  537. SDT_AArch64TLSDescCallSeq,
  538. [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
  539. SDNPVariadic]>;
  540. def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
  541. SDT_AArch64WrapperLarge>;
  542. def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
  543. def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
  544. SDTCisSameAs<1, 2>]>;
  545. def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
  546. def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
  547. def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
  548. def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
  549. def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
  550. def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
  551. def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
  552. def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
  553. def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
  554. def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
  555. def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
  556. def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
  557. def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
  558. def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
  559. def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
  560. def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
  561. def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
  562. def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
  563. def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
  564. [(abdu node:$lhs, node:$rhs),
  565. (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
  566. def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
  567. [(abds node:$lhs, node:$rhs),
  568. (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
  569. def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
  570. def AArch64uaddlp : PatFrags<(ops node:$src),
  571. [(AArch64uaddlp_n node:$src),
  572. (int_aarch64_neon_uaddlp node:$src)]>;
  573. def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
  574. def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
  575. def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
  576. def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
  577. def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
  578. def SDT_AArch64unpk : SDTypeProfile<1, 1, [
  579. SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
  580. ]>;
  581. def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
  582. def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
  583. def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
  584. def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
  585. def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
  586. def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
  587. def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
  588. def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
  589. def AArch64mrs : SDNode<"AArch64ISD::MRS",
  590. SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
  591. [SDNPHasChain, SDNPOutGlue]>;
  592. //===----------------------------------------------------------------------===//
  593. //===----------------------------------------------------------------------===//
  594. // AArch64 Instruction Predicate Definitions.
  595. // We could compute these on a per-module basis but doing so requires accessing
  596. // the Function object through the <Target>Subtarget and objections were raised
  597. // to that (see post-commit review comments for r301750).
  598. let RecomputePerFunction = 1 in {
  599. def ForCodeSize : Predicate<"shouldOptForSize(MF)">;
  600. def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">;
  601. // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
  602. def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
  603. def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
  604. def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
  605. def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
  606. def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
  607. // Toggles patterns which aren't beneficial in GlobalISel when we aren't
  608. // optimizing. This allows us to selectively use patterns without impacting
  609. // SelectionDAG's behaviour.
  610. // FIXME: One day there will probably be a nicer way to check for this, but
  611. // today is not that day.
  612. def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
  613. }
  614. include "AArch64InstrFormats.td"
  615. include "SVEInstrFormats.td"
  616. include "SMEInstrFormats.td"
  617. //===----------------------------------------------------------------------===//
  618. //===----------------------------------------------------------------------===//
  619. // Miscellaneous instructions.
  620. //===----------------------------------------------------------------------===//
  621. let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
  622. // We set Sched to empty list because we expect these instructions to simply get
  623. // removed in most cases.
  624. def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
  625. [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
  626. Sched<[]>;
  627. def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
  628. [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
  629. Sched<[]>;
  630. } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
  631. let isReMaterializable = 1, isCodeGenOnly = 1 in {
  632. // FIXME: The following pseudo instructions are only needed because remat
  633. // cannot handle multiple instructions. When that changes, they can be
  634. // removed, along with the AArch64Wrapper node.
  635. let AddedComplexity = 10 in
  636. def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
  637. [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
  638. Sched<[WriteLDAdr]>;
  639. // The MOVaddr instruction should match only when the add is not folded
  640. // into a load or store address.
  641. def MOVaddr
  642. : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
  643. [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
  644. tglobaladdr:$low))]>,
  645. Sched<[WriteAdrAdr]>;
  646. def MOVaddrJT
  647. : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
  648. [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
  649. tjumptable:$low))]>,
  650. Sched<[WriteAdrAdr]>;
  651. def MOVaddrCP
  652. : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
  653. [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
  654. tconstpool:$low))]>,
  655. Sched<[WriteAdrAdr]>;
  656. def MOVaddrBA
  657. : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
  658. [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
  659. tblockaddress:$low))]>,
  660. Sched<[WriteAdrAdr]>;
  661. def MOVaddrTLS
  662. : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
  663. [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
  664. tglobaltlsaddr:$low))]>,
  665. Sched<[WriteAdrAdr]>;
  666. def MOVaddrEXT
  667. : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
  668. [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
  669. texternalsym:$low))]>,
  670. Sched<[WriteAdrAdr]>;
  671. // Normally AArch64addlow either gets folded into a following ldr/str,
  672. // or together with an adrp into MOVaddr above. For cases with TLS, it
  673. // might appear without either of them, so allow lowering it into a plain
  674. // add.
  675. def ADDlowTLS
  676. : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
  677. [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
  678. tglobaltlsaddr:$low))]>,
  679. Sched<[WriteAdr]>;
  680. } // isReMaterializable, isCodeGenOnly
  681. def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
  682. (LOADgot tglobaltlsaddr:$addr)>;
  683. def : Pat<(AArch64LOADgot texternalsym:$addr),
  684. (LOADgot texternalsym:$addr)>;
  685. def : Pat<(AArch64LOADgot tconstpool:$addr),
  686. (LOADgot tconstpool:$addr)>;
  687. // In general these get lowered into a sequence of three 4-byte instructions.
  688. // 32-bit jump table destination is actually only 2 instructions since we can
  689. // use the table itself as a PC-relative base. But optimization occurs after
  690. // branch relaxation so be pessimistic.
  691. let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
  692. isNotDuplicable = 1 in {
  693. def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
  694. (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
  695. Sched<[]>;
  696. def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
  697. (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
  698. Sched<[]>;
  699. def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
  700. (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
  701. Sched<[]>;
  702. }
  703. // Space-consuming pseudo to aid testing of placement and reachability
  704. // algorithms. Immediate operand is the number of bytes this "instruction"
  705. // occupies; register operands can be used to enforce dependency and constrain
  706. // the scheduler.
  707. let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
  708. def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
  709. [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
  710. Sched<[]>;
  711. let hasSideEffects = 1, isCodeGenOnly = 1 in {
  712. def SpeculationSafeValueX
  713. : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
  714. def SpeculationSafeValueW
  715. : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
  716. }
  717. // SpeculationBarrierEndBB must only be used after an unconditional control
  718. // flow, i.e. after a terminator for which isBarrier is True.
  719. let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
  720. // This gets lowered to a pair of 4-byte instructions.
  721. let Size = 8 in
  722. def SpeculationBarrierISBDSBEndBB
  723. : Pseudo<(outs), (ins), []>, Sched<[]>;
  724. // This gets lowered to a 4-byte instruction.
  725. let Size = 4 in
  726. def SpeculationBarrierSBEndBB
  727. : Pseudo<(outs), (ins), []>, Sched<[]>;
  728. }
  729. //===----------------------------------------------------------------------===//
  730. // System instructions.
  731. //===----------------------------------------------------------------------===//
  732. def HINT : HintI<"hint">;
  733. def : InstAlias<"nop", (HINT 0b000)>;
  734. def : InstAlias<"yield",(HINT 0b001)>;
  735. def : InstAlias<"wfe", (HINT 0b010)>;
  736. def : InstAlias<"wfi", (HINT 0b011)>;
  737. def : InstAlias<"sev", (HINT 0b100)>;
  738. def : InstAlias<"sevl", (HINT 0b101)>;
  739. def : InstAlias<"dgh", (HINT 0b110)>;
  740. def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
  741. def : InstAlias<"csdb", (HINT 20)>;
  742. // In order to be able to write readable assembly, LLVM should accept assembly
  743. // inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
  744. // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
  745. // should not emit these mnemonics unless BTI is enabled.
  746. def : InstAlias<"bti", (HINT 32), 0>;
  747. def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
  748. def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>;
  749. def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
  750. // v8.2a Statistical Profiling extension
  751. def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
  752. // As far as LLVM is concerned this writes to the system's exclusive monitors.
  753. let mayLoad = 1, mayStore = 1 in
  754. def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
  755. // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
  756. // model patterns with sufficiently fine granularity.
  757. let mayLoad = ?, mayStore = ? in {
  758. def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
  759. [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
  760. def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
  761. [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
  762. def ISB : CRmSystemI<barrier_op, 0b110, "isb",
  763. [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
  764. def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
  765. let CRm = 0b0010;
  766. let Inst{12} = 0;
  767. let Predicates = [HasTRACEV8_4];
  768. }
  769. def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
  770. let CRm{1-0} = 0b11;
  771. let Inst{9-8} = 0b10;
  772. let Predicates = [HasXS];
  773. }
  774. let Predicates = [HasWFxT] in {
  775. def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
  776. def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
  777. }
  778. // Branch Record Buffer two-word mnemonic instructions
  779. class BRBEI<bits<3> op2, string keyword>
  780. : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
  781. let Inst{31-8} = 0b110101010000100101110010;
  782. let Inst{7-5} = op2;
  783. let Predicates = [HasBRBE];
  784. }
  785. def BRB_IALL: BRBEI<0b100, "\tiall">;
  786. def BRB_INJ: BRBEI<0b101, "\tinj">;
  787. }
  788. // Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
  789. def : TokenAlias<"INJ", "inj">;
  790. def : TokenAlias<"IALL", "iall">;
  791. // ARMv8.2-A Dot Product
  792. let Predicates = [HasDotProd] in {
  793. defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
  794. defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
  795. defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
  796. defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
  797. }
  798. // ARMv8.6-A BFloat
  799. let Predicates = [HasNEON, HasBF16] in {
  800. defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
  801. defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
  802. def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
  803. def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
  804. def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
  805. def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
  806. def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
  807. def BFCVTN : SIMD_BFCVTN;
  808. def BFCVTN2 : SIMD_BFCVTN2;
  809. // Vector-scalar BFDOT:
  810. // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
  811. // register (the instruction uses a single 32-bit lane from it), so the pattern
  812. // is a bit tricky.
  813. def : Pat<(v2f32 (int_aarch64_neon_bfdot
  814. (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
  815. (v4bf16 (bitconvert
  816. (v2i32 (AArch64duplane32
  817. (v4i32 (bitconvert
  818. (v8bf16 (insert_subvector undef,
  819. (v4bf16 V64:$Rm),
  820. (i64 0))))),
  821. VectorIndexS:$idx)))))),
  822. (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
  823. (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
  824. VectorIndexS:$idx)>;
  825. }
  826. let Predicates = [HasNEONorStreamingSVE, HasBF16] in {
  827. def BFCVT : BF16ToSinglePrecision<"bfcvt">;
  828. }
  829. // ARMv8.6A AArch64 matrix multiplication
  830. let Predicates = [HasMatMulInt8] in {
  831. def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
  832. def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
  833. def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
  834. defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
  835. defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
  836. // sudot lane has a pattern where usdot is expected (there is no sudot).
  837. // The second operand is used in the dup operation to repeat the indexed
  838. // element.
  839. class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
  840. string rhs_kind, RegisterOperand RegType,
  841. ValueType AccumType, ValueType InputType>
  842. : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
  843. lhs_kind, rhs_kind, RegType, AccumType,
  844. InputType, null_frag> {
  845. let Pattern = [(set (AccumType RegType:$dst),
  846. (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
  847. (InputType (bitconvert (AccumType
  848. (AArch64duplane32 (v4i32 V128:$Rm),
  849. VectorIndexS:$idx)))),
  850. (InputType RegType:$Rn))))];
  851. }
  852. multiclass SIMDSUDOTIndex {
  853. def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
  854. def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
  855. }
  856. defm SUDOTlane : SIMDSUDOTIndex;
  857. }
  858. // ARMv8.2-A FP16 Fused Multiply-Add Long
  859. let Predicates = [HasNEON, HasFP16FML] in {
  860. defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
  861. defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
  862. defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
  863. defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
  864. defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
  865. defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
  866. defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
  867. defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
  868. }
  869. // Armv8.2-A Crypto extensions
  870. let Predicates = [HasSHA3] in {
  871. def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">;
  872. def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">;
  873. def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
  874. def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
  875. def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">;
  876. def EOR3 : CryptoRRRR_16B<0b00, "eor3">;
  877. def BCAX : CryptoRRRR_16B<0b01, "bcax">;
  878. def XAR : CryptoRRRi6<"xar">;
  879. class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
  880. : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
  881. (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
  882. def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
  883. (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
  884. def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
  885. def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
  886. def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
  887. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
  888. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
  889. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
  890. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
  891. class EOR3_pattern<ValueType VecTy>
  892. : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
  893. (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
  894. def : EOR3_pattern<v16i8>;
  895. def : EOR3_pattern<v8i16>;
  896. def : EOR3_pattern<v4i32>;
  897. def : EOR3_pattern<v2i64>;
  898. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
  899. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
  900. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
  901. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
  902. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
  903. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
  904. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
  905. def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
  906. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
  907. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
  908. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
  909. def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
  910. def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
  911. (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
  912. def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
  913. (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
  914. } // HasSHA3
  915. let Predicates = [HasSM4] in {
  916. def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
  917. def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
  918. def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
  919. def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
  920. def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">;
  921. def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
  922. def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
  923. def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
  924. def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
  925. def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
  926. (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
  927. class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
  928. : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
  929. (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
  930. class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
  931. : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
  932. (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
  933. class SM4_pattern<Instruction INST, Intrinsic OpNode>
  934. : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
  935. (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
  936. def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
  937. def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
  938. def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
  939. def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
  940. def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
  941. def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
  942. def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
  943. def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
  944. } // HasSM4
  945. let Predicates = [HasRCPC] in {
  946. // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
  947. def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>;
  948. def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>;
  949. def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>;
  950. def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
  951. }
  952. // v8.3a complex add and multiply-accumulate. No predicate here, that is done
  953. // inside the multiclass as the FP16 versions need different predicates.
  954. defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
  955. "fcmla", null_frag>;
  956. defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
  957. "fcadd", null_frag>;
  958. defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
  959. let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
  960. def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
  961. (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
  962. def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
  963. (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
  964. def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
  965. (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
  966. def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
  967. (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
  968. }
  969. let Predicates = [HasComplxNum, HasNEON] in {
  970. def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
  971. (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
  972. def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
  973. (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
  974. foreach Ty = [v4f32, v2f64] in {
  975. def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
  976. (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
  977. def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
  978. (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
  979. }
  980. }
  981. multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
  982. def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
  983. (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
  984. def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
  985. (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
  986. def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
  987. (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
  988. def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
  989. (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
  990. }
  991. multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
  992. def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
  993. (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
  994. def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
  995. (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
  996. def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
  997. (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
  998. def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
  999. (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
  1000. }
  1001. let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
  1002. defm : FCMLA_PATS<v4f16, V64>;
  1003. defm : FCMLA_PATS<v8f16, V128>;
  1004. defm : FCMLA_LANE_PATS<v4f16, V64,
  1005. (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
  1006. defm : FCMLA_LANE_PATS<v8f16, V128,
  1007. (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
  1008. }
  1009. let Predicates = [HasComplxNum, HasNEON] in {
  1010. defm : FCMLA_PATS<v2f32, V64>;
  1011. defm : FCMLA_PATS<v4f32, V128>;
  1012. defm : FCMLA_PATS<v2f64, V128>;
  1013. defm : FCMLA_LANE_PATS<v4f32, V128,
  1014. (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
  1015. }
  1016. // v8.3a Pointer Authentication
  1017. // These instructions inhabit part of the hint space and so can be used for
  1018. // armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
  1019. // important for compatibility with other assemblers (e.g. GAS) when building
  1020. // software compatible with both CPUs that do or don't implement PA.
  1021. let Uses = [LR], Defs = [LR] in {
  1022. def PACIAZ : SystemNoOperands<0b000, "hint\t#24">;
  1023. def PACIBZ : SystemNoOperands<0b010, "hint\t#26">;
  1024. let isAuthenticated = 1 in {
  1025. def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">;
  1026. def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">;
  1027. }
  1028. }
  1029. let Uses = [LR, SP], Defs = [LR] in {
  1030. def PACIASP : SystemNoOperands<0b001, "hint\t#25">;
  1031. def PACIBSP : SystemNoOperands<0b011, "hint\t#27">;
  1032. let isAuthenticated = 1 in {
  1033. def AUTIASP : SystemNoOperands<0b101, "hint\t#29">;
  1034. def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">;
  1035. }
  1036. }
  1037. let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
  1038. def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">;
  1039. def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">;
  1040. let isAuthenticated = 1 in {
  1041. def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">;
  1042. def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">;
  1043. }
  1044. }
  1045. let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
  1046. def XPACLRI : SystemNoOperands<0b111, "hint\t#7">;
  1047. }
  1048. // In order to be able to write readable assembly, LLVM should accept assembly
  1049. // inputs that use pointer authentication mnemonics, even with PA disabled.
  1050. // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
  1051. // should not emit these mnemonics unless PA is enabled.
  1052. def : InstAlias<"paciaz", (PACIAZ), 0>;
  1053. def : InstAlias<"pacibz", (PACIBZ), 0>;
  1054. def : InstAlias<"autiaz", (AUTIAZ), 0>;
  1055. def : InstAlias<"autibz", (AUTIBZ), 0>;
  1056. def : InstAlias<"paciasp", (PACIASP), 0>;
  1057. def : InstAlias<"pacibsp", (PACIBSP), 0>;
  1058. def : InstAlias<"autiasp", (AUTIASP), 0>;
  1059. def : InstAlias<"autibsp", (AUTIBSP), 0>;
  1060. def : InstAlias<"pacia1716", (PACIA1716), 0>;
  1061. def : InstAlias<"pacib1716", (PACIB1716), 0>;
  1062. def : InstAlias<"autia1716", (AUTIA1716), 0>;
  1063. def : InstAlias<"autib1716", (AUTIB1716), 0>;
  1064. def : InstAlias<"xpaclri", (XPACLRI), 0>;
  1065. // These pointer authentication instructions require armv8.3a
  1066. let Predicates = [HasPAuth] in {
  1067. // When PA is enabled, a better mnemonic should be emitted.
  1068. def : InstAlias<"paciaz", (PACIAZ), 1>;
  1069. def : InstAlias<"pacibz", (PACIBZ), 1>;
  1070. def : InstAlias<"autiaz", (AUTIAZ), 1>;
  1071. def : InstAlias<"autibz", (AUTIBZ), 1>;
  1072. def : InstAlias<"paciasp", (PACIASP), 1>;
  1073. def : InstAlias<"pacibsp", (PACIBSP), 1>;
  1074. def : InstAlias<"autiasp", (AUTIASP), 1>;
  1075. def : InstAlias<"autibsp", (AUTIBSP), 1>;
  1076. def : InstAlias<"pacia1716", (PACIA1716), 1>;
  1077. def : InstAlias<"pacib1716", (PACIB1716), 1>;
  1078. def : InstAlias<"autia1716", (AUTIA1716), 1>;
  1079. def : InstAlias<"autib1716", (AUTIB1716), 1>;
  1080. def : InstAlias<"xpaclri", (XPACLRI), 1>;
  1081. multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
  1082. SDPatternOperator op> {
  1083. def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>;
  1084. def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>;
  1085. def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>;
  1086. def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>;
  1087. def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>;
  1088. def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>;
  1089. def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>;
  1090. def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
  1091. }
  1092. defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
  1093. defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
  1094. def XPACI : ClearAuth<0, "xpaci">;
  1095. def XPACD : ClearAuth<1, "xpacd">;
  1096. def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
  1097. // Combined Instructions
  1098. let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
  1099. def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
  1100. def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
  1101. }
  1102. let isCall = 1, Defs = [LR], Uses = [SP] in {
  1103. def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
  1104. def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
  1105. }
  1106. let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
  1107. def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
  1108. def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
  1109. }
  1110. let isCall = 1, Defs = [LR], Uses = [SP] in {
  1111. def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
  1112. def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
  1113. }
  1114. let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
  1115. def RETAA : AuthReturn<0b010, 0, "retaa">;
  1116. def RETAB : AuthReturn<0b010, 1, "retab">;
  1117. def ERETAA : AuthReturn<0b100, 0, "eretaa">;
  1118. def ERETAB : AuthReturn<0b100, 1, "eretab">;
  1119. }
  1120. defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
  1121. defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
  1122. }
  1123. // v8.3a floating point conversion for javascript
  1124. let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
  1125. def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
  1126. "fjcvtzs",
  1127. [(set GPR32:$Rd,
  1128. (int_aarch64_fjcvtzs FPR64:$Rn))]> {
  1129. let Inst{31} = 0;
  1130. } // HasJS, HasFPARMv8
  1131. // v8.4 Flag manipulation instructions
  1132. let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
  1133. def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
  1134. let Inst{20-5} = 0b0000001000000000;
  1135. }
  1136. def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
  1137. def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
  1138. def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
  1139. "{\t$Rn, $imm, $mask}">;
  1140. } // HasFlagM
  1141. // v8.5 flag manipulation instructions
  1142. let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
  1143. def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
  1144. let Inst{18-16} = 0b000;
  1145. let Inst{11-8} = 0b0000;
  1146. let Unpredictable{11-8} = 0b1111;
  1147. let Inst{7-5} = 0b001;
  1148. }
  1149. def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
  1150. let Inst{18-16} = 0b000;
  1151. let Inst{11-8} = 0b0000;
  1152. let Unpredictable{11-8} = 0b1111;
  1153. let Inst{7-5} = 0b010;
  1154. }
  1155. } // HasAltNZCV
  1156. // Armv8.5-A speculation barrier
  1157. def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
  1158. let Inst{20-5} = 0b0001100110000111;
  1159. let Unpredictable{11-8} = 0b1111;
  1160. let Predicates = [HasSB];
  1161. let hasSideEffects = 1;
  1162. }
  1163. def : InstAlias<"clrex", (CLREX 0xf)>;
  1164. def : InstAlias<"isb", (ISB 0xf)>;
  1165. def : InstAlias<"ssbb", (DSB 0)>;
  1166. def : InstAlias<"pssbb", (DSB 4)>;
  1167. def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
  1168. def MRS : MRSI;
  1169. def MSR : MSRI;
  1170. def MSRpstateImm1 : MSRpstateImm0_1;
  1171. def MSRpstateImm4 : MSRpstateImm0_15;
  1172. def : Pat<(AArch64mrs imm:$id),
  1173. (MRS imm:$id)>;
  1174. // The thread pointer (on Linux, at least, where this has been implemented) is
  1175. // TPIDR_EL0.
  1176. def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
  1177. [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
  1178. let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
  1179. def HWASAN_CHECK_MEMACCESS : Pseudo<
  1180. (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
  1181. [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
  1182. Sched<[]>;
  1183. }
  1184. let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
  1185. def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
  1186. (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
  1187. [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
  1188. Sched<[]>;
  1189. }
  1190. // The cycle counter PMC register is PMCCNTR_EL0.
  1191. let Predicates = [HasPerfMon] in
  1192. def : Pat<(readcyclecounter), (MRS 0xdce8)>;
  1193. // FPCR register
  1194. def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>;
  1195. def : Pat<(int_aarch64_set_fpcr i64:$val), (MSR 0xda20, GPR64:$val)>;
  1196. // Generic system instructions
  1197. def SYSxt : SystemXtI<0, "sys">;
  1198. def SYSLxt : SystemLXtI<1, "sysl">;
  1199. def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
  1200. (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
  1201. sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
  1202. let Predicates = [HasTME] in {
  1203. def TSTART : TMSystemI<0b0000, "tstart",
  1204. [(set GPR64:$Rt, (int_aarch64_tstart))]>;
  1205. def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
  1206. def TCANCEL : TMSystemException<0b011, "tcancel",
  1207. [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
  1208. def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
  1209. let mayLoad = 0;
  1210. let mayStore = 0;
  1211. }
  1212. } // HasTME
  1213. //===----------------------------------------------------------------------===//
  1214. // Move immediate instructions.
  1215. //===----------------------------------------------------------------------===//
  1216. defm MOVK : InsertImmediate<0b11, "movk">;
  1217. defm MOVN : MoveImmediate<0b00, "movn">;
  1218. let PostEncoderMethod = "fixMOVZ" in
  1219. defm MOVZ : MoveImmediate<0b10, "movz">;
  1220. // First group of aliases covers an implicit "lsl #0".
  1221. def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
  1222. def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
  1223. def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
  1224. def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
  1225. def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
  1226. def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
  1227. // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
  1228. def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
  1229. def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
  1230. def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
  1231. def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
  1232. def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
  1233. def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
  1234. def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
  1235. def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
  1236. def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
  1237. def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
  1238. def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
  1239. def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
  1240. def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
  1241. def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
  1242. def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
  1243. def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
  1244. def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
  1245. def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
  1246. // Final group of aliases covers true "mov $Rd, $imm" cases.
  1247. multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
  1248. int width, int shift> {
  1249. def _asmoperand : AsmOperandClass {
  1250. let Name = basename # width # "_lsl" # shift # "MovAlias";
  1251. let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
  1252. # shift # ">";
  1253. let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
  1254. }
  1255. def _movimm : Operand<i32> {
  1256. let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
  1257. }
  1258. def : InstAlias<"mov $Rd, $imm",
  1259. (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
  1260. }
  1261. defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
  1262. defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
  1263. defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
  1264. defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
  1265. defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
  1266. defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
  1267. defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
  1268. defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
  1269. defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
  1270. defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
  1271. defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
  1272. defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
  1273. let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
  1274. isAsCheapAsAMove = 1 in {
  1275. // FIXME: The following pseudo instructions are only needed because remat
  1276. // cannot handle multiple instructions. When that changes, we can select
  1277. // directly to the real instructions and get rid of these pseudos.
  1278. def MOVi32imm
  1279. : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
  1280. [(set GPR32:$dst, imm:$src)]>,
  1281. Sched<[WriteImm]>;
  1282. def MOVi64imm
  1283. : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
  1284. [(set GPR64:$dst, imm:$src)]>,
  1285. Sched<[WriteImm]>;
  1286. } // isReMaterializable, isCodeGenOnly
  1287. // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
  1288. // eventual expansion code fewer bits to worry about getting right. Marshalling
  1289. // the types is a little tricky though:
  1290. def i64imm_32bit : ImmLeaf<i64, [{
  1291. return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
  1292. }]>;
  1293. def s64imm_32bit : ImmLeaf<i64, [{
  1294. int64_t Imm64 = static_cast<int64_t>(Imm);
  1295. return Imm64 >= std::numeric_limits<int32_t>::min() &&
  1296. Imm64 <= std::numeric_limits<int32_t>::max();
  1297. }]>;
  1298. def trunc_imm : SDNodeXForm<imm, [{
  1299. return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
  1300. }]>;
  1301. def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
  1302. GISDNodeXFormEquiv<trunc_imm>;
  1303. let Predicates = [OptimizedGISelOrOtherSelector] in {
  1304. // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
  1305. // copies.
  1306. def : Pat<(i64 i64imm_32bit:$src),
  1307. (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
  1308. }
  1309. // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
  1310. def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
  1311. return CurDAG->getTargetConstant(
  1312. N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
  1313. }]>;
  1314. def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
  1315. return CurDAG->getTargetConstant(
  1316. N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
  1317. }]>;
  1318. def : Pat<(f32 fpimm:$in),
  1319. (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
  1320. def : Pat<(f64 fpimm:$in),
  1321. (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
  1322. // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
  1323. // sequences.
  1324. def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
  1325. tglobaladdr:$g1, tglobaladdr:$g0),
  1326. (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
  1327. tglobaladdr:$g1, 16),
  1328. tglobaladdr:$g2, 32),
  1329. tglobaladdr:$g3, 48)>;
  1330. def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
  1331. tblockaddress:$g1, tblockaddress:$g0),
  1332. (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
  1333. tblockaddress:$g1, 16),
  1334. tblockaddress:$g2, 32),
  1335. tblockaddress:$g3, 48)>;
  1336. def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
  1337. tconstpool:$g1, tconstpool:$g0),
  1338. (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
  1339. tconstpool:$g1, 16),
  1340. tconstpool:$g2, 32),
  1341. tconstpool:$g3, 48)>;
  1342. def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
  1343. tjumptable:$g1, tjumptable:$g0),
  1344. (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
  1345. tjumptable:$g1, 16),
  1346. tjumptable:$g2, 32),
  1347. tjumptable:$g3, 48)>;
  1348. //===----------------------------------------------------------------------===//
  1349. // Arithmetic instructions.
  1350. //===----------------------------------------------------------------------===//
  1351. // Add/subtract with carry.
  1352. defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
  1353. defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
  1354. def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
  1355. def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
  1356. def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
  1357. def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
  1358. // Add/subtract
  1359. defm ADD : AddSub<0, "add", "sub", add>;
  1360. defm SUB : AddSub<1, "sub", "add">;
  1361. def : InstAlias<"mov $dst, $src",
  1362. (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
  1363. def : InstAlias<"mov $dst, $src",
  1364. (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
  1365. def : InstAlias<"mov $dst, $src",
  1366. (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
  1367. def : InstAlias<"mov $dst, $src",
  1368. (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
  1369. defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
  1370. defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
  1371. // Use SUBS instead of SUB to enable CSE between SUBS and SUB.
  1372. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
  1373. (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
  1374. def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
  1375. (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
  1376. def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
  1377. (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
  1378. def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
  1379. (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
  1380. def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
  1381. (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
  1382. def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
  1383. (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
  1384. let AddedComplexity = 1 in {
  1385. def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
  1386. (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
  1387. def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
  1388. (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
  1389. }
  1390. // Because of the immediate format for add/sub-imm instructions, the
  1391. // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
  1392. // These patterns capture that transformation.
  1393. let AddedComplexity = 1 in {
  1394. def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
  1395. (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
  1396. def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
  1397. (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
  1398. def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
  1399. (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
  1400. def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
  1401. (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
  1402. }
  1403. // Because of the immediate format for add/sub-imm instructions, the
  1404. // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
  1405. // These patterns capture that transformation.
  1406. let AddedComplexity = 1 in {
  1407. def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
  1408. (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
  1409. def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
  1410. (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
  1411. def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
  1412. (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
  1413. def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
  1414. (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
  1415. }
  1416. def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
  1417. def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
  1418. def : InstAlias<"neg $dst, $src$shift",
  1419. (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
  1420. def : InstAlias<"neg $dst, $src$shift",
  1421. (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
  1422. def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
  1423. def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
  1424. def : InstAlias<"negs $dst, $src$shift",
  1425. (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
  1426. def : InstAlias<"negs $dst, $src$shift",
  1427. (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
  1428. // Unsigned/Signed divide
  1429. defm UDIV : Div<0, "udiv", udiv>;
  1430. defm SDIV : Div<1, "sdiv", sdiv>;
  1431. def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
  1432. def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
  1433. def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
  1434. def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
  1435. // Variable shift
  1436. defm ASRV : Shift<0b10, "asr", sra>;
  1437. defm LSLV : Shift<0b00, "lsl", shl>;
  1438. defm LSRV : Shift<0b01, "lsr", srl>;
  1439. defm RORV : Shift<0b11, "ror", rotr>;
  1440. def : ShiftAlias<"asrv", ASRVWr, GPR32>;
  1441. def : ShiftAlias<"asrv", ASRVXr, GPR64>;
  1442. def : ShiftAlias<"lslv", LSLVWr, GPR32>;
  1443. def : ShiftAlias<"lslv", LSLVXr, GPR64>;
  1444. def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
  1445. def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
  1446. def : ShiftAlias<"rorv", RORVWr, GPR32>;
  1447. def : ShiftAlias<"rorv", RORVXr, GPR64>;
  1448. // Multiply-add
  1449. let AddedComplexity = 5 in {
  1450. defm MADD : MulAccum<0, "madd">;
  1451. defm MSUB : MulAccum<1, "msub">;
  1452. def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
  1453. (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
  1454. def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
  1455. (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
  1456. def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
  1457. (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
  1458. def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
  1459. (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
  1460. def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
  1461. (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
  1462. def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
  1463. (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
  1464. } // AddedComplexity = 5
  1465. let AddedComplexity = 5 in {
  1466. def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
  1467. def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
  1468. def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
  1469. def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
  1470. def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
  1471. (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
  1472. def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
  1473. (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
  1474. def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
  1475. (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
  1476. def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
  1477. (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
  1478. def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
  1479. (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
  1480. def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
  1481. (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
  1482. def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
  1483. (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
  1484. def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
  1485. (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
  1486. def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
  1487. (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
  1488. def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
  1489. (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
  1490. def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
  1491. (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
  1492. (MOVi32imm (trunc_imm imm:$C)), XZR)>;
  1493. def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
  1494. (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
  1495. def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
  1496. (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
  1497. def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
  1498. (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
  1499. (MOVi32imm (trunc_imm imm:$C)), XZR)>;
  1500. def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
  1501. (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
  1502. def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
  1503. (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
  1504. def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
  1505. GPR64:$Ra)),
  1506. (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
  1507. (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
  1508. def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
  1509. (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
  1510. def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
  1511. (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
  1512. def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
  1513. (s64imm_32bit:$C)))),
  1514. (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
  1515. (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
  1516. } // AddedComplexity = 5
  1517. def : MulAccumWAlias<"mul", MADDWrrr>;
  1518. def : MulAccumXAlias<"mul", MADDXrrr>;
  1519. def : MulAccumWAlias<"mneg", MSUBWrrr>;
  1520. def : MulAccumXAlias<"mneg", MSUBXrrr>;
  1521. def : WideMulAccumAlias<"smull", SMADDLrrr>;
  1522. def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
  1523. def : WideMulAccumAlias<"umull", UMADDLrrr>;
  1524. def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
  1525. // Multiply-high
  1526. def SMULHrr : MulHi<0b010, "smulh", mulhs>;
  1527. def UMULHrr : MulHi<0b110, "umulh", mulhu>;
  1528. // CRC32
  1529. def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
  1530. def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
  1531. def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
  1532. def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
  1533. def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
  1534. def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
  1535. def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
  1536. def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
  1537. // v8.1 atomic CAS
  1538. defm CAS : CompareAndSwap<0, 0, "">;
  1539. defm CASA : CompareAndSwap<1, 0, "a">;
  1540. defm CASL : CompareAndSwap<0, 1, "l">;
  1541. defm CASAL : CompareAndSwap<1, 1, "al">;
  1542. // v8.1 atomic CASP
  1543. defm CASP : CompareAndSwapPair<0, 0, "">;
  1544. defm CASPA : CompareAndSwapPair<1, 0, "a">;
  1545. defm CASPL : CompareAndSwapPair<0, 1, "l">;
  1546. defm CASPAL : CompareAndSwapPair<1, 1, "al">;
  1547. // v8.1 atomic SWP
  1548. defm SWP : Swap<0, 0, "">;
  1549. defm SWPA : Swap<1, 0, "a">;
  1550. defm SWPL : Swap<0, 1, "l">;
  1551. defm SWPAL : Swap<1, 1, "al">;
  1552. // v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
  1553. defm LDADD : LDOPregister<0b000, "add", 0, 0, "">;
  1554. defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
  1555. defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
  1556. defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
  1557. defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">;
  1558. defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
  1559. defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
  1560. defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
  1561. defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">;
  1562. defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
  1563. defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
  1564. defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
  1565. defm LDSET : LDOPregister<0b011, "set", 0, 0, "">;
  1566. defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
  1567. defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
  1568. defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
  1569. defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">;
  1570. defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
  1571. defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
  1572. defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
  1573. defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">;
  1574. defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
  1575. defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
  1576. defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
  1577. defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">;
  1578. defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
  1579. defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
  1580. defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
  1581. defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">;
  1582. defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
  1583. defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
  1584. defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
  1585. // v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
  1586. defm : STOPregister<"stadd","LDADD">; // STADDx
  1587. defm : STOPregister<"stclr","LDCLR">; // STCLRx
  1588. defm : STOPregister<"steor","LDEOR">; // STEORx
  1589. defm : STOPregister<"stset","LDSET">; // STSETx
  1590. defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
  1591. defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
  1592. defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
  1593. defm : STOPregister<"stumin","LDUMIN">;// STUMINx
  1594. // v8.5 Memory Tagging Extension
  1595. let Predicates = [HasMTE] in {
  1596. def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>,
  1597. Sched<[]>{
  1598. let Inst{31} = 1;
  1599. }
  1600. def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{
  1601. let Inst{31} = 1;
  1602. let isNotDuplicable = 1;
  1603. }
  1604. def ADDG : AddSubG<0, "addg", null_frag>;
  1605. def SUBG : AddSubG<1, "subg", null_frag>;
  1606. def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
  1607. def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
  1608. def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
  1609. let Defs = [NZCV];
  1610. }
  1611. def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
  1612. def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
  1613. def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
  1614. (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
  1615. def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
  1616. (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
  1617. def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
  1618. def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
  1619. (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
  1620. def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
  1621. (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
  1622. def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
  1623. (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
  1624. let Inst{23} = 0;
  1625. }
  1626. defm STG : MemTagStore<0b00, "stg">;
  1627. defm STZG : MemTagStore<0b01, "stzg">;
  1628. defm ST2G : MemTagStore<0b10, "st2g">;
  1629. defm STZ2G : MemTagStore<0b11, "stz2g">;
  1630. def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
  1631. (STGOffset $Rn, $Rm, $imm)>;
  1632. def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
  1633. (STZGOffset $Rn, $Rm, $imm)>;
  1634. def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
  1635. (ST2GOffset $Rn, $Rm, $imm)>;
  1636. def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
  1637. (STZ2GOffset $Rn, $Rm, $imm)>;
  1638. defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
  1639. def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
  1640. def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
  1641. def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
  1642. (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
  1643. def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
  1644. (STGPi $Rt, $Rt2, $Rn, $imm)>;
  1645. def IRGstack
  1646. : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
  1647. Sched<[]>;
  1648. def TAGPstack
  1649. : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
  1650. Sched<[]>;
  1651. // Explicit SP in the first operand prevents ShrinkWrap optimization
  1652. // from leaving this instruction out of the stack frame. When IRGstack
  1653. // is transformed into IRG, this operand is replaced with the actual
  1654. // register / expression for the tagged base pointer of the current function.
  1655. def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
  1656. // Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
  1657. // $Rn_wback is one past the end of the range. $Rm is the loop counter.
  1658. let isCodeGenOnly=1, mayStore=1 in {
  1659. def STGloop_wback
  1660. : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
  1661. [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
  1662. Sched<[WriteAdr, WriteST]>;
  1663. def STZGloop_wback
  1664. : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
  1665. [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
  1666. Sched<[WriteAdr, WriteST]>;
  1667. // A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
  1668. // Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
  1669. def STGloop
  1670. : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
  1671. [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
  1672. Sched<[WriteAdr, WriteST]>;
  1673. def STZGloop
  1674. : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
  1675. [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
  1676. Sched<[WriteAdr, WriteST]>;
  1677. }
  1678. } // Predicates = [HasMTE]
  1679. //===----------------------------------------------------------------------===//
  1680. // Logical instructions.
  1681. //===----------------------------------------------------------------------===//
  1682. // (immediate)
  1683. defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
  1684. defm AND : LogicalImm<0b00, "and", and, "bic">;
  1685. defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
  1686. defm ORR : LogicalImm<0b01, "orr", or, "orn">;
  1687. // FIXME: these aliases *are* canonical sometimes (when movz can't be
  1688. // used). Actually, it seems to be working right now, but putting logical_immXX
  1689. // here is a bit dodgy on the AsmParser side too.
  1690. def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
  1691. logical_imm32:$imm), 0>;
  1692. def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
  1693. logical_imm64:$imm), 0>;
  1694. // (register)
  1695. defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
  1696. defm BICS : LogicalRegS<0b11, 1, "bics",
  1697. BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
  1698. defm AND : LogicalReg<0b00, 0, "and", and>;
  1699. defm BIC : LogicalReg<0b00, 1, "bic",
  1700. BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
  1701. defm EON : LogicalReg<0b10, 1, "eon",
  1702. BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
  1703. defm EOR : LogicalReg<0b10, 0, "eor", xor>;
  1704. defm ORN : LogicalReg<0b01, 1, "orn",
  1705. BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
  1706. defm ORR : LogicalReg<0b01, 0, "orr", or>;
  1707. def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
  1708. def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
  1709. def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
  1710. def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
  1711. def : InstAlias<"mvn $Wd, $Wm$sh",
  1712. (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
  1713. def : InstAlias<"mvn $Xd, $Xm$sh",
  1714. (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
  1715. def : InstAlias<"tst $src1, $src2",
  1716. (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
  1717. def : InstAlias<"tst $src1, $src2",
  1718. (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
  1719. def : InstAlias<"tst $src1, $src2",
  1720. (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
  1721. def : InstAlias<"tst $src1, $src2",
  1722. (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
  1723. def : InstAlias<"tst $src1, $src2$sh",
  1724. (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
  1725. def : InstAlias<"tst $src1, $src2$sh",
  1726. (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
  1727. def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
  1728. def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
  1729. //===----------------------------------------------------------------------===//
  1730. // One operand data processing instructions.
  1731. //===----------------------------------------------------------------------===//
  1732. defm CLS : OneOperandData<0b101, "cls">;
  1733. defm CLZ : OneOperandData<0b100, "clz", ctlz>;
  1734. defm RBIT : OneOperandData<0b000, "rbit", bitreverse>;
  1735. def REV16Wr : OneWRegData<0b001, "rev16",
  1736. UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
  1737. def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
  1738. def : Pat<(cttz GPR32:$Rn),
  1739. (CLZWr (RBITWr GPR32:$Rn))>;
  1740. def : Pat<(cttz GPR64:$Rn),
  1741. (CLZXr (RBITXr GPR64:$Rn))>;
  1742. def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
  1743. (i32 1))),
  1744. (CLSWr GPR32:$Rn)>;
  1745. def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
  1746. (i64 1))),
  1747. (CLSXr GPR64:$Rn)>;
  1748. def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
  1749. def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
  1750. // Unlike the other one operand instructions, the instructions with the "rev"
  1751. // mnemonic do *not* just different in the size bit, but actually use different
  1752. // opcode bits for the different sizes.
  1753. def REVWr : OneWRegData<0b010, "rev", bswap>;
  1754. def REVXr : OneXRegData<0b011, "rev", bswap>;
  1755. def REV32Xr : OneXRegData<0b010, "rev32",
  1756. UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
  1757. def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
  1758. // The bswap commutes with the rotr so we want a pattern for both possible
  1759. // orders.
  1760. def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
  1761. def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
  1762. // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
  1763. def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
  1764. def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
  1765. //===----------------------------------------------------------------------===//
  1766. // Bitfield immediate extraction instruction.
  1767. //===----------------------------------------------------------------------===//
  1768. let hasSideEffects = 0 in
  1769. defm EXTR : ExtractImm<"extr">;
  1770. def : InstAlias<"ror $dst, $src, $shift",
  1771. (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
  1772. def : InstAlias<"ror $dst, $src, $shift",
  1773. (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
  1774. def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
  1775. (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
  1776. def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
  1777. (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
  1778. //===----------------------------------------------------------------------===//
  1779. // Other bitfield immediate instructions.
  1780. //===----------------------------------------------------------------------===//
  1781. let hasSideEffects = 0 in {
  1782. defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
  1783. defm SBFM : BitfieldImm<0b00, "sbfm">;
  1784. defm UBFM : BitfieldImm<0b10, "ubfm">;
  1785. }
  1786. def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
  1787. uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
  1788. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1789. }]>;
  1790. def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
  1791. uint64_t enc = 31 - N->getZExtValue();
  1792. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1793. }]>;
  1794. // min(7, 31 - shift_amt)
  1795. def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
  1796. uint64_t enc = 31 - N->getZExtValue();
  1797. enc = enc > 7 ? 7 : enc;
  1798. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1799. }]>;
  1800. // min(15, 31 - shift_amt)
  1801. def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
  1802. uint64_t enc = 31 - N->getZExtValue();
  1803. enc = enc > 15 ? 15 : enc;
  1804. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1805. }]>;
  1806. def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
  1807. uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
  1808. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1809. }]>;
  1810. def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
  1811. uint64_t enc = 63 - N->getZExtValue();
  1812. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1813. }]>;
  1814. // min(7, 63 - shift_amt)
  1815. def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
  1816. uint64_t enc = 63 - N->getZExtValue();
  1817. enc = enc > 7 ? 7 : enc;
  1818. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1819. }]>;
  1820. // min(15, 63 - shift_amt)
  1821. def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
  1822. uint64_t enc = 63 - N->getZExtValue();
  1823. enc = enc > 15 ? 15 : enc;
  1824. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1825. }]>;
  1826. // min(31, 63 - shift_amt)
  1827. def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
  1828. uint64_t enc = 63 - N->getZExtValue();
  1829. enc = enc > 31 ? 31 : enc;
  1830. return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
  1831. }]>;
  1832. def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
  1833. (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
  1834. (i64 (i32shift_b imm0_31:$imm)))>;
  1835. def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
  1836. (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
  1837. (i64 (i64shift_b imm0_63:$imm)))>;
  1838. let AddedComplexity = 10 in {
  1839. def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
  1840. (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
  1841. def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
  1842. (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
  1843. }
  1844. def : InstAlias<"asr $dst, $src, $shift",
  1845. (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
  1846. def : InstAlias<"asr $dst, $src, $shift",
  1847. (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
  1848. def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
  1849. def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
  1850. def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
  1851. def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
  1852. def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
  1853. def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
  1854. (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
  1855. def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
  1856. (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
  1857. def : InstAlias<"lsr $dst, $src, $shift",
  1858. (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
  1859. def : InstAlias<"lsr $dst, $src, $shift",
  1860. (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
  1861. def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
  1862. def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
  1863. def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
  1864. def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
  1865. def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
  1866. //===----------------------------------------------------------------------===//
  1867. // Conditional comparison instructions.
  1868. //===----------------------------------------------------------------------===//
  1869. defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
  1870. defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
  1871. //===----------------------------------------------------------------------===//
  1872. // Conditional select instructions.
  1873. //===----------------------------------------------------------------------===//
  1874. defm CSEL : CondSelect<0, 0b00, "csel">;
  1875. def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
  1876. defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
  1877. defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
  1878. defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
  1879. def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
  1880. (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
  1881. def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
  1882. (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
  1883. def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
  1884. (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
  1885. def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
  1886. (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
  1887. def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
  1888. (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
  1889. def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
  1890. (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
  1891. def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
  1892. (CSINCWr WZR, WZR, (i32 imm:$cc))>;
  1893. def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
  1894. (CSINCXr XZR, XZR, (i32 imm:$cc))>;
  1895. def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
  1896. (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
  1897. def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
  1898. (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
  1899. def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
  1900. (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
  1901. def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
  1902. (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
  1903. def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
  1904. (CSINVWr WZR, WZR, (i32 imm:$cc))>;
  1905. def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
  1906. (CSINVXr XZR, XZR, (i32 imm:$cc))>;
  1907. def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
  1908. (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
  1909. def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
  1910. (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
  1911. def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
  1912. (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
  1913. def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
  1914. (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
  1915. def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
  1916. (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
  1917. def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
  1918. (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
  1919. // The inverse of the condition code from the alias instruction is what is used
  1920. // in the aliased instruction. The parser all ready inverts the condition code
  1921. // for these aliases.
  1922. def : InstAlias<"cset $dst, $cc",
  1923. (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
  1924. def : InstAlias<"cset $dst, $cc",
  1925. (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
  1926. def : InstAlias<"csetm $dst, $cc",
  1927. (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
  1928. def : InstAlias<"csetm $dst, $cc",
  1929. (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
  1930. def : InstAlias<"cinc $dst, $src, $cc",
  1931. (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
  1932. def : InstAlias<"cinc $dst, $src, $cc",
  1933. (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
  1934. def : InstAlias<"cinv $dst, $src, $cc",
  1935. (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
  1936. def : InstAlias<"cinv $dst, $src, $cc",
  1937. (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
  1938. def : InstAlias<"cneg $dst, $src, $cc",
  1939. (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
  1940. def : InstAlias<"cneg $dst, $src, $cc",
  1941. (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
  1942. //===----------------------------------------------------------------------===//
  1943. // PC-relative instructions.
  1944. //===----------------------------------------------------------------------===//
  1945. let isReMaterializable = 1 in {
  1946. let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
  1947. def ADR : ADRI<0, "adr", adrlabel,
  1948. [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
  1949. } // hasSideEffects = 0
  1950. def ADRP : ADRI<1, "adrp", adrplabel,
  1951. [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
  1952. } // isReMaterializable = 1
  1953. // page address of a constant pool entry, block address
  1954. def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
  1955. def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
  1956. def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
  1957. def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
  1958. def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
  1959. def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
  1960. def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
  1961. //===----------------------------------------------------------------------===//
  1962. // Unconditional branch (register) instructions.
  1963. //===----------------------------------------------------------------------===//
  1964. let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
  1965. def RET : BranchReg<0b0010, "ret", []>;
  1966. def DRPS : SpecialReturn<0b0101, "drps">;
  1967. def ERET : SpecialReturn<0b0100, "eret">;
  1968. } // isReturn = 1, isTerminator = 1, isBarrier = 1
  1969. // Default to the LR register.
  1970. def : InstAlias<"ret", (RET LR)>;
  1971. let isCall = 1, Defs = [LR], Uses = [SP] in {
  1972. def BLR : BranchReg<0b0001, "blr", []>;
  1973. def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
  1974. Sched<[WriteBrReg]>,
  1975. PseudoInstExpansion<(BLR GPR64:$Rn)>;
  1976. def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
  1977. Sched<[WriteBrReg]>;
  1978. def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
  1979. Sched<[WriteBrReg]>;
  1980. } // isCall
  1981. def : Pat<(AArch64call GPR64:$Rn),
  1982. (BLR GPR64:$Rn)>,
  1983. Requires<[NoSLSBLRMitigation]>;
  1984. def : Pat<(AArch64call GPR64noip:$Rn),
  1985. (BLRNoIP GPR64noip:$Rn)>,
  1986. Requires<[SLSBLRMitigation]>;
  1987. def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
  1988. (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
  1989. Requires<[NoSLSBLRMitigation]>;
  1990. def : Pat<(AArch64call_bti GPR64:$Rn),
  1991. (BLR_BTI GPR64:$Rn)>,
  1992. Requires<[NoSLSBLRMitigation]>;
  1993. let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
  1994. def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
  1995. } // isBranch, isTerminator, isBarrier, isIndirectBranch
  1996. // Create a separate pseudo-instruction for codegen to use so that we don't
  1997. // flag lr as used in every function. It'll be restored before the RET by the
  1998. // epilogue if it's legitimately used.
  1999. def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
  2000. Sched<[WriteBrReg]> {
  2001. let isTerminator = 1;
  2002. let isBarrier = 1;
  2003. let isReturn = 1;
  2004. }
  2005. // This is a directive-like pseudo-instruction. The purpose is to insert an
  2006. // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
  2007. // (which in the usual case is a BLR).
  2008. let hasSideEffects = 1 in
  2009. def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
  2010. let AsmString = ".tlsdesccall $sym";
  2011. }
  2012. // Pseudo instruction to tell the streamer to emit a 'B' character into the
  2013. // augmentation string.
  2014. def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
  2015. // FIXME: maybe the scratch register used shouldn't be fixed to X1?
  2016. // FIXME: can "hasSideEffects be dropped?
  2017. // This gets lowered to an instruction sequence which takes 16 bytes
  2018. let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16,
  2019. isCodeGenOnly = 1 in
  2020. def TLSDESC_CALLSEQ
  2021. : Pseudo<(outs), (ins i64imm:$sym),
  2022. [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
  2023. Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
  2024. def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
  2025. (TLSDESC_CALLSEQ texternalsym:$sym)>;
  2026. //===----------------------------------------------------------------------===//
  2027. // Conditional branch (immediate) instruction.
  2028. //===----------------------------------------------------------------------===//
  2029. def Bcc : BranchCond<0, "b">;
  2030. // Armv8.8-A variant form which hints to the branch predictor that
  2031. // this branch is very likely to go the same way nearly all the time
  2032. // (even though it is not known at compile time _which_ way that is).
  2033. def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
  2034. //===----------------------------------------------------------------------===//
  2035. // Compare-and-branch instructions.
  2036. //===----------------------------------------------------------------------===//
  2037. defm CBZ : CmpBranch<0, "cbz", AArch64cbz>;
  2038. defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
  2039. //===----------------------------------------------------------------------===//
  2040. // Test-bit-and-branch instructions.
  2041. //===----------------------------------------------------------------------===//
  2042. defm TBZ : TestBranch<0, "tbz", AArch64tbz>;
  2043. defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
  2044. //===----------------------------------------------------------------------===//
  2045. // Unconditional branch (immediate) instructions.
  2046. //===----------------------------------------------------------------------===//
  2047. let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
  2048. def B : BranchImm<0, "b", [(br bb:$addr)]>;
  2049. } // isBranch, isTerminator, isBarrier
  2050. let isCall = 1, Defs = [LR], Uses = [SP] in {
  2051. def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
  2052. } // isCall
  2053. def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
  2054. //===----------------------------------------------------------------------===//
  2055. // Exception generation instructions.
  2056. //===----------------------------------------------------------------------===//
  2057. let isTrap = 1 in {
  2058. def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
  2059. }
  2060. def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
  2061. def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
  2062. def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
  2063. def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
  2064. def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
  2065. def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
  2066. def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
  2067. // DCPSn defaults to an immediate operand of zero if unspecified.
  2068. def : InstAlias<"dcps1", (DCPS1 0)>;
  2069. def : InstAlias<"dcps2", (DCPS2 0)>;
  2070. def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
  2071. def UDF : UDFType<0, "udf">;
  2072. //===----------------------------------------------------------------------===//
  2073. // Load instructions.
  2074. //===----------------------------------------------------------------------===//
  2075. // Pair (indexed, offset)
  2076. defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
  2077. defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
  2078. defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
  2079. defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
  2080. defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
  2081. defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
  2082. // Pair (pre-indexed)
  2083. def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
  2084. def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
  2085. def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
  2086. def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
  2087. def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
  2088. def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
  2089. // Pair (post-indexed)
  2090. def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
  2091. def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
  2092. def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
  2093. def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
  2094. def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
  2095. def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
  2096. // Pair (no allocate)
  2097. defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
  2098. defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
  2099. defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
  2100. defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
  2101. defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
  2102. def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
  2103. (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
  2104. //---
  2105. // (register offset)
  2106. //---
  2107. // Integer
  2108. defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
  2109. defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
  2110. defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
  2111. defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
  2112. // Floating-point
  2113. defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>;
  2114. defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
  2115. defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
  2116. defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
  2117. defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
  2118. // Load sign-extended half-word
  2119. defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
  2120. defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
  2121. // Load sign-extended byte
  2122. defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
  2123. defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
  2124. // Load sign-extended word
  2125. defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
  2126. // Pre-fetch.
  2127. defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
  2128. // For regular load, we do not have any alignment requirement.
  2129. // Thus, it is safe to directly map the vector loads with interesting
  2130. // addressing modes.
  2131. // FIXME: We could do the same for bitconvert to floating point vectors.
  2132. multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
  2133. ValueType ScalTy, ValueType VecTy,
  2134. Instruction LOADW, Instruction LOADX,
  2135. SubRegIndex sub> {
  2136. def : Pat<(VecTy (scalar_to_vector (ScalTy
  2137. (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
  2138. (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
  2139. (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
  2140. sub)>;
  2141. def : Pat<(VecTy (scalar_to_vector (ScalTy
  2142. (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
  2143. (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
  2144. (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
  2145. sub)>;
  2146. }
  2147. let AddedComplexity = 10 in {
  2148. defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>;
  2149. defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
  2150. defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
  2151. defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
  2152. defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
  2153. defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
  2154. defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
  2155. defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
  2156. defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>;
  2157. defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>;
  2158. defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
  2159. defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
  2160. def : Pat <(v1i64 (scalar_to_vector (i64
  2161. (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
  2162. ro_Wextend64:$extend))))),
  2163. (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
  2164. def : Pat <(v1i64 (scalar_to_vector (i64
  2165. (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
  2166. ro_Xextend64:$extend))))),
  2167. (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
  2168. }
  2169. // Match all load 64 bits width whose type is compatible with FPR64
  2170. multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
  2171. Instruction LOADW, Instruction LOADX> {
  2172. def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
  2173. (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
  2174. def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
  2175. (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
  2176. }
  2177. let AddedComplexity = 10 in {
  2178. let Predicates = [IsLE] in {
  2179. // We must do vector loads with LD1 in big-endian.
  2180. defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
  2181. defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
  2182. defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
  2183. defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
  2184. defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
  2185. defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
  2186. }
  2187. defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
  2188. defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>;
  2189. // Match all load 128 bits width whose type is compatible with FPR128
  2190. let Predicates = [IsLE] in {
  2191. // We must do vector loads with LD1 in big-endian.
  2192. defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>;
  2193. defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>;
  2194. defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
  2195. defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
  2196. defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
  2197. defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
  2198. defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>;
  2199. defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
  2200. }
  2201. } // AddedComplexity = 10
  2202. // zextload -> i64
  2203. multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
  2204. Instruction INSTW, Instruction INSTX> {
  2205. def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
  2206. (SUBREG_TO_REG (i64 0),
  2207. (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
  2208. sub_32)>;
  2209. def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
  2210. (SUBREG_TO_REG (i64 0),
  2211. (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
  2212. sub_32)>;
  2213. }
  2214. let AddedComplexity = 10 in {
  2215. defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>;
  2216. defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
  2217. defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>;
  2218. // zextloadi1 -> zextloadi8
  2219. defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
  2220. // extload -> zextload
  2221. defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
  2222. defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
  2223. defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
  2224. // extloadi1 -> zextloadi8
  2225. defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>;
  2226. }
  2227. // zextload -> i64
  2228. multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
  2229. Instruction INSTW, Instruction INSTX> {
  2230. def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
  2231. (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
  2232. def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
  2233. (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
  2234. }
  2235. let AddedComplexity = 10 in {
  2236. // extload -> zextload
  2237. defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
  2238. defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
  2239. defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
  2240. // zextloadi1 -> zextloadi8
  2241. defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
  2242. }
  2243. //---
  2244. // (unsigned immediate)
  2245. //---
  2246. defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
  2247. [(set GPR64z:$Rt,
  2248. (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
  2249. defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
  2250. [(set GPR32z:$Rt,
  2251. (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
  2252. defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
  2253. [(set FPR8Op:$Rt,
  2254. (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
  2255. defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
  2256. [(set (f16 FPR16Op:$Rt),
  2257. (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
  2258. defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
  2259. [(set (f32 FPR32Op:$Rt),
  2260. (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
  2261. defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
  2262. [(set (f64 FPR64Op:$Rt),
  2263. (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
  2264. defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
  2265. [(set (f128 FPR128Op:$Rt),
  2266. (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
  2267. // bf16 load pattern
  2268. def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
  2269. (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
  2270. // For regular load, we do not have any alignment requirement.
  2271. // Thus, it is safe to directly map the vector loads with interesting
  2272. // addressing modes.
  2273. // FIXME: We could do the same for bitconvert to floating point vectors.
  2274. def : Pat <(v8i8 (scalar_to_vector (i32
  2275. (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
  2276. (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
  2277. (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
  2278. def : Pat <(v16i8 (scalar_to_vector (i32
  2279. (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
  2280. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  2281. (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
  2282. def : Pat <(v4i16 (scalar_to_vector (i32
  2283. (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
  2284. (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
  2285. (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
  2286. def : Pat <(v8i16 (scalar_to_vector (i32
  2287. (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
  2288. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  2289. (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
  2290. def : Pat <(v2i32 (scalar_to_vector (i32
  2291. (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
  2292. (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
  2293. (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
  2294. def : Pat <(v4i32 (scalar_to_vector (i32
  2295. (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
  2296. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  2297. (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
  2298. def : Pat <(v1i64 (scalar_to_vector (i64
  2299. (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
  2300. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2301. def : Pat <(v2i64 (scalar_to_vector (i64
  2302. (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
  2303. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
  2304. (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
  2305. // Match all load 64 bits width whose type is compatible with FPR64
  2306. let Predicates = [IsLE] in {
  2307. // We must use LD1 to perform vector loads in big-endian.
  2308. def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2309. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2310. def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2311. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2312. def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2313. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2314. def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2315. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2316. def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2317. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2318. def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2319. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2320. }
  2321. def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2322. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2323. def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
  2324. (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
  2325. // Match all load 128 bits width whose type is compatible with FPR128
  2326. let Predicates = [IsLE] in {
  2327. // We must use LD1 to perform vector loads in big-endian.
  2328. def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2329. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2330. def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2331. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2332. def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2333. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2334. def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2335. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2336. def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2337. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2338. def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2339. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2340. def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2341. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2342. def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2343. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2344. }
  2345. def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
  2346. (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
  2347. defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
  2348. [(set GPR32:$Rt,
  2349. (zextloadi16 (am_indexed16 GPR64sp:$Rn,
  2350. uimm12s2:$offset)))]>;
  2351. defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
  2352. [(set GPR32:$Rt,
  2353. (zextloadi8 (am_indexed8 GPR64sp:$Rn,
  2354. uimm12s1:$offset)))]>;
  2355. // zextload -> i64
  2356. def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2357. (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
  2358. def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
  2359. (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
  2360. // zextloadi1 -> zextloadi8
  2361. def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2362. (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
  2363. def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2364. (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
  2365. // extload -> zextload
  2366. def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
  2367. (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
  2368. def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2369. (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
  2370. def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2371. (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
  2372. def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
  2373. (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
  2374. def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
  2375. (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
  2376. def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2377. (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
  2378. def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
  2379. (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
  2380. // load sign-extended half-word
  2381. defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
  2382. [(set GPR32:$Rt,
  2383. (sextloadi16 (am_indexed16 GPR64sp:$Rn,
  2384. uimm12s2:$offset)))]>;
  2385. defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
  2386. [(set GPR64:$Rt,
  2387. (sextloadi16 (am_indexed16 GPR64sp:$Rn,
  2388. uimm12s2:$offset)))]>;
  2389. // load sign-extended byte
  2390. defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
  2391. [(set GPR32:$Rt,
  2392. (sextloadi8 (am_indexed8 GPR64sp:$Rn,
  2393. uimm12s1:$offset)))]>;
  2394. defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
  2395. [(set GPR64:$Rt,
  2396. (sextloadi8 (am_indexed8 GPR64sp:$Rn,
  2397. uimm12s1:$offset)))]>;
  2398. // load sign-extended word
  2399. defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
  2400. [(set GPR64:$Rt,
  2401. (sextloadi32 (am_indexed32 GPR64sp:$Rn,
  2402. uimm12s4:$offset)))]>;
  2403. // load zero-extended word
  2404. def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
  2405. (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
  2406. // Pre-fetch.
  2407. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
  2408. [(AArch64Prefetch imm:$Rt,
  2409. (am_indexed64 GPR64sp:$Rn,
  2410. uimm12s8:$offset))]>;
  2411. def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
  2412. //---
  2413. // (literal)
  2414. def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
  2415. if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
  2416. const DataLayout &DL = MF->getDataLayout();
  2417. Align Align = G->getGlobal()->getPointerAlignment(DL);
  2418. return Align >= 4 && G->getOffset() % 4 == 0;
  2419. }
  2420. if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
  2421. return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
  2422. return false;
  2423. }]>;
  2424. def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
  2425. [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
  2426. def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
  2427. [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
  2428. def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
  2429. [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
  2430. def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
  2431. [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
  2432. def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
  2433. [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
  2434. // load sign-extended word
  2435. def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
  2436. [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
  2437. let AddedComplexity = 20 in {
  2438. def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
  2439. (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
  2440. }
  2441. // prefetch
  2442. def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
  2443. // [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
  2444. //---
  2445. // (unscaled immediate)
  2446. defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
  2447. [(set GPR64z:$Rt,
  2448. (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
  2449. defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
  2450. [(set GPR32z:$Rt,
  2451. (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
  2452. defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
  2453. [(set FPR8Op:$Rt,
  2454. (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
  2455. defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
  2456. [(set (f16 FPR16Op:$Rt),
  2457. (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
  2458. defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
  2459. [(set (f32 FPR32Op:$Rt),
  2460. (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
  2461. defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
  2462. [(set (f64 FPR64Op:$Rt),
  2463. (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
  2464. defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
  2465. [(set (f128 FPR128Op:$Rt),
  2466. (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
  2467. defm LDURHH
  2468. : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
  2469. [(set GPR32:$Rt,
  2470. (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
  2471. defm LDURBB
  2472. : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
  2473. [(set GPR32:$Rt,
  2474. (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
  2475. // Match all load 64 bits width whose type is compatible with FPR64
  2476. let Predicates = [IsLE] in {
  2477. def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2478. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2479. def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2480. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2481. def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2482. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2483. def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2484. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2485. def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2486. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2487. }
  2488. def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2489. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2490. def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
  2491. (LDURDi GPR64sp:$Rn, simm9:$offset)>;
  2492. // Match all load 128 bits width whose type is compatible with FPR128
  2493. let Predicates = [IsLE] in {
  2494. def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2495. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2496. def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2497. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2498. def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2499. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2500. def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2501. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2502. def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2503. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2504. def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2505. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2506. def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
  2507. (LDURQi GPR64sp:$Rn, simm9:$offset)>;
  2508. }
  2509. // anyext -> zext
  2510. def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
  2511. (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
  2512. def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2513. (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
  2514. def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2515. (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
  2516. def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
  2517. (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2518. def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
  2519. (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2520. def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2521. (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2522. def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2523. (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2524. // unscaled zext
  2525. def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
  2526. (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
  2527. def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2528. (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
  2529. def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2530. (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
  2531. def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
  2532. (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2533. def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
  2534. (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2535. def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2536. (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2537. def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2538. (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2539. //---
  2540. // LDR mnemonics fall back to LDUR for negative or unaligned offsets.
  2541. // Define new assembler match classes as we want to only match these when
  2542. // the don't otherwise match the scaled addressing mode for LDR/STR. Don't
  2543. // associate a DiagnosticType either, as we want the diagnostic for the
  2544. // canonical form (the scaled operand) to take precedence.
  2545. class SImm9OffsetOperand<int Width> : AsmOperandClass {
  2546. let Name = "SImm9OffsetFB" # Width;
  2547. let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
  2548. let RenderMethod = "addImmOperands";
  2549. }
  2550. def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
  2551. def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
  2552. def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
  2553. def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
  2554. def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
  2555. def simm9_offset_fb8 : Operand<i64> {
  2556. let ParserMatchClass = SImm9OffsetFB8Operand;
  2557. }
  2558. def simm9_offset_fb16 : Operand<i64> {
  2559. let ParserMatchClass = SImm9OffsetFB16Operand;
  2560. }
  2561. def simm9_offset_fb32 : Operand<i64> {
  2562. let ParserMatchClass = SImm9OffsetFB32Operand;
  2563. }
  2564. def simm9_offset_fb64 : Operand<i64> {
  2565. let ParserMatchClass = SImm9OffsetFB64Operand;
  2566. }
  2567. def simm9_offset_fb128 : Operand<i64> {
  2568. let ParserMatchClass = SImm9OffsetFB128Operand;
  2569. }
  2570. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2571. (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
  2572. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2573. (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
  2574. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2575. (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
  2576. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2577. (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
  2578. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2579. (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
  2580. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2581. (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
  2582. def : InstAlias<"ldr $Rt, [$Rn, $offset]",
  2583. (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
  2584. // zextload -> i64
  2585. def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
  2586. (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2587. def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
  2588. (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
  2589. // load sign-extended half-word
  2590. defm LDURSHW
  2591. : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
  2592. [(set GPR32:$Rt,
  2593. (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
  2594. defm LDURSHX
  2595. : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
  2596. [(set GPR64:$Rt,
  2597. (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
  2598. // load sign-extended byte
  2599. defm LDURSBW
  2600. : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
  2601. [(set GPR32:$Rt,
  2602. (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
  2603. defm LDURSBX
  2604. : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
  2605. [(set GPR64:$Rt,
  2606. (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
  2607. // load sign-extended word
  2608. defm LDURSW
  2609. : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
  2610. [(set GPR64:$Rt,
  2611. (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
  2612. // zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
  2613. def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
  2614. (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
  2615. def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
  2616. (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
  2617. def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
  2618. (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
  2619. def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
  2620. (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
  2621. def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
  2622. (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
  2623. def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
  2624. (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
  2625. def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
  2626. (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
  2627. // Pre-fetch.
  2628. defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
  2629. [(AArch64Prefetch imm:$Rt,
  2630. (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
  2631. //---
  2632. // (unscaled immediate, unprivileged)
  2633. defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
  2634. defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
  2635. defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
  2636. defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
  2637. // load sign-extended half-word
  2638. defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
  2639. defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
  2640. // load sign-extended byte
  2641. defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
  2642. defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
  2643. // load sign-extended word
  2644. defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
  2645. //---
  2646. // (immediate pre-indexed)
  2647. def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
  2648. def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
  2649. def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
  2650. def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
  2651. def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
  2652. def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
  2653. def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
  2654. // load sign-extended half-word
  2655. def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
  2656. def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
  2657. // load sign-extended byte
  2658. def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
  2659. def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
  2660. // load zero-extended byte
  2661. def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
  2662. def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
  2663. // load sign-extended word
  2664. def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
  2665. //---
  2666. // (immediate post-indexed)
  2667. def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
  2668. def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
  2669. def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
  2670. def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
  2671. def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
  2672. def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
  2673. def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
  2674. // load sign-extended half-word
  2675. def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
  2676. def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
  2677. // load sign-extended byte
  2678. def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
  2679. def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
  2680. // load zero-extended byte
  2681. def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
  2682. def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
  2683. // load sign-extended word
  2684. def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
  2685. //===----------------------------------------------------------------------===//
  2686. // Store instructions.
  2687. //===----------------------------------------------------------------------===//
  2688. // Pair (indexed, offset)
  2689. // FIXME: Use dedicated range-checked addressing mode operand here.
  2690. defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
  2691. defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
  2692. defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
  2693. defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
  2694. defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
  2695. // Pair (pre-indexed)
  2696. def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
  2697. def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
  2698. def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
  2699. def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
  2700. def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
  2701. // Pair (pre-indexed)
  2702. def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
  2703. def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
  2704. def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
  2705. def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
  2706. def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
  2707. // Pair (no allocate)
  2708. defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
  2709. defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
  2710. defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
  2711. defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
  2712. defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
  2713. def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
  2714. (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
  2715. def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
  2716. (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
  2717. //---
  2718. // (Register offset)
  2719. // Integer
  2720. defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
  2721. defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
  2722. defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>;
  2723. defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
  2724. // Floating-point
  2725. defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
  2726. defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
  2727. defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
  2728. defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
  2729. defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
  2730. let Predicates = [UseSTRQro], AddedComplexity = 10 in {
  2731. def : Pat<(store (f128 FPR128:$Rt),
  2732. (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
  2733. ro_Wextend128:$extend)),
  2734. (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
  2735. def : Pat<(store (f128 FPR128:$Rt),
  2736. (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
  2737. ro_Xextend128:$extend)),
  2738. (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
  2739. }
  2740. multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
  2741. Instruction STRW, Instruction STRX> {
  2742. def : Pat<(storeop GPR64:$Rt,
  2743. (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
  2744. (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
  2745. GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
  2746. def : Pat<(storeop GPR64:$Rt,
  2747. (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
  2748. (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
  2749. GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
  2750. }
  2751. let AddedComplexity = 10 in {
  2752. // truncstore i64
  2753. defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>;
  2754. defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
  2755. defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>;
  2756. }
  2757. multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
  2758. Instruction STRW, Instruction STRX> {
  2759. def : Pat<(store (VecTy FPR:$Rt),
  2760. (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
  2761. (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
  2762. def : Pat<(store (VecTy FPR:$Rt),
  2763. (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
  2764. (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
  2765. }
  2766. let AddedComplexity = 10 in {
  2767. // Match all store 64 bits width whose type is compatible with FPR64
  2768. let Predicates = [IsLE] in {
  2769. // We must use ST1 to store vectors in big-endian.
  2770. defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
  2771. defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
  2772. defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
  2773. defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
  2774. defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
  2775. defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
  2776. }
  2777. defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
  2778. defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
  2779. // Match all store 128 bits width whose type is compatible with FPR128
  2780. let Predicates = [IsLE, UseSTRQro] in {
  2781. // We must use ST1 to store vectors in big-endian.
  2782. defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
  2783. defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
  2784. defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
  2785. defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
  2786. defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
  2787. defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
  2788. defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
  2789. defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
  2790. }
  2791. } // AddedComplexity = 10
  2792. // Match stores from lane 0 to the appropriate subreg's store.
  2793. multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
  2794. ValueType VecTy, ValueType STy,
  2795. SubRegIndex SubRegIdx,
  2796. Instruction STRW, Instruction STRX> {
  2797. def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
  2798. (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
  2799. (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
  2800. GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
  2801. def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
  2802. (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
  2803. (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
  2804. GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
  2805. }
  2806. let AddedComplexity = 19 in {
  2807. defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
  2808. defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>;
  2809. defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>;
  2810. defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>;
  2811. defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>;
  2812. defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>;
  2813. }
  2814. //---
  2815. // (unsigned immediate)
  2816. defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
  2817. [(store GPR64z:$Rt,
  2818. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
  2819. defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
  2820. [(store GPR32z:$Rt,
  2821. (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
  2822. defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
  2823. [(store FPR8Op:$Rt,
  2824. (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
  2825. defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
  2826. [(store (f16 FPR16Op:$Rt),
  2827. (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
  2828. defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
  2829. [(store (f32 FPR32Op:$Rt),
  2830. (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
  2831. defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
  2832. [(store (f64 FPR64Op:$Rt),
  2833. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
  2834. defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
  2835. defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
  2836. [(truncstorei16 GPR32z:$Rt,
  2837. (am_indexed16 GPR64sp:$Rn,
  2838. uimm12s2:$offset))]>;
  2839. defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb",
  2840. [(truncstorei8 GPR32z:$Rt,
  2841. (am_indexed8 GPR64sp:$Rn,
  2842. uimm12s1:$offset))]>;
  2843. // bf16 store pattern
  2844. def : Pat<(store (bf16 FPR16Op:$Rt),
  2845. (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
  2846. (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
  2847. let AddedComplexity = 10 in {
  2848. // Match all store 64 bits width whose type is compatible with FPR64
  2849. def : Pat<(store (v1i64 FPR64:$Rt),
  2850. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2851. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2852. def : Pat<(store (v1f64 FPR64:$Rt),
  2853. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2854. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2855. let Predicates = [IsLE] in {
  2856. // We must use ST1 to store vectors in big-endian.
  2857. def : Pat<(store (v2f32 FPR64:$Rt),
  2858. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2859. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2860. def : Pat<(store (v8i8 FPR64:$Rt),
  2861. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2862. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2863. def : Pat<(store (v4i16 FPR64:$Rt),
  2864. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2865. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2866. def : Pat<(store (v2i32 FPR64:$Rt),
  2867. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2868. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2869. def : Pat<(store (v4f16 FPR64:$Rt),
  2870. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2871. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2872. def : Pat<(store (v4bf16 FPR64:$Rt),
  2873. (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
  2874. (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
  2875. }
  2876. // Match all store 128 bits width whose type is compatible with FPR128
  2877. def : Pat<(store (f128 FPR128:$Rt),
  2878. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2879. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2880. let Predicates = [IsLE] in {
  2881. // We must use ST1 to store vectors in big-endian.
  2882. def : Pat<(store (v4f32 FPR128:$Rt),
  2883. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2884. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2885. def : Pat<(store (v2f64 FPR128:$Rt),
  2886. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2887. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2888. def : Pat<(store (v16i8 FPR128:$Rt),
  2889. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2890. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2891. def : Pat<(store (v8i16 FPR128:$Rt),
  2892. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2893. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2894. def : Pat<(store (v4i32 FPR128:$Rt),
  2895. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2896. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2897. def : Pat<(store (v2i64 FPR128:$Rt),
  2898. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2899. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2900. def : Pat<(store (v8f16 FPR128:$Rt),
  2901. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2902. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2903. def : Pat<(store (v8bf16 FPR128:$Rt),
  2904. (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
  2905. (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
  2906. }
  2907. // truncstore i64
  2908. def : Pat<(truncstorei32 GPR64:$Rt,
  2909. (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
  2910. (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
  2911. def : Pat<(truncstorei16 GPR64:$Rt,
  2912. (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
  2913. (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
  2914. def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
  2915. (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
  2916. } // AddedComplexity = 10
  2917. // Match stores from lane 0 to the appropriate subreg's store.
  2918. multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
  2919. ValueType VTy, ValueType STy,
  2920. SubRegIndex SubRegIdx, Operand IndexType,
  2921. Instruction STR> {
  2922. def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
  2923. (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
  2924. (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
  2925. GPR64sp:$Rn, IndexType:$offset)>;
  2926. }
  2927. let AddedComplexity = 19 in {
  2928. defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
  2929. defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>;
  2930. defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>;
  2931. defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>;
  2932. defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>;
  2933. defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>;
  2934. }
  2935. //---
  2936. // (unscaled immediate)
  2937. defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
  2938. [(store GPR64z:$Rt,
  2939. (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
  2940. defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
  2941. [(store GPR32z:$Rt,
  2942. (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
  2943. defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
  2944. [(store FPR8Op:$Rt,
  2945. (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
  2946. defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
  2947. [(store (f16 FPR16Op:$Rt),
  2948. (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
  2949. defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
  2950. [(store (f32 FPR32Op:$Rt),
  2951. (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
  2952. defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
  2953. [(store (f64 FPR64Op:$Rt),
  2954. (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
  2955. defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
  2956. [(store (f128 FPR128Op:$Rt),
  2957. (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
  2958. defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
  2959. [(truncstorei16 GPR32z:$Rt,
  2960. (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
  2961. defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
  2962. [(truncstorei8 GPR32z:$Rt,
  2963. (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
  2964. // Armv8.4 Weaker Release Consistency enhancements
  2965. // LDAPR & STLR with Immediate Offset instructions
  2966. let Predicates = [HasRCPC_IMMO] in {
  2967. defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>;
  2968. defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>;
  2969. defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>;
  2970. defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>;
  2971. defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>;
  2972. defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
  2973. defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
  2974. defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>;
  2975. defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
  2976. defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
  2977. defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>;
  2978. defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
  2979. defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>;
  2980. }
  2981. // Match all store 64 bits width whose type is compatible with FPR64
  2982. def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  2983. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  2984. def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  2985. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  2986. let AddedComplexity = 10 in {
  2987. let Predicates = [IsLE] in {
  2988. // We must use ST1 to store vectors in big-endian.
  2989. def : Pat<(store (v2f32 FPR64:$Rt),
  2990. (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  2991. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  2992. def : Pat<(store (v8i8 FPR64:$Rt),
  2993. (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  2994. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  2995. def : Pat<(store (v4i16 FPR64:$Rt),
  2996. (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  2997. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  2998. def : Pat<(store (v2i32 FPR64:$Rt),
  2999. (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  3000. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3001. def : Pat<(store (v4f16 FPR64:$Rt),
  3002. (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  3003. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3004. def : Pat<(store (v4bf16 FPR64:$Rt),
  3005. (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
  3006. (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3007. }
  3008. // Match all store 128 bits width whose type is compatible with FPR128
  3009. def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3010. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3011. let Predicates = [IsLE] in {
  3012. // We must use ST1 to store vectors in big-endian.
  3013. def : Pat<(store (v4f32 FPR128:$Rt),
  3014. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3015. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3016. def : Pat<(store (v2f64 FPR128:$Rt),
  3017. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3018. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3019. def : Pat<(store (v16i8 FPR128:$Rt),
  3020. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3021. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3022. def : Pat<(store (v8i16 FPR128:$Rt),
  3023. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3024. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3025. def : Pat<(store (v4i32 FPR128:$Rt),
  3026. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3027. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3028. def : Pat<(store (v2i64 FPR128:$Rt),
  3029. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3030. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3031. def : Pat<(store (v2f64 FPR128:$Rt),
  3032. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3033. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3034. def : Pat<(store (v8f16 FPR128:$Rt),
  3035. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3036. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3037. def : Pat<(store (v8bf16 FPR128:$Rt),
  3038. (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
  3039. (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
  3040. }
  3041. } // AddedComplexity = 10
  3042. // unscaled i64 truncating stores
  3043. def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
  3044. (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
  3045. def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
  3046. (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
  3047. def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
  3048. (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
  3049. // Match stores from lane 0 to the appropriate subreg's store.
  3050. multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
  3051. ValueType VTy, ValueType STy,
  3052. SubRegIndex SubRegIdx, Instruction STR> {
  3053. defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
  3054. }
  3055. let AddedComplexity = 19 in {
  3056. defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
  3057. defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>;
  3058. defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>;
  3059. defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>;
  3060. defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>;
  3061. defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>;
  3062. }
  3063. //---
  3064. // STR mnemonics fall back to STUR for negative or unaligned offsets.
  3065. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3066. (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
  3067. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3068. (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
  3069. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3070. (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
  3071. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3072. (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
  3073. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3074. (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
  3075. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3076. (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
  3077. def : InstAlias<"str $Rt, [$Rn, $offset]",
  3078. (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
  3079. def : InstAlias<"strb $Rt, [$Rn, $offset]",
  3080. (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
  3081. def : InstAlias<"strh $Rt, [$Rn, $offset]",
  3082. (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
  3083. //---
  3084. // (unscaled immediate, unprivileged)
  3085. defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
  3086. defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
  3087. defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
  3088. defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
  3089. //---
  3090. // (immediate pre-indexed)
  3091. def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
  3092. def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
  3093. def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>;
  3094. def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
  3095. def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
  3096. def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
  3097. def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
  3098. def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>;
  3099. def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
  3100. // truncstore i64
  3101. def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
  3102. (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
  3103. simm9:$off)>;
  3104. def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
  3105. (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
  3106. simm9:$off)>;
  3107. def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
  3108. (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
  3109. simm9:$off)>;
  3110. def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3111. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3112. def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3113. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3114. def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3115. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3116. def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3117. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3118. def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3119. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3120. def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3121. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3122. def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3123. (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3124. def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3125. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3126. def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3127. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3128. def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3129. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3130. def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3131. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3132. def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3133. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3134. def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3135. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3136. def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3137. (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3138. //---
  3139. // (immediate post-indexed)
  3140. def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
  3141. def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
  3142. def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>;
  3143. def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
  3144. def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
  3145. def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
  3146. def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
  3147. def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
  3148. def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
  3149. // truncstore i64
  3150. def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
  3151. (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
  3152. simm9:$off)>;
  3153. def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
  3154. (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
  3155. simm9:$off)>;
  3156. def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
  3157. (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
  3158. simm9:$off)>;
  3159. def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
  3160. (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
  3161. def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3162. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3163. def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3164. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3165. def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3166. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3167. def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3168. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3169. def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3170. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3171. def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3172. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3173. def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3174. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3175. def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
  3176. (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
  3177. def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3178. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3179. def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3180. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3181. def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3182. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3183. def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3184. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3185. def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3186. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3187. def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3188. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3189. def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3190. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3191. def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
  3192. (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
  3193. //===----------------------------------------------------------------------===//
  3194. // Load/store exclusive instructions.
  3195. //===----------------------------------------------------------------------===//
  3196. def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
  3197. def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
  3198. def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
  3199. def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
  3200. def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
  3201. def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
  3202. def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
  3203. def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
  3204. def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
  3205. def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
  3206. def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
  3207. def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
  3208. def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
  3209. def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
  3210. def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
  3211. def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
  3212. def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
  3213. def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
  3214. def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
  3215. def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
  3216. def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
  3217. def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
  3218. def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
  3219. def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
  3220. def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
  3221. def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
  3222. def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
  3223. def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
  3224. def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
  3225. def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
  3226. def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
  3227. def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
  3228. let Predicates = [HasLOR] in {
  3229. // v8.1a "Limited Order Region" extension load-acquire instructions
  3230. def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
  3231. def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
  3232. def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
  3233. def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
  3234. // v8.1a "Limited Order Region" extension store-release instructions
  3235. def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
  3236. def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
  3237. def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
  3238. def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
  3239. }
  3240. //===----------------------------------------------------------------------===//
  3241. // Scaled floating point to integer conversion instructions.
  3242. //===----------------------------------------------------------------------===//
  3243. defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
  3244. defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
  3245. defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
  3246. defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
  3247. defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
  3248. defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
  3249. defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
  3250. defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
  3251. defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
  3252. defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
  3253. defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
  3254. defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
  3255. // AArch64's FCVT instructions saturate when out of range.
  3256. multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
  3257. let Predicates = [HasFullFP16] in {
  3258. def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
  3259. (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
  3260. def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
  3261. (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
  3262. }
  3263. def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
  3264. (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
  3265. def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
  3266. (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
  3267. def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
  3268. (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
  3269. def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
  3270. (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
  3271. let Predicates = [HasFullFP16] in {
  3272. def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
  3273. (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
  3274. def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
  3275. (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
  3276. }
  3277. def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
  3278. (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
  3279. def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
  3280. (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
  3281. def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
  3282. (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
  3283. def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
  3284. (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
  3285. }
  3286. defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
  3287. defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
  3288. multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
  3289. let Predicates = [HasFullFP16] in {
  3290. def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
  3291. def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
  3292. }
  3293. def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
  3294. def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
  3295. def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
  3296. def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
  3297. let Predicates = [HasFullFP16] in {
  3298. def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
  3299. (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
  3300. def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
  3301. (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
  3302. }
  3303. def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
  3304. (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
  3305. def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
  3306. (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
  3307. def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
  3308. (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
  3309. def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
  3310. (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
  3311. }
  3312. defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
  3313. defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
  3314. multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
  3315. def : Pat<(i32 (to_int (round f32:$Rn))),
  3316. (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
  3317. def : Pat<(i64 (to_int (round f32:$Rn))),
  3318. (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
  3319. def : Pat<(i32 (to_int (round f64:$Rn))),
  3320. (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
  3321. def : Pat<(i64 (to_int (round f64:$Rn))),
  3322. (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
  3323. // These instructions saturate like fp_to_[su]int_sat.
  3324. let Predicates = [HasFullFP16] in {
  3325. def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
  3326. (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
  3327. def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
  3328. (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
  3329. }
  3330. def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
  3331. (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
  3332. def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
  3333. (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
  3334. def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
  3335. (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
  3336. def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
  3337. (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
  3338. }
  3339. defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
  3340. defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
  3341. defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
  3342. defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
  3343. defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
  3344. defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
  3345. defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
  3346. defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
  3347. let Predicates = [HasFullFP16] in {
  3348. def : Pat<(i32 (lround f16:$Rn)),
  3349. (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
  3350. def : Pat<(i64 (lround f16:$Rn)),
  3351. (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
  3352. def : Pat<(i64 (llround f16:$Rn)),
  3353. (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
  3354. }
  3355. def : Pat<(i32 (lround f32:$Rn)),
  3356. (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
  3357. def : Pat<(i32 (lround f64:$Rn)),
  3358. (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
  3359. def : Pat<(i64 (lround f32:$Rn)),
  3360. (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
  3361. def : Pat<(i64 (lround f64:$Rn)),
  3362. (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
  3363. def : Pat<(i64 (llround f32:$Rn)),
  3364. (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
  3365. def : Pat<(i64 (llround f64:$Rn)),
  3366. (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
  3367. //===----------------------------------------------------------------------===//
  3368. // Scaled integer to floating point conversion instructions.
  3369. //===----------------------------------------------------------------------===//
  3370. defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
  3371. defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
  3372. //===----------------------------------------------------------------------===//
  3373. // Unscaled integer to floating point conversion instruction.
  3374. //===----------------------------------------------------------------------===//
  3375. defm FMOV : UnscaledConversion<"fmov">;
  3376. // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
  3377. let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
  3378. def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
  3379. Sched<[WriteF]>, Requires<[HasFullFP16]>;
  3380. def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
  3381. Sched<[WriteF]>;
  3382. def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
  3383. Sched<[WriteF]>;
  3384. }
  3385. // Similarly add aliases
  3386. def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
  3387. Requires<[HasFullFP16]>;
  3388. def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
  3389. def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
  3390. //===----------------------------------------------------------------------===//
  3391. // Floating point conversion instruction.
  3392. //===----------------------------------------------------------------------===//
  3393. defm FCVT : FPConversion<"fcvt">;
  3394. //===----------------------------------------------------------------------===//
  3395. // Floating point single operand instructions.
  3396. //===----------------------------------------------------------------------===//
  3397. defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>;
  3398. defm FMOV : SingleOperandFPData<0b0000, "fmov">;
  3399. defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
  3400. defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
  3401. defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
  3402. defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
  3403. defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
  3404. defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
  3405. defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
  3406. defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
  3407. let SchedRW = [WriteFDiv] in {
  3408. defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
  3409. }
  3410. let Predicates = [HasFRInt3264] in {
  3411. defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
  3412. defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
  3413. defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
  3414. defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
  3415. } // HasFRInt3264
  3416. let Predicates = [HasFullFP16] in {
  3417. def : Pat<(i32 (lrint f16:$Rn)),
  3418. (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
  3419. def : Pat<(i64 (lrint f16:$Rn)),
  3420. (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
  3421. def : Pat<(i64 (llrint f16:$Rn)),
  3422. (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
  3423. }
  3424. def : Pat<(i32 (lrint f32:$Rn)),
  3425. (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
  3426. def : Pat<(i32 (lrint f64:$Rn)),
  3427. (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
  3428. def : Pat<(i64 (lrint f32:$Rn)),
  3429. (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
  3430. def : Pat<(i64 (lrint f64:$Rn)),
  3431. (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
  3432. def : Pat<(i64 (llrint f32:$Rn)),
  3433. (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
  3434. def : Pat<(i64 (llrint f64:$Rn)),
  3435. (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
  3436. //===----------------------------------------------------------------------===//
  3437. // Floating point two operand instructions.
  3438. //===----------------------------------------------------------------------===//
  3439. defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
  3440. let SchedRW = [WriteFDiv] in {
  3441. defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
  3442. }
  3443. defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
  3444. defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>;
  3445. defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
  3446. defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>;
  3447. let SchedRW = [WriteFMul] in {
  3448. defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
  3449. defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
  3450. }
  3451. defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
  3452. def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
  3453. (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
  3454. def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
  3455. (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
  3456. def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
  3457. (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
  3458. def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
  3459. (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
  3460. //===----------------------------------------------------------------------===//
  3461. // Floating point three operand instructions.
  3462. //===----------------------------------------------------------------------===//
  3463. defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>;
  3464. defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
  3465. TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
  3466. defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
  3467. TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
  3468. defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
  3469. TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
  3470. // The following def pats catch the case where the LHS of an FMA is negated.
  3471. // The TriOpFrag above catches the case where the middle operand is negated.
  3472. // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
  3473. // the NEON variant.
  3474. // Here we handle first -(a + b*c) for FNMADD:
  3475. let Predicates = [HasNEON, HasFullFP16] in
  3476. def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
  3477. (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
  3478. def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
  3479. (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  3480. def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
  3481. (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  3482. // Now it's time for "(-a) + (-b)*c"
  3483. let Predicates = [HasNEON, HasFullFP16] in
  3484. def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
  3485. (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
  3486. def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
  3487. (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  3488. def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
  3489. (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  3490. //===----------------------------------------------------------------------===//
  3491. // Floating point comparison instructions.
  3492. //===----------------------------------------------------------------------===//
  3493. defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
  3494. defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>;
  3495. //===----------------------------------------------------------------------===//
  3496. // Floating point conditional comparison instructions.
  3497. //===----------------------------------------------------------------------===//
  3498. defm FCCMPE : FPCondComparison<1, "fccmpe">;
  3499. defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
  3500. //===----------------------------------------------------------------------===//
  3501. // Floating point conditional select instruction.
  3502. //===----------------------------------------------------------------------===//
  3503. defm FCSEL : FPCondSelect<"fcsel">;
  3504. // CSEL instructions providing f128 types need to be handled by a
  3505. // pseudo-instruction since the eventual code will need to introduce basic
  3506. // blocks and control flow.
  3507. def F128CSEL : Pseudo<(outs FPR128:$Rd),
  3508. (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
  3509. [(set (f128 FPR128:$Rd),
  3510. (AArch64csel FPR128:$Rn, FPR128:$Rm,
  3511. (i32 imm:$cond), NZCV))]> {
  3512. let Uses = [NZCV];
  3513. let usesCustomInserter = 1;
  3514. let hasNoSchedulingInfo = 1;
  3515. }
  3516. //===----------------------------------------------------------------------===//
  3517. // Instructions used for emitting unwind opcodes on ARM64 Windows.
  3518. //===----------------------------------------------------------------------===//
  3519. let isPseudo = 1 in {
  3520. def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
  3521. def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
  3522. def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
  3523. def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
  3524. def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
  3525. def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
  3526. def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
  3527. def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
  3528. def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
  3529. def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
  3530. def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
  3531. def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
  3532. def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
  3533. def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
  3534. def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
  3535. def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
  3536. def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
  3537. }
  3538. // Pseudo instructions for Windows EH
  3539. //===----------------------------------------------------------------------===//
  3540. let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
  3541. isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
  3542. def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
  3543. let usesCustomInserter = 1 in
  3544. def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
  3545. Sched<[]>;
  3546. }
  3547. // Pseudo instructions for homogeneous prolog/epilog
  3548. let isPseudo = 1 in {
  3549. // Save CSRs in order, {FPOffset}
  3550. def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
  3551. // Restore CSRs in order
  3552. def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
  3553. }
  3554. //===----------------------------------------------------------------------===//
  3555. // Floating point immediate move.
  3556. //===----------------------------------------------------------------------===//
  3557. let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
  3558. defm FMOV : FPMoveImmediate<"fmov">;
  3559. }
  3560. //===----------------------------------------------------------------------===//
  3561. // Advanced SIMD two vector instructions.
  3562. //===----------------------------------------------------------------------===//
  3563. defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
  3564. AArch64uabd>;
  3565. // Match UABDL in log2-shuffle patterns.
  3566. def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
  3567. (zext (v8i8 V64:$opB))))),
  3568. (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
  3569. def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
  3570. (v8i16 (add (sub (zext (v8i8 V64:$opA)),
  3571. (zext (v8i8 V64:$opB))),
  3572. (AArch64vashr v8i16:$src, (i32 15))))),
  3573. (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
  3574. def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
  3575. (zext (extract_high_v16i8 V128:$opB))))),
  3576. (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
  3577. def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
  3578. (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
  3579. (zext (extract_high_v16i8 V128:$opB))),
  3580. (AArch64vashr v8i16:$src, (i32 15))))),
  3581. (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
  3582. def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
  3583. (zext (v4i16 V64:$opB))))),
  3584. (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
  3585. def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
  3586. (zext (extract_high_v8i16 V128:$opB))))),
  3587. (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
  3588. def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
  3589. (zext (v2i32 V64:$opB))))),
  3590. (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
  3591. def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
  3592. (zext (extract_high_v4i32 V128:$opB))))),
  3593. (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
  3594. defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
  3595. defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
  3596. defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
  3597. defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
  3598. defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
  3599. defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
  3600. defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
  3601. defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
  3602. defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
  3603. defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
  3604. def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
  3605. (CMLTv8i8rz V64:$Rn)>;
  3606. def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
  3607. (CMLTv4i16rz V64:$Rn)>;
  3608. def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
  3609. (CMLTv2i32rz V64:$Rn)>;
  3610. def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
  3611. (CMLTv16i8rz V128:$Rn)>;
  3612. def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
  3613. (CMLTv8i16rz V128:$Rn)>;
  3614. def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
  3615. (CMLTv4i32rz V128:$Rn)>;
  3616. def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
  3617. (CMLTv2i64rz V128:$Rn)>;
  3618. defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
  3619. defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
  3620. defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
  3621. defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
  3622. defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
  3623. defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
  3624. defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
  3625. defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
  3626. def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
  3627. (FCVTLv4i16 V64:$Rn)>;
  3628. def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
  3629. (i64 4)))),
  3630. (FCVTLv8i16 V128:$Rn)>;
  3631. def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
  3632. def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
  3633. defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
  3634. defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
  3635. defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
  3636. defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
  3637. defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
  3638. def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
  3639. (FCVTNv4i16 V128:$Rn)>;
  3640. def : Pat<(concat_vectors V64:$Rd,
  3641. (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
  3642. (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
  3643. def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
  3644. def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
  3645. def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
  3646. (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
  3647. defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
  3648. defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
  3649. defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
  3650. int_aarch64_neon_fcvtxn>;
  3651. defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
  3652. defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
  3653. // AArch64's FCVT instructions saturate when out of range.
  3654. multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
  3655. def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
  3656. (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
  3657. def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
  3658. (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
  3659. def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
  3660. (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
  3661. def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
  3662. (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
  3663. def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
  3664. (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
  3665. }
  3666. defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
  3667. defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
  3668. def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
  3669. def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
  3670. def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
  3671. def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
  3672. def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
  3673. def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
  3674. def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
  3675. def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
  3676. def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
  3677. def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
  3678. defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
  3679. defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
  3680. defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
  3681. defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
  3682. defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
  3683. defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
  3684. defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
  3685. defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
  3686. defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
  3687. let Predicates = [HasFRInt3264] in {
  3688. defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
  3689. defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
  3690. defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
  3691. defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
  3692. } // HasFRInt3264
  3693. defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
  3694. defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
  3695. defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
  3696. UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
  3697. defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
  3698. // Aliases for MVN -> NOT.
  3699. def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
  3700. (NOTv8i8 V64:$Vd, V64:$Vn)>;
  3701. def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
  3702. (NOTv16i8 V128:$Vd, V128:$Vn)>;
  3703. def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
  3704. def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
  3705. def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
  3706. def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
  3707. def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
  3708. def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
  3709. defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
  3710. defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
  3711. defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
  3712. defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
  3713. defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
  3714. BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
  3715. defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
  3716. defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
  3717. defm SHLL : SIMDVectorLShiftLongBySizeBHS;
  3718. defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
  3719. defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
  3720. defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
  3721. defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
  3722. defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
  3723. defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
  3724. BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
  3725. defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
  3726. defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
  3727. defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
  3728. defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
  3729. defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
  3730. defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
  3731. defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
  3732. def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
  3733. def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
  3734. def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
  3735. def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
  3736. def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
  3737. def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
  3738. def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
  3739. def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
  3740. def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
  3741. def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
  3742. // Patterns for vector long shift (by element width). These need to match all
  3743. // three of zext, sext and anyext so it's easier to pull the patterns out of the
  3744. // definition.
  3745. multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
  3746. def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
  3747. (SHLLv8i8 V64:$Rn)>;
  3748. def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
  3749. (SHLLv16i8 V128:$Rn)>;
  3750. def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
  3751. (SHLLv4i16 V64:$Rn)>;
  3752. def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
  3753. (SHLLv8i16 V128:$Rn)>;
  3754. def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
  3755. (SHLLv2i32 V64:$Rn)>;
  3756. def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
  3757. (SHLLv4i32 V128:$Rn)>;
  3758. }
  3759. defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
  3760. defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
  3761. defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
  3762. // Constant vector values, used in the S/UQXTN patterns below.
  3763. def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
  3764. def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
  3765. def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
  3766. def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
  3767. def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
  3768. def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
  3769. // trunc(umin(X, 255)) -> UQXTRN v8i8
  3770. def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
  3771. (UQXTNv8i8 V128:$Vn)>;
  3772. // trunc(umin(X, 65535)) -> UQXTRN v4i16
  3773. def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
  3774. (UQXTNv4i16 V128:$Vn)>;
  3775. // trunc(smin(smax(X, -128), 128)) -> SQXTRN
  3776. // with reversed min/max
  3777. def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
  3778. (v8i16 VImm7F)))),
  3779. (SQXTNv8i8 V128:$Vn)>;
  3780. def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
  3781. (v8i16 VImm80)))),
  3782. (SQXTNv8i8 V128:$Vn)>;
  3783. // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
  3784. // with reversed min/max
  3785. def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
  3786. (v4i32 VImm7FFF)))),
  3787. (SQXTNv4i16 V128:$Vn)>;
  3788. def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
  3789. (v4i32 VImm8000)))),
  3790. (SQXTNv4i16 V128:$Vn)>;
  3791. // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
  3792. // with reversed min/max
  3793. def : Pat<(v16i8 (concat_vectors
  3794. (v8i8 V64:$Vd),
  3795. (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
  3796. (v8i16 VImm7F)))))),
  3797. (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
  3798. def : Pat<(v16i8 (concat_vectors
  3799. (v8i8 V64:$Vd),
  3800. (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
  3801. (v8i16 VImm80)))))),
  3802. (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
  3803. // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
  3804. // with reversed min/max
  3805. def : Pat<(v8i16 (concat_vectors
  3806. (v4i16 V64:$Vd),
  3807. (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
  3808. (v4i32 VImm7FFF)))))),
  3809. (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
  3810. def : Pat<(v8i16 (concat_vectors
  3811. (v4i16 V64:$Vd),
  3812. (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
  3813. (v4i32 VImm8000)))))),
  3814. (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
  3815. //===----------------------------------------------------------------------===//
  3816. // Advanced SIMD three vector instructions.
  3817. //===----------------------------------------------------------------------===//
  3818. defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
  3819. defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
  3820. defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
  3821. defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
  3822. defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
  3823. defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
  3824. defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
  3825. defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
  3826. foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
  3827. def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
  3828. }
  3829. defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
  3830. let Predicates = [HasNEON] in {
  3831. foreach VT = [ v2f32, v4f32, v2f64 ] in
  3832. def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
  3833. }
  3834. let Predicates = [HasNEON, HasFullFP16] in {
  3835. foreach VT = [ v4f16, v8f16 ] in
  3836. def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
  3837. }
  3838. defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
  3839. defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
  3840. defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
  3841. defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
  3842. defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
  3843. defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
  3844. defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
  3845. defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
  3846. defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
  3847. defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
  3848. defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
  3849. defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
  3850. defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
  3851. defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
  3852. defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
  3853. defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
  3854. // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
  3855. // instruction expects the addend first, while the fma intrinsic puts it last.
  3856. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
  3857. TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
  3858. defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
  3859. TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
  3860. defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
  3861. defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
  3862. defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
  3863. defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
  3864. defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
  3865. // MLA and MLS are generated in MachineCombine
  3866. defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
  3867. defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
  3868. defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
  3869. defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
  3870. defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
  3871. TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
  3872. defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
  3873. defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
  3874. defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
  3875. defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
  3876. defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
  3877. defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
  3878. defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
  3879. defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
  3880. defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
  3881. defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
  3882. defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
  3883. defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
  3884. defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
  3885. defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>;
  3886. defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
  3887. defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
  3888. defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
  3889. defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
  3890. TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
  3891. defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
  3892. defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
  3893. defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
  3894. defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
  3895. defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
  3896. defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
  3897. defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
  3898. defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
  3899. defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
  3900. defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
  3901. defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
  3902. defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
  3903. defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
  3904. defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
  3905. defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
  3906. int_aarch64_neon_sqrdmlah>;
  3907. defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
  3908. int_aarch64_neon_sqrdmlsh>;
  3909. // Extra saturate patterns, other than the intrinsics matches above
  3910. defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
  3911. defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
  3912. defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
  3913. defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
  3914. defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
  3915. defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
  3916. BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
  3917. defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
  3918. defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
  3919. BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
  3920. defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
  3921. // Pseudo bitwise select pattern BSP.
  3922. // It is expanded into BSL/BIT/BIF after register allocation.
  3923. defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
  3924. (and (vnot node:$LHS), node:$RHS))>>;
  3925. defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
  3926. defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
  3927. defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
  3928. def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
  3929. (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
  3930. def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
  3931. (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
  3932. def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
  3933. (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
  3934. def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
  3935. (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
  3936. def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
  3937. (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
  3938. def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
  3939. (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
  3940. def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
  3941. (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
  3942. def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
  3943. (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
  3944. def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
  3945. (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
  3946. def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
  3947. (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
  3948. def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
  3949. (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
  3950. def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
  3951. (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
  3952. def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
  3953. (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
  3954. def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
  3955. (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
  3956. def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
  3957. (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
  3958. def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
  3959. (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
  3960. def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
  3961. "|cmls.8b\t$dst, $src1, $src2}",
  3962. (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
  3963. def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
  3964. "|cmls.16b\t$dst, $src1, $src2}",
  3965. (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
  3966. def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
  3967. "|cmls.4h\t$dst, $src1, $src2}",
  3968. (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
  3969. def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
  3970. "|cmls.8h\t$dst, $src1, $src2}",
  3971. (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
  3972. def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
  3973. "|cmls.2s\t$dst, $src1, $src2}",
  3974. (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
  3975. def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
  3976. "|cmls.4s\t$dst, $src1, $src2}",
  3977. (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
  3978. def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
  3979. "|cmls.2d\t$dst, $src1, $src2}",
  3980. (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
  3981. def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
  3982. "|cmlo.8b\t$dst, $src1, $src2}",
  3983. (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
  3984. def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
  3985. "|cmlo.16b\t$dst, $src1, $src2}",
  3986. (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
  3987. def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
  3988. "|cmlo.4h\t$dst, $src1, $src2}",
  3989. (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
  3990. def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
  3991. "|cmlo.8h\t$dst, $src1, $src2}",
  3992. (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
  3993. def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
  3994. "|cmlo.2s\t$dst, $src1, $src2}",
  3995. (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
  3996. def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
  3997. "|cmlo.4s\t$dst, $src1, $src2}",
  3998. (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
  3999. def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
  4000. "|cmlo.2d\t$dst, $src1, $src2}",
  4001. (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4002. def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
  4003. "|cmle.8b\t$dst, $src1, $src2}",
  4004. (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
  4005. def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
  4006. "|cmle.16b\t$dst, $src1, $src2}",
  4007. (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
  4008. def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
  4009. "|cmle.4h\t$dst, $src1, $src2}",
  4010. (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
  4011. def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
  4012. "|cmle.8h\t$dst, $src1, $src2}",
  4013. (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
  4014. def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
  4015. "|cmle.2s\t$dst, $src1, $src2}",
  4016. (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
  4017. def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
  4018. "|cmle.4s\t$dst, $src1, $src2}",
  4019. (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
  4020. def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
  4021. "|cmle.2d\t$dst, $src1, $src2}",
  4022. (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4023. def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
  4024. "|cmlt.8b\t$dst, $src1, $src2}",
  4025. (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
  4026. def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
  4027. "|cmlt.16b\t$dst, $src1, $src2}",
  4028. (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
  4029. def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
  4030. "|cmlt.4h\t$dst, $src1, $src2}",
  4031. (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
  4032. def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
  4033. "|cmlt.8h\t$dst, $src1, $src2}",
  4034. (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
  4035. def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
  4036. "|cmlt.2s\t$dst, $src1, $src2}",
  4037. (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
  4038. def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
  4039. "|cmlt.4s\t$dst, $src1, $src2}",
  4040. (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
  4041. def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
  4042. "|cmlt.2d\t$dst, $src1, $src2}",
  4043. (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4044. let Predicates = [HasNEON, HasFullFP16] in {
  4045. def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
  4046. "|fcmle.4h\t$dst, $src1, $src2}",
  4047. (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
  4048. def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
  4049. "|fcmle.8h\t$dst, $src1, $src2}",
  4050. (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
  4051. }
  4052. def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
  4053. "|fcmle.2s\t$dst, $src1, $src2}",
  4054. (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
  4055. def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
  4056. "|fcmle.4s\t$dst, $src1, $src2}",
  4057. (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
  4058. def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
  4059. "|fcmle.2d\t$dst, $src1, $src2}",
  4060. (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4061. let Predicates = [HasNEON, HasFullFP16] in {
  4062. def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
  4063. "|fcmlt.4h\t$dst, $src1, $src2}",
  4064. (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
  4065. def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
  4066. "|fcmlt.8h\t$dst, $src1, $src2}",
  4067. (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
  4068. }
  4069. def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
  4070. "|fcmlt.2s\t$dst, $src1, $src2}",
  4071. (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
  4072. def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
  4073. "|fcmlt.4s\t$dst, $src1, $src2}",
  4074. (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
  4075. def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
  4076. "|fcmlt.2d\t$dst, $src1, $src2}",
  4077. (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4078. let Predicates = [HasNEON, HasFullFP16] in {
  4079. def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
  4080. "|facle.4h\t$dst, $src1, $src2}",
  4081. (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
  4082. def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
  4083. "|facle.8h\t$dst, $src1, $src2}",
  4084. (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
  4085. }
  4086. def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
  4087. "|facle.2s\t$dst, $src1, $src2}",
  4088. (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
  4089. def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
  4090. "|facle.4s\t$dst, $src1, $src2}",
  4091. (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
  4092. def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
  4093. "|facle.2d\t$dst, $src1, $src2}",
  4094. (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4095. let Predicates = [HasNEON, HasFullFP16] in {
  4096. def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
  4097. "|faclt.4h\t$dst, $src1, $src2}",
  4098. (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
  4099. def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
  4100. "|faclt.8h\t$dst, $src1, $src2}",
  4101. (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
  4102. }
  4103. def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
  4104. "|faclt.2s\t$dst, $src1, $src2}",
  4105. (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
  4106. def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
  4107. "|faclt.4s\t$dst, $src1, $src2}",
  4108. (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
  4109. def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
  4110. "|faclt.2d\t$dst, $src1, $src2}",
  4111. (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
  4112. //===----------------------------------------------------------------------===//
  4113. // Advanced SIMD three scalar instructions.
  4114. //===----------------------------------------------------------------------===//
  4115. defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>;
  4116. defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
  4117. defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
  4118. defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
  4119. defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
  4120. defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
  4121. defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
  4122. defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
  4123. def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
  4124. (FABD64 FPR64:$Rn, FPR64:$Rm)>;
  4125. let Predicates = [HasFullFP16] in {
  4126. def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
  4127. }
  4128. def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
  4129. def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
  4130. defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
  4131. int_aarch64_neon_facge>;
  4132. defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
  4133. int_aarch64_neon_facgt>;
  4134. defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
  4135. defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
  4136. defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
  4137. defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
  4138. defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
  4139. defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
  4140. defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
  4141. defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
  4142. defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
  4143. defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
  4144. defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
  4145. defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
  4146. defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
  4147. defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
  4148. defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
  4149. defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
  4150. defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
  4151. defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
  4152. defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
  4153. defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
  4154. defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
  4155. let Predicates = [HasRDM] in {
  4156. defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
  4157. defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
  4158. def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
  4159. (i32 FPR32:$Rm))),
  4160. (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
  4161. def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
  4162. (i32 FPR32:$Rm))),
  4163. (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
  4164. }
  4165. def : InstAlias<"cmls $dst, $src1, $src2",
  4166. (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4167. def : InstAlias<"cmle $dst, $src1, $src2",
  4168. (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4169. def : InstAlias<"cmlo $dst, $src1, $src2",
  4170. (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4171. def : InstAlias<"cmlt $dst, $src1, $src2",
  4172. (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4173. def : InstAlias<"fcmle $dst, $src1, $src2",
  4174. (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
  4175. def : InstAlias<"fcmle $dst, $src1, $src2",
  4176. (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4177. def : InstAlias<"fcmlt $dst, $src1, $src2",
  4178. (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
  4179. def : InstAlias<"fcmlt $dst, $src1, $src2",
  4180. (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4181. def : InstAlias<"facle $dst, $src1, $src2",
  4182. (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
  4183. def : InstAlias<"facle $dst, $src1, $src2",
  4184. (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4185. def : InstAlias<"faclt $dst, $src1, $src2",
  4186. (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
  4187. def : InstAlias<"faclt $dst, $src1, $src2",
  4188. (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
  4189. //===----------------------------------------------------------------------===//
  4190. // Advanced SIMD three scalar instructions (mixed operands).
  4191. //===----------------------------------------------------------------------===//
  4192. defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
  4193. int_aarch64_neon_sqdmulls_scalar>;
  4194. defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
  4195. defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
  4196. def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
  4197. (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
  4198. (i32 FPR32:$Rm))))),
  4199. (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
  4200. def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
  4201. (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
  4202. (i32 FPR32:$Rm))))),
  4203. (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
  4204. //===----------------------------------------------------------------------===//
  4205. // Advanced SIMD two scalar instructions.
  4206. //===----------------------------------------------------------------------===//
  4207. defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>;
  4208. defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
  4209. defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
  4210. defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
  4211. defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
  4212. defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
  4213. defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
  4214. defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
  4215. defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
  4216. defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
  4217. defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
  4218. defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
  4219. defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
  4220. defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
  4221. defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
  4222. defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
  4223. defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
  4224. defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
  4225. defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
  4226. def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
  4227. defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
  4228. defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
  4229. defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
  4230. defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
  4231. defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
  4232. defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
  4233. UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
  4234. defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
  4235. defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
  4236. defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
  4237. defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
  4238. defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
  4239. defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
  4240. int_aarch64_neon_suqadd>;
  4241. defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
  4242. defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
  4243. defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
  4244. int_aarch64_neon_usqadd>;
  4245. def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
  4246. (CMLTv1i64rz V64:$Rn)>;
  4247. def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
  4248. (FCVTASv1i64 FPR64:$Rn)>;
  4249. def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
  4250. (FCVTAUv1i64 FPR64:$Rn)>;
  4251. def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
  4252. (FCVTMSv1i64 FPR64:$Rn)>;
  4253. def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
  4254. (FCVTMUv1i64 FPR64:$Rn)>;
  4255. def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
  4256. (FCVTNSv1i64 FPR64:$Rn)>;
  4257. def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
  4258. (FCVTNUv1i64 FPR64:$Rn)>;
  4259. def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
  4260. (FCVTPSv1i64 FPR64:$Rn)>;
  4261. def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
  4262. (FCVTPUv1i64 FPR64:$Rn)>;
  4263. def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
  4264. (FCVTZSv1i64 FPR64:$Rn)>;
  4265. def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
  4266. (FCVTZUv1i64 FPR64:$Rn)>;
  4267. def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
  4268. (FRECPEv1f16 FPR16:$Rn)>;
  4269. def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
  4270. (FRECPEv1i32 FPR32:$Rn)>;
  4271. def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
  4272. (FRECPEv1i64 FPR64:$Rn)>;
  4273. def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
  4274. (FRECPEv1i64 FPR64:$Rn)>;
  4275. def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
  4276. (FRECPEv1i32 FPR32:$Rn)>;
  4277. def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
  4278. (FRECPEv2f32 V64:$Rn)>;
  4279. def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
  4280. (FRECPEv4f32 FPR128:$Rn)>;
  4281. def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
  4282. (FRECPEv1i64 FPR64:$Rn)>;
  4283. def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
  4284. (FRECPEv1i64 FPR64:$Rn)>;
  4285. def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
  4286. (FRECPEv2f64 FPR128:$Rn)>;
  4287. def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
  4288. (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
  4289. def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
  4290. (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
  4291. def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
  4292. (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
  4293. def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
  4294. (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
  4295. def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
  4296. (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
  4297. def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
  4298. (FRECPXv1f16 FPR16:$Rn)>;
  4299. def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
  4300. (FRECPXv1i32 FPR32:$Rn)>;
  4301. def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
  4302. (FRECPXv1i64 FPR64:$Rn)>;
  4303. def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
  4304. (FRSQRTEv1f16 FPR16:$Rn)>;
  4305. def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
  4306. (FRSQRTEv1i32 FPR32:$Rn)>;
  4307. def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
  4308. (FRSQRTEv1i64 FPR64:$Rn)>;
  4309. def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
  4310. (FRSQRTEv1i64 FPR64:$Rn)>;
  4311. def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
  4312. (FRSQRTEv1i32 FPR32:$Rn)>;
  4313. def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
  4314. (FRSQRTEv2f32 V64:$Rn)>;
  4315. def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
  4316. (FRSQRTEv4f32 FPR128:$Rn)>;
  4317. def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
  4318. (FRSQRTEv1i64 FPR64:$Rn)>;
  4319. def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
  4320. (FRSQRTEv1i64 FPR64:$Rn)>;
  4321. def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
  4322. (FRSQRTEv2f64 FPR128:$Rn)>;
  4323. def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
  4324. (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
  4325. def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
  4326. (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
  4327. def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
  4328. (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
  4329. def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
  4330. (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
  4331. def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
  4332. (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
  4333. // Some float -> int -> float conversion patterns for which we want to keep the
  4334. // int values in FP registers using the corresponding NEON instructions to
  4335. // avoid more costly int <-> fp register transfers.
  4336. let Predicates = [HasNEON] in {
  4337. def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
  4338. (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
  4339. def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
  4340. (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
  4341. def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
  4342. (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
  4343. def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
  4344. (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
  4345. let Predicates = [HasFullFP16] in {
  4346. def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
  4347. (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
  4348. def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
  4349. (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
  4350. }
  4351. }
  4352. // If an integer is about to be converted to a floating point value,
  4353. // just load it on the floating point unit.
  4354. // Here are the patterns for 8 and 16-bits to float.
  4355. // 8-bits -> float.
  4356. multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
  4357. SDPatternOperator loadop, Instruction UCVTF,
  4358. ROAddrMode ro, Instruction LDRW, Instruction LDRX,
  4359. SubRegIndex sub> {
  4360. def : Pat<(DstTy (uint_to_fp (SrcTy
  4361. (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
  4362. ro.Wext:$extend))))),
  4363. (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
  4364. (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
  4365. sub))>;
  4366. def : Pat<(DstTy (uint_to_fp (SrcTy
  4367. (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
  4368. ro.Wext:$extend))))),
  4369. (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
  4370. (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
  4371. sub))>;
  4372. }
  4373. defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
  4374. UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
  4375. def : Pat <(f32 (uint_to_fp (i32
  4376. (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
  4377. (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
  4378. (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
  4379. def : Pat <(f32 (uint_to_fp (i32
  4380. (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
  4381. (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
  4382. (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
  4383. // 16-bits -> float.
  4384. defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
  4385. UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
  4386. def : Pat <(f32 (uint_to_fp (i32
  4387. (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
  4388. (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
  4389. (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
  4390. def : Pat <(f32 (uint_to_fp (i32
  4391. (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
  4392. (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
  4393. (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
  4394. // 32-bits are handled in target specific dag combine:
  4395. // performIntToFpCombine.
  4396. // 64-bits integer to 32-bits floating point, not possible with
  4397. // UCVTF on floating point registers (both source and destination
  4398. // must have the same size).
  4399. // Here are the patterns for 8, 16, 32, and 64-bits to double.
  4400. // 8-bits -> double.
  4401. defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
  4402. UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
  4403. def : Pat <(f64 (uint_to_fp (i32
  4404. (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
  4405. (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  4406. (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
  4407. def : Pat <(f64 (uint_to_fp (i32
  4408. (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
  4409. (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  4410. (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
  4411. // 16-bits -> double.
  4412. defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
  4413. UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
  4414. def : Pat <(f64 (uint_to_fp (i32
  4415. (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
  4416. (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  4417. (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
  4418. def : Pat <(f64 (uint_to_fp (i32
  4419. (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
  4420. (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  4421. (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
  4422. // 32-bits -> double.
  4423. defm : UIntToFPROLoadPat<f64, i32, load,
  4424. UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
  4425. def : Pat <(f64 (uint_to_fp (i32
  4426. (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
  4427. (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  4428. (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
  4429. def : Pat <(f64 (uint_to_fp (i32
  4430. (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
  4431. (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  4432. (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
  4433. // 64-bits -> double are handled in target specific dag combine:
  4434. // performIntToFpCombine.
  4435. //===----------------------------------------------------------------------===//
  4436. // Advanced SIMD three different-sized vector instructions.
  4437. //===----------------------------------------------------------------------===//
  4438. defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
  4439. defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
  4440. defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
  4441. defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
  4442. defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
  4443. defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
  4444. AArch64sabd>;
  4445. defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
  4446. AArch64sabd>;
  4447. defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
  4448. BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
  4449. defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
  4450. BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
  4451. defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
  4452. TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
  4453. defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
  4454. TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
  4455. defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
  4456. defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
  4457. int_aarch64_neon_sqadd>;
  4458. defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
  4459. int_aarch64_neon_sqsub>;
  4460. defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
  4461. int_aarch64_neon_sqdmull>;
  4462. defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
  4463. BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
  4464. defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
  4465. BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
  4466. defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
  4467. AArch64uabd>;
  4468. defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
  4469. BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
  4470. defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
  4471. BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
  4472. defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
  4473. TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
  4474. defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
  4475. TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
  4476. defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
  4477. defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
  4478. BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
  4479. defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
  4480. BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
  4481. // Additional patterns for [SU]ML[AS]L
  4482. multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
  4483. Instruction INST8B, Instruction INST4H, Instruction INST2S> {
  4484. def : Pat<(v4i16 (opnode
  4485. V64:$Ra,
  4486. (v4i16 (extract_subvector
  4487. (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
  4488. (i64 0))))),
  4489. (EXTRACT_SUBREG (v8i16 (INST8B
  4490. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
  4491. V64:$Rn, V64:$Rm)), dsub)>;
  4492. def : Pat<(v2i32 (opnode
  4493. V64:$Ra,
  4494. (v2i32 (extract_subvector
  4495. (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
  4496. (i64 0))))),
  4497. (EXTRACT_SUBREG (v4i32 (INST4H
  4498. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
  4499. V64:$Rn, V64:$Rm)), dsub)>;
  4500. def : Pat<(v1i64 (opnode
  4501. V64:$Ra,
  4502. (v1i64 (extract_subvector
  4503. (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
  4504. (i64 0))))),
  4505. (EXTRACT_SUBREG (v2i64 (INST2S
  4506. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
  4507. V64:$Rn, V64:$Rm)), dsub)>;
  4508. }
  4509. defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
  4510. UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
  4511. defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
  4512. SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
  4513. defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
  4514. UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
  4515. defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
  4516. SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
  4517. // Additional patterns for SMULL and UMULL
  4518. multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
  4519. Instruction INST8B, Instruction INST4H, Instruction INST2S> {
  4520. def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
  4521. (INST8B V64:$Rn, V64:$Rm)>;
  4522. def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
  4523. (INST4H V64:$Rn, V64:$Rm)>;
  4524. def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
  4525. (INST2S V64:$Rn, V64:$Rm)>;
  4526. }
  4527. defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
  4528. SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
  4529. defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
  4530. UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
  4531. // Patterns for smull2/umull2.
  4532. multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
  4533. Instruction INST8B, Instruction INST4H, Instruction INST2S> {
  4534. def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
  4535. (extract_high_v16i8 V128:$Rm))),
  4536. (INST8B V128:$Rn, V128:$Rm)>;
  4537. def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
  4538. (extract_high_v8i16 V128:$Rm))),
  4539. (INST4H V128:$Rn, V128:$Rm)>;
  4540. def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
  4541. (extract_high_v4i32 V128:$Rm))),
  4542. (INST2S V128:$Rn, V128:$Rm)>;
  4543. }
  4544. defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
  4545. SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
  4546. defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
  4547. UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
  4548. // Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
  4549. multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
  4550. Instruction INST8B, Instruction INST4H, Instruction INST2S> {
  4551. def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
  4552. (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
  4553. def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
  4554. (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
  4555. def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
  4556. (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
  4557. }
  4558. defm : Neon_mulacc_widen_patterns<
  4559. TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
  4560. SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
  4561. defm : Neon_mulacc_widen_patterns<
  4562. TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
  4563. UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
  4564. defm : Neon_mulacc_widen_patterns<
  4565. TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
  4566. SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
  4567. defm : Neon_mulacc_widen_patterns<
  4568. TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
  4569. UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
  4570. // Patterns for 64-bit pmull
  4571. def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
  4572. (PMULLv1i64 V64:$Rn, V64:$Rm)>;
  4573. def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
  4574. (extractelt (v2i64 V128:$Rm), (i64 1))),
  4575. (PMULLv2i64 V128:$Rn, V128:$Rm)>;
  4576. // CodeGen patterns for addhn and subhn instructions, which can actually be
  4577. // written in LLVM IR without too much difficulty.
  4578. // ADDHN
  4579. def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
  4580. (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
  4581. def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
  4582. (i32 16))))),
  4583. (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
  4584. def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
  4585. (i32 32))))),
  4586. (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
  4587. def : Pat<(concat_vectors (v8i8 V64:$Rd),
  4588. (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
  4589. (i32 8))))),
  4590. (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
  4591. V128:$Rn, V128:$Rm)>;
  4592. def : Pat<(concat_vectors (v4i16 V64:$Rd),
  4593. (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
  4594. (i32 16))))),
  4595. (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
  4596. V128:$Rn, V128:$Rm)>;
  4597. def : Pat<(concat_vectors (v2i32 V64:$Rd),
  4598. (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
  4599. (i32 32))))),
  4600. (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
  4601. V128:$Rn, V128:$Rm)>;
  4602. // SUBHN
  4603. def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
  4604. (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
  4605. def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
  4606. (i32 16))))),
  4607. (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
  4608. def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
  4609. (i32 32))))),
  4610. (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
  4611. def : Pat<(concat_vectors (v8i8 V64:$Rd),
  4612. (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
  4613. (i32 8))))),
  4614. (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
  4615. V128:$Rn, V128:$Rm)>;
  4616. def : Pat<(concat_vectors (v4i16 V64:$Rd),
  4617. (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
  4618. (i32 16))))),
  4619. (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
  4620. V128:$Rn, V128:$Rm)>;
  4621. def : Pat<(concat_vectors (v2i32 V64:$Rd),
  4622. (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
  4623. (i32 32))))),
  4624. (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
  4625. V128:$Rn, V128:$Rm)>;
  4626. //----------------------------------------------------------------------------
  4627. // AdvSIMD bitwise extract from vector instruction.
  4628. //----------------------------------------------------------------------------
  4629. defm EXT : SIMDBitwiseExtract<"ext">;
  4630. def AdjustExtImm : SDNodeXForm<imm, [{
  4631. return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
  4632. }]>;
  4633. multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
  4634. def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
  4635. (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
  4636. def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
  4637. (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
  4638. // We use EXT to handle extract_subvector to copy the upper 64-bits of a
  4639. // 128-bit vector.
  4640. def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
  4641. (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
  4642. // A 64-bit EXT of two halves of the same 128-bit register can be done as a
  4643. // single 128-bit EXT.
  4644. def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
  4645. (extract_subvector V128:$Rn, (i64 N)),
  4646. (i32 imm:$imm))),
  4647. (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
  4648. // A 64-bit EXT of the high half of a 128-bit register can be done using a
  4649. // 128-bit EXT of the whole register with an adjustment to the immediate. The
  4650. // top half of the other operand will be unset, but that doesn't matter as it
  4651. // will not be used.
  4652. def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
  4653. V64:$Rm,
  4654. (i32 imm:$imm))),
  4655. (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
  4656. (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
  4657. (AdjustExtImm imm:$imm)), dsub)>;
  4658. }
  4659. defm : ExtPat<v8i8, v16i8, 8>;
  4660. defm : ExtPat<v4i16, v8i16, 4>;
  4661. defm : ExtPat<v4f16, v8f16, 4>;
  4662. defm : ExtPat<v4bf16, v8bf16, 4>;
  4663. defm : ExtPat<v2i32, v4i32, 2>;
  4664. defm : ExtPat<v2f32, v4f32, 2>;
  4665. defm : ExtPat<v1i64, v2i64, 1>;
  4666. defm : ExtPat<v1f64, v2f64, 1>;
  4667. //----------------------------------------------------------------------------
  4668. // AdvSIMD zip vector
  4669. //----------------------------------------------------------------------------
  4670. defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
  4671. defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
  4672. defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
  4673. defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
  4674. defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
  4675. defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
  4676. def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
  4677. (v8i8 (trunc (v8i16 V128:$Vm))))),
  4678. (UZP1v16i8 V128:$Vn, V128:$Vm)>;
  4679. def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
  4680. (v4i16 (trunc (v4i32 V128:$Vm))))),
  4681. (UZP1v8i16 V128:$Vn, V128:$Vm)>;
  4682. def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
  4683. (v2i32 (trunc (v2i64 V128:$Vm))))),
  4684. (UZP1v4i32 V128:$Vn, V128:$Vm)>;
  4685. //----------------------------------------------------------------------------
  4686. // AdvSIMD TBL/TBX instructions
  4687. //----------------------------------------------------------------------------
  4688. defm TBL : SIMDTableLookup< 0, "tbl">;
  4689. defm TBX : SIMDTableLookupTied<1, "tbx">;
  4690. def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
  4691. (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
  4692. def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
  4693. (TBLv16i8One V128:$Ri, V128:$Rn)>;
  4694. def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
  4695. (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
  4696. (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
  4697. def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
  4698. (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
  4699. (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
  4700. //----------------------------------------------------------------------------
  4701. // AdvSIMD scalar DUP instruction
  4702. //----------------------------------------------------------------------------
  4703. defm DUP : SIMDScalarDUP<"mov">;
  4704. //----------------------------------------------------------------------------
  4705. // AdvSIMD scalar pairwise instructions
  4706. //----------------------------------------------------------------------------
  4707. defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
  4708. defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
  4709. defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
  4710. defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
  4711. defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
  4712. defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
  4713. let Predicates = [HasFullFP16] in {
  4714. def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
  4715. (FADDPv2i16p
  4716. (EXTRACT_SUBREG
  4717. (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
  4718. dsub))>;
  4719. def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
  4720. (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
  4721. }
  4722. def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
  4723. (FADDPv2i32p
  4724. (EXTRACT_SUBREG
  4725. (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
  4726. dsub))>;
  4727. def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
  4728. (FADDPv2i32p V64:$Rn)>;
  4729. def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
  4730. (FADDPv2i64p V128:$Rn)>;
  4731. def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
  4732. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
  4733. def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
  4734. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
  4735. def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
  4736. (FADDPv2i32p V64:$Rn)>;
  4737. def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
  4738. (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
  4739. def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
  4740. (FADDPv2i64p V128:$Rn)>;
  4741. def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
  4742. (FMAXNMPv2i32p V64:$Rn)>;
  4743. def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
  4744. (FMAXNMPv2i64p V128:$Rn)>;
  4745. def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
  4746. (FMAXPv2i32p V64:$Rn)>;
  4747. def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
  4748. (FMAXPv2i64p V128:$Rn)>;
  4749. def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
  4750. (FMINNMPv2i32p V64:$Rn)>;
  4751. def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
  4752. (FMINNMPv2i64p V128:$Rn)>;
  4753. def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
  4754. (FMINPv2i32p V64:$Rn)>;
  4755. def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
  4756. (FMINPv2i64p V128:$Rn)>;
  4757. //----------------------------------------------------------------------------
  4758. // AdvSIMD INS/DUP instructions
  4759. //----------------------------------------------------------------------------
  4760. def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
  4761. def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
  4762. def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
  4763. def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
  4764. def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
  4765. def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
  4766. def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
  4767. def DUPv2i64lane : SIMDDup64FromElement;
  4768. def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
  4769. def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
  4770. def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
  4771. def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
  4772. def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
  4773. def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
  4774. // DUP from a 64-bit register to a 64-bit register is just a copy
  4775. def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
  4776. (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
  4777. def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
  4778. (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
  4779. def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
  4780. (v2f32 (DUPv2i32lane
  4781. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
  4782. (i64 0)))>;
  4783. def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
  4784. (v4f32 (DUPv4i32lane
  4785. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
  4786. (i64 0)))>;
  4787. def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
  4788. (v2f64 (DUPv2i64lane
  4789. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
  4790. (i64 0)))>;
  4791. def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
  4792. (v4f16 (DUPv4i16lane
  4793. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
  4794. (i64 0)))>;
  4795. def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
  4796. (v4bf16 (DUPv4i16lane
  4797. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
  4798. (i64 0)))>;
  4799. def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
  4800. (v8f16 (DUPv8i16lane
  4801. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
  4802. (i64 0)))>;
  4803. def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
  4804. (v8bf16 (DUPv8i16lane
  4805. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
  4806. (i64 0)))>;
  4807. def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
  4808. (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
  4809. def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
  4810. (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
  4811. def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
  4812. (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
  4813. def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
  4814. (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
  4815. def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
  4816. (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
  4817. def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
  4818. (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
  4819. def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
  4820. (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
  4821. // If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
  4822. // instruction even if the types don't match: we just have to remap the lane
  4823. // carefully. N.b. this trick only applies to truncations.
  4824. def VecIndex_x2 : SDNodeXForm<imm, [{
  4825. return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
  4826. }]>;
  4827. def VecIndex_x4 : SDNodeXForm<imm, [{
  4828. return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
  4829. }]>;
  4830. def VecIndex_x8 : SDNodeXForm<imm, [{
  4831. return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
  4832. }]>;
  4833. multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
  4834. ValueType Src128VT, ValueType ScalVT,
  4835. Instruction DUP, SDNodeXForm IdxXFORM> {
  4836. def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
  4837. imm:$idx)))),
  4838. (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
  4839. def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
  4840. imm:$idx)))),
  4841. (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
  4842. }
  4843. defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>;
  4844. defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>;
  4845. defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
  4846. defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
  4847. defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
  4848. defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
  4849. multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
  4850. SDNodeXForm IdxXFORM> {
  4851. def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
  4852. imm:$idx))))),
  4853. (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
  4854. def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
  4855. imm:$idx))))),
  4856. (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
  4857. }
  4858. defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>;
  4859. defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>;
  4860. defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>;
  4861. defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
  4862. defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
  4863. defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
  4864. // SMOV and UMOV definitions, with some extra patterns for convenience
  4865. defm SMOV : SMov;
  4866. defm UMOV : UMov;
  4867. def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
  4868. (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
  4869. def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
  4870. (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
  4871. def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
  4872. (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
  4873. def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
  4874. (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
  4875. def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
  4876. (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
  4877. def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
  4878. (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
  4879. def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
  4880. VectorIndexB:$idx)))), i8),
  4881. (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
  4882. def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
  4883. VectorIndexH:$idx)))), i16),
  4884. (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
  4885. // Extracting i8 or i16 elements will have the zero-extend transformed to
  4886. // an 'and' mask by type legalization since neither i8 nor i16 are legal types
  4887. // for AArch64. Match these patterns here since UMOV already zeroes out the high
  4888. // bits of the destination register.
  4889. def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
  4890. (i32 0xff)),
  4891. (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
  4892. def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
  4893. (i32 0xffff)),
  4894. (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
  4895. def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
  4896. VectorIndexB:$idx)))), (i64 0xff))),
  4897. (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
  4898. def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
  4899. VectorIndexH:$idx)))), (i64 0xffff))),
  4900. (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
  4901. defm INS : SIMDIns;
  4902. def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
  4903. (SUBREG_TO_REG (i32 0),
  4904. (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
  4905. def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
  4906. (SUBREG_TO_REG (i32 0),
  4907. (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
  4908. def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
  4909. (SUBREG_TO_REG (i32 0),
  4910. (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
  4911. def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
  4912. (SUBREG_TO_REG (i32 0),
  4913. (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
  4914. def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
  4915. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4916. def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
  4917. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4918. def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
  4919. (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4920. def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
  4921. (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4922. def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
  4923. (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
  4924. (i32 FPR32:$Rn), ssub))>;
  4925. def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
  4926. (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  4927. (i32 FPR32:$Rn), ssub))>;
  4928. def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
  4929. (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
  4930. (i64 FPR64:$Rn), dsub))>;
  4931. def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
  4932. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4933. def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
  4934. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4935. def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
  4936. (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4937. def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
  4938. (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
  4939. def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
  4940. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
  4941. def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
  4942. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
  4943. def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
  4944. (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
  4945. def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
  4946. (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
  4947. (EXTRACT_SUBREG
  4948. (INSvi16lane
  4949. (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
  4950. VectorIndexS:$imm,
  4951. (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
  4952. (i64 0)),
  4953. dsub)>;
  4954. def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
  4955. (i64 VectorIndexH:$imm)),
  4956. (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
  4957. def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
  4958. (i64 VectorIndexS:$imm)),
  4959. (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
  4960. def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
  4961. (i64 VectorIndexD:$imm)),
  4962. (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
  4963. def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
  4964. (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
  4965. (INSvi16lane
  4966. V128:$Rn, VectorIndexH:$imm,
  4967. (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
  4968. (i64 0))>;
  4969. def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
  4970. (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
  4971. (EXTRACT_SUBREG
  4972. (INSvi16lane
  4973. (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
  4974. VectorIndexS:$imm,
  4975. (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
  4976. (i64 0)),
  4977. dsub)>;
  4978. def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
  4979. (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
  4980. (INSvi16lane
  4981. V128:$Rn, VectorIndexH:$imm,
  4982. (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
  4983. (i64 0))>;
  4984. def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
  4985. (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
  4986. (EXTRACT_SUBREG
  4987. (INSvi32lane
  4988. (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
  4989. VectorIndexS:$imm,
  4990. (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
  4991. (i64 0)),
  4992. dsub)>;
  4993. def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
  4994. (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
  4995. (INSvi32lane
  4996. V128:$Rn, VectorIndexS:$imm,
  4997. (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
  4998. (i64 0))>;
  4999. def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
  5000. (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
  5001. (INSvi64lane
  5002. V128:$Rn, VectorIndexD:$imm,
  5003. (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
  5004. (i64 0))>;
  5005. // Copy an element at a constant index in one vector into a constant indexed
  5006. // element of another.
  5007. // FIXME refactor to a shared class/dev parameterized on vector type, vector
  5008. // index type and INS extension
  5009. def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
  5010. (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
  5011. VectorIndexB:$idx2)),
  5012. (v16i8 (INSvi8lane
  5013. V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
  5014. )>;
  5015. def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
  5016. (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
  5017. VectorIndexH:$idx2)),
  5018. (v8i16 (INSvi16lane
  5019. V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
  5020. )>;
  5021. def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
  5022. (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
  5023. VectorIndexS:$idx2)),
  5024. (v4i32 (INSvi32lane
  5025. V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
  5026. )>;
  5027. def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
  5028. (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
  5029. VectorIndexD:$idx2)),
  5030. (v2i64 (INSvi64lane
  5031. V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
  5032. )>;
  5033. multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
  5034. ValueType VTScal, Instruction INS> {
  5035. def : Pat<(VT128 (vector_insert V128:$src,
  5036. (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
  5037. imm:$Immd)),
  5038. (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
  5039. def : Pat<(VT128 (vector_insert V128:$src,
  5040. (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
  5041. imm:$Immd)),
  5042. (INS V128:$src, imm:$Immd,
  5043. (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
  5044. def : Pat<(VT64 (vector_insert V64:$src,
  5045. (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
  5046. imm:$Immd)),
  5047. (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
  5048. imm:$Immd, V128:$Rn, imm:$Immn),
  5049. dsub)>;
  5050. def : Pat<(VT64 (vector_insert V64:$src,
  5051. (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
  5052. imm:$Immd)),
  5053. (EXTRACT_SUBREG
  5054. (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
  5055. (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
  5056. dsub)>;
  5057. }
  5058. defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
  5059. defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
  5060. defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
  5061. defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
  5062. // Floating point vector extractions are codegen'd as either a sequence of
  5063. // subregister extractions, or a MOV (aka DUP here) if
  5064. // the lane number is anything other than zero.
  5065. def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
  5066. (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
  5067. def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
  5068. (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
  5069. def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
  5070. (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
  5071. def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
  5072. (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
  5073. def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
  5074. (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
  5075. def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
  5076. (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
  5077. def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
  5078. (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
  5079. def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
  5080. (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
  5081. // All concat_vectors operations are canonicalised to act on i64 vectors for
  5082. // AArch64. In the general case we need an instruction, which had just as well be
  5083. // INS.
  5084. class ConcatPat<ValueType DstTy, ValueType SrcTy>
  5085. : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
  5086. (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
  5087. (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
  5088. def : ConcatPat<v2i64, v1i64>;
  5089. def : ConcatPat<v2f64, v1f64>;
  5090. def : ConcatPat<v4i32, v2i32>;
  5091. def : ConcatPat<v4f32, v2f32>;
  5092. def : ConcatPat<v8i16, v4i16>;
  5093. def : ConcatPat<v8f16, v4f16>;
  5094. def : ConcatPat<v8bf16, v4bf16>;
  5095. def : ConcatPat<v16i8, v8i8>;
  5096. // If the high lanes are undef, though, we can just ignore them:
  5097. class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
  5098. : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
  5099. (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
  5100. def : ConcatUndefPat<v2i64, v1i64>;
  5101. def : ConcatUndefPat<v2f64, v1f64>;
  5102. def : ConcatUndefPat<v4i32, v2i32>;
  5103. def : ConcatUndefPat<v4f32, v2f32>;
  5104. def : ConcatUndefPat<v8i16, v4i16>;
  5105. def : ConcatUndefPat<v16i8, v8i8>;
  5106. //----------------------------------------------------------------------------
  5107. // AdvSIMD across lanes instructions
  5108. //----------------------------------------------------------------------------
  5109. defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
  5110. defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
  5111. defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
  5112. defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
  5113. defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
  5114. defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
  5115. defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
  5116. defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
  5117. defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
  5118. defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
  5119. defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
  5120. // Patterns for uaddv(uaddlp(x)) ==> uaddlv
  5121. def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
  5122. (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
  5123. (i64 0))), (i64 0))),
  5124. (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
  5125. (UADDLVv8i8v V64:$op), hsub), ssub)>;
  5126. def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
  5127. (v16i8 V128:$op))))), (i64 0))),
  5128. (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  5129. (UADDLVv16i8v V128:$op), hsub), ssub)>;
  5130. def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
  5131. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>;
  5132. // Patterns for addp(uaddlp(x))) ==> uaddlv
  5133. def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))),
  5134. (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>;
  5135. def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))),
  5136. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>;
  5137. // Patterns for across-vector intrinsics, that have a node equivalent, that
  5138. // returns a vector (with only the low lane defined) instead of a scalar.
  5139. // In effect, opNode is the same as (scalar_to_vector (IntNode)).
  5140. multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
  5141. SDPatternOperator opNode> {
  5142. // If a lane instruction caught the vector_extract around opNode, we can
  5143. // directly match the latter to the instruction.
  5144. def : Pat<(v8i8 (opNode V64:$Rn)),
  5145. (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
  5146. (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
  5147. def : Pat<(v16i8 (opNode V128:$Rn)),
  5148. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5149. (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
  5150. def : Pat<(v4i16 (opNode V64:$Rn)),
  5151. (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
  5152. (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
  5153. def : Pat<(v8i16 (opNode V128:$Rn)),
  5154. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  5155. (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
  5156. def : Pat<(v4i32 (opNode V128:$Rn)),
  5157. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  5158. (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
  5159. // If none did, fallback to the explicit patterns, consuming the vector_extract.
  5160. def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
  5161. (i64 0)), (i64 0))),
  5162. (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
  5163. (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
  5164. bsub), ssub)>;
  5165. def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
  5166. (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5167. (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
  5168. bsub), ssub)>;
  5169. def : Pat<(i32 (vector_extract (insert_subvector undef,
  5170. (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
  5171. (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
  5172. (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
  5173. hsub), ssub)>;
  5174. def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
  5175. (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  5176. (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
  5177. hsub), ssub)>;
  5178. def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
  5179. (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  5180. (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
  5181. ssub), ssub)>;
  5182. }
  5183. multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
  5184. SDPatternOperator opNode>
  5185. : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
  5186. // If there is a sign extension after this intrinsic, consume it as smov already
  5187. // performed it
  5188. def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
  5189. (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
  5190. (i32 (SMOVvi8to32
  5191. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5192. (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
  5193. (i64 0)))>;
  5194. def : Pat<(i32 (sext_inreg (i32 (vector_extract
  5195. (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
  5196. (i32 (SMOVvi8to32
  5197. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5198. (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
  5199. (i64 0)))>;
  5200. def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
  5201. (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
  5202. (i32 (SMOVvi16to32
  5203. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5204. (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
  5205. (i64 0)))>;
  5206. def : Pat<(i32 (sext_inreg (i32 (vector_extract
  5207. (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
  5208. (i32 (SMOVvi16to32
  5209. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5210. (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
  5211. (i64 0)))>;
  5212. }
  5213. multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
  5214. SDPatternOperator opNode>
  5215. : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
  5216. // If there is a masking operation keeping only what has been actually
  5217. // generated, consume it.
  5218. def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
  5219. (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
  5220. (i32 (EXTRACT_SUBREG
  5221. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5222. (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
  5223. ssub))>;
  5224. def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
  5225. maski8_or_more)),
  5226. (i32 (EXTRACT_SUBREG
  5227. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5228. (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
  5229. ssub))>;
  5230. def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
  5231. (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
  5232. (i32 (EXTRACT_SUBREG
  5233. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5234. (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
  5235. ssub))>;
  5236. def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
  5237. maski16_or_more)),
  5238. (i32 (EXTRACT_SUBREG
  5239. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5240. (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
  5241. ssub))>;
  5242. }
  5243. defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
  5244. // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
  5245. def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
  5246. (ADDPv2i32 V64:$Rn, V64:$Rn)>;
  5247. defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
  5248. // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
  5249. def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
  5250. (ADDPv2i32 V64:$Rn, V64:$Rn)>;
  5251. defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
  5252. def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
  5253. (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
  5254. defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
  5255. def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
  5256. (SMINPv2i32 V64:$Rn, V64:$Rn)>;
  5257. defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
  5258. def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
  5259. (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
  5260. defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
  5261. def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
  5262. (UMINPv2i32 V64:$Rn, V64:$Rn)>;
  5263. multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
  5264. def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
  5265. (i32 (SMOVvi16to32
  5266. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5267. (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
  5268. (i64 0)))>;
  5269. def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
  5270. (i32 (SMOVvi16to32
  5271. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5272. (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
  5273. (i64 0)))>;
  5274. def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
  5275. (i32 (EXTRACT_SUBREG
  5276. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5277. (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
  5278. ssub))>;
  5279. def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
  5280. (i32 (EXTRACT_SUBREG
  5281. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5282. (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
  5283. ssub))>;
  5284. def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
  5285. (i64 (EXTRACT_SUBREG
  5286. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5287. (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
  5288. dsub))>;
  5289. }
  5290. multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
  5291. Intrinsic intOp> {
  5292. def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
  5293. (i32 (EXTRACT_SUBREG
  5294. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5295. (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
  5296. ssub))>;
  5297. def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
  5298. (i32 (EXTRACT_SUBREG
  5299. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5300. (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
  5301. ssub))>;
  5302. def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
  5303. (i32 (EXTRACT_SUBREG
  5304. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5305. (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
  5306. ssub))>;
  5307. def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
  5308. (i32 (EXTRACT_SUBREG
  5309. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5310. (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
  5311. ssub))>;
  5312. def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
  5313. (i64 (EXTRACT_SUBREG
  5314. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5315. (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
  5316. dsub))>;
  5317. }
  5318. defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
  5319. defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
  5320. // The vaddlv_s32 intrinsic gets mapped to SADDLP.
  5321. def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
  5322. (i64 (EXTRACT_SUBREG
  5323. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5324. (SADDLPv2i32_v1i64 V64:$Rn), dsub),
  5325. dsub))>;
  5326. // The vaddlv_u32 intrinsic gets mapped to UADDLP.
  5327. def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
  5328. (i64 (EXTRACT_SUBREG
  5329. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5330. (UADDLPv2i32_v1i64 V64:$Rn), dsub),
  5331. dsub))>;
  5332. //------------------------------------------------------------------------------
  5333. // AdvSIMD modified immediate instructions
  5334. //------------------------------------------------------------------------------
  5335. // AdvSIMD BIC
  5336. defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
  5337. // AdvSIMD ORR
  5338. defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
  5339. def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
  5340. def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
  5341. def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
  5342. def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
  5343. def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
  5344. def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
  5345. def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
  5346. def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
  5347. def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
  5348. def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
  5349. def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
  5350. def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
  5351. def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
  5352. def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
  5353. def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
  5354. def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
  5355. // AdvSIMD FMOV
  5356. def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
  5357. "fmov", ".2d",
  5358. [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
  5359. def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
  5360. "fmov", ".2s",
  5361. [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
  5362. def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
  5363. "fmov", ".4s",
  5364. [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
  5365. let Predicates = [HasNEON, HasFullFP16] in {
  5366. def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
  5367. "fmov", ".4h",
  5368. [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
  5369. def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
  5370. "fmov", ".8h",
  5371. [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
  5372. } // Predicates = [HasNEON, HasFullFP16]
  5373. // AdvSIMD MOVI
  5374. // EDIT byte mask: scalar
  5375. let isReMaterializable = 1, isAsCheapAsAMove = 1 in
  5376. def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
  5377. [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
  5378. // The movi_edit node has the immediate value already encoded, so we use
  5379. // a plain imm0_255 here.
  5380. def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
  5381. (MOVID imm0_255:$shift)>;
  5382. // EDIT byte mask: 2d
  5383. // The movi_edit node has the immediate value already encoded, so we use
  5384. // a plain imm0_255 in the pattern
  5385. let isReMaterializable = 1, isAsCheapAsAMove = 1 in
  5386. def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
  5387. simdimmtype10,
  5388. "movi", ".2d",
  5389. [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
  5390. def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
  5391. def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
  5392. def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
  5393. def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
  5394. def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  5395. def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  5396. def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  5397. def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  5398. // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
  5399. // extract is free and this gives better MachineCSE results.
  5400. def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
  5401. def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
  5402. def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
  5403. def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
  5404. def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
  5405. def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
  5406. def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
  5407. def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
  5408. // EDIT per word & halfword: 2s, 4h, 4s, & 8h
  5409. let isReMaterializable = 1, isAsCheapAsAMove = 1 in
  5410. defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
  5411. def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
  5412. def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
  5413. def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
  5414. def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
  5415. def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
  5416. def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
  5417. def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
  5418. def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
  5419. def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
  5420. (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
  5421. def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
  5422. (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
  5423. def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
  5424. (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
  5425. def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
  5426. (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
  5427. let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
  5428. // EDIT per word: 2s & 4s with MSL shifter
  5429. def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
  5430. [(set (v2i32 V64:$Rd),
  5431. (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
  5432. def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
  5433. [(set (v4i32 V128:$Rd),
  5434. (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
  5435. // Per byte: 8b & 16b
  5436. def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
  5437. "movi", ".8b",
  5438. [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
  5439. def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
  5440. "movi", ".16b",
  5441. [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
  5442. }
  5443. // AdvSIMD MVNI
  5444. // EDIT per word & halfword: 2s, 4h, 4s, & 8h
  5445. let isReMaterializable = 1, isAsCheapAsAMove = 1 in
  5446. defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
  5447. def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
  5448. def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
  5449. def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
  5450. def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
  5451. def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
  5452. def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
  5453. def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
  5454. def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
  5455. def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
  5456. (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
  5457. def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
  5458. (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
  5459. def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
  5460. (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
  5461. def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
  5462. (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
  5463. // EDIT per word: 2s & 4s with MSL shifter
  5464. let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
  5465. def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
  5466. [(set (v2i32 V64:$Rd),
  5467. (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
  5468. def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
  5469. [(set (v4i32 V128:$Rd),
  5470. (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
  5471. }
  5472. //----------------------------------------------------------------------------
  5473. // AdvSIMD indexed element
  5474. //----------------------------------------------------------------------------
  5475. let hasSideEffects = 0 in {
  5476. defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">;
  5477. defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">;
  5478. }
  5479. // NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
  5480. // instruction expects the addend first, while the intrinsic expects it last.
  5481. // On the other hand, there are quite a few valid combinatorial options due to
  5482. // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
  5483. defm : SIMDFPIndexedTiedPatterns<"FMLA",
  5484. TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
  5485. defm : SIMDFPIndexedTiedPatterns<"FMLA",
  5486. TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
  5487. defm : SIMDFPIndexedTiedPatterns<"FMLS",
  5488. TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
  5489. defm : SIMDFPIndexedTiedPatterns<"FMLS",
  5490. TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
  5491. defm : SIMDFPIndexedTiedPatterns<"FMLS",
  5492. TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
  5493. defm : SIMDFPIndexedTiedPatterns<"FMLS",
  5494. TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
  5495. multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
  5496. // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
  5497. // and DUP scalar.
  5498. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
  5499. (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
  5500. VectorIndexS:$idx))),
  5501. (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
  5502. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
  5503. (v2f32 (AArch64duplane32
  5504. (v4f32 (insert_subvector undef,
  5505. (v2f32 (fneg V64:$Rm)),
  5506. (i64 0))),
  5507. VectorIndexS:$idx)))),
  5508. (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
  5509. (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
  5510. VectorIndexS:$idx)>;
  5511. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
  5512. (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
  5513. (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
  5514. (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
  5515. // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
  5516. // and DUP scalar.
  5517. def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
  5518. (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
  5519. VectorIndexS:$idx))),
  5520. (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
  5521. VectorIndexS:$idx)>;
  5522. def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
  5523. (v4f32 (AArch64duplane32
  5524. (v4f32 (insert_subvector undef,
  5525. (v2f32 (fneg V64:$Rm)),
  5526. (i64 0))),
  5527. VectorIndexS:$idx)))),
  5528. (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
  5529. (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
  5530. VectorIndexS:$idx)>;
  5531. def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
  5532. (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
  5533. (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
  5534. (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
  5535. // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
  5536. // (DUPLANE from 64-bit would be trivial).
  5537. def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
  5538. (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
  5539. VectorIndexD:$idx))),
  5540. (FMLSv2i64_indexed
  5541. V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
  5542. def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
  5543. (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
  5544. (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
  5545. (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
  5546. // 2 variants for 32-bit scalar version: extract from .2s or from .4s
  5547. def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
  5548. (vector_extract (v4f32 (fneg V128:$Rm)),
  5549. VectorIndexS:$idx))),
  5550. (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
  5551. V128:$Rm, VectorIndexS:$idx)>;
  5552. def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
  5553. (vector_extract (v4f32 (insert_subvector undef,
  5554. (v2f32 (fneg V64:$Rm)),
  5555. (i64 0))),
  5556. VectorIndexS:$idx))),
  5557. (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
  5558. (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
  5559. // 1 variant for 64-bit scalar version: extract from .1d or from .2d
  5560. def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
  5561. (vector_extract (v2f64 (fneg V128:$Rm)),
  5562. VectorIndexS:$idx))),
  5563. (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
  5564. V128:$Rm, VectorIndexS:$idx)>;
  5565. }
  5566. defm : FMLSIndexedAfterNegPatterns<
  5567. TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
  5568. defm : FMLSIndexedAfterNegPatterns<
  5569. TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
  5570. defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
  5571. defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
  5572. def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
  5573. (FMULv2i32_indexed V64:$Rn,
  5574. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
  5575. (i64 0))>;
  5576. def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
  5577. (FMULv4i32_indexed V128:$Rn,
  5578. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
  5579. (i64 0))>;
  5580. def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
  5581. (FMULv2i64_indexed V128:$Rn,
  5582. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
  5583. (i64 0))>;
  5584. defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
  5585. defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
  5586. defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
  5587. int_aarch64_neon_sqdmulh_laneq>;
  5588. defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
  5589. int_aarch64_neon_sqrdmulh_laneq>;
  5590. // Generated by MachineCombine
  5591. defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
  5592. defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
  5593. defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
  5594. defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
  5595. TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
  5596. defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
  5597. TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
  5598. defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
  5599. int_aarch64_neon_smull>;
  5600. defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
  5601. int_aarch64_neon_sqadd>;
  5602. defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
  5603. int_aarch64_neon_sqsub>;
  5604. defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
  5605. int_aarch64_neon_sqrdmlah>;
  5606. defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
  5607. int_aarch64_neon_sqrdmlsh>;
  5608. defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
  5609. defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
  5610. TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
  5611. defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
  5612. TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
  5613. defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
  5614. int_aarch64_neon_umull>;
  5615. // A scalar sqdmull with the second operand being a vector lane can be
  5616. // handled directly with the indexed instruction encoding.
  5617. def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
  5618. (vector_extract (v4i32 V128:$Vm),
  5619. VectorIndexS:$idx)),
  5620. (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
  5621. // Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
  5622. // have no common bits.
  5623. def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
  5624. [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
  5625. if (N->getOpcode() == ISD::ADD)
  5626. return true;
  5627. return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
  5628. }]> {
  5629. let GISelPredicateCode = [{
  5630. // Only handle G_ADD for now. FIXME. build capability to compute whether
  5631. // operands of G_OR have common bits set or not.
  5632. return MI.getOpcode() == TargetOpcode::G_ADD;
  5633. }];
  5634. }
  5635. //----------------------------------------------------------------------------
  5636. // AdvSIMD scalar shift instructions
  5637. //----------------------------------------------------------------------------
  5638. defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
  5639. defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
  5640. defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
  5641. defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
  5642. // Codegen patterns for the above. We don't put these directly on the
  5643. // instructions because TableGen's type inference can't handle the truth.
  5644. // Having the same base pattern for fp <--> int totally freaks it out.
  5645. def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
  5646. (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
  5647. def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
  5648. (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
  5649. def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
  5650. (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
  5651. def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
  5652. (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
  5653. def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
  5654. vecshiftR64:$imm)),
  5655. (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
  5656. def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
  5657. vecshiftR64:$imm)),
  5658. (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
  5659. def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
  5660. (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
  5661. def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
  5662. (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
  5663. def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
  5664. vecshiftR64:$imm)),
  5665. (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
  5666. def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
  5667. (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
  5668. def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
  5669. vecshiftR64:$imm)),
  5670. (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
  5671. def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
  5672. (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
  5673. // Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
  5674. def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
  5675. (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
  5676. def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
  5677. (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
  5678. def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
  5679. (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
  5680. def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
  5681. (and FPR32:$Rn, (i32 65535)),
  5682. vecshiftR16:$imm)),
  5683. (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
  5684. def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
  5685. (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
  5686. def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
  5687. (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
  5688. def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
  5689. (i32 (INSERT_SUBREG
  5690. (i32 (IMPLICIT_DEF)),
  5691. (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
  5692. hsub))>;
  5693. def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
  5694. (i64 (INSERT_SUBREG
  5695. (i64 (IMPLICIT_DEF)),
  5696. (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
  5697. hsub))>;
  5698. def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
  5699. (i32 (INSERT_SUBREG
  5700. (i32 (IMPLICIT_DEF)),
  5701. (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
  5702. hsub))>;
  5703. def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
  5704. (i64 (INSERT_SUBREG
  5705. (i64 (IMPLICIT_DEF)),
  5706. (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
  5707. hsub))>;
  5708. def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
  5709. (i32 (INSERT_SUBREG
  5710. (i32 (IMPLICIT_DEF)),
  5711. (FACGE16 FPR16:$Rn, FPR16:$Rm),
  5712. hsub))>;
  5713. def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
  5714. (i32 (INSERT_SUBREG
  5715. (i32 (IMPLICIT_DEF)),
  5716. (FACGT16 FPR16:$Rn, FPR16:$Rm),
  5717. hsub))>;
  5718. defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
  5719. defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
  5720. defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
  5721. int_aarch64_neon_sqrshrn>;
  5722. defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
  5723. int_aarch64_neon_sqrshrun>;
  5724. defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
  5725. defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
  5726. defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
  5727. int_aarch64_neon_sqshrn>;
  5728. defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
  5729. int_aarch64_neon_sqshrun>;
  5730. defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
  5731. defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
  5732. defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
  5733. TriOpFrag<(add node:$LHS,
  5734. (AArch64srshri node:$MHS, node:$RHS))>>;
  5735. defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
  5736. defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
  5737. TriOpFrag<(add_and_or_is_add node:$LHS,
  5738. (AArch64vashr node:$MHS, node:$RHS))>>;
  5739. defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
  5740. int_aarch64_neon_uqrshrn>;
  5741. defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
  5742. defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
  5743. int_aarch64_neon_uqshrn>;
  5744. defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
  5745. defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
  5746. TriOpFrag<(add node:$LHS,
  5747. (AArch64urshri node:$MHS, node:$RHS))>>;
  5748. defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
  5749. defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
  5750. TriOpFrag<(add_and_or_is_add node:$LHS,
  5751. (AArch64vlshr node:$MHS, node:$RHS))>>;
  5752. //----------------------------------------------------------------------------
  5753. // AdvSIMD vector shift instructions
  5754. //----------------------------------------------------------------------------
  5755. defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
  5756. defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
  5757. defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
  5758. int_aarch64_neon_vcvtfxs2fp>;
  5759. defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
  5760. int_aarch64_neon_rshrn>;
  5761. defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
  5762. defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
  5763. BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
  5764. defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
  5765. def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
  5766. (i32 vecshiftL64:$imm))),
  5767. (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
  5768. defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
  5769. int_aarch64_neon_sqrshrn>;
  5770. defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
  5771. int_aarch64_neon_sqrshrun>;
  5772. defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
  5773. defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
  5774. defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
  5775. int_aarch64_neon_sqshrn>;
  5776. defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
  5777. int_aarch64_neon_sqshrun>;
  5778. defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
  5779. def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
  5780. (i32 vecshiftR64:$imm))),
  5781. (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
  5782. defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
  5783. defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
  5784. TriOpFrag<(add node:$LHS,
  5785. (AArch64srshri node:$MHS, node:$RHS))> >;
  5786. defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
  5787. BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
  5788. defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
  5789. defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
  5790. TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
  5791. defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
  5792. int_aarch64_neon_vcvtfxu2fp>;
  5793. defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
  5794. int_aarch64_neon_uqrshrn>;
  5795. defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
  5796. defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
  5797. int_aarch64_neon_uqshrn>;
  5798. defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
  5799. defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
  5800. TriOpFrag<(add node:$LHS,
  5801. (AArch64urshri node:$MHS, node:$RHS))> >;
  5802. defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
  5803. BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
  5804. defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
  5805. defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
  5806. TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
  5807. // RADDHN patterns for when RSHRN shifts by half the size of the vector element
  5808. def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
  5809. (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
  5810. def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
  5811. (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
  5812. def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
  5813. (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
  5814. // RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
  5815. def : Pat<(v16i8 (concat_vectors
  5816. (v8i8 V64:$Vd),
  5817. (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
  5818. (RADDHNv8i16_v16i8
  5819. (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
  5820. (v8i16 (MOVIv2d_ns (i32 0))))>;
  5821. def : Pat<(v8i16 (concat_vectors
  5822. (v4i16 V64:$Vd),
  5823. (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
  5824. (RADDHNv4i32_v8i16
  5825. (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
  5826. (v4i32 (MOVIv2d_ns (i32 0))))>;
  5827. def : Pat<(v4i32 (concat_vectors
  5828. (v2i32 V64:$Vd),
  5829. (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
  5830. (RADDHNv2i64_v4i32
  5831. (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
  5832. (v2i64 (MOVIv2d_ns (i32 0))))>;
  5833. // SHRN patterns for when a logical right shift was used instead of arithmetic
  5834. // (the immediate guarantees no sign bits actually end up in the result so it
  5835. // doesn't matter).
  5836. def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
  5837. (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
  5838. def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
  5839. (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
  5840. def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
  5841. (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
  5842. def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
  5843. (trunc (AArch64vlshr (v8i16 V128:$Rn),
  5844. vecshiftR16Narrow:$imm)))),
  5845. (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
  5846. V128:$Rn, vecshiftR16Narrow:$imm)>;
  5847. def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
  5848. (trunc (AArch64vlshr (v4i32 V128:$Rn),
  5849. vecshiftR32Narrow:$imm)))),
  5850. (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
  5851. V128:$Rn, vecshiftR32Narrow:$imm)>;
  5852. def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
  5853. (trunc (AArch64vlshr (v2i64 V128:$Rn),
  5854. vecshiftR64Narrow:$imm)))),
  5855. (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
  5856. V128:$Rn, vecshiftR32Narrow:$imm)>;
  5857. // Vector sign and zero extensions are implemented with SSHLL and USSHLL.
  5858. // Anyexts are implemented as zexts.
  5859. def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
  5860. def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
  5861. def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
  5862. def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
  5863. def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
  5864. def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
  5865. def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
  5866. def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
  5867. def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
  5868. // Also match an extend from the upper half of a 128 bit source register.
  5869. def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
  5870. (USHLLv16i8_shift V128:$Rn, (i32 0))>;
  5871. def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
  5872. (USHLLv16i8_shift V128:$Rn, (i32 0))>;
  5873. def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
  5874. (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
  5875. def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
  5876. (USHLLv8i16_shift V128:$Rn, (i32 0))>;
  5877. def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
  5878. (USHLLv8i16_shift V128:$Rn, (i32 0))>;
  5879. def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
  5880. (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
  5881. def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
  5882. (USHLLv4i32_shift V128:$Rn, (i32 0))>;
  5883. def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
  5884. (USHLLv4i32_shift V128:$Rn, (i32 0))>;
  5885. def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
  5886. (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
  5887. // Vector shift sxtl aliases
  5888. def : InstAlias<"sxtl.8h $dst, $src1",
  5889. (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
  5890. def : InstAlias<"sxtl $dst.8h, $src1.8b",
  5891. (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
  5892. def : InstAlias<"sxtl.4s $dst, $src1",
  5893. (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
  5894. def : InstAlias<"sxtl $dst.4s, $src1.4h",
  5895. (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
  5896. def : InstAlias<"sxtl.2d $dst, $src1",
  5897. (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
  5898. def : InstAlias<"sxtl $dst.2d, $src1.2s",
  5899. (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
  5900. // Vector shift sxtl2 aliases
  5901. def : InstAlias<"sxtl2.8h $dst, $src1",
  5902. (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
  5903. def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
  5904. (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
  5905. def : InstAlias<"sxtl2.4s $dst, $src1",
  5906. (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
  5907. def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
  5908. (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
  5909. def : InstAlias<"sxtl2.2d $dst, $src1",
  5910. (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
  5911. def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
  5912. (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
  5913. // Vector shift uxtl aliases
  5914. def : InstAlias<"uxtl.8h $dst, $src1",
  5915. (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
  5916. def : InstAlias<"uxtl $dst.8h, $src1.8b",
  5917. (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
  5918. def : InstAlias<"uxtl.4s $dst, $src1",
  5919. (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
  5920. def : InstAlias<"uxtl $dst.4s, $src1.4h",
  5921. (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
  5922. def : InstAlias<"uxtl.2d $dst, $src1",
  5923. (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
  5924. def : InstAlias<"uxtl $dst.2d, $src1.2s",
  5925. (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
  5926. // Vector shift uxtl2 aliases
  5927. def : InstAlias<"uxtl2.8h $dst, $src1",
  5928. (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
  5929. def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
  5930. (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
  5931. def : InstAlias<"uxtl2.4s $dst, $src1",
  5932. (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
  5933. def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
  5934. (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
  5935. def : InstAlias<"uxtl2.2d $dst, $src1",
  5936. (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
  5937. def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
  5938. (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
  5939. // If an integer is about to be converted to a floating point value,
  5940. // just load it on the floating point unit.
  5941. // These patterns are more complex because floating point loads do not
  5942. // support sign extension.
  5943. // The sign extension has to be explicitly added and is only supported for
  5944. // one step: byte-to-half, half-to-word, word-to-doubleword.
  5945. // SCVTF GPR -> FPR is 9 cycles.
  5946. // SCVTF FPR -> FPR is 4 cyclces.
  5947. // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
  5948. // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
  5949. // and still being faster.
  5950. // However, this is not good for code size.
  5951. // 8-bits -> float. 2 sizes step-up.
  5952. class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
  5953. : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
  5954. (SCVTFv1i32 (f32 (EXTRACT_SUBREG
  5955. (SSHLLv4i16_shift
  5956. (f64
  5957. (EXTRACT_SUBREG
  5958. (SSHLLv8i8_shift
  5959. (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  5960. INST,
  5961. bsub),
  5962. 0),
  5963. dsub)),
  5964. 0),
  5965. ssub)))>,
  5966. Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
  5967. def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
  5968. (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
  5969. def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
  5970. (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
  5971. def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
  5972. (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
  5973. def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
  5974. (LDURBi GPR64sp:$Rn, simm9:$offset)>;
  5975. // 16-bits -> float. 1 size step-up.
  5976. class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
  5977. : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
  5978. (SCVTFv1i32 (f32 (EXTRACT_SUBREG
  5979. (SSHLLv4i16_shift
  5980. (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  5981. INST,
  5982. hsub),
  5983. 0),
  5984. ssub)))>, Requires<[NotForCodeSize]>;
  5985. def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
  5986. (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
  5987. def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
  5988. (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
  5989. def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
  5990. (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
  5991. def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
  5992. (LDURHi GPR64sp:$Rn, simm9:$offset)>;
  5993. // 32-bits to 32-bits are handled in target specific dag combine:
  5994. // performIntToFpCombine.
  5995. // 64-bits integer to 32-bits floating point, not possible with
  5996. // SCVTF on floating point registers (both source and destination
  5997. // must have the same size).
  5998. // Here are the patterns for 8, 16, 32, and 64-bits to double.
  5999. // 8-bits -> double. 3 size step-up: give up.
  6000. // 16-bits -> double. 2 size step.
  6001. class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
  6002. : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
  6003. (SCVTFv1i64 (f64 (EXTRACT_SUBREG
  6004. (SSHLLv2i32_shift
  6005. (f64
  6006. (EXTRACT_SUBREG
  6007. (SSHLLv4i16_shift
  6008. (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  6009. INST,
  6010. hsub),
  6011. 0),
  6012. dsub)),
  6013. 0),
  6014. dsub)))>,
  6015. Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
  6016. def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
  6017. (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
  6018. def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
  6019. (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
  6020. def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
  6021. (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
  6022. def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
  6023. (LDURHi GPR64sp:$Rn, simm9:$offset)>;
  6024. // 32-bits -> double. 1 size step-up.
  6025. class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
  6026. : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
  6027. (SCVTFv1i64 (f64 (EXTRACT_SUBREG
  6028. (SSHLLv2i32_shift
  6029. (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
  6030. INST,
  6031. ssub),
  6032. 0),
  6033. dsub)))>, Requires<[NotForCodeSize]>;
  6034. def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
  6035. (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
  6036. def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
  6037. (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
  6038. def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
  6039. (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
  6040. def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
  6041. (LDURSi GPR64sp:$Rn, simm9:$offset)>;
  6042. // 64-bits -> double are handled in target specific dag combine:
  6043. // performIntToFpCombine.
  6044. //----------------------------------------------------------------------------
  6045. // AdvSIMD Load-Store Structure
  6046. //----------------------------------------------------------------------------
  6047. defm LD1 : SIMDLd1Multiple<"ld1">;
  6048. defm LD2 : SIMDLd2Multiple<"ld2">;
  6049. defm LD3 : SIMDLd3Multiple<"ld3">;
  6050. defm LD4 : SIMDLd4Multiple<"ld4">;
  6051. defm ST1 : SIMDSt1Multiple<"st1">;
  6052. defm ST2 : SIMDSt2Multiple<"st2">;
  6053. defm ST3 : SIMDSt3Multiple<"st3">;
  6054. defm ST4 : SIMDSt4Multiple<"st4">;
  6055. class Ld1Pat<ValueType ty, Instruction INST>
  6056. : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
  6057. def : Ld1Pat<v16i8, LD1Onev16b>;
  6058. def : Ld1Pat<v8i16, LD1Onev8h>;
  6059. def : Ld1Pat<v4i32, LD1Onev4s>;
  6060. def : Ld1Pat<v2i64, LD1Onev2d>;
  6061. def : Ld1Pat<v8i8, LD1Onev8b>;
  6062. def : Ld1Pat<v4i16, LD1Onev4h>;
  6063. def : Ld1Pat<v2i32, LD1Onev2s>;
  6064. def : Ld1Pat<v1i64, LD1Onev1d>;
  6065. class St1Pat<ValueType ty, Instruction INST>
  6066. : Pat<(store ty:$Vt, GPR64sp:$Rn),
  6067. (INST ty:$Vt, GPR64sp:$Rn)>;
  6068. def : St1Pat<v16i8, ST1Onev16b>;
  6069. def : St1Pat<v8i16, ST1Onev8h>;
  6070. def : St1Pat<v4i32, ST1Onev4s>;
  6071. def : St1Pat<v2i64, ST1Onev2d>;
  6072. def : St1Pat<v8i8, ST1Onev8b>;
  6073. def : St1Pat<v4i16, ST1Onev4h>;
  6074. def : St1Pat<v2i32, ST1Onev2s>;
  6075. def : St1Pat<v1i64, ST1Onev1d>;
  6076. //---
  6077. // Single-element
  6078. //---
  6079. defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
  6080. defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
  6081. defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
  6082. defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
  6083. let mayLoad = 1, hasSideEffects = 0 in {
  6084. defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
  6085. defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
  6086. defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
  6087. defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>;
  6088. defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
  6089. defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
  6090. defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
  6091. defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
  6092. defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>;
  6093. defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>;
  6094. defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
  6095. defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
  6096. defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
  6097. defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
  6098. defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
  6099. defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
  6100. }
  6101. def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
  6102. (LD1Rv8b GPR64sp:$Rn)>;
  6103. def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
  6104. (LD1Rv16b GPR64sp:$Rn)>;
  6105. def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
  6106. (LD1Rv4h GPR64sp:$Rn)>;
  6107. def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
  6108. (LD1Rv8h GPR64sp:$Rn)>;
  6109. def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
  6110. (LD1Rv2s GPR64sp:$Rn)>;
  6111. def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
  6112. (LD1Rv4s GPR64sp:$Rn)>;
  6113. def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
  6114. (LD1Rv2d GPR64sp:$Rn)>;
  6115. def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
  6116. (LD1Rv1d GPR64sp:$Rn)>;
  6117. // Grab the floating point version too
  6118. def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
  6119. (LD1Rv2s GPR64sp:$Rn)>;
  6120. def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
  6121. (LD1Rv4s GPR64sp:$Rn)>;
  6122. def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
  6123. (LD1Rv2d GPR64sp:$Rn)>;
  6124. def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
  6125. (LD1Rv1d GPR64sp:$Rn)>;
  6126. def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
  6127. (LD1Rv4h GPR64sp:$Rn)>;
  6128. def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
  6129. (LD1Rv8h GPR64sp:$Rn)>;
  6130. def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
  6131. (LD1Rv4h GPR64sp:$Rn)>;
  6132. def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
  6133. (LD1Rv8h GPR64sp:$Rn)>;
  6134. class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
  6135. ValueType VTy, ValueType STy, Instruction LD1>
  6136. : Pat<(vector_insert (VTy VecListOne128:$Rd),
  6137. (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
  6138. (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
  6139. def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
  6140. def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
  6141. def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
  6142. def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
  6143. def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
  6144. def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
  6145. def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
  6146. def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>;
  6147. // Generate LD1 for extload if memory type does not match the
  6148. // destination type, for example:
  6149. //
  6150. // (v4i32 (insert_vector_elt (load anyext from i8) idx))
  6151. //
  6152. // In this case, the index must be adjusted to match LD1 type.
  6153. //
  6154. class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
  6155. VecIndex, ValueType VTy, ValueType STy,
  6156. Instruction LD1, SDNodeXForm IdxOp>
  6157. : Pat<(vector_insert (VTy VecListOne128:$Rd),
  6158. (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
  6159. (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
  6160. def VectorIndexStoH : SDNodeXForm<imm, [{
  6161. return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
  6162. }]>;
  6163. def VectorIndexStoB : SDNodeXForm<imm, [{
  6164. return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
  6165. }]>;
  6166. def VectorIndexHtoB : SDNodeXForm<imm, [{
  6167. return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
  6168. }]>;
  6169. def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
  6170. def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
  6171. def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
  6172. // Same as above, but the first element is populated using
  6173. // scalar_to_vector + insert_subvector instead of insert_vector_elt.
  6174. class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
  6175. SDPatternOperator ExtLoad, Instruction LD1>
  6176. : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
  6177. (ResultTy (EXTRACT_SUBREG
  6178. (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
  6179. def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
  6180. def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
  6181. def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
  6182. class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
  6183. ValueType VTy, ValueType STy, Instruction LD1>
  6184. : Pat<(vector_insert (VTy VecListOne64:$Rd),
  6185. (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
  6186. (EXTRACT_SUBREG
  6187. (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
  6188. VecIndex:$idx, GPR64sp:$Rn),
  6189. dsub)>;
  6190. def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
  6191. def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
  6192. def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
  6193. def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
  6194. def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
  6195. def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>;
  6196. defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
  6197. defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
  6198. defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
  6199. defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
  6200. // Stores
  6201. defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
  6202. defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
  6203. defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
  6204. defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
  6205. let AddedComplexity = 19 in
  6206. class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
  6207. ValueType VTy, ValueType STy, Instruction ST1>
  6208. : Pat<(scalar_store
  6209. (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
  6210. GPR64sp:$Rn),
  6211. (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
  6212. def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
  6213. def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
  6214. def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
  6215. def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
  6216. def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
  6217. def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
  6218. def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
  6219. def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>;
  6220. let AddedComplexity = 19 in
  6221. class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
  6222. ValueType VTy, ValueType STy, Instruction ST1>
  6223. : Pat<(scalar_store
  6224. (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
  6225. GPR64sp:$Rn),
  6226. (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
  6227. VecIndex:$idx, GPR64sp:$Rn)>;
  6228. def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
  6229. def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
  6230. def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
  6231. def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
  6232. def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
  6233. def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>;
  6234. multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
  6235. ValueType VTy, ValueType STy, Instruction ST1,
  6236. int offset> {
  6237. def : Pat<(scalar_store
  6238. (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
  6239. GPR64sp:$Rn, offset),
  6240. (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
  6241. VecIndex:$idx, GPR64sp:$Rn, XZR)>;
  6242. def : Pat<(scalar_store
  6243. (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
  6244. GPR64sp:$Rn, GPR64:$Rm),
  6245. (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
  6246. VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
  6247. }
  6248. defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
  6249. defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
  6250. 2>;
  6251. defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
  6252. defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
  6253. defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
  6254. defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
  6255. defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
  6256. defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
  6257. multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
  6258. ValueType VTy, ValueType STy, Instruction ST1,
  6259. int offset> {
  6260. def : Pat<(scalar_store
  6261. (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
  6262. GPR64sp:$Rn, offset),
  6263. (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
  6264. def : Pat<(scalar_store
  6265. (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
  6266. GPR64sp:$Rn, GPR64:$Rm),
  6267. (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
  6268. }
  6269. defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
  6270. 1>;
  6271. defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
  6272. 2>;
  6273. defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
  6274. defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
  6275. defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
  6276. defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
  6277. defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
  6278. defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
  6279. let mayStore = 1, hasSideEffects = 0 in {
  6280. defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
  6281. defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
  6282. defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
  6283. defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
  6284. defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
  6285. defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
  6286. defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
  6287. defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
  6288. defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
  6289. defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
  6290. defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
  6291. defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
  6292. }
  6293. defm ST1 : SIMDLdSt1SingleAliases<"st1">;
  6294. defm ST2 : SIMDLdSt2SingleAliases<"st2">;
  6295. defm ST3 : SIMDLdSt3SingleAliases<"st3">;
  6296. defm ST4 : SIMDLdSt4SingleAliases<"st4">;
  6297. //----------------------------------------------------------------------------
  6298. // Crypto extensions
  6299. //----------------------------------------------------------------------------
  6300. let Predicates = [HasAES] in {
  6301. def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>;
  6302. def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>;
  6303. def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
  6304. def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
  6305. }
  6306. // Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
  6307. // for AES fusion on some CPUs.
  6308. let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
  6309. def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
  6310. Sched<[WriteVq]>;
  6311. def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
  6312. Sched<[WriteVq]>;
  6313. }
  6314. // Only use constrained versions of AES(I)MC instructions if they are paired with
  6315. // AESE/AESD.
  6316. def : Pat<(v16i8 (int_aarch64_crypto_aesmc
  6317. (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
  6318. (v16i8 V128:$src2))))),
  6319. (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
  6320. (v16i8 V128:$src2)))))>,
  6321. Requires<[HasFuseAES]>;
  6322. def : Pat<(v16i8 (int_aarch64_crypto_aesimc
  6323. (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
  6324. (v16i8 V128:$src2))))),
  6325. (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
  6326. (v16i8 V128:$src2)))))>,
  6327. Requires<[HasFuseAES]>;
  6328. let Predicates = [HasSHA2] in {
  6329. def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
  6330. def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
  6331. def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
  6332. def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
  6333. def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
  6334. def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
  6335. def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
  6336. def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>;
  6337. def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>;
  6338. def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
  6339. }
  6340. //----------------------------------------------------------------------------
  6341. // Compiler-pseudos
  6342. //----------------------------------------------------------------------------
  6343. // FIXME: Like for X86, these should go in their own separate .td file.
  6344. def def32 : PatLeaf<(i32 GPR32:$src), [{
  6345. return isDef32(*N);
  6346. }]>;
  6347. // In the case of a 32-bit def that is known to implicitly zero-extend,
  6348. // we can use a SUBREG_TO_REG.
  6349. def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
  6350. // For an anyext, we don't care what the high bits are, so we can perform an
  6351. // INSERT_SUBREF into an IMPLICIT_DEF.
  6352. def : Pat<(i64 (anyext GPR32:$src)),
  6353. (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
  6354. // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
  6355. // then assert the extension has happened.
  6356. def : Pat<(i64 (zext GPR32:$src)),
  6357. (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
  6358. // To sign extend, we use a signed bitfield move instruction (SBFM) on the
  6359. // containing super-reg.
  6360. def : Pat<(i64 (sext GPR32:$src)),
  6361. (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
  6362. def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
  6363. def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
  6364. def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>;
  6365. def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>;
  6366. def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
  6367. def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>;
  6368. def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>;
  6369. def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
  6370. (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
  6371. (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
  6372. def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
  6373. (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
  6374. (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
  6375. def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
  6376. (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
  6377. (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
  6378. def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
  6379. (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
  6380. (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
  6381. def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
  6382. (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
  6383. (i64 (i64shift_a imm0_63:$imm)),
  6384. (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
  6385. // sra patterns have an AddedComplexity of 10, so make sure we have a higher
  6386. // AddedComplexity for the following patterns since we want to match sext + sra
  6387. // patterns before we attempt to match a single sra node.
  6388. let AddedComplexity = 20 in {
  6389. // We support all sext + sra combinations which preserve at least one bit of the
  6390. // original value which is to be sign extended. E.g. we support shifts up to
  6391. // bitwidth-1 bits.
  6392. def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
  6393. (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
  6394. def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
  6395. (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
  6396. def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
  6397. (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
  6398. def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
  6399. (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
  6400. def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
  6401. (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
  6402. (i64 imm0_31:$imm), 31)>;
  6403. } // AddedComplexity = 20
  6404. // To truncate, we can simply extract from a subregister.
  6405. def : Pat<(i32 (trunc GPR64sp:$src)),
  6406. (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
  6407. // __builtin_trap() uses the BRK instruction on AArch64.
  6408. def : Pat<(trap), (BRK 1)>;
  6409. def : Pat<(debugtrap), (BRK 0xF000)>;
  6410. def ubsan_trap_xform : SDNodeXForm<timm, [{
  6411. return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
  6412. }]>;
  6413. def ubsan_trap_imm : TImmLeaf<i32, [{
  6414. return isUInt<8>(Imm);
  6415. }], ubsan_trap_xform>;
  6416. def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
  6417. // Multiply high patterns which multiply the lower subvector using smull/umull
  6418. // and the upper subvector with smull2/umull2. Then shuffle the high the high
  6419. // part of both results together.
  6420. def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
  6421. (UZP2v16i8
  6422. (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
  6423. (EXTRACT_SUBREG V128:$Rm, dsub)),
  6424. (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
  6425. def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
  6426. (UZP2v8i16
  6427. (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
  6428. (EXTRACT_SUBREG V128:$Rm, dsub)),
  6429. (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
  6430. def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
  6431. (UZP2v4i32
  6432. (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
  6433. (EXTRACT_SUBREG V128:$Rm, dsub)),
  6434. (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
  6435. def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
  6436. (UZP2v16i8
  6437. (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
  6438. (EXTRACT_SUBREG V128:$Rm, dsub)),
  6439. (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
  6440. def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
  6441. (UZP2v8i16
  6442. (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
  6443. (EXTRACT_SUBREG V128:$Rm, dsub)),
  6444. (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
  6445. def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
  6446. (UZP2v4i32
  6447. (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
  6448. (EXTRACT_SUBREG V128:$Rm, dsub)),
  6449. (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
  6450. // Conversions within AdvSIMD types in the same register size are free.
  6451. // But because we need a consistent lane ordering, in big endian many
  6452. // conversions require one or more REV instructions.
  6453. //
  6454. // Consider a simple memory load followed by a bitconvert then a store.
  6455. // v0 = load v2i32
  6456. // v1 = BITCAST v2i32 v0 to v4i16
  6457. // store v4i16 v2
  6458. //
  6459. // In big endian mode every memory access has an implicit byte swap. LDR and
  6460. // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
  6461. // is, they treat the vector as a sequence of elements to be byte-swapped.
  6462. // The two pairs of instructions are fundamentally incompatible. We've decided
  6463. // to use LD1/ST1 only to simplify compiler implementation.
  6464. //
  6465. // LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
  6466. // the original code sequence:
  6467. // v0 = load v2i32
  6468. // v1 = REV v2i32 (implicit)
  6469. // v2 = BITCAST v2i32 v1 to v4i16
  6470. // v3 = REV v4i16 v2 (implicit)
  6471. // store v4i16 v3
  6472. //
  6473. // But this is now broken - the value stored is different to the value loaded
  6474. // due to lane reordering. To fix this, on every BITCAST we must perform two
  6475. // other REVs:
  6476. // v0 = load v2i32
  6477. // v1 = REV v2i32 (implicit)
  6478. // v2 = REV v2i32
  6479. // v3 = BITCAST v2i32 v2 to v4i16
  6480. // v4 = REV v4i16
  6481. // v5 = REV v4i16 v4 (implicit)
  6482. // store v4i16 v5
  6483. //
  6484. // This means an extra two instructions, but actually in most cases the two REV
  6485. // instructions can be combined into one. For example:
  6486. // (REV64_2s (REV64_4h X)) === (REV32_4h X)
  6487. //
  6488. // There is also no 128-bit REV instruction. This must be synthesized with an
  6489. // EXT instruction.
  6490. //
  6491. // Most bitconverts require some sort of conversion. The only exceptions are:
  6492. // a) Identity conversions - vNfX <-> vNiX
  6493. // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
  6494. //
  6495. // Natural vector casts (64 bit)
  6496. def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
  6497. def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
  6498. def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
  6499. def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6500. def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
  6501. def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
  6502. def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
  6503. def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
  6504. def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
  6505. def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
  6506. def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6507. def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
  6508. def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
  6509. def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
  6510. def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
  6511. def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
  6512. def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6513. def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
  6514. def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
  6515. def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
  6516. def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
  6517. def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
  6518. def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
  6519. def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6520. def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
  6521. def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
  6522. def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
  6523. def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
  6524. def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
  6525. def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
  6526. def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
  6527. def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
  6528. def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
  6529. def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
  6530. // Natural vector casts (128 bit)
  6531. def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
  6532. def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
  6533. def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
  6534. def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
  6535. def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
  6536. def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
  6537. def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
  6538. def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
  6539. def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
  6540. def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
  6541. def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
  6542. def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
  6543. def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
  6544. def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
  6545. def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
  6546. def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
  6547. def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
  6548. def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
  6549. def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
  6550. def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
  6551. def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
  6552. def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
  6553. def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
  6554. def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
  6555. def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
  6556. def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
  6557. def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
  6558. def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
  6559. def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
  6560. def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
  6561. def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
  6562. def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
  6563. def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
  6564. def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
  6565. def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
  6566. def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
  6567. def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
  6568. def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
  6569. def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
  6570. def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
  6571. def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
  6572. def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
  6573. def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
  6574. def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
  6575. def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
  6576. def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
  6577. def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
  6578. def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
  6579. let Predicates = [IsLE] in {
  6580. def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6581. def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6582. def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6583. def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6584. def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6585. def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6586. def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
  6587. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6588. def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
  6589. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6590. def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
  6591. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6592. def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
  6593. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6594. def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
  6595. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6596. def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
  6597. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6598. def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
  6599. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6600. }
  6601. let Predicates = [IsBE] in {
  6602. def : Pat<(v8i8 (bitconvert GPR64:$Xn)),
  6603. (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
  6604. def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
  6605. (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
  6606. def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
  6607. (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
  6608. def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
  6609. (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
  6610. def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
  6611. (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
  6612. def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
  6613. (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
  6614. def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
  6615. (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
  6616. def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
  6617. (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
  6618. def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
  6619. (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
  6620. def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
  6621. (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
  6622. def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
  6623. (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
  6624. def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
  6625. (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
  6626. }
  6627. def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6628. def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6629. def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
  6630. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6631. def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
  6632. (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6633. def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
  6634. (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6635. def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
  6636. def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
  6637. (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
  6638. def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
  6639. (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
  6640. def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
  6641. (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
  6642. def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
  6643. (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
  6644. def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
  6645. (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
  6646. def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
  6647. def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
  6648. let Predicates = [IsLE] in {
  6649. def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
  6650. def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
  6651. def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
  6652. def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
  6653. def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
  6654. def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
  6655. }
  6656. let Predicates = [IsBE] in {
  6657. def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
  6658. (v1i64 (REV64v2i32 FPR64:$src))>;
  6659. def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
  6660. (v1i64 (REV64v4i16 FPR64:$src))>;
  6661. def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
  6662. (v1i64 (REV64v8i8 FPR64:$src))>;
  6663. def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
  6664. (v1i64 (REV64v4i16 FPR64:$src))>;
  6665. def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
  6666. (v1i64 (REV64v4i16 FPR64:$src))>;
  6667. def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
  6668. (v1i64 (REV64v2i32 FPR64:$src))>;
  6669. }
  6670. def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
  6671. def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
  6672. let Predicates = [IsLE] in {
  6673. def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
  6674. def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
  6675. def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
  6676. def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
  6677. def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
  6678. def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
  6679. def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
  6680. }
  6681. let Predicates = [IsBE] in {
  6682. def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
  6683. (v2i32 (REV64v2i32 FPR64:$src))>;
  6684. def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
  6685. (v2i32 (REV32v4i16 FPR64:$src))>;
  6686. def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))),
  6687. (v2i32 (REV32v8i8 FPR64:$src))>;
  6688. def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
  6689. (v2i32 (REV64v2i32 FPR64:$src))>;
  6690. def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
  6691. (v2i32 (REV64v2i32 FPR64:$src))>;
  6692. def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
  6693. (v2i32 (REV32v4i16 FPR64:$src))>;
  6694. def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
  6695. (v2i32 (REV32v4i16 FPR64:$src))>;
  6696. }
  6697. def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
  6698. let Predicates = [IsLE] in {
  6699. def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
  6700. def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
  6701. def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
  6702. def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
  6703. def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
  6704. def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
  6705. }
  6706. let Predicates = [IsBE] in {
  6707. def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
  6708. (v4i16 (REV64v4i16 FPR64:$src))>;
  6709. def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
  6710. (v4i16 (REV32v4i16 FPR64:$src))>;
  6711. def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
  6712. (v4i16 (REV16v8i8 FPR64:$src))>;
  6713. def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
  6714. (v4i16 (REV64v4i16 FPR64:$src))>;
  6715. def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
  6716. (v4i16 (REV32v4i16 FPR64:$src))>;
  6717. def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
  6718. (v4i16 (REV64v4i16 FPR64:$src))>;
  6719. }
  6720. def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
  6721. def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
  6722. let Predicates = [IsLE] in {
  6723. def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
  6724. def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
  6725. def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
  6726. def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
  6727. def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
  6728. def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
  6729. def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6730. def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6731. def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6732. def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6733. def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6734. def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6735. }
  6736. let Predicates = [IsBE] in {
  6737. def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
  6738. (v4f16 (REV64v4i16 FPR64:$src))>;
  6739. def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
  6740. (v4f16 (REV32v4i16 FPR64:$src))>;
  6741. def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
  6742. (v4f16 (REV16v8i8 FPR64:$src))>;
  6743. def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
  6744. (v4f16 (REV64v4i16 FPR64:$src))>;
  6745. def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
  6746. (v4f16 (REV32v4i16 FPR64:$src))>;
  6747. def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
  6748. (v4f16 (REV64v4i16 FPR64:$src))>;
  6749. def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
  6750. (v4bf16 (REV64v4i16 FPR64:$src))>;
  6751. def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
  6752. (v4bf16 (REV32v4i16 FPR64:$src))>;
  6753. def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))),
  6754. (v4bf16 (REV16v8i8 FPR64:$src))>;
  6755. def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))),
  6756. (v4bf16 (REV64v4i16 FPR64:$src))>;
  6757. def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
  6758. (v4bf16 (REV32v4i16 FPR64:$src))>;
  6759. def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
  6760. (v4bf16 (REV64v4i16 FPR64:$src))>;
  6761. }
  6762. def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
  6763. def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
  6764. let Predicates = [IsLE] in {
  6765. def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
  6766. def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
  6767. def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
  6768. def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
  6769. def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
  6770. def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
  6771. def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
  6772. def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>;
  6773. }
  6774. let Predicates = [IsBE] in {
  6775. def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
  6776. (v8i8 (REV64v8i8 FPR64:$src))>;
  6777. def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))),
  6778. (v8i8 (REV32v8i8 FPR64:$src))>;
  6779. def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))),
  6780. (v8i8 (REV16v8i8 FPR64:$src))>;
  6781. def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
  6782. (v8i8 (REV64v8i8 FPR64:$src))>;
  6783. def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
  6784. (v8i8 (REV32v8i8 FPR64:$src))>;
  6785. def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
  6786. (v8i8 (REV64v8i8 FPR64:$src))>;
  6787. def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
  6788. (v8i8 (REV16v8i8 FPR64:$src))>;
  6789. def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))),
  6790. (v8i8 (REV16v8i8 FPR64:$src))>;
  6791. }
  6792. let Predicates = [IsLE] in {
  6793. def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
  6794. def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
  6795. def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
  6796. def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
  6797. def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
  6798. def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>;
  6799. }
  6800. let Predicates = [IsBE] in {
  6801. def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
  6802. (f64 (REV64v2i32 FPR64:$src))>;
  6803. def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))),
  6804. (f64 (REV64v4i16 FPR64:$src))>;
  6805. def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
  6806. (f64 (REV64v2i32 FPR64:$src))>;
  6807. def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
  6808. (f64 (REV64v8i8 FPR64:$src))>;
  6809. def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
  6810. (f64 (REV64v4i16 FPR64:$src))>;
  6811. def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))),
  6812. (f64 (REV64v4i16 FPR64:$src))>;
  6813. }
  6814. def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
  6815. def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
  6816. let Predicates = [IsLE] in {
  6817. def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
  6818. def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
  6819. def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
  6820. def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
  6821. def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
  6822. def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
  6823. }
  6824. let Predicates = [IsBE] in {
  6825. def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
  6826. (v1f64 (REV64v2i32 FPR64:$src))>;
  6827. def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
  6828. (v1f64 (REV64v4i16 FPR64:$src))>;
  6829. def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
  6830. (v1f64 (REV64v8i8 FPR64:$src))>;
  6831. def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
  6832. (v1f64 (REV64v2i32 FPR64:$src))>;
  6833. def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
  6834. (v1f64 (REV64v4i16 FPR64:$src))>;
  6835. def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
  6836. (v1f64 (REV64v4i16 FPR64:$src))>;
  6837. }
  6838. def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
  6839. def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
  6840. let Predicates = [IsLE] in {
  6841. def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
  6842. def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
  6843. def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
  6844. def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
  6845. def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
  6846. def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
  6847. def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
  6848. }
  6849. let Predicates = [IsBE] in {
  6850. def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
  6851. (v2f32 (REV64v2i32 FPR64:$src))>;
  6852. def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
  6853. (v2f32 (REV32v4i16 FPR64:$src))>;
  6854. def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))),
  6855. (v2f32 (REV32v8i8 FPR64:$src))>;
  6856. def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
  6857. (v2f32 (REV64v2i32 FPR64:$src))>;
  6858. def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
  6859. (v2f32 (REV64v2i32 FPR64:$src))>;
  6860. def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
  6861. (v2f32 (REV32v4i16 FPR64:$src))>;
  6862. def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
  6863. (v2f32 (REV32v4i16 FPR64:$src))>;
  6864. }
  6865. def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
  6866. let Predicates = [IsLE] in {
  6867. def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
  6868. def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
  6869. def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
  6870. def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
  6871. def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
  6872. def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
  6873. def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
  6874. def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
  6875. }
  6876. let Predicates = [IsBE] in {
  6877. def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
  6878. (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
  6879. def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
  6880. (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
  6881. (REV64v4i32 FPR128:$src), (i32 8)))>;
  6882. def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
  6883. (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
  6884. (REV64v8i16 FPR128:$src), (i32 8)))>;
  6885. def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
  6886. (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
  6887. (REV64v8i16 FPR128:$src), (i32 8)))>;
  6888. def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
  6889. (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
  6890. (REV64v8i16 FPR128:$src), (i32 8)))>;
  6891. def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
  6892. (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
  6893. def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
  6894. (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
  6895. (REV64v4i32 FPR128:$src), (i32 8)))>;
  6896. def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
  6897. (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
  6898. (REV64v16i8 FPR128:$src), (i32 8)))>;
  6899. }
  6900. let Predicates = [IsLE] in {
  6901. def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
  6902. def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
  6903. def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
  6904. def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
  6905. def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
  6906. def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
  6907. def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
  6908. }
  6909. let Predicates = [IsBE] in {
  6910. def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
  6911. (v2f64 (EXTv16i8 FPR128:$src,
  6912. FPR128:$src, (i32 8)))>;
  6913. def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
  6914. (v2f64 (REV64v4i32 FPR128:$src))>;
  6915. def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
  6916. (v2f64 (REV64v8i16 FPR128:$src))>;
  6917. def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
  6918. (v2f64 (REV64v8i16 FPR128:$src))>;
  6919. def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
  6920. (v2f64 (REV64v8i16 FPR128:$src))>;
  6921. def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
  6922. (v2f64 (REV64v16i8 FPR128:$src))>;
  6923. def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
  6924. (v2f64 (REV64v4i32 FPR128:$src))>;
  6925. }
  6926. def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
  6927. let Predicates = [IsLE] in {
  6928. def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
  6929. def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
  6930. def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
  6931. def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
  6932. def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
  6933. def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
  6934. def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
  6935. }
  6936. let Predicates = [IsBE] in {
  6937. def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
  6938. (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
  6939. (REV64v4i32 FPR128:$src), (i32 8)))>;
  6940. def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
  6941. (v4f32 (REV32v8i16 FPR128:$src))>;
  6942. def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
  6943. (v4f32 (REV32v8i16 FPR128:$src))>;
  6944. def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
  6945. (v4f32 (REV32v8i16 FPR128:$src))>;
  6946. def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
  6947. (v4f32 (REV32v16i8 FPR128:$src))>;
  6948. def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
  6949. (v4f32 (REV64v4i32 FPR128:$src))>;
  6950. def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
  6951. (v4f32 (REV64v4i32 FPR128:$src))>;
  6952. }
  6953. def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
  6954. let Predicates = [IsLE] in {
  6955. def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
  6956. def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
  6957. def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
  6958. def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
  6959. def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
  6960. def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
  6961. def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
  6962. }
  6963. let Predicates = [IsBE] in {
  6964. def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
  6965. (v2i64 (EXTv16i8 FPR128:$src,
  6966. FPR128:$src, (i32 8)))>;
  6967. def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
  6968. (v2i64 (REV64v4i32 FPR128:$src))>;
  6969. def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
  6970. (v2i64 (REV64v8i16 FPR128:$src))>;
  6971. def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
  6972. (v2i64 (REV64v16i8 FPR128:$src))>;
  6973. def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
  6974. (v2i64 (REV64v4i32 FPR128:$src))>;
  6975. def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
  6976. (v2i64 (REV64v8i16 FPR128:$src))>;
  6977. def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
  6978. (v2i64 (REV64v8i16 FPR128:$src))>;
  6979. }
  6980. def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
  6981. let Predicates = [IsLE] in {
  6982. def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
  6983. def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
  6984. def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
  6985. def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
  6986. def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
  6987. def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
  6988. def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
  6989. }
  6990. let Predicates = [IsBE] in {
  6991. def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
  6992. (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
  6993. (REV64v4i32 FPR128:$src),
  6994. (i32 8)))>;
  6995. def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
  6996. (v4i32 (REV64v4i32 FPR128:$src))>;
  6997. def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
  6998. (v4i32 (REV32v8i16 FPR128:$src))>;
  6999. def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
  7000. (v4i32 (REV32v16i8 FPR128:$src))>;
  7001. def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
  7002. (v4i32 (REV64v4i32 FPR128:$src))>;
  7003. def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
  7004. (v4i32 (REV32v8i16 FPR128:$src))>;
  7005. def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
  7006. (v4i32 (REV32v8i16 FPR128:$src))>;
  7007. }
  7008. def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
  7009. let Predicates = [IsLE] in {
  7010. def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
  7011. def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
  7012. def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
  7013. def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
  7014. def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
  7015. def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
  7016. }
  7017. let Predicates = [IsBE] in {
  7018. def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
  7019. (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
  7020. (REV64v8i16 FPR128:$src),
  7021. (i32 8)))>;
  7022. def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
  7023. (v8i16 (REV64v8i16 FPR128:$src))>;
  7024. def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
  7025. (v8i16 (REV32v8i16 FPR128:$src))>;
  7026. def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
  7027. (v8i16 (REV16v16i8 FPR128:$src))>;
  7028. def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
  7029. (v8i16 (REV64v8i16 FPR128:$src))>;
  7030. def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
  7031. (v8i16 (REV32v8i16 FPR128:$src))>;
  7032. }
  7033. def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
  7034. def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
  7035. let Predicates = [IsLE] in {
  7036. def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
  7037. def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
  7038. def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
  7039. def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
  7040. def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
  7041. def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
  7042. def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7043. def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7044. def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7045. def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7046. def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7047. def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7048. }
  7049. let Predicates = [IsBE] in {
  7050. def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
  7051. (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
  7052. (REV64v8i16 FPR128:$src),
  7053. (i32 8)))>;
  7054. def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
  7055. (v8f16 (REV64v8i16 FPR128:$src))>;
  7056. def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
  7057. (v8f16 (REV32v8i16 FPR128:$src))>;
  7058. def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
  7059. (v8f16 (REV16v16i8 FPR128:$src))>;
  7060. def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
  7061. (v8f16 (REV64v8i16 FPR128:$src))>;
  7062. def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
  7063. (v8f16 (REV32v8i16 FPR128:$src))>;
  7064. def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))),
  7065. (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
  7066. (REV64v8i16 FPR128:$src),
  7067. (i32 8)))>;
  7068. def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
  7069. (v8bf16 (REV64v8i16 FPR128:$src))>;
  7070. def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
  7071. (v8bf16 (REV32v8i16 FPR128:$src))>;
  7072. def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
  7073. (v8bf16 (REV16v16i8 FPR128:$src))>;
  7074. def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
  7075. (v8bf16 (REV64v8i16 FPR128:$src))>;
  7076. def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
  7077. (v8bf16 (REV32v8i16 FPR128:$src))>;
  7078. }
  7079. def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
  7080. def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
  7081. let Predicates = [IsLE] in {
  7082. def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
  7083. def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
  7084. def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
  7085. def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
  7086. def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
  7087. def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
  7088. def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
  7089. def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
  7090. }
  7091. let Predicates = [IsBE] in {
  7092. def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
  7093. (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
  7094. (REV64v16i8 FPR128:$src),
  7095. (i32 8)))>;
  7096. def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
  7097. (v16i8 (REV64v16i8 FPR128:$src))>;
  7098. def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
  7099. (v16i8 (REV32v16i8 FPR128:$src))>;
  7100. def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
  7101. (v16i8 (REV16v16i8 FPR128:$src))>;
  7102. def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
  7103. (v16i8 (REV64v16i8 FPR128:$src))>;
  7104. def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
  7105. (v16i8 (REV32v16i8 FPR128:$src))>;
  7106. def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
  7107. (v16i8 (REV16v16i8 FPR128:$src))>;
  7108. def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
  7109. (v16i8 (REV16v16i8 FPR128:$src))>;
  7110. }
  7111. def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
  7112. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7113. def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
  7114. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7115. def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
  7116. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7117. def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
  7118. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7119. def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
  7120. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7121. def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
  7122. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7123. def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
  7124. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7125. def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
  7126. (EXTRACT_SUBREG V128:$Rn, dsub)>;
  7127. def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
  7128. (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
  7129. def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
  7130. (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
  7131. def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
  7132. (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
  7133. def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
  7134. (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
  7135. // A 64-bit subvector insert to the first 128-bit vector position
  7136. // is a subregister copy that needs no instruction.
  7137. multiclass InsertSubvectorUndef<ValueType Ty> {
  7138. def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
  7139. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7140. def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
  7141. (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7142. def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
  7143. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7144. def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
  7145. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7146. def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
  7147. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7148. def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
  7149. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7150. def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
  7151. (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7152. def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
  7153. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
  7154. }
  7155. defm : InsertSubvectorUndef<i32>;
  7156. defm : InsertSubvectorUndef<i64>;
  7157. // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
  7158. // or v2f32.
  7159. def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
  7160. (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
  7161. (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
  7162. def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
  7163. (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
  7164. (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
  7165. // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
  7166. // so we match on v4f32 here, not v2f32. This will also catch adding
  7167. // the low two lanes of a true v4f32 vector.
  7168. def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
  7169. (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
  7170. (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
  7171. def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
  7172. (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
  7173. (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
  7174. // Scalar 64-bit shifts in FPR64 registers.
  7175. def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
  7176. (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
  7177. def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
  7178. (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
  7179. def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
  7180. (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
  7181. def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
  7182. (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
  7183. // Patterns for nontemporal/no-allocate stores.
  7184. // We have to resort to tricks to turn a single-input store into a store pair,
  7185. // because there is no single-input nontemporal store, only STNP.
  7186. let Predicates = [IsLE] in {
  7187. let AddedComplexity = 15 in {
  7188. class NTStore128Pat<ValueType VT> :
  7189. Pat<(nontemporalstore (VT FPR128:$Rt),
  7190. (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
  7191. (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
  7192. (DUPi64 FPR128:$Rt, (i64 1)),
  7193. GPR64sp:$Rn, simm7s8:$offset)>;
  7194. def : NTStore128Pat<v2i64>;
  7195. def : NTStore128Pat<v4i32>;
  7196. def : NTStore128Pat<v8i16>;
  7197. def : NTStore128Pat<v16i8>;
  7198. class NTStore64Pat<ValueType VT> :
  7199. Pat<(nontemporalstore (VT FPR64:$Rt),
  7200. (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
  7201. (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
  7202. (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
  7203. GPR64sp:$Rn, simm7s4:$offset)>;
  7204. // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
  7205. def : NTStore64Pat<v1f64>;
  7206. def : NTStore64Pat<v1i64>;
  7207. def : NTStore64Pat<v2i32>;
  7208. def : NTStore64Pat<v4i16>;
  7209. def : NTStore64Pat<v8i8>;
  7210. def : Pat<(nontemporalstore GPR64:$Rt,
  7211. (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
  7212. (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
  7213. (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
  7214. GPR64sp:$Rn, simm7s4:$offset)>;
  7215. } // AddedComplexity=10
  7216. } // Predicates = [IsLE]
  7217. // Tail call return handling. These are all compiler pseudo-instructions,
  7218. // so no encoding information or anything like that.
  7219. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
  7220. def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
  7221. Sched<[WriteBrReg]>;
  7222. def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
  7223. Sched<[WriteBrReg]>;
  7224. // Indirect tail-call with any register allowed, used by MachineOutliner when
  7225. // this is proven safe.
  7226. // FIXME: If we have to add any more hacks like this, we should instead relax
  7227. // some verifier checks for outlined functions.
  7228. def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
  7229. Sched<[WriteBrReg]>;
  7230. // Indirect tail-call limited to only use registers (x16 and x17) which are
  7231. // allowed to tail-call a "BTI c" instruction.
  7232. def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
  7233. Sched<[WriteBrReg]>;
  7234. }
  7235. def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
  7236. (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
  7237. Requires<[NotUseBTI]>;
  7238. def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
  7239. (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
  7240. Requires<[UseBTI]>;
  7241. def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
  7242. (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
  7243. def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
  7244. (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
  7245. def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
  7246. def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
  7247. // Extracting lane zero is a special case where we can just use a plain
  7248. // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
  7249. // rest of the compiler, especially the register allocator and copy propagation,
  7250. // to reason about, so is preferred when it's possible to use it.
  7251. let AddedComplexity = 10 in {
  7252. def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
  7253. def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
  7254. def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
  7255. }
  7256. // dot_v4i8
  7257. class mul_v4i8<SDPatternOperator ldop> :
  7258. PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
  7259. (mul (ldop (add node:$Rn, node:$offset)),
  7260. (ldop (add node:$Rm, node:$offset)))>;
  7261. class mulz_v4i8<SDPatternOperator ldop> :
  7262. PatFrag<(ops node:$Rn, node:$Rm),
  7263. (mul (ldop node:$Rn), (ldop node:$Rm))>;
  7264. def load_v4i8 :
  7265. OutPatFrag<(ops node:$R),
  7266. (INSERT_SUBREG
  7267. (v2i32 (IMPLICIT_DEF)),
  7268. (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
  7269. ssub)>;
  7270. class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
  7271. Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
  7272. (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
  7273. (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
  7274. (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
  7275. (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
  7276. (load_v4i8 GPR64sp:$Rn),
  7277. (load_v4i8 GPR64sp:$Rm))),
  7278. sub_32)>, Requires<[HasDotProd]>;
  7279. // dot_v8i8
  7280. class ee_v8i8<SDPatternOperator extend> :
  7281. PatFrag<(ops node:$V, node:$K),
  7282. (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
  7283. class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
  7284. PatFrag<(ops node:$M, node:$N, node:$K),
  7285. (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
  7286. (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
  7287. class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
  7288. PatFrag<(ops node:$M, node:$N),
  7289. (i32 (extractelt
  7290. (v4i32 (AArch64uaddv
  7291. (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
  7292. (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
  7293. (i64 0)))>;
  7294. // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
  7295. def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
  7296. class odot_v8i8<Instruction DOT> :
  7297. OutPatFrag<(ops node:$Vm, node:$Vn),
  7298. (EXTRACT_SUBREG
  7299. (VADDV_32
  7300. (i64 (DOT (DUPv2i32gpr WZR),
  7301. (v8i8 node:$Vm),
  7302. (v8i8 node:$Vn)))),
  7303. sub_32)>;
  7304. class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
  7305. SDPatternOperator extend> :
  7306. Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
  7307. (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
  7308. Requires<[HasDotProd]>;
  7309. // dot_v16i8
  7310. class ee_v16i8<SDPatternOperator extend> :
  7311. PatFrag<(ops node:$V, node:$K1, node:$K2),
  7312. (v4i16 (extract_subvector
  7313. (v8i16 (extend
  7314. (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
  7315. class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
  7316. PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
  7317. (v4i32
  7318. (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
  7319. (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
  7320. class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
  7321. PatFrag<(ops node:$M, node:$N),
  7322. (i32 (extractelt
  7323. (v4i32 (AArch64uaddv
  7324. (add
  7325. (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
  7326. (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
  7327. (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
  7328. (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
  7329. (i64 0)))>;
  7330. class odot_v16i8<Instruction DOT> :
  7331. OutPatFrag<(ops node:$Vm, node:$Vn),
  7332. (i32 (ADDVv4i32v
  7333. (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
  7334. class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
  7335. SDPatternOperator extend> :
  7336. Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
  7337. (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
  7338. Requires<[HasDotProd]>;
  7339. let AddedComplexity = 10 in {
  7340. def : dot_v4i8<SDOTv8i8, sextloadi8>;
  7341. def : dot_v4i8<UDOTv8i8, zextloadi8>;
  7342. def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
  7343. def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
  7344. def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
  7345. def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
  7346. // FIXME: add patterns to generate vector by element dot product.
  7347. // FIXME: add SVE dot-product patterns.
  7348. }
  7349. // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
  7350. // so that it can be used as input to inline asm, and vice versa.
  7351. def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
  7352. def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
  7353. def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
  7354. GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
  7355. (REG_SEQUENCE GPR64x8Class,
  7356. $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
  7357. $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
  7358. foreach i = 0-7 in {
  7359. def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
  7360. (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
  7361. }
  7362. let Predicates = [HasLS64] in {
  7363. def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
  7364. (outs GPR64x8:$Rt)>;
  7365. def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
  7366. (outs)>;
  7367. def ST64BV: Store64BV<0b011, "st64bv">;
  7368. def ST64BV0: Store64BV<0b010, "st64bv0">;
  7369. class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
  7370. : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
  7371. (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
  7372. def : ST64BPattern<int_aarch64_st64b, ST64B>;
  7373. def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
  7374. def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
  7375. }
  7376. let Predicates = [HasMOPS] in {
  7377. let Defs = [NZCV] in {
  7378. defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
  7379. defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
  7380. defm SETP : MOPSMemorySetInsns<0b00, "setp">;
  7381. }
  7382. let Uses = [NZCV] in {
  7383. defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
  7384. defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
  7385. defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
  7386. defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
  7387. defm SETM : MOPSMemorySetInsns<0b01, "setm">;
  7388. defm SETE : MOPSMemorySetInsns<0b10, "sete">;
  7389. }
  7390. }
  7391. let Predicates = [HasMOPS, HasMTE] in {
  7392. let Defs = [NZCV] in {
  7393. defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
  7394. }
  7395. let Uses = [NZCV] in {
  7396. defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
  7397. // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
  7398. defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
  7399. }
  7400. }
  7401. // MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
  7402. // MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
  7403. def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
  7404. def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
  7405. def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
  7406. def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
  7407. def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
  7408. // MOPS operations always contain three 4-byte instructions
  7409. let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
  7410. let mayLoad = 1 in {
  7411. def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
  7412. (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
  7413. [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
  7414. def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
  7415. (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
  7416. [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
  7417. }
  7418. let mayLoad = 0 in {
  7419. def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
  7420. (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
  7421. [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
  7422. }
  7423. }
  7424. let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
  7425. def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
  7426. (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
  7427. [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
  7428. }
  7429. // This gets lowered into an instruction sequence of 20 bytes
  7430. let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
  7431. def StoreSwiftAsyncContext
  7432. : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
  7433. []>, Sched<[]>;
  7434. def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
  7435. def : Pat<(AArch64AssertZExtBool GPR32:$op),
  7436. (i32 GPR32:$op)>;
  7437. include "AArch64InstrAtomics.td"
  7438. include "AArch64SVEInstrInfo.td"
  7439. include "AArch64SMEInstrInfo.td"
  7440. include "AArch64InstrGISel.td"