ARMInstrNEON.td 436 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148
  1. //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file describes the ARM NEON instruction set.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // NEON-specific Operands.
  14. //===----------------------------------------------------------------------===//
  15. def nModImm : Operand<i32> {
  16. let PrintMethod = "printVMOVModImmOperand";
  17. }
  18. def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
  19. def nImmSplatI8 : Operand<i32> {
  20. let PrintMethod = "printVMOVModImmOperand";
  21. let ParserMatchClass = nImmSplatI8AsmOperand;
  22. }
  23. def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
  24. def nImmSplatI16 : Operand<i32> {
  25. let PrintMethod = "printVMOVModImmOperand";
  26. let ParserMatchClass = nImmSplatI16AsmOperand;
  27. }
  28. def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
  29. def nImmSplatI32 : Operand<i32> {
  30. let PrintMethod = "printVMOVModImmOperand";
  31. let ParserMatchClass = nImmSplatI32AsmOperand;
  32. }
  33. def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
  34. def nImmSplatNotI16 : Operand<i32> {
  35. let ParserMatchClass = nImmSplatNotI16AsmOperand;
  36. }
  37. def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
  38. def nImmSplatNotI32 : Operand<i32> {
  39. let ParserMatchClass = nImmSplatNotI32AsmOperand;
  40. }
  41. def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
  42. def nImmVMOVI32 : Operand<i32> {
  43. let PrintMethod = "printVMOVModImmOperand";
  44. let ParserMatchClass = nImmVMOVI32AsmOperand;
  45. }
  46. class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
  47. : AsmOperandClass {
  48. let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
  49. let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
  50. let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
  51. }
  52. class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
  53. : AsmOperandClass {
  54. let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
  55. let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
  56. let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
  57. }
  58. class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
  59. let PrintMethod = "printVMOVModImmOperand";
  60. let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
  61. }
  62. class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
  63. let PrintMethod = "printVMOVModImmOperand";
  64. let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
  65. }
  66. def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
  67. def nImmVMOVI32Neg : Operand<i32> {
  68. let PrintMethod = "printVMOVModImmOperand";
  69. let ParserMatchClass = nImmVMOVI32NegAsmOperand;
  70. }
  71. def nImmVMOVF32 : Operand<i32> {
  72. let PrintMethod = "printFPImmOperand";
  73. let ParserMatchClass = FPImmOperand;
  74. }
  75. def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
  76. def nImmSplatI64 : Operand<i32> {
  77. let PrintMethod = "printVMOVModImmOperand";
  78. let ParserMatchClass = nImmSplatI64AsmOperand;
  79. }
  80. def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
  81. def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
  82. def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
  83. def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
  84. def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
  85. return ((uint64_t)Imm) < 8;
  86. }]> {
  87. let ParserMatchClass = VectorIndex8Operand;
  88. let PrintMethod = "printVectorIndex";
  89. let MIOperandInfo = (ops i32imm);
  90. }
  91. def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
  92. return ((uint64_t)Imm) < 4;
  93. }]> {
  94. let ParserMatchClass = VectorIndex16Operand;
  95. let PrintMethod = "printVectorIndex";
  96. let MIOperandInfo = (ops i32imm);
  97. }
  98. def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
  99. return ((uint64_t)Imm) < 2;
  100. }]> {
  101. let ParserMatchClass = VectorIndex32Operand;
  102. let PrintMethod = "printVectorIndex";
  103. let MIOperandInfo = (ops i32imm);
  104. }
  105. def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
  106. return ((uint64_t)Imm) < 1;
  107. }]> {
  108. let ParserMatchClass = VectorIndex64Operand;
  109. let PrintMethod = "printVectorIndex";
  110. let MIOperandInfo = (ops i32imm);
  111. }
  112. // Register list of one D register.
  113. def VecListOneDAsmOperand : AsmOperandClass {
  114. let Name = "VecListOneD";
  115. let ParserMethod = "parseVectorList";
  116. let RenderMethod = "addVecListOperands";
  117. }
  118. def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
  119. let ParserMatchClass = VecListOneDAsmOperand;
  120. }
  121. // Register list of two sequential D registers.
  122. def VecListDPairAsmOperand : AsmOperandClass {
  123. let Name = "VecListDPair";
  124. let ParserMethod = "parseVectorList";
  125. let RenderMethod = "addVecListOperands";
  126. }
  127. def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
  128. let ParserMatchClass = VecListDPairAsmOperand;
  129. }
  130. // Register list of three sequential D registers.
  131. def VecListThreeDAsmOperand : AsmOperandClass {
  132. let Name = "VecListThreeD";
  133. let ParserMethod = "parseVectorList";
  134. let RenderMethod = "addVecListOperands";
  135. }
  136. def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
  137. let ParserMatchClass = VecListThreeDAsmOperand;
  138. }
  139. // Register list of four sequential D registers.
  140. def VecListFourDAsmOperand : AsmOperandClass {
  141. let Name = "VecListFourD";
  142. let ParserMethod = "parseVectorList";
  143. let RenderMethod = "addVecListOperands";
  144. }
  145. def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
  146. let ParserMatchClass = VecListFourDAsmOperand;
  147. }
  148. // Register list of two D registers spaced by 2 (two sequential Q registers).
  149. def VecListDPairSpacedAsmOperand : AsmOperandClass {
  150. let Name = "VecListDPairSpaced";
  151. let ParserMethod = "parseVectorList";
  152. let RenderMethod = "addVecListOperands";
  153. }
  154. def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
  155. let ParserMatchClass = VecListDPairSpacedAsmOperand;
  156. }
  157. // Register list of three D registers spaced by 2 (three Q registers).
  158. def VecListThreeQAsmOperand : AsmOperandClass {
  159. let Name = "VecListThreeQ";
  160. let ParserMethod = "parseVectorList";
  161. let RenderMethod = "addVecListOperands";
  162. }
  163. def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
  164. let ParserMatchClass = VecListThreeQAsmOperand;
  165. }
  166. // Register list of three D registers spaced by 2 (three Q registers).
  167. def VecListFourQAsmOperand : AsmOperandClass {
  168. let Name = "VecListFourQ";
  169. let ParserMethod = "parseVectorList";
  170. let RenderMethod = "addVecListOperands";
  171. }
  172. def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
  173. let ParserMatchClass = VecListFourQAsmOperand;
  174. }
  175. // Register list of one D register, with "all lanes" subscripting.
  176. def VecListOneDAllLanesAsmOperand : AsmOperandClass {
  177. let Name = "VecListOneDAllLanes";
  178. let ParserMethod = "parseVectorList";
  179. let RenderMethod = "addVecListOperands";
  180. }
  181. def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
  182. let ParserMatchClass = VecListOneDAllLanesAsmOperand;
  183. }
  184. // Register list of two D registers, with "all lanes" subscripting.
  185. def VecListDPairAllLanesAsmOperand : AsmOperandClass {
  186. let Name = "VecListDPairAllLanes";
  187. let ParserMethod = "parseVectorList";
  188. let RenderMethod = "addVecListOperands";
  189. }
  190. def VecListDPairAllLanes : RegisterOperand<DPair,
  191. "printVectorListTwoAllLanes"> {
  192. let ParserMatchClass = VecListDPairAllLanesAsmOperand;
  193. }
  194. // Register list of two D registers spaced by 2 (two sequential Q registers).
  195. def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
  196. let Name = "VecListDPairSpacedAllLanes";
  197. let ParserMethod = "parseVectorList";
  198. let RenderMethod = "addVecListOperands";
  199. }
  200. def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
  201. "printVectorListTwoSpacedAllLanes"> {
  202. let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
  203. }
  204. // Register list of three D registers, with "all lanes" subscripting.
  205. def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
  206. let Name = "VecListThreeDAllLanes";
  207. let ParserMethod = "parseVectorList";
  208. let RenderMethod = "addVecListOperands";
  209. }
  210. def VecListThreeDAllLanes : RegisterOperand<DPR,
  211. "printVectorListThreeAllLanes"> {
  212. let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
  213. }
  214. // Register list of three D registers spaced by 2 (three sequential Q regs).
  215. def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
  216. let Name = "VecListThreeQAllLanes";
  217. let ParserMethod = "parseVectorList";
  218. let RenderMethod = "addVecListOperands";
  219. }
  220. def VecListThreeQAllLanes : RegisterOperand<DPR,
  221. "printVectorListThreeSpacedAllLanes"> {
  222. let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
  223. }
  224. // Register list of four D registers, with "all lanes" subscripting.
  225. def VecListFourDAllLanesAsmOperand : AsmOperandClass {
  226. let Name = "VecListFourDAllLanes";
  227. let ParserMethod = "parseVectorList";
  228. let RenderMethod = "addVecListOperands";
  229. }
  230. def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
  231. let ParserMatchClass = VecListFourDAllLanesAsmOperand;
  232. }
  233. // Register list of four D registers spaced by 2 (four sequential Q regs).
  234. def VecListFourQAllLanesAsmOperand : AsmOperandClass {
  235. let Name = "VecListFourQAllLanes";
  236. let ParserMethod = "parseVectorList";
  237. let RenderMethod = "addVecListOperands";
  238. }
  239. def VecListFourQAllLanes : RegisterOperand<DPR,
  240. "printVectorListFourSpacedAllLanes"> {
  241. let ParserMatchClass = VecListFourQAllLanesAsmOperand;
  242. }
  243. // Register list of one D register, with byte lane subscripting.
  244. def VecListOneDByteIndexAsmOperand : AsmOperandClass {
  245. let Name = "VecListOneDByteIndexed";
  246. let ParserMethod = "parseVectorList";
  247. let RenderMethod = "addVecListIndexedOperands";
  248. }
  249. def VecListOneDByteIndexed : Operand<i32> {
  250. let ParserMatchClass = VecListOneDByteIndexAsmOperand;
  251. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  252. }
  253. // ...with half-word lane subscripting.
  254. def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
  255. let Name = "VecListOneDHWordIndexed";
  256. let ParserMethod = "parseVectorList";
  257. let RenderMethod = "addVecListIndexedOperands";
  258. }
  259. def VecListOneDHWordIndexed : Operand<i32> {
  260. let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
  261. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  262. }
  263. // ...with word lane subscripting.
  264. def VecListOneDWordIndexAsmOperand : AsmOperandClass {
  265. let Name = "VecListOneDWordIndexed";
  266. let ParserMethod = "parseVectorList";
  267. let RenderMethod = "addVecListIndexedOperands";
  268. }
  269. def VecListOneDWordIndexed : Operand<i32> {
  270. let ParserMatchClass = VecListOneDWordIndexAsmOperand;
  271. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  272. }
  273. // Register list of two D registers with byte lane subscripting.
  274. def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
  275. let Name = "VecListTwoDByteIndexed";
  276. let ParserMethod = "parseVectorList";
  277. let RenderMethod = "addVecListIndexedOperands";
  278. }
  279. def VecListTwoDByteIndexed : Operand<i32> {
  280. let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
  281. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  282. }
  283. // ...with half-word lane subscripting.
  284. def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
  285. let Name = "VecListTwoDHWordIndexed";
  286. let ParserMethod = "parseVectorList";
  287. let RenderMethod = "addVecListIndexedOperands";
  288. }
  289. def VecListTwoDHWordIndexed : Operand<i32> {
  290. let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
  291. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  292. }
  293. // ...with word lane subscripting.
  294. def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
  295. let Name = "VecListTwoDWordIndexed";
  296. let ParserMethod = "parseVectorList";
  297. let RenderMethod = "addVecListIndexedOperands";
  298. }
  299. def VecListTwoDWordIndexed : Operand<i32> {
  300. let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
  301. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  302. }
  303. // Register list of two Q registers with half-word lane subscripting.
  304. def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
  305. let Name = "VecListTwoQHWordIndexed";
  306. let ParserMethod = "parseVectorList";
  307. let RenderMethod = "addVecListIndexedOperands";
  308. }
  309. def VecListTwoQHWordIndexed : Operand<i32> {
  310. let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
  311. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  312. }
  313. // ...with word lane subscripting.
  314. def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
  315. let Name = "VecListTwoQWordIndexed";
  316. let ParserMethod = "parseVectorList";
  317. let RenderMethod = "addVecListIndexedOperands";
  318. }
  319. def VecListTwoQWordIndexed : Operand<i32> {
  320. let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
  321. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  322. }
  323. // Register list of three D registers with byte lane subscripting.
  324. def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
  325. let Name = "VecListThreeDByteIndexed";
  326. let ParserMethod = "parseVectorList";
  327. let RenderMethod = "addVecListIndexedOperands";
  328. }
  329. def VecListThreeDByteIndexed : Operand<i32> {
  330. let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
  331. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  332. }
  333. // ...with half-word lane subscripting.
  334. def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
  335. let Name = "VecListThreeDHWordIndexed";
  336. let ParserMethod = "parseVectorList";
  337. let RenderMethod = "addVecListIndexedOperands";
  338. }
  339. def VecListThreeDHWordIndexed : Operand<i32> {
  340. let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
  341. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  342. }
  343. // ...with word lane subscripting.
  344. def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
  345. let Name = "VecListThreeDWordIndexed";
  346. let ParserMethod = "parseVectorList";
  347. let RenderMethod = "addVecListIndexedOperands";
  348. }
  349. def VecListThreeDWordIndexed : Operand<i32> {
  350. let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
  351. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  352. }
  353. // Register list of three Q registers with half-word lane subscripting.
  354. def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
  355. let Name = "VecListThreeQHWordIndexed";
  356. let ParserMethod = "parseVectorList";
  357. let RenderMethod = "addVecListIndexedOperands";
  358. }
  359. def VecListThreeQHWordIndexed : Operand<i32> {
  360. let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
  361. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  362. }
  363. // ...with word lane subscripting.
  364. def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
  365. let Name = "VecListThreeQWordIndexed";
  366. let ParserMethod = "parseVectorList";
  367. let RenderMethod = "addVecListIndexedOperands";
  368. }
  369. def VecListThreeQWordIndexed : Operand<i32> {
  370. let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
  371. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  372. }
  373. // Register list of four D registers with byte lane subscripting.
  374. def VecListFourDByteIndexAsmOperand : AsmOperandClass {
  375. let Name = "VecListFourDByteIndexed";
  376. let ParserMethod = "parseVectorList";
  377. let RenderMethod = "addVecListIndexedOperands";
  378. }
  379. def VecListFourDByteIndexed : Operand<i32> {
  380. let ParserMatchClass = VecListFourDByteIndexAsmOperand;
  381. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  382. }
  383. // ...with half-word lane subscripting.
  384. def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
  385. let Name = "VecListFourDHWordIndexed";
  386. let ParserMethod = "parseVectorList";
  387. let RenderMethod = "addVecListIndexedOperands";
  388. }
  389. def VecListFourDHWordIndexed : Operand<i32> {
  390. let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
  391. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  392. }
  393. // ...with word lane subscripting.
  394. def VecListFourDWordIndexAsmOperand : AsmOperandClass {
  395. let Name = "VecListFourDWordIndexed";
  396. let ParserMethod = "parseVectorList";
  397. let RenderMethod = "addVecListIndexedOperands";
  398. }
  399. def VecListFourDWordIndexed : Operand<i32> {
  400. let ParserMatchClass = VecListFourDWordIndexAsmOperand;
  401. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  402. }
  403. // Register list of four Q registers with half-word lane subscripting.
  404. def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
  405. let Name = "VecListFourQHWordIndexed";
  406. let ParserMethod = "parseVectorList";
  407. let RenderMethod = "addVecListIndexedOperands";
  408. }
  409. def VecListFourQHWordIndexed : Operand<i32> {
  410. let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
  411. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  412. }
  413. // ...with word lane subscripting.
  414. def VecListFourQWordIndexAsmOperand : AsmOperandClass {
  415. let Name = "VecListFourQWordIndexed";
  416. let ParserMethod = "parseVectorList";
  417. let RenderMethod = "addVecListIndexedOperands";
  418. }
  419. def VecListFourQWordIndexed : Operand<i32> {
  420. let ParserMatchClass = VecListFourQWordIndexAsmOperand;
  421. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  422. }
  423. def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  424. return cast<LoadSDNode>(N)->getAlignment() >= 8;
  425. }]>;
  426. def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  427. (store node:$val, node:$ptr), [{
  428. return cast<StoreSDNode>(N)->getAlignment() >= 8;
  429. }]>;
  430. def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  431. return cast<LoadSDNode>(N)->getAlignment() == 4;
  432. }]>;
  433. def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  434. (store node:$val, node:$ptr), [{
  435. return cast<StoreSDNode>(N)->getAlignment() == 4;
  436. }]>;
  437. def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  438. return cast<LoadSDNode>(N)->getAlignment() == 2;
  439. }]>;
  440. def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  441. (store node:$val, node:$ptr), [{
  442. return cast<StoreSDNode>(N)->getAlignment() == 2;
  443. }]>;
  444. def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  445. return cast<LoadSDNode>(N)->getAlignment() == 1;
  446. }]>;
  447. def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  448. (store node:$val, node:$ptr), [{
  449. return cast<StoreSDNode>(N)->getAlignment() == 1;
  450. }]>;
  451. def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  452. return cast<LoadSDNode>(N)->getAlignment() < 4;
  453. }]>;
  454. def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  455. (store node:$val, node:$ptr), [{
  456. return cast<StoreSDNode>(N)->getAlignment() < 4;
  457. }]>;
  458. //===----------------------------------------------------------------------===//
  459. // NEON-specific DAG Nodes.
  460. //===----------------------------------------------------------------------===//
  461. def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
  462. def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>;
  463. // Types for vector shift by immediates. The "SHX" version is for long and
  464. // narrow operations where the source and destination vectors have different
  465. // types. The "SHINS" version is for shift and insert operations.
  466. def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
  467. SDTCisVT<2, i32>]>;
  468. def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
  469. SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
  470. def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
  471. def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
  472. def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
  473. def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
  474. def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
  475. def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
  476. def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
  477. def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
  478. def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
  479. def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
  480. def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
  481. def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
  482. def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
  483. def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
  484. def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
  485. def NEONvbsp : SDNode<"ARMISD::VBSP",
  486. SDTypeProfile<1, 3, [SDTCisVec<0>,
  487. SDTCisSameAs<0, 1>,
  488. SDTCisSameAs<0, 2>,
  489. SDTCisSameAs<0, 3>]>>;
  490. def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
  491. SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
  492. def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
  493. def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
  494. SDTCisSameAs<0, 2>,
  495. SDTCisSameAs<0, 3>]>;
  496. def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
  497. def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
  498. def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
  499. def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
  500. SDTCisVT<2, v8i8>]>;
  501. def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
  502. SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
  503. def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
  504. def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
  505. //===----------------------------------------------------------------------===//
  506. // NEON load / store instructions
  507. //===----------------------------------------------------------------------===//
  508. // Use VLDM to load a Q register as a D register pair.
  509. // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
  510. def VLDMQIA
  511. : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
  512. IIC_fpLoad_m, "",
  513. [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
  514. // Use VSTM to store a Q register as a D register pair.
  515. // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
  516. def VSTMQIA
  517. : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
  518. IIC_fpStore_m, "",
  519. [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
  520. // Classes for VLD* pseudo-instructions with multi-register operands.
  521. // These are expanded to real instructions after register allocation.
  522. class VLDQPseudo<InstrItinClass itin>
  523. : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
  524. class VLDQWBPseudo<InstrItinClass itin>
  525. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  526. (ins addrmode6:$addr, am6offset:$offset), itin,
  527. "$addr.addr = $wb">;
  528. class VLDQWBfixedPseudo<InstrItinClass itin>
  529. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  530. (ins addrmode6:$addr), itin,
  531. "$addr.addr = $wb">;
  532. class VLDQWBregisterPseudo<InstrItinClass itin>
  533. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  534. (ins addrmode6:$addr, rGPR:$offset), itin,
  535. "$addr.addr = $wb">;
  536. class VLDQQPseudo<InstrItinClass itin>
  537. : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
  538. class VLDQQWBPseudo<InstrItinClass itin>
  539. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  540. (ins addrmode6:$addr, am6offset:$offset), itin,
  541. "$addr.addr = $wb">;
  542. class VLDQQWBfixedPseudo<InstrItinClass itin>
  543. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  544. (ins addrmode6:$addr), itin,
  545. "$addr.addr = $wb">;
  546. class VLDQQWBregisterPseudo<InstrItinClass itin>
  547. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  548. (ins addrmode6:$addr, rGPR:$offset), itin,
  549. "$addr.addr = $wb">;
  550. class VLDQQQQPseudo<InstrItinClass itin>
  551. : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
  552. "$src = $dst">;
  553. class VLDQQQQWBPseudo<InstrItinClass itin>
  554. : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
  555. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
  556. "$addr.addr = $wb, $src = $dst">;
  557. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  558. // VLD1 : Vector Load (multiple single elements)
  559. class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
  560. : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
  561. (ins AddrMode:$Rn), IIC_VLD1,
  562. "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
  563. let Rm = 0b1111;
  564. let Inst{4} = Rn{4};
  565. let DecoderMethod = "DecodeVLDST1Instruction";
  566. }
  567. class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
  568. : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
  569. (ins AddrMode:$Rn), IIC_VLD1x2,
  570. "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
  571. let Rm = 0b1111;
  572. let Inst{5-4} = Rn{5-4};
  573. let DecoderMethod = "DecodeVLDST1Instruction";
  574. }
  575. def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
  576. def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
  577. def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
  578. def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
  579. def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
  580. def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
  581. def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
  582. def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
  583. // ...with address register writeback:
  584. multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  585. def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
  586. (ins AddrMode:$Rn), IIC_VLD1u,
  587. "vld1", Dt, "$Vd, $Rn!",
  588. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  589. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  590. let Inst{4} = Rn{4};
  591. let DecoderMethod = "DecodeVLDST1Instruction";
  592. }
  593. def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
  594. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
  595. "vld1", Dt, "$Vd, $Rn, $Rm",
  596. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  597. let Inst{4} = Rn{4};
  598. let DecoderMethod = "DecodeVLDST1Instruction";
  599. }
  600. }
  601. multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  602. def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
  603. (ins AddrMode:$Rn), IIC_VLD1x2u,
  604. "vld1", Dt, "$Vd, $Rn!",
  605. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  606. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  607. let Inst{5-4} = Rn{5-4};
  608. let DecoderMethod = "DecodeVLDST1Instruction";
  609. }
  610. def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
  611. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  612. "vld1", Dt, "$Vd, $Rn, $Rm",
  613. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  614. let Inst{5-4} = Rn{5-4};
  615. let DecoderMethod = "DecodeVLDST1Instruction";
  616. }
  617. }
  618. defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
  619. defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
  620. defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
  621. defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
  622. defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
  623. defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
  624. defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
  625. defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
  626. // ...with 3 registers
  627. class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
  628. : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
  629. (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
  630. "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
  631. let Rm = 0b1111;
  632. let Inst{4} = Rn{4};
  633. let DecoderMethod = "DecodeVLDST1Instruction";
  634. }
  635. multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  636. def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
  637. (ins AddrMode:$Rn), IIC_VLD1x2u,
  638. "vld1", Dt, "$Vd, $Rn!",
  639. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  640. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  641. let Inst{4} = Rn{4};
  642. let DecoderMethod = "DecodeVLDST1Instruction";
  643. }
  644. def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
  645. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  646. "vld1", Dt, "$Vd, $Rn, $Rm",
  647. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  648. let Inst{4} = Rn{4};
  649. let DecoderMethod = "DecodeVLDST1Instruction";
  650. }
  651. }
  652. def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
  653. def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
  654. def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
  655. def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
  656. defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
  657. defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
  658. defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
  659. defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
  660. def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  661. def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  662. def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  663. def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  664. def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  665. def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  666. def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  667. def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  668. def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  669. def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  670. def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  671. def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  672. def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  673. def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  674. // ...with 4 registers
  675. class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
  676. : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
  677. (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
  678. "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
  679. let Rm = 0b1111;
  680. let Inst{5-4} = Rn{5-4};
  681. let DecoderMethod = "DecodeVLDST1Instruction";
  682. }
  683. multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  684. def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
  685. (ins AddrMode:$Rn), IIC_VLD1x2u,
  686. "vld1", Dt, "$Vd, $Rn!",
  687. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  688. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  689. let Inst{5-4} = Rn{5-4};
  690. let DecoderMethod = "DecodeVLDST1Instruction";
  691. }
  692. def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
  693. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  694. "vld1", Dt, "$Vd, $Rn, $Rm",
  695. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  696. let Inst{5-4} = Rn{5-4};
  697. let DecoderMethod = "DecodeVLDST1Instruction";
  698. }
  699. }
  700. def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
  701. def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
  702. def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
  703. def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
  704. defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  705. defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  706. defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  707. defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
  708. def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  709. def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  710. def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  711. def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  712. def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  713. def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  714. def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  715. def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  716. def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  717. def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  718. def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  719. def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  720. def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  721. def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  722. // VLD2 : Vector Load (multiple 2-element structures)
  723. class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
  724. InstrItinClass itin, Operand AddrMode>
  725. : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
  726. (ins AddrMode:$Rn), itin,
  727. "vld2", Dt, "$Vd, $Rn", "", []> {
  728. let Rm = 0b1111;
  729. let Inst{5-4} = Rn{5-4};
  730. let DecoderMethod = "DecodeVLDST2Instruction";
  731. }
  732. def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
  733. addrmode6align64or128>, Sched<[WriteVLD2]>;
  734. def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
  735. addrmode6align64or128>, Sched<[WriteVLD2]>;
  736. def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
  737. addrmode6align64or128>, Sched<[WriteVLD2]>;
  738. def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
  739. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  740. def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
  741. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  742. def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
  743. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  744. def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  745. def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  746. def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  747. // ...with address register writeback:
  748. multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
  749. RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
  750. def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
  751. (ins AddrMode:$Rn), itin,
  752. "vld2", Dt, "$Vd, $Rn!",
  753. "$Rn.addr = $wb", []> {
  754. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  755. let Inst{5-4} = Rn{5-4};
  756. let DecoderMethod = "DecodeVLDST2Instruction";
  757. }
  758. def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
  759. (ins AddrMode:$Rn, rGPR:$Rm), itin,
  760. "vld2", Dt, "$Vd, $Rn, $Rm",
  761. "$Rn.addr = $wb", []> {
  762. let Inst{5-4} = Rn{5-4};
  763. let DecoderMethod = "DecodeVLDST2Instruction";
  764. }
  765. }
  766. defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
  767. addrmode6align64or128>, Sched<[WriteVLD2]>;
  768. defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
  769. addrmode6align64or128>, Sched<[WriteVLD2]>;
  770. defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
  771. addrmode6align64or128>, Sched<[WriteVLD2]>;
  772. defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
  773. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  774. defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
  775. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  776. defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
  777. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  778. def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  779. def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  780. def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  781. def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  782. def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  783. def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  784. // ...with double-spaced registers
  785. def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
  786. addrmode6align64or128>, Sched<[WriteVLD2]>;
  787. def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
  788. addrmode6align64or128>, Sched<[WriteVLD2]>;
  789. def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
  790. addrmode6align64or128>, Sched<[WriteVLD2]>;
  791. defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
  792. addrmode6align64or128>, Sched<[WriteVLD2]>;
  793. defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
  794. addrmode6align64or128>, Sched<[WriteVLD2]>;
  795. defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
  796. addrmode6align64or128>, Sched<[WriteVLD2]>;
  797. // VLD3 : Vector Load (multiple 3-element structures)
  798. class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
  799. : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  800. (ins addrmode6:$Rn), IIC_VLD3,
  801. "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
  802. let Rm = 0b1111;
  803. let Inst{4} = Rn{4};
  804. let DecoderMethod = "DecodeVLDST3Instruction";
  805. }
  806. def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
  807. def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
  808. def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
  809. def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  810. def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  811. def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  812. // ...with address register writeback:
  813. class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  814. : NLdSt<0, 0b10, op11_8, op7_4,
  815. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  816. (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
  817. "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
  818. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  819. let Inst{4} = Rn{4};
  820. let DecoderMethod = "DecodeVLDST3Instruction";
  821. }
  822. def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
  823. def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
  824. def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
  825. def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  826. def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  827. def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  828. // ...with double-spaced registers:
  829. def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
  830. def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
  831. def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
  832. def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
  833. def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
  834. def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
  835. def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  836. def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  837. def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  838. // ...alternate versions to be allocated odd register numbers:
  839. def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  840. def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  841. def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  842. def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  843. def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  844. def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  845. // VLD4 : Vector Load (multiple 4-element structures)
  846. class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
  847. : NLdSt<0, 0b10, op11_8, op7_4,
  848. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  849. (ins addrmode6:$Rn), IIC_VLD4,
  850. "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
  851. Sched<[WriteVLD4]> {
  852. let Rm = 0b1111;
  853. let Inst{5-4} = Rn{5-4};
  854. let DecoderMethod = "DecodeVLDST4Instruction";
  855. }
  856. def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
  857. def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
  858. def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
  859. def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  860. def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  861. def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  862. // ...with address register writeback:
  863. class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  864. : NLdSt<0, 0b10, op11_8, op7_4,
  865. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  866. (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
  867. "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
  868. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  869. let Inst{5-4} = Rn{5-4};
  870. let DecoderMethod = "DecodeVLDST4Instruction";
  871. }
  872. def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
  873. def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
  874. def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
  875. def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  876. def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  877. def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  878. // ...with double-spaced registers:
  879. def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
  880. def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
  881. def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
  882. def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
  883. def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
  884. def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
  885. def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  886. def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  887. def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  888. // ...alternate versions to be allocated odd register numbers:
  889. def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  890. def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  891. def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  892. def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  893. def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  894. def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  895. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  896. // Classes for VLD*LN pseudo-instructions with multi-register operands.
  897. // These are expanded to real instructions after register allocation.
  898. class VLDQLNPseudo<InstrItinClass itin>
  899. : PseudoNLdSt<(outs QPR:$dst),
  900. (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
  901. itin, "$src = $dst">;
  902. class VLDQLNWBPseudo<InstrItinClass itin>
  903. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  904. (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
  905. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  906. class VLDQQLNPseudo<InstrItinClass itin>
  907. : PseudoNLdSt<(outs QQPR:$dst),
  908. (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
  909. itin, "$src = $dst">;
  910. class VLDQQLNWBPseudo<InstrItinClass itin>
  911. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  912. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
  913. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  914. class VLDQQQQLNPseudo<InstrItinClass itin>
  915. : PseudoNLdSt<(outs QQQQPR:$dst),
  916. (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
  917. itin, "$src = $dst">;
  918. class VLDQQQQLNWBPseudo<InstrItinClass itin>
  919. : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
  920. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
  921. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  922. // VLD1LN : Vector Load (single element to one lane)
  923. class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  924. PatFrag LoadOp>
  925. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
  926. (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
  927. IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
  928. "$src = $Vd",
  929. [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
  930. (i32 (LoadOp addrmode6:$Rn)),
  931. imm:$lane))]> {
  932. let Rm = 0b1111;
  933. let DecoderMethod = "DecodeVLD1LN";
  934. }
  935. class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  936. PatFrag LoadOp>
  937. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
  938. (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
  939. IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
  940. "$src = $Vd",
  941. [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
  942. (i32 (LoadOp addrmode6oneL32:$Rn)),
  943. imm:$lane))]>, Sched<[WriteVLD1]> {
  944. let Rm = 0b1111;
  945. let DecoderMethod = "DecodeVLD1LN";
  946. }
  947. class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
  948. Sched<[WriteVLD1]> {
  949. let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
  950. (i32 (LoadOp addrmode6:$addr)),
  951. imm:$lane))];
  952. }
  953. def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
  954. let Inst{7-5} = lane{2-0};
  955. }
  956. def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
  957. let Inst{7-6} = lane{1-0};
  958. let Inst{5-4} = Rn{5-4};
  959. }
  960. def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
  961. let Inst{7} = lane{0};
  962. let Inst{5-4} = Rn{5-4};
  963. }
  964. def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
  965. def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
  966. def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
  967. let Predicates = [HasNEON] in {
  968. def : Pat<(vector_insert (v4f16 DPR:$src),
  969. (f16 (load addrmode6:$addr)), imm:$lane),
  970. (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  971. def : Pat<(vector_insert (v8f16 QPR:$src),
  972. (f16 (load addrmode6:$addr)), imm:$lane),
  973. (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  974. def : Pat<(vector_insert (v4bf16 DPR:$src),
  975. (bf16 (load addrmode6:$addr)), imm:$lane),
  976. (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  977. def : Pat<(vector_insert (v8bf16 QPR:$src),
  978. (bf16 (load addrmode6:$addr)), imm:$lane),
  979. (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  980. def : Pat<(vector_insert (v2f32 DPR:$src),
  981. (f32 (load addrmode6:$addr)), imm:$lane),
  982. (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
  983. def : Pat<(vector_insert (v4f32 QPR:$src),
  984. (f32 (load addrmode6:$addr)), imm:$lane),
  985. (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  986. // A 64-bit subvector insert to the first 128-bit vector position
  987. // is a subregister copy that needs no instruction.
  988. def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
  989. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  990. def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
  991. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  992. def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
  993. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  994. def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
  995. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  996. def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
  997. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  998. def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
  999. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1000. }
  1001. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  1002. // ...with address register writeback:
  1003. class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1004. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
  1005. (ins addrmode6:$Rn, am6offset:$Rm,
  1006. DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
  1007. "\\{$Vd[$lane]\\}, $Rn$Rm",
  1008. "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1009. let DecoderMethod = "DecodeVLD1LN";
  1010. }
  1011. def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
  1012. let Inst{7-5} = lane{2-0};
  1013. }
  1014. def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
  1015. let Inst{7-6} = lane{1-0};
  1016. let Inst{4} = Rn{4};
  1017. }
  1018. def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
  1019. let Inst{7} = lane{0};
  1020. let Inst{5} = Rn{4};
  1021. let Inst{4} = Rn{4};
  1022. }
  1023. def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1024. def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1025. def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1026. // VLD2LN : Vector Load (single 2-element structure to one lane)
  1027. class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1028. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
  1029. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
  1030. IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
  1031. "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
  1032. let Rm = 0b1111;
  1033. let Inst{4} = Rn{4};
  1034. let DecoderMethod = "DecodeVLD2LN";
  1035. }
  1036. def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
  1037. let Inst{7-5} = lane{2-0};
  1038. }
  1039. def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
  1040. let Inst{7-6} = lane{1-0};
  1041. }
  1042. def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
  1043. let Inst{7} = lane{0};
  1044. }
  1045. def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1046. def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1047. def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1048. // ...with double-spaced registers:
  1049. def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
  1050. let Inst{7-6} = lane{1-0};
  1051. }
  1052. def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
  1053. let Inst{7} = lane{0};
  1054. }
  1055. def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1056. def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1057. // ...with address register writeback:
  1058. class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1059. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
  1060. (ins addrmode6:$Rn, am6offset:$Rm,
  1061. DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
  1062. "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
  1063. "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
  1064. let Inst{4} = Rn{4};
  1065. let DecoderMethod = "DecodeVLD2LN";
  1066. }
  1067. def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
  1068. let Inst{7-5} = lane{2-0};
  1069. }
  1070. def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
  1071. let Inst{7-6} = lane{1-0};
  1072. }
  1073. def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
  1074. let Inst{7} = lane{0};
  1075. }
  1076. def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1077. def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1078. def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1079. def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
  1080. let Inst{7-6} = lane{1-0};
  1081. }
  1082. def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
  1083. let Inst{7} = lane{0};
  1084. }
  1085. def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1086. def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1087. // VLD3LN : Vector Load (single 3-element structure to one lane)
  1088. class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1089. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  1090. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
  1091. nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
  1092. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
  1093. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
  1094. let Rm = 0b1111;
  1095. let DecoderMethod = "DecodeVLD3LN";
  1096. }
  1097. def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
  1098. let Inst{7-5} = lane{2-0};
  1099. }
  1100. def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
  1101. let Inst{7-6} = lane{1-0};
  1102. }
  1103. def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
  1104. let Inst{7} = lane{0};
  1105. }
  1106. def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1107. def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1108. def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1109. // ...with double-spaced registers:
  1110. def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
  1111. let Inst{7-6} = lane{1-0};
  1112. }
  1113. def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
  1114. let Inst{7} = lane{0};
  1115. }
  1116. def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1117. def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1118. // ...with address register writeback:
  1119. class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1120. : NLdStLn<1, 0b10, op11_8, op7_4,
  1121. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  1122. (ins addrmode6:$Rn, am6offset:$Rm,
  1123. DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
  1124. IIC_VLD3lnu, "vld3", Dt,
  1125. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
  1126. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
  1127. []>, Sched<[WriteVLD2]> {
  1128. let DecoderMethod = "DecodeVLD3LN";
  1129. }
  1130. def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
  1131. let Inst{7-5} = lane{2-0};
  1132. }
  1133. def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
  1134. let Inst{7-6} = lane{1-0};
  1135. }
  1136. def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
  1137. let Inst{7} = lane{0};
  1138. }
  1139. def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1140. def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1141. def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1142. def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
  1143. let Inst{7-6} = lane{1-0};
  1144. }
  1145. def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
  1146. let Inst{7} = lane{0};
  1147. }
  1148. def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1149. def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1150. // VLD4LN : Vector Load (single 4-element structure to one lane)
  1151. class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1152. : NLdStLn<1, 0b10, op11_8, op7_4,
  1153. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  1154. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
  1155. nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
  1156. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
  1157. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
  1158. Sched<[WriteVLD2]> {
  1159. let Rm = 0b1111;
  1160. let Inst{4} = Rn{4};
  1161. let DecoderMethod = "DecodeVLD4LN";
  1162. }
  1163. def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
  1164. let Inst{7-5} = lane{2-0};
  1165. }
  1166. def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
  1167. let Inst{7-6} = lane{1-0};
  1168. }
  1169. def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
  1170. let Inst{7} = lane{0};
  1171. let Inst{5} = Rn{5};
  1172. }
  1173. def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1174. def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1175. def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1176. // ...with double-spaced registers:
  1177. def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
  1178. let Inst{7-6} = lane{1-0};
  1179. }
  1180. def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
  1181. let Inst{7} = lane{0};
  1182. let Inst{5} = Rn{5};
  1183. }
  1184. def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1185. def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1186. // ...with address register writeback:
  1187. class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1188. : NLdStLn<1, 0b10, op11_8, op7_4,
  1189. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  1190. (ins addrmode6:$Rn, am6offset:$Rm,
  1191. DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
  1192. IIC_VLD4lnu, "vld4", Dt,
  1193. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
  1194. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
  1195. []> {
  1196. let Inst{4} = Rn{4};
  1197. let DecoderMethod = "DecodeVLD4LN" ;
  1198. }
  1199. def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
  1200. let Inst{7-5} = lane{2-0};
  1201. }
  1202. def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
  1203. let Inst{7-6} = lane{1-0};
  1204. }
  1205. def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
  1206. let Inst{7} = lane{0};
  1207. let Inst{5} = Rn{5};
  1208. }
  1209. def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1210. def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1211. def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1212. def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
  1213. let Inst{7-6} = lane{1-0};
  1214. }
  1215. def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
  1216. let Inst{7} = lane{0};
  1217. let Inst{5} = Rn{5};
  1218. }
  1219. def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1220. def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1221. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  1222. // VLD1DUP : Vector Load (single element to all lanes)
  1223. class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
  1224. Operand AddrMode>
  1225. : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
  1226. (ins AddrMode:$Rn),
  1227. IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
  1228. [(set VecListOneDAllLanes:$Vd,
  1229. (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
  1230. Sched<[WriteVLD2]> {
  1231. let Rm = 0b1111;
  1232. let Inst{4} = Rn{4};
  1233. let DecoderMethod = "DecodeVLD1DupInstruction";
  1234. }
  1235. def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
  1236. addrmode6dupalignNone>;
  1237. def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
  1238. addrmode6dupalign16>;
  1239. def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
  1240. addrmode6dupalign32>;
  1241. let Predicates = [HasNEON] in {
  1242. def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
  1243. (VLD1DUPd32 addrmode6:$addr)>;
  1244. }
  1245. class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
  1246. Operand AddrMode>
  1247. : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
  1248. (ins AddrMode:$Rn), IIC_VLD1dup,
  1249. "vld1", Dt, "$Vd, $Rn", "",
  1250. [(set VecListDPairAllLanes:$Vd,
  1251. (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
  1252. let Rm = 0b1111;
  1253. let Inst{4} = Rn{4};
  1254. let DecoderMethod = "DecodeVLD1DupInstruction";
  1255. }
  1256. def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
  1257. addrmode6dupalignNone>;
  1258. def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
  1259. addrmode6dupalign16>;
  1260. def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
  1261. addrmode6dupalign32>;
  1262. let Predicates = [HasNEON] in {
  1263. def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
  1264. (VLD1DUPq32 addrmode6:$addr)>;
  1265. }
  1266. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  1267. // ...with address register writeback:
  1268. multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1269. def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
  1270. (outs VecListOneDAllLanes:$Vd, GPR:$wb),
  1271. (ins AddrMode:$Rn), IIC_VLD1dupu,
  1272. "vld1", Dt, "$Vd, $Rn!",
  1273. "$Rn.addr = $wb", []> {
  1274. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1275. let Inst{4} = Rn{4};
  1276. let DecoderMethod = "DecodeVLD1DupInstruction";
  1277. }
  1278. def _register : NLdSt<1, 0b10, 0b1100, op7_4,
  1279. (outs VecListOneDAllLanes:$Vd, GPR:$wb),
  1280. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
  1281. "vld1", Dt, "$Vd, $Rn, $Rm",
  1282. "$Rn.addr = $wb", []> {
  1283. let Inst{4} = Rn{4};
  1284. let DecoderMethod = "DecodeVLD1DupInstruction";
  1285. }
  1286. }
  1287. multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1288. def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
  1289. (outs VecListDPairAllLanes:$Vd, GPR:$wb),
  1290. (ins AddrMode:$Rn), IIC_VLD1dupu,
  1291. "vld1", Dt, "$Vd, $Rn!",
  1292. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1293. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1294. let Inst{4} = Rn{4};
  1295. let DecoderMethod = "DecodeVLD1DupInstruction";
  1296. }
  1297. def _register : NLdSt<1, 0b10, 0b1100, op7_4,
  1298. (outs VecListDPairAllLanes:$Vd, GPR:$wb),
  1299. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
  1300. "vld1", Dt, "$Vd, $Rn, $Rm",
  1301. "$Rn.addr = $wb", []> {
  1302. let Inst{4} = Rn{4};
  1303. let DecoderMethod = "DecodeVLD1DupInstruction";
  1304. }
  1305. }
  1306. defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
  1307. defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
  1308. defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
  1309. defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
  1310. defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
  1311. defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
  1312. // VLD2DUP : Vector Load (single 2-element structure to all lanes)
  1313. class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
  1314. : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
  1315. (ins AddrMode:$Rn), IIC_VLD2dup,
  1316. "vld2", Dt, "$Vd, $Rn", "", []> {
  1317. let Rm = 0b1111;
  1318. let Inst{4} = Rn{4};
  1319. let DecoderMethod = "DecodeVLD2DupInstruction";
  1320. }
  1321. def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
  1322. addrmode6dupalign16>;
  1323. def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
  1324. addrmode6dupalign32>;
  1325. def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
  1326. addrmode6dupalign64>;
  1327. // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
  1328. // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
  1329. // ...with double-spaced registers
  1330. def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
  1331. addrmode6dupalign16>;
  1332. def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
  1333. addrmode6dupalign32>;
  1334. def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
  1335. addrmode6dupalign64>;
  1336. def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1337. def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1338. def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1339. def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1340. def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1341. def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1342. // ...with address register writeback:
  1343. multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
  1344. Operand AddrMode> {
  1345. def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
  1346. (outs VdTy:$Vd, GPR:$wb),
  1347. (ins AddrMode:$Rn), IIC_VLD2dupu,
  1348. "vld2", Dt, "$Vd, $Rn!",
  1349. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1350. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1351. let Inst{4} = Rn{4};
  1352. let DecoderMethod = "DecodeVLD2DupInstruction";
  1353. }
  1354. def _register : NLdSt<1, 0b10, 0b1101, op7_4,
  1355. (outs VdTy:$Vd, GPR:$wb),
  1356. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
  1357. "vld2", Dt, "$Vd, $Rn, $Rm",
  1358. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1359. let Inst{4} = Rn{4};
  1360. let DecoderMethod = "DecodeVLD2DupInstruction";
  1361. }
  1362. }
  1363. defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
  1364. addrmode6dupalign16>;
  1365. defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
  1366. addrmode6dupalign32>;
  1367. defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
  1368. addrmode6dupalign64>;
  1369. defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
  1370. addrmode6dupalign16>;
  1371. defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
  1372. addrmode6dupalign32>;
  1373. defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
  1374. addrmode6dupalign64>;
  1375. // VLD3DUP : Vector Load (single 3-element structure to all lanes)
  1376. class VLD3DUP<bits<4> op7_4, string Dt>
  1377. : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  1378. (ins addrmode6dup:$Rn), IIC_VLD3dup,
  1379. "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
  1380. Sched<[WriteVLD2]> {
  1381. let Rm = 0b1111;
  1382. let Inst{4} = 0;
  1383. let DecoderMethod = "DecodeVLD3DupInstruction";
  1384. }
  1385. def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
  1386. def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
  1387. def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
  1388. def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1389. def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1390. def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1391. // ...with double-spaced registers (not used for codegen):
  1392. def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
  1393. def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
  1394. def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
  1395. def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1396. def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1397. def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1398. def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1399. def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1400. def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1401. // ...with address register writeback:
  1402. class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
  1403. : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  1404. (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
  1405. "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
  1406. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  1407. let Inst{4} = 0;
  1408. let DecoderMethod = "DecodeVLD3DupInstruction";
  1409. }
  1410. def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
  1411. def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
  1412. def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
  1413. def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
  1414. def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
  1415. def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
  1416. def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1417. def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1418. def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1419. // VLD4DUP : Vector Load (single 4-element structure to all lanes)
  1420. class VLD4DUP<bits<4> op7_4, string Dt>
  1421. : NLdSt<1, 0b10, 0b1111, op7_4,
  1422. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  1423. (ins addrmode6dup:$Rn), IIC_VLD4dup,
  1424. "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
  1425. let Rm = 0b1111;
  1426. let Inst{4} = Rn{4};
  1427. let DecoderMethod = "DecodeVLD4DupInstruction";
  1428. }
  1429. def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
  1430. def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
  1431. def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
  1432. def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1433. def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1434. def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1435. // ...with double-spaced registers (not used for codegen):
  1436. def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
  1437. def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
  1438. def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
  1439. def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1440. def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1441. def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1442. def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1443. def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1444. def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1445. // ...with address register writeback:
  1446. class VLD4DUPWB<bits<4> op7_4, string Dt>
  1447. : NLdSt<1, 0b10, 0b1111, op7_4,
  1448. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  1449. (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
  1450. "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
  1451. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  1452. let Inst{4} = Rn{4};
  1453. let DecoderMethod = "DecodeVLD4DupInstruction";
  1454. }
  1455. def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
  1456. def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
  1457. def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
  1458. def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
  1459. def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
  1460. def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
  1461. def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1462. def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1463. def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1464. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  1465. let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
  1466. // Classes for VST* pseudo-instructions with multi-register operands.
  1467. // These are expanded to real instructions after register allocation.
  1468. class VSTQPseudo<InstrItinClass itin>
  1469. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
  1470. class VSTQWBPseudo<InstrItinClass itin>
  1471. : PseudoNLdSt<(outs GPR:$wb),
  1472. (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
  1473. "$addr.addr = $wb">;
  1474. class VSTQWBfixedPseudo<InstrItinClass itin>
  1475. : PseudoNLdSt<(outs GPR:$wb),
  1476. (ins addrmode6:$addr, QPR:$src), itin,
  1477. "$addr.addr = $wb">;
  1478. class VSTQWBregisterPseudo<InstrItinClass itin>
  1479. : PseudoNLdSt<(outs GPR:$wb),
  1480. (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
  1481. "$addr.addr = $wb">;
  1482. class VSTQQPseudo<InstrItinClass itin>
  1483. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
  1484. class VSTQQWBPseudo<InstrItinClass itin>
  1485. : PseudoNLdSt<(outs GPR:$wb),
  1486. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
  1487. "$addr.addr = $wb">;
  1488. class VSTQQWBfixedPseudo<InstrItinClass itin>
  1489. : PseudoNLdSt<(outs GPR:$wb),
  1490. (ins addrmode6:$addr, QQPR:$src), itin,
  1491. "$addr.addr = $wb">;
  1492. class VSTQQWBregisterPseudo<InstrItinClass itin>
  1493. : PseudoNLdSt<(outs GPR:$wb),
  1494. (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
  1495. "$addr.addr = $wb">;
  1496. class VSTQQQQPseudo<InstrItinClass itin>
  1497. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
  1498. class VSTQQQQWBPseudo<InstrItinClass itin>
  1499. : PseudoNLdSt<(outs GPR:$wb),
  1500. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
  1501. "$addr.addr = $wb">;
  1502. // VST1 : Vector Store (multiple single elements)
  1503. class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
  1504. : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
  1505. IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
  1506. let Rm = 0b1111;
  1507. let Inst{4} = Rn{4};
  1508. let DecoderMethod = "DecodeVLDST1Instruction";
  1509. }
  1510. class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
  1511. : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
  1512. IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
  1513. let Rm = 0b1111;
  1514. let Inst{5-4} = Rn{5-4};
  1515. let DecoderMethod = "DecodeVLDST1Instruction";
  1516. }
  1517. def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
  1518. def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
  1519. def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
  1520. def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
  1521. def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
  1522. def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
  1523. def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
  1524. def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
  1525. // ...with address register writeback:
  1526. multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1527. def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
  1528. (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
  1529. "vst1", Dt, "$Vd, $Rn!",
  1530. "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
  1531. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1532. let Inst{4} = Rn{4};
  1533. let DecoderMethod = "DecodeVLDST1Instruction";
  1534. }
  1535. def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
  1536. (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
  1537. IIC_VLD1u,
  1538. "vst1", Dt, "$Vd, $Rn, $Rm",
  1539. "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
  1540. let Inst{4} = Rn{4};
  1541. let DecoderMethod = "DecodeVLDST1Instruction";
  1542. }
  1543. }
  1544. multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1545. def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
  1546. (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
  1547. "vst1", Dt, "$Vd, $Rn!",
  1548. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1549. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1550. let Inst{5-4} = Rn{5-4};
  1551. let DecoderMethod = "DecodeVLDST1Instruction";
  1552. }
  1553. def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
  1554. (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
  1555. IIC_VLD1x2u,
  1556. "vst1", Dt, "$Vd, $Rn, $Rm",
  1557. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1558. let Inst{5-4} = Rn{5-4};
  1559. let DecoderMethod = "DecodeVLDST1Instruction";
  1560. }
  1561. }
  1562. defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
  1563. defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
  1564. defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
  1565. defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
  1566. defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
  1567. defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
  1568. defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
  1569. defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
  1570. // ...with 3 registers
  1571. class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
  1572. : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
  1573. (ins AddrMode:$Rn, VecListThreeD:$Vd),
  1574. IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
  1575. let Rm = 0b1111;
  1576. let Inst{4} = Rn{4};
  1577. let DecoderMethod = "DecodeVLDST1Instruction";
  1578. }
  1579. multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1580. def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
  1581. (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
  1582. "vst1", Dt, "$Vd, $Rn!",
  1583. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1584. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1585. let Inst{5-4} = Rn{5-4};
  1586. let DecoderMethod = "DecodeVLDST1Instruction";
  1587. }
  1588. def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
  1589. (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
  1590. IIC_VLD1x3u,
  1591. "vst1", Dt, "$Vd, $Rn, $Rm",
  1592. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1593. let Inst{5-4} = Rn{5-4};
  1594. let DecoderMethod = "DecodeVLDST1Instruction";
  1595. }
  1596. }
  1597. def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
  1598. def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
  1599. def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
  1600. def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
  1601. defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
  1602. defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
  1603. defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
  1604. defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
  1605. def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1606. def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1607. def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1608. def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1609. def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1610. def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1611. def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1612. def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1613. def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1614. def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1615. def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1616. def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1617. def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1618. def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1619. // ...with 4 registers
  1620. class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
  1621. : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
  1622. (ins AddrMode:$Rn, VecListFourD:$Vd),
  1623. IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
  1624. []>, Sched<[WriteVST4]> {
  1625. let Rm = 0b1111;
  1626. let Inst{5-4} = Rn{5-4};
  1627. let DecoderMethod = "DecodeVLDST1Instruction";
  1628. }
  1629. multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1630. def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
  1631. (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
  1632. "vst1", Dt, "$Vd, $Rn!",
  1633. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1634. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1635. let Inst{5-4} = Rn{5-4};
  1636. let DecoderMethod = "DecodeVLDST1Instruction";
  1637. }
  1638. def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
  1639. (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
  1640. IIC_VLD1x4u,
  1641. "vst1", Dt, "$Vd, $Rn, $Rm",
  1642. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1643. let Inst{5-4} = Rn{5-4};
  1644. let DecoderMethod = "DecodeVLDST1Instruction";
  1645. }
  1646. }
  1647. def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1648. def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1649. def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1650. def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
  1651. defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1652. defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1653. defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1654. defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
  1655. def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1656. def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1657. def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1658. def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1659. def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1660. def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1661. def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1662. def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1663. def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1664. def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1665. def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1666. def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1667. def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1668. def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1669. // VST2 : Vector Store (multiple 2-element structures)
  1670. class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
  1671. InstrItinClass itin, Operand AddrMode>
  1672. : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
  1673. itin, "vst2", Dt, "$Vd, $Rn", "", []> {
  1674. let Rm = 0b1111;
  1675. let Inst{5-4} = Rn{5-4};
  1676. let DecoderMethod = "DecodeVLDST2Instruction";
  1677. }
  1678. def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
  1679. addrmode6align64or128>, Sched<[WriteVST2]>;
  1680. def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
  1681. addrmode6align64or128>, Sched<[WriteVST2]>;
  1682. def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
  1683. addrmode6align64or128>, Sched<[WriteVST2]>;
  1684. def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
  1685. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1686. def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
  1687. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1688. def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
  1689. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1690. def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1691. def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1692. def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1693. // ...with address register writeback:
  1694. multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
  1695. RegisterOperand VdTy, Operand AddrMode> {
  1696. def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1697. (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
  1698. "vst2", Dt, "$Vd, $Rn!",
  1699. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1700. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1701. let Inst{5-4} = Rn{5-4};
  1702. let DecoderMethod = "DecodeVLDST2Instruction";
  1703. }
  1704. def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1705. (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
  1706. "vst2", Dt, "$Vd, $Rn, $Rm",
  1707. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1708. let Inst{5-4} = Rn{5-4};
  1709. let DecoderMethod = "DecodeVLDST2Instruction";
  1710. }
  1711. }
  1712. multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1713. def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
  1714. (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
  1715. "vst2", Dt, "$Vd, $Rn!",
  1716. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1717. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1718. let Inst{5-4} = Rn{5-4};
  1719. let DecoderMethod = "DecodeVLDST2Instruction";
  1720. }
  1721. def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
  1722. (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
  1723. IIC_VLD1u,
  1724. "vst2", Dt, "$Vd, $Rn, $Rm",
  1725. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1726. let Inst{5-4} = Rn{5-4};
  1727. let DecoderMethod = "DecodeVLDST2Instruction";
  1728. }
  1729. }
  1730. defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
  1731. addrmode6align64or128>;
  1732. defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
  1733. addrmode6align64or128>;
  1734. defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
  1735. addrmode6align64or128>;
  1736. defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1737. defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1738. defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1739. def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1740. def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1741. def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1742. def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1743. def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1744. def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1745. // ...with double-spaced registers
  1746. def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
  1747. addrmode6align64or128>;
  1748. def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
  1749. addrmode6align64or128>;
  1750. def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
  1751. addrmode6align64or128>;
  1752. defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
  1753. addrmode6align64or128>;
  1754. defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
  1755. addrmode6align64or128>;
  1756. defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
  1757. addrmode6align64or128>;
  1758. // VST3 : Vector Store (multiple 3-element structures)
  1759. class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
  1760. : NLdSt<0, 0b00, op11_8, op7_4, (outs),
  1761. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
  1762. "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
  1763. let Rm = 0b1111;
  1764. let Inst{4} = Rn{4};
  1765. let DecoderMethod = "DecodeVLDST3Instruction";
  1766. }
  1767. def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
  1768. def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
  1769. def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
  1770. def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1771. def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1772. def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1773. // ...with address register writeback:
  1774. class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1775. : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1776. (ins addrmode6:$Rn, am6offset:$Rm,
  1777. DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
  1778. "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
  1779. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1780. let Inst{4} = Rn{4};
  1781. let DecoderMethod = "DecodeVLDST3Instruction";
  1782. }
  1783. def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
  1784. def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
  1785. def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
  1786. def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1787. def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1788. def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1789. // ...with double-spaced registers:
  1790. def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
  1791. def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
  1792. def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
  1793. def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
  1794. def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
  1795. def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
  1796. def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1797. def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1798. def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1799. // ...alternate versions to be allocated odd register numbers:
  1800. def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1801. def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1802. def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1803. def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1804. def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1805. def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1806. // VST4 : Vector Store (multiple 4-element structures)
  1807. class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
  1808. : NLdSt<0, 0b00, op11_8, op7_4, (outs),
  1809. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
  1810. IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
  1811. "", []>, Sched<[WriteVST4]> {
  1812. let Rm = 0b1111;
  1813. let Inst{5-4} = Rn{5-4};
  1814. let DecoderMethod = "DecodeVLDST4Instruction";
  1815. }
  1816. def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
  1817. def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
  1818. def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
  1819. def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1820. def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1821. def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1822. // ...with address register writeback:
  1823. class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1824. : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1825. (ins addrmode6:$Rn, am6offset:$Rm,
  1826. DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
  1827. "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
  1828. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1829. let Inst{5-4} = Rn{5-4};
  1830. let DecoderMethod = "DecodeVLDST4Instruction";
  1831. }
  1832. def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
  1833. def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
  1834. def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
  1835. def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1836. def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1837. def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1838. // ...with double-spaced registers:
  1839. def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
  1840. def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
  1841. def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
  1842. def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
  1843. def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
  1844. def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
  1845. def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1846. def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1847. def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1848. // ...alternate versions to be allocated odd register numbers:
  1849. def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1850. def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1851. def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1852. def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1853. def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1854. def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1855. } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
  1856. // Classes for VST*LN pseudo-instructions with multi-register operands.
  1857. // These are expanded to real instructions after register allocation.
  1858. class VSTQLNPseudo<InstrItinClass itin>
  1859. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
  1860. itin, "">;
  1861. class VSTQLNWBPseudo<InstrItinClass itin>
  1862. : PseudoNLdSt<(outs GPR:$wb),
  1863. (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
  1864. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1865. class VSTQQLNPseudo<InstrItinClass itin>
  1866. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
  1867. itin, "">;
  1868. class VSTQQLNWBPseudo<InstrItinClass itin>
  1869. : PseudoNLdSt<(outs GPR:$wb),
  1870. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
  1871. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1872. class VSTQQQQLNPseudo<InstrItinClass itin>
  1873. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
  1874. itin, "">;
  1875. class VSTQQQQLNWBPseudo<InstrItinClass itin>
  1876. : PseudoNLdSt<(outs GPR:$wb),
  1877. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
  1878. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1879. // VST1LN : Vector Store (single element from one lane)
  1880. class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  1881. PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
  1882. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  1883. (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
  1884. IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
  1885. [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
  1886. Sched<[WriteVST1]> {
  1887. let Rm = 0b1111;
  1888. let DecoderMethod = "DecodeVST1LN";
  1889. }
  1890. class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
  1891. : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
  1892. let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
  1893. addrmode6:$addr)];
  1894. }
  1895. def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
  1896. ARMvgetlaneu, addrmode6> {
  1897. let Inst{7-5} = lane{2-0};
  1898. }
  1899. def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
  1900. ARMvgetlaneu, addrmode6> {
  1901. let Inst{7-6} = lane{1-0};
  1902. let Inst{4} = Rn{4};
  1903. }
  1904. def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
  1905. addrmode6oneL32> {
  1906. let Inst{7} = lane{0};
  1907. let Inst{5-4} = Rn{5-4};
  1908. }
  1909. def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
  1910. def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
  1911. def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
  1912. let Predicates = [HasNEON] in {
  1913. def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
  1914. (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1915. def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
  1916. (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1917. def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
  1918. (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1919. def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
  1920. (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1921. }
  1922. // ...with address register writeback:
  1923. class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  1924. PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
  1925. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1926. (ins AdrMode:$Rn, am6offset:$Rm,
  1927. DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
  1928. "\\{$Vd[$lane]\\}, $Rn$Rm",
  1929. "$Rn.addr = $wb",
  1930. [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
  1931. AdrMode:$Rn, am6offset:$Rm))]>,
  1932. Sched<[WriteVST1]> {
  1933. let DecoderMethod = "DecodeVST1LN";
  1934. }
  1935. class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
  1936. : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
  1937. let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
  1938. addrmode6:$addr, am6offset:$offset))];
  1939. }
  1940. def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
  1941. ARMvgetlaneu, addrmode6> {
  1942. let Inst{7-5} = lane{2-0};
  1943. }
  1944. def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
  1945. ARMvgetlaneu, addrmode6> {
  1946. let Inst{7-6} = lane{1-0};
  1947. let Inst{4} = Rn{4};
  1948. }
  1949. def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
  1950. extractelt, addrmode6oneL32> {
  1951. let Inst{7} = lane{0};
  1952. let Inst{5-4} = Rn{5-4};
  1953. }
  1954. def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
  1955. def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
  1956. def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
  1957. let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
  1958. // VST2LN : Vector Store (single 2-element structure from one lane)
  1959. class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1960. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  1961. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
  1962. IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
  1963. "", []>, Sched<[WriteVST1]> {
  1964. let Rm = 0b1111;
  1965. let Inst{4} = Rn{4};
  1966. let DecoderMethod = "DecodeVST2LN";
  1967. }
  1968. def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
  1969. let Inst{7-5} = lane{2-0};
  1970. }
  1971. def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
  1972. let Inst{7-6} = lane{1-0};
  1973. }
  1974. def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
  1975. let Inst{7} = lane{0};
  1976. }
  1977. def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  1978. def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  1979. def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  1980. // ...with double-spaced registers:
  1981. def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
  1982. let Inst{7-6} = lane{1-0};
  1983. let Inst{4} = Rn{4};
  1984. }
  1985. def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
  1986. let Inst{7} = lane{0};
  1987. let Inst{4} = Rn{4};
  1988. }
  1989. def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  1990. def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  1991. // ...with address register writeback:
  1992. class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1993. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1994. (ins addrmode6:$Rn, am6offset:$Rm,
  1995. DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
  1996. "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
  1997. "$Rn.addr = $wb", []> {
  1998. let Inst{4} = Rn{4};
  1999. let DecoderMethod = "DecodeVST2LN";
  2000. }
  2001. def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
  2002. let Inst{7-5} = lane{2-0};
  2003. }
  2004. def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
  2005. let Inst{7-6} = lane{1-0};
  2006. }
  2007. def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
  2008. let Inst{7} = lane{0};
  2009. }
  2010. def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2011. def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2012. def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2013. def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
  2014. let Inst{7-6} = lane{1-0};
  2015. }
  2016. def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
  2017. let Inst{7} = lane{0};
  2018. }
  2019. def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2020. def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2021. // VST3LN : Vector Store (single 3-element structure from one lane)
  2022. class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2023. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2024. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
  2025. nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
  2026. "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
  2027. Sched<[WriteVST2]> {
  2028. let Rm = 0b1111;
  2029. let DecoderMethod = "DecodeVST3LN";
  2030. }
  2031. def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
  2032. let Inst{7-5} = lane{2-0};
  2033. }
  2034. def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
  2035. let Inst{7-6} = lane{1-0};
  2036. }
  2037. def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
  2038. let Inst{7} = lane{0};
  2039. }
  2040. def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2041. def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2042. def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2043. // ...with double-spaced registers:
  2044. def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
  2045. let Inst{7-6} = lane{1-0};
  2046. }
  2047. def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
  2048. let Inst{7} = lane{0};
  2049. }
  2050. def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
  2051. def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
  2052. // ...with address register writeback:
  2053. class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2054. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2055. (ins addrmode6:$Rn, am6offset:$Rm,
  2056. DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
  2057. IIC_VST3lnu, "vst3", Dt,
  2058. "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
  2059. "$Rn.addr = $wb", []> {
  2060. let DecoderMethod = "DecodeVST3LN";
  2061. }
  2062. def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
  2063. let Inst{7-5} = lane{2-0};
  2064. }
  2065. def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
  2066. let Inst{7-6} = lane{1-0};
  2067. }
  2068. def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
  2069. let Inst{7} = lane{0};
  2070. }
  2071. def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2072. def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2073. def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2074. def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
  2075. let Inst{7-6} = lane{1-0};
  2076. }
  2077. def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
  2078. let Inst{7} = lane{0};
  2079. }
  2080. def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2081. def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2082. // VST4LN : Vector Store (single 4-element structure from one lane)
  2083. class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2084. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2085. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
  2086. nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
  2087. "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
  2088. "", []>, Sched<[WriteVST2]> {
  2089. let Rm = 0b1111;
  2090. let Inst{4} = Rn{4};
  2091. let DecoderMethod = "DecodeVST4LN";
  2092. }
  2093. def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
  2094. let Inst{7-5} = lane{2-0};
  2095. }
  2096. def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
  2097. let Inst{7-6} = lane{1-0};
  2098. }
  2099. def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
  2100. let Inst{7} = lane{0};
  2101. let Inst{5} = Rn{5};
  2102. }
  2103. def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2104. def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2105. def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2106. // ...with double-spaced registers:
  2107. def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
  2108. let Inst{7-6} = lane{1-0};
  2109. }
  2110. def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
  2111. let Inst{7} = lane{0};
  2112. let Inst{5} = Rn{5};
  2113. }
  2114. def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2115. def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2116. // ...with address register writeback:
  2117. class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2118. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2119. (ins addrmode6:$Rn, am6offset:$Rm,
  2120. DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
  2121. IIC_VST4lnu, "vst4", Dt,
  2122. "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
  2123. "$Rn.addr = $wb", []> {
  2124. let Inst{4} = Rn{4};
  2125. let DecoderMethod = "DecodeVST4LN";
  2126. }
  2127. def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
  2128. let Inst{7-5} = lane{2-0};
  2129. }
  2130. def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
  2131. let Inst{7-6} = lane{1-0};
  2132. }
  2133. def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
  2134. let Inst{7} = lane{0};
  2135. let Inst{5} = Rn{5};
  2136. }
  2137. def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2138. def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2139. def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2140. def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
  2141. let Inst{7-6} = lane{1-0};
  2142. }
  2143. def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
  2144. let Inst{7} = lane{0};
  2145. let Inst{5} = Rn{5};
  2146. }
  2147. def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2148. def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2149. } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
  2150. // Use vld1/vst1 for unaligned f64 load / store
  2151. let Predicates = [IsLE,HasNEON] in {
  2152. def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
  2153. (VLD1d16 addrmode6:$addr)>;
  2154. def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2155. (VST1d16 addrmode6:$addr, DPR:$value)>;
  2156. def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
  2157. (VLD1d8 addrmode6:$addr)>;
  2158. def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2159. (VST1d8 addrmode6:$addr, DPR:$value)>;
  2160. }
  2161. let Predicates = [IsBE,HasNEON] in {
  2162. def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
  2163. (VLD1d64 addrmode6:$addr)>;
  2164. def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2165. (VST1d64 addrmode6:$addr, DPR:$value)>;
  2166. }
  2167. // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
  2168. // load / store if it's legal.
  2169. let Predicates = [HasNEON] in {
  2170. def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
  2171. (VLD1q64 addrmode6:$addr)>;
  2172. def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2173. (VST1q64 addrmode6:$addr, QPR:$value)>;
  2174. }
  2175. let Predicates = [IsLE,HasNEON] in {
  2176. def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
  2177. (VLD1q32 addrmode6:$addr)>;
  2178. def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2179. (VST1q32 addrmode6:$addr, QPR:$value)>;
  2180. def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
  2181. (VLD1q16 addrmode6:$addr)>;
  2182. def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2183. (VST1q16 addrmode6:$addr, QPR:$value)>;
  2184. def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
  2185. (VLD1q8 addrmode6:$addr)>;
  2186. def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2187. (VST1q8 addrmode6:$addr, QPR:$value)>;
  2188. }
  2189. //===----------------------------------------------------------------------===//
  2190. // Instruction Classes
  2191. //===----------------------------------------------------------------------===//
  2192. // Basic 2-register operations: double- and quad-register.
  2193. class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2194. bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
  2195. string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
  2196. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2197. (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
  2198. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
  2199. class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2200. bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
  2201. string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
  2202. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2203. (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
  2204. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
  2205. // Basic 2-register intrinsics, both double- and quad-register.
  2206. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2207. bits<2> op17_16, bits<5> op11_7, bit op4,
  2208. InstrItinClass itin, string OpcodeStr, string Dt,
  2209. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2210. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2211. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2212. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2213. class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2214. bits<2> op17_16, bits<5> op11_7, bit op4,
  2215. InstrItinClass itin, string OpcodeStr, string Dt,
  2216. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2217. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2218. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2219. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2220. // Same as above, but not predicated.
  2221. class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
  2222. InstrItinClass itin, string OpcodeStr, string Dt,
  2223. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2224. : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
  2225. itin, OpcodeStr, Dt,
  2226. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2227. class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
  2228. InstrItinClass itin, string OpcodeStr, string Dt,
  2229. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2230. : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
  2231. itin, OpcodeStr, Dt,
  2232. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2233. // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
  2234. class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
  2235. bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
  2236. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2237. : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
  2238. itin, OpcodeStr, Dt,
  2239. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2240. // Same as N2VQIntXnp but with Vd as a src register.
  2241. class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
  2242. bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
  2243. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2244. : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
  2245. (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
  2246. itin, OpcodeStr, Dt,
  2247. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
  2248. let Constraints = "$src = $Vd";
  2249. }
  2250. // Narrow 2-register operations.
  2251. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2252. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2253. InstrItinClass itin, string OpcodeStr, string Dt,
  2254. ValueType TyD, ValueType TyQ, SDNode OpNode>
  2255. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
  2256. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2257. [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
  2258. // Narrow 2-register intrinsics.
  2259. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2260. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2261. InstrItinClass itin, string OpcodeStr, string Dt,
  2262. ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
  2263. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
  2264. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2265. [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
  2266. // Long 2-register operations (currently only used for VMOVL).
  2267. class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2268. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2269. InstrItinClass itin, string OpcodeStr, string Dt,
  2270. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2271. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
  2272. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2273. [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
  2274. // Long 2-register intrinsics.
  2275. class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2276. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2277. InstrItinClass itin, string OpcodeStr, string Dt,
  2278. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
  2279. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
  2280. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2281. [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
  2282. // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
  2283. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
  2284. : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
  2285. (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
  2286. OpcodeStr, Dt, "$Vd, $Vm",
  2287. "$src1 = $Vd, $src2 = $Vm", []>;
  2288. class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
  2289. InstrItinClass itin, string OpcodeStr, string Dt>
  2290. : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
  2291. (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
  2292. "$src1 = $Vd, $src2 = $Vm", []>;
  2293. // Basic 3-register operations: double- and quad-register.
  2294. class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2295. InstrItinClass itin, string OpcodeStr, string Dt,
  2296. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2297. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2298. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2299. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2300. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2301. // All of these have a two-operand InstAlias.
  2302. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2303. let isCommutable = Commutable;
  2304. }
  2305. // Same as N3VD but no data type.
  2306. class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2307. InstrItinClass itin, string OpcodeStr,
  2308. ValueType ResTy, ValueType OpTy,
  2309. SDNode OpNode, bit Commutable>
  2310. : N3VX<op24, op23, op21_20, op11_8, 0, op4,
  2311. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2312. OpcodeStr, "$Vd, $Vn, $Vm", "",
  2313. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
  2314. // All of these have a two-operand InstAlias.
  2315. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2316. let isCommutable = Commutable;
  2317. }
  2318. class N3VDSL<bits<2> op21_20, bits<4> op11_8,
  2319. InstrItinClass itin, string OpcodeStr, string Dt,
  2320. ValueType Ty, SDNode ShOp>
  2321. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2322. (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2323. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2324. [(set (Ty DPR:$Vd),
  2325. (Ty (ShOp (Ty DPR:$Vn),
  2326. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
  2327. // All of these have a two-operand InstAlias.
  2328. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2329. let isCommutable = 0;
  2330. }
  2331. class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
  2332. string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
  2333. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2334. (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2335. NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
  2336. [(set (Ty DPR:$Vd),
  2337. (Ty (ShOp (Ty DPR:$Vn),
  2338. (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
  2339. // All of these have a two-operand InstAlias.
  2340. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2341. let isCommutable = 0;
  2342. }
  2343. class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2344. InstrItinClass itin, string OpcodeStr, string Dt,
  2345. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2346. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2347. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2348. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2349. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2350. // All of these have a two-operand InstAlias.
  2351. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2352. let isCommutable = Commutable;
  2353. }
  2354. class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2355. InstrItinClass itin, string OpcodeStr,
  2356. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2357. : N3VX<op24, op23, op21_20, op11_8, 1, op4,
  2358. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2359. OpcodeStr, "$Vd, $Vn, $Vm", "",
  2360. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
  2361. // All of these have a two-operand InstAlias.
  2362. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2363. let isCommutable = Commutable;
  2364. }
  2365. class N3VQSL<bits<2> op21_20, bits<4> op11_8,
  2366. InstrItinClass itin, string OpcodeStr, string Dt,
  2367. ValueType ResTy, ValueType OpTy, SDNode ShOp>
  2368. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2369. (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2370. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2371. [(set (ResTy QPR:$Vd),
  2372. (ResTy (ShOp (ResTy QPR:$Vn),
  2373. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2374. imm:$lane)))))]> {
  2375. // All of these have a two-operand InstAlias.
  2376. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2377. let isCommutable = 0;
  2378. }
  2379. class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
  2380. ValueType ResTy, ValueType OpTy, SDNode ShOp>
  2381. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2382. (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2383. NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
  2384. [(set (ResTy QPR:$Vd),
  2385. (ResTy (ShOp (ResTy QPR:$Vn),
  2386. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2387. imm:$lane)))))]> {
  2388. // All of these have a two-operand InstAlias.
  2389. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2390. let isCommutable = 0;
  2391. }
  2392. // Basic 3-register intrinsics, both double- and quad-register.
  2393. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2394. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2395. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
  2396. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2397. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
  2398. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2399. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2400. // All of these have a two-operand InstAlias.
  2401. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2402. let isCommutable = Commutable;
  2403. }
  2404. class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2405. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2406. string Dt, ValueType ResTy, ValueType OpTy,
  2407. SDPatternOperator IntOp, bit Commutable>
  2408. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2409. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
  2410. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
  2411. class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2412. string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
  2413. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2414. (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2415. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2416. [(set (Ty DPR:$Vd),
  2417. (Ty (IntOp (Ty DPR:$Vn),
  2418. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
  2419. imm:$lane)))))]> {
  2420. let isCommutable = 0;
  2421. }
  2422. class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2423. string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
  2424. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2425. (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2426. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2427. [(set (Ty DPR:$Vd),
  2428. (Ty (IntOp (Ty DPR:$Vn),
  2429. (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
  2430. let isCommutable = 0;
  2431. }
  2432. class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2433. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2434. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2435. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2436. (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
  2437. OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
  2438. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
  2439. let TwoOperandAliasConstraint = "$Vm = $Vd";
  2440. let isCommutable = 0;
  2441. }
  2442. class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2443. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2444. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
  2445. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2446. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
  2447. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2448. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2449. // All of these have a two-operand InstAlias.
  2450. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2451. let isCommutable = Commutable;
  2452. }
  2453. class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2454. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2455. string Dt, ValueType ResTy, ValueType OpTy,
  2456. SDPatternOperator IntOp, bit Commutable>
  2457. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2458. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
  2459. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
  2460. // Same as N3VQIntnp but with Vd as a src register.
  2461. class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2462. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2463. string Dt, ValueType ResTy, ValueType OpTy,
  2464. SDPatternOperator IntOp, bit Commutable>
  2465. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2466. (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
  2467. f, itin, OpcodeStr, Dt,
  2468. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
  2469. (OpTy QPR:$Vm))))]> {
  2470. let Constraints = "$src = $Vd";
  2471. }
  2472. class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2473. string OpcodeStr, string Dt,
  2474. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2475. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2476. (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2477. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2478. [(set (ResTy QPR:$Vd),
  2479. (ResTy (IntOp (ResTy QPR:$Vn),
  2480. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2481. imm:$lane)))))]> {
  2482. let isCommutable = 0;
  2483. }
  2484. class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2485. string OpcodeStr, string Dt,
  2486. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2487. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2488. (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2489. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2490. [(set (ResTy QPR:$Vd),
  2491. (ResTy (IntOp (ResTy QPR:$Vn),
  2492. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2493. imm:$lane)))))]> {
  2494. let isCommutable = 0;
  2495. }
  2496. class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2497. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2498. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2499. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2500. (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
  2501. OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
  2502. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
  2503. let TwoOperandAliasConstraint = "$Vm = $Vd";
  2504. let isCommutable = 0;
  2505. }
  2506. // Multiply-Add/Sub operations: double- and quad-register.
  2507. class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2508. InstrItinClass itin, string OpcodeStr, string Dt,
  2509. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
  2510. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2511. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2512. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2513. [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
  2514. (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
  2515. class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2516. string OpcodeStr, string Dt,
  2517. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
  2518. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2519. (outs DPR:$Vd),
  2520. (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2521. NVMulSLFrm, itin,
  2522. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2523. [(set (Ty DPR:$Vd),
  2524. (Ty (ShOp (Ty DPR:$src1),
  2525. (Ty (MulOp DPR:$Vn,
  2526. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
  2527. imm:$lane)))))))]>;
  2528. class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2529. string OpcodeStr, string Dt,
  2530. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
  2531. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2532. (outs DPR:$Vd),
  2533. (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2534. NVMulSLFrm, itin,
  2535. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2536. [(set (Ty DPR:$Vd),
  2537. (Ty (ShOp (Ty DPR:$src1),
  2538. (Ty (MulOp DPR:$Vn,
  2539. (Ty (ARMvduplane (Ty DPR_8:$Vm),
  2540. imm:$lane)))))))]>;
  2541. class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2542. InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
  2543. SDPatternOperator MulOp, SDPatternOperator OpNode>
  2544. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2545. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2546. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2547. [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
  2548. (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
  2549. class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2550. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2551. SDPatternOperator MulOp, SDPatternOperator ShOp>
  2552. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2553. (outs QPR:$Vd),
  2554. (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2555. NVMulSLFrm, itin,
  2556. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2557. [(set (ResTy QPR:$Vd),
  2558. (ResTy (ShOp (ResTy QPR:$src1),
  2559. (ResTy (MulOp QPR:$Vn,
  2560. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2561. imm:$lane)))))))]>;
  2562. class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2563. string OpcodeStr, string Dt,
  2564. ValueType ResTy, ValueType OpTy,
  2565. SDPatternOperator MulOp, SDPatternOperator ShOp>
  2566. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2567. (outs QPR:$Vd),
  2568. (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2569. NVMulSLFrm, itin,
  2570. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2571. [(set (ResTy QPR:$Vd),
  2572. (ResTy (ShOp (ResTy QPR:$src1),
  2573. (ResTy (MulOp QPR:$Vn,
  2574. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2575. imm:$lane)))))))]>;
  2576. // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
  2577. class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2578. InstrItinClass itin, string OpcodeStr, string Dt,
  2579. ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
  2580. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2581. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2582. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2583. [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
  2584. (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
  2585. class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2586. InstrItinClass itin, string OpcodeStr, string Dt,
  2587. ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
  2588. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2589. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2590. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2591. [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
  2592. (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
  2593. // Neon 3-argument intrinsics, both double- and quad-register.
  2594. // The destination register is also used as the first source operand register.
  2595. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2596. InstrItinClass itin, string OpcodeStr, string Dt,
  2597. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2598. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2599. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2600. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2601. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
  2602. (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
  2603. class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2604. InstrItinClass itin, string OpcodeStr, string Dt,
  2605. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2606. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2607. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2608. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2609. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
  2610. (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
  2611. // Long Multiply-Add/Sub operations.
  2612. class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2613. InstrItinClass itin, string OpcodeStr, string Dt,
  2614. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2615. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2616. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2617. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2618. [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
  2619. (TyQ (MulOp (TyD DPR:$Vn),
  2620. (TyD DPR:$Vm)))))]>;
  2621. class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
  2622. InstrItinClass itin, string OpcodeStr, string Dt,
  2623. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2624. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
  2625. (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2626. NVMulSLFrm, itin,
  2627. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2628. [(set QPR:$Vd,
  2629. (OpNode (TyQ QPR:$src1),
  2630. (TyQ (MulOp (TyD DPR:$Vn),
  2631. (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
  2632. imm:$lane))))))]>;
  2633. class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2634. InstrItinClass itin, string OpcodeStr, string Dt,
  2635. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2636. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
  2637. (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2638. NVMulSLFrm, itin,
  2639. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2640. [(set QPR:$Vd,
  2641. (OpNode (TyQ QPR:$src1),
  2642. (TyQ (MulOp (TyD DPR:$Vn),
  2643. (TyD (ARMvduplane (TyD DPR_8:$Vm),
  2644. imm:$lane))))))]>;
  2645. // Long Intrinsic-Op vector operations with explicit extend (VABAL).
  2646. class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2647. InstrItinClass itin, string OpcodeStr, string Dt,
  2648. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
  2649. SDNode OpNode>
  2650. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2651. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2652. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2653. [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
  2654. (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
  2655. (TyD DPR:$Vm)))))))]>;
  2656. // Neon Long 3-argument intrinsic. The destination register is
  2657. // a quad-register and is also used as the first source operand register.
  2658. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2659. InstrItinClass itin, string OpcodeStr, string Dt,
  2660. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
  2661. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2662. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2663. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2664. [(set QPR:$Vd,
  2665. (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
  2666. class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2667. string OpcodeStr, string Dt,
  2668. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2669. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2670. (outs QPR:$Vd),
  2671. (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2672. NVMulSLFrm, itin,
  2673. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2674. [(set (ResTy QPR:$Vd),
  2675. (ResTy (IntOp (ResTy QPR:$src1),
  2676. (OpTy DPR:$Vn),
  2677. (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2678. imm:$lane)))))]>;
  2679. class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2680. InstrItinClass itin, string OpcodeStr, string Dt,
  2681. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2682. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2683. (outs QPR:$Vd),
  2684. (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2685. NVMulSLFrm, itin,
  2686. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2687. [(set (ResTy QPR:$Vd),
  2688. (ResTy (IntOp (ResTy QPR:$src1),
  2689. (OpTy DPR:$Vn),
  2690. (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
  2691. imm:$lane)))))]>;
  2692. // Narrowing 3-register intrinsics.
  2693. class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2694. string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
  2695. SDPatternOperator IntOp, bit Commutable>
  2696. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2697. (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
  2698. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2699. [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
  2700. let isCommutable = Commutable;
  2701. }
  2702. // Long 3-register operations.
  2703. class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2704. InstrItinClass itin, string OpcodeStr, string Dt,
  2705. ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
  2706. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2707. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2708. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2709. [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
  2710. let isCommutable = Commutable;
  2711. }
  2712. class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
  2713. InstrItinClass itin, string OpcodeStr, string Dt,
  2714. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2715. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2716. (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2717. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2718. [(set QPR:$Vd,
  2719. (TyQ (OpNode (TyD DPR:$Vn),
  2720. (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
  2721. class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2722. InstrItinClass itin, string OpcodeStr, string Dt,
  2723. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2724. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2725. (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2726. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2727. [(set QPR:$Vd,
  2728. (TyQ (OpNode (TyD DPR:$Vn),
  2729. (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
  2730. // Long 3-register operations with explicitly extended operands.
  2731. class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2732. InstrItinClass itin, string OpcodeStr, string Dt,
  2733. ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
  2734. bit Commutable>
  2735. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2736. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2737. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2738. [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
  2739. (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
  2740. let isCommutable = Commutable;
  2741. }
  2742. // Long 3-register intrinsics with explicit extend (VABDL).
  2743. class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2744. InstrItinClass itin, string OpcodeStr, string Dt,
  2745. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
  2746. bit Commutable>
  2747. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2748. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2749. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2750. [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
  2751. (TyD DPR:$Vm))))))]> {
  2752. let isCommutable = Commutable;
  2753. }
  2754. // Long 3-register intrinsics.
  2755. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2756. InstrItinClass itin, string OpcodeStr, string Dt,
  2757. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
  2758. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2759. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2760. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2761. [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
  2762. let isCommutable = Commutable;
  2763. }
  2764. // Same as above, but not predicated.
  2765. class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2766. bit op4, InstrItinClass itin, string OpcodeStr,
  2767. string Dt, ValueType ResTy, ValueType OpTy,
  2768. SDPatternOperator IntOp, bit Commutable>
  2769. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2770. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
  2771. [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
  2772. class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2773. string OpcodeStr, string Dt,
  2774. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2775. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2776. (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2777. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2778. [(set (ResTy QPR:$Vd),
  2779. (ResTy (IntOp (OpTy DPR:$Vn),
  2780. (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2781. imm:$lane)))))]>;
  2782. class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2783. InstrItinClass itin, string OpcodeStr, string Dt,
  2784. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2785. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2786. (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2787. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2788. [(set (ResTy QPR:$Vd),
  2789. (ResTy (IntOp (OpTy DPR:$Vn),
  2790. (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
  2791. imm:$lane)))))]>;
  2792. // Wide 3-register operations.
  2793. class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2794. string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
  2795. SDNode OpNode, SDNode ExtOp, bit Commutable>
  2796. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2797. (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
  2798. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2799. [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
  2800. (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
  2801. // All of these have a two-operand InstAlias.
  2802. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2803. let isCommutable = Commutable;
  2804. }
  2805. // Pairwise long 2-register intrinsics, both double- and quad-register.
  2806. class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2807. bits<2> op17_16, bits<5> op11_7, bit op4,
  2808. string OpcodeStr, string Dt,
  2809. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2810. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2811. (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  2812. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2813. class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2814. bits<2> op17_16, bits<5> op11_7, bit op4,
  2815. string OpcodeStr, string Dt,
  2816. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2817. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2818. (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  2819. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2820. // Pairwise long 2-register accumulate intrinsics,
  2821. // both double- and quad-register.
  2822. // The destination register is also used as the first source operand register.
  2823. class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2824. bits<2> op17_16, bits<5> op11_7, bit op4,
  2825. string OpcodeStr, string Dt,
  2826. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2827. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
  2828. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
  2829. OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
  2830. [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
  2831. class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2832. bits<2> op17_16, bits<5> op11_7, bit op4,
  2833. string OpcodeStr, string Dt,
  2834. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2835. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
  2836. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
  2837. OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
  2838. [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
  2839. // Shift by immediate,
  2840. // both double- and quad-register.
  2841. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2842. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2843. Format f, InstrItinClass itin, Operand ImmTy,
  2844. string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
  2845. : N2VImm<op24, op23, op11_8, op7, 0, op4,
  2846. (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
  2847. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2848. [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
  2849. class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2850. Format f, InstrItinClass itin, Operand ImmTy,
  2851. string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
  2852. : N2VImm<op24, op23, op11_8, op7, 1, op4,
  2853. (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
  2854. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2855. [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
  2856. }
  2857. // Long shift by immediate.
  2858. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
  2859. string OpcodeStr, string Dt,
  2860. ValueType ResTy, ValueType OpTy, Operand ImmTy,
  2861. SDPatternOperator OpNode>
  2862. : N2VImm<op24, op23, op11_8, op7, op6, op4,
  2863. (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
  2864. IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2865. [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
  2866. // Narrow shift by immediate.
  2867. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
  2868. InstrItinClass itin, string OpcodeStr, string Dt,
  2869. ValueType ResTy, ValueType OpTy, Operand ImmTy,
  2870. SDPatternOperator OpNode>
  2871. : N2VImm<op24, op23, op11_8, op7, op6, op4,
  2872. (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
  2873. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2874. [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
  2875. (i32 ImmTy:$SIMM))))]>;
  2876. // Shift right by immediate and accumulate,
  2877. // both double- and quad-register.
  2878. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2879. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2880. Operand ImmTy, string OpcodeStr, string Dt,
  2881. ValueType Ty, SDNode ShOp>
  2882. : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
  2883. (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
  2884. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2885. [(set DPR:$Vd, (Ty (add DPR:$src1,
  2886. (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
  2887. class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2888. Operand ImmTy, string OpcodeStr, string Dt,
  2889. ValueType Ty, SDNode ShOp>
  2890. : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
  2891. (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
  2892. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2893. [(set QPR:$Vd, (Ty (add QPR:$src1,
  2894. (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
  2895. }
  2896. // Shift by immediate and insert,
  2897. // both double- and quad-register.
  2898. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2899. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2900. Operand ImmTy, Format f, string OpcodeStr, string Dt,
  2901. ValueType Ty,SDNode ShOp>
  2902. : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
  2903. (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
  2904. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2905. [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
  2906. class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2907. Operand ImmTy, Format f, string OpcodeStr, string Dt,
  2908. ValueType Ty,SDNode ShOp>
  2909. : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
  2910. (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
  2911. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2912. [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
  2913. }
  2914. // Convert, with fractional bits immediate,
  2915. // both double- and quad-register.
  2916. class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2917. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2918. SDPatternOperator IntOp>
  2919. : N2VImm<op24, op23, op11_8, op7, 0, op4,
  2920. (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
  2921. IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2922. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
  2923. class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2924. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2925. SDPatternOperator IntOp>
  2926. : N2VImm<op24, op23, op11_8, op7, 1, op4,
  2927. (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
  2928. IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2929. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
  2930. //===----------------------------------------------------------------------===//
  2931. // Multiclasses
  2932. //===----------------------------------------------------------------------===//
  2933. // Abbreviations used in multiclass suffixes:
  2934. // Q = quarter int (8 bit) elements
  2935. // H = half int (16 bit) elements
  2936. // S = single int (32 bit) elements
  2937. // D = double int (64 bit) elements
  2938. // Neon 2-register vector operations and intrinsics.
  2939. // Neon 2-register comparisons.
  2940. // source operand element sizes of 8, 16 and 32 bits:
  2941. multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  2942. bits<5> op11_7, bit op4, string opc, string Dt,
  2943. string asm, PatFrag fc> {
  2944. // 64-bit vector types.
  2945. def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
  2946. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  2947. opc, !strconcat(Dt, "8"), asm, "",
  2948. [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
  2949. def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
  2950. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  2951. opc, !strconcat(Dt, "16"), asm, "",
  2952. [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
  2953. def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
  2954. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  2955. opc, !strconcat(Dt, "32"), asm, "",
  2956. [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
  2957. def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
  2958. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  2959. opc, "f32", asm, "",
  2960. [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
  2961. let Inst{10} = 1; // overwrite F = 1
  2962. }
  2963. def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
  2964. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  2965. opc, "f16", asm, "",
  2966. [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
  2967. Requires<[HasNEON,HasFullFP16]> {
  2968. let Inst{10} = 1; // overwrite F = 1
  2969. }
  2970. // 128-bit vector types.
  2971. def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
  2972. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  2973. opc, !strconcat(Dt, "8"), asm, "",
  2974. [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
  2975. def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
  2976. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  2977. opc, !strconcat(Dt, "16"), asm, "",
  2978. [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
  2979. def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
  2980. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  2981. opc, !strconcat(Dt, "32"), asm, "",
  2982. [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
  2983. def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
  2984. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  2985. opc, "f32", asm, "",
  2986. [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
  2987. let Inst{10} = 1; // overwrite F = 1
  2988. }
  2989. def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
  2990. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  2991. opc, "f16", asm, "",
  2992. [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
  2993. Requires<[HasNEON,HasFullFP16]> {
  2994. let Inst{10} = 1; // overwrite F = 1
  2995. }
  2996. }
  2997. // Neon 3-register comparisons.
  2998. class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2999. InstrItinClass itin, string OpcodeStr, string Dt,
  3000. ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
  3001. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  3002. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  3003. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  3004. [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
  3005. // All of these have a two-operand InstAlias.
  3006. let TwoOperandAliasConstraint = "$Vn = $Vd";
  3007. let isCommutable = Commutable;
  3008. }
  3009. class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  3010. InstrItinClass itin, string OpcodeStr, string Dt,
  3011. ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
  3012. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  3013. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  3014. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  3015. [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
  3016. // All of these have a two-operand InstAlias.
  3017. let TwoOperandAliasConstraint = "$Vn = $Vd";
  3018. let isCommutable = Commutable;
  3019. }
  3020. multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
  3021. InstrItinClass itinD16, InstrItinClass itinD32,
  3022. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3023. string OpcodeStr, string Dt,
  3024. PatFrag fc, bit Commutable = 0> {
  3025. // 64-bit vector types.
  3026. def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
  3027. OpcodeStr, !strconcat(Dt, "8"),
  3028. v8i8, v8i8, fc, Commutable>;
  3029. def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
  3030. OpcodeStr, !strconcat(Dt, "16"),
  3031. v4i16, v4i16, fc, Commutable>;
  3032. def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
  3033. OpcodeStr, !strconcat(Dt, "32"),
  3034. v2i32, v2i32, fc, Commutable>;
  3035. // 128-bit vector types.
  3036. def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
  3037. OpcodeStr, !strconcat(Dt, "8"),
  3038. v16i8, v16i8, fc, Commutable>;
  3039. def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
  3040. OpcodeStr, !strconcat(Dt, "16"),
  3041. v8i16, v8i16, fc, Commutable>;
  3042. def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
  3043. OpcodeStr, !strconcat(Dt, "32"),
  3044. v4i32, v4i32, fc, Commutable>;
  3045. }
  3046. // Neon 2-register vector intrinsics,
  3047. // element sizes of 8, 16 and 32 bits:
  3048. multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3049. bits<5> op11_7, bit op4,
  3050. InstrItinClass itinD, InstrItinClass itinQ,
  3051. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3052. // 64-bit vector types.
  3053. def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3054. itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
  3055. def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3056. itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
  3057. def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3058. itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
  3059. // 128-bit vector types.
  3060. def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3061. itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
  3062. def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3063. itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
  3064. def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3065. itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
  3066. }
  3067. // Neon Narrowing 2-register vector operations,
  3068. // source operand element sizes of 16, 32 and 64 bits:
  3069. multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3070. bits<5> op11_7, bit op6, bit op4,
  3071. InstrItinClass itin, string OpcodeStr, string Dt,
  3072. SDNode OpNode> {
  3073. def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
  3074. itin, OpcodeStr, !strconcat(Dt, "16"),
  3075. v8i8, v8i16, OpNode>;
  3076. def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
  3077. itin, OpcodeStr, !strconcat(Dt, "32"),
  3078. v4i16, v4i32, OpNode>;
  3079. def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
  3080. itin, OpcodeStr, !strconcat(Dt, "64"),
  3081. v2i32, v2i64, OpNode>;
  3082. }
  3083. // Neon Narrowing 2-register vector intrinsics,
  3084. // source operand element sizes of 16, 32 and 64 bits:
  3085. multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3086. bits<5> op11_7, bit op6, bit op4,
  3087. InstrItinClass itin, string OpcodeStr, string Dt,
  3088. SDPatternOperator IntOp> {
  3089. def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
  3090. itin, OpcodeStr, !strconcat(Dt, "16"),
  3091. v8i8, v8i16, IntOp>;
  3092. def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
  3093. itin, OpcodeStr, !strconcat(Dt, "32"),
  3094. v4i16, v4i32, IntOp>;
  3095. def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
  3096. itin, OpcodeStr, !strconcat(Dt, "64"),
  3097. v2i32, v2i64, IntOp>;
  3098. }
  3099. // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
  3100. // source operand element sizes of 16, 32 and 64 bits:
  3101. multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
  3102. string OpcodeStr, string Dt, SDNode OpNode> {
  3103. def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3104. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
  3105. def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3106. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
  3107. def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3108. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
  3109. }
  3110. // Neon 3-register vector operations.
  3111. // First with only element sizes of 8, 16 and 32 bits:
  3112. multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3113. InstrItinClass itinD16, InstrItinClass itinD32,
  3114. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3115. string OpcodeStr, string Dt,
  3116. SDNode OpNode, bit Commutable = 0> {
  3117. // 64-bit vector types.
  3118. def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
  3119. OpcodeStr, !strconcat(Dt, "8"),
  3120. v8i8, v8i8, OpNode, Commutable>;
  3121. def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
  3122. OpcodeStr, !strconcat(Dt, "16"),
  3123. v4i16, v4i16, OpNode, Commutable>;
  3124. def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
  3125. OpcodeStr, !strconcat(Dt, "32"),
  3126. v2i32, v2i32, OpNode, Commutable>;
  3127. // 128-bit vector types.
  3128. def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
  3129. OpcodeStr, !strconcat(Dt, "8"),
  3130. v16i8, v16i8, OpNode, Commutable>;
  3131. def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
  3132. OpcodeStr, !strconcat(Dt, "16"),
  3133. v8i16, v8i16, OpNode, Commutable>;
  3134. def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
  3135. OpcodeStr, !strconcat(Dt, "32"),
  3136. v4i32, v4i32, OpNode, Commutable>;
  3137. }
  3138. multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
  3139. def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
  3140. def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
  3141. def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
  3142. def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
  3143. v4i32, v2i32, ShOp>;
  3144. }
  3145. // ....then also with element size 64 bits:
  3146. multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3147. InstrItinClass itinD, InstrItinClass itinQ,
  3148. string OpcodeStr, string Dt,
  3149. SDNode OpNode, bit Commutable = 0>
  3150. : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
  3151. OpcodeStr, Dt, OpNode, Commutable> {
  3152. def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
  3153. OpcodeStr, !strconcat(Dt, "64"),
  3154. v1i64, v1i64, OpNode, Commutable>;
  3155. def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
  3156. OpcodeStr, !strconcat(Dt, "64"),
  3157. v2i64, v2i64, OpNode, Commutable>;
  3158. }
  3159. // Neon 3-register vector intrinsics.
  3160. // First with only element sizes of 16 and 32 bits:
  3161. multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3162. InstrItinClass itinD16, InstrItinClass itinD32,
  3163. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3164. string OpcodeStr, string Dt,
  3165. SDPatternOperator IntOp, bit Commutable = 0> {
  3166. // 64-bit vector types.
  3167. def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
  3168. OpcodeStr, !strconcat(Dt, "16"),
  3169. v4i16, v4i16, IntOp, Commutable>;
  3170. def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
  3171. OpcodeStr, !strconcat(Dt, "32"),
  3172. v2i32, v2i32, IntOp, Commutable>;
  3173. // 128-bit vector types.
  3174. def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
  3175. OpcodeStr, !strconcat(Dt, "16"),
  3176. v8i16, v8i16, IntOp, Commutable>;
  3177. def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
  3178. OpcodeStr, !strconcat(Dt, "32"),
  3179. v4i32, v4i32, IntOp, Commutable>;
  3180. }
  3181. multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3182. InstrItinClass itinD16, InstrItinClass itinD32,
  3183. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3184. string OpcodeStr, string Dt,
  3185. SDPatternOperator IntOp> {
  3186. // 64-bit vector types.
  3187. def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
  3188. OpcodeStr, !strconcat(Dt, "16"),
  3189. v4i16, v4i16, IntOp>;
  3190. def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
  3191. OpcodeStr, !strconcat(Dt, "32"),
  3192. v2i32, v2i32, IntOp>;
  3193. // 128-bit vector types.
  3194. def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
  3195. OpcodeStr, !strconcat(Dt, "16"),
  3196. v8i16, v8i16, IntOp>;
  3197. def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
  3198. OpcodeStr, !strconcat(Dt, "32"),
  3199. v4i32, v4i32, IntOp>;
  3200. }
  3201. multiclass N3VIntSL_HS<bits<4> op11_8,
  3202. InstrItinClass itinD16, InstrItinClass itinD32,
  3203. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3204. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3205. def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
  3206. OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
  3207. def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
  3208. OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
  3209. def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
  3210. OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
  3211. def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
  3212. OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
  3213. }
  3214. // ....then also with element size of 8 bits:
  3215. multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3216. InstrItinClass itinD16, InstrItinClass itinD32,
  3217. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3218. string OpcodeStr, string Dt,
  3219. SDPatternOperator IntOp, bit Commutable = 0>
  3220. : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3221. OpcodeStr, Dt, IntOp, Commutable> {
  3222. def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
  3223. OpcodeStr, !strconcat(Dt, "8"),
  3224. v8i8, v8i8, IntOp, Commutable>;
  3225. def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
  3226. OpcodeStr, !strconcat(Dt, "8"),
  3227. v16i8, v16i8, IntOp, Commutable>;
  3228. }
  3229. multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3230. InstrItinClass itinD16, InstrItinClass itinD32,
  3231. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3232. string OpcodeStr, string Dt,
  3233. SDPatternOperator IntOp>
  3234. : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3235. OpcodeStr, Dt, IntOp> {
  3236. def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
  3237. OpcodeStr, !strconcat(Dt, "8"),
  3238. v8i8, v8i8, IntOp>;
  3239. def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
  3240. OpcodeStr, !strconcat(Dt, "8"),
  3241. v16i8, v16i8, IntOp>;
  3242. }
  3243. // ....then also with element size of 64 bits:
  3244. multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3245. InstrItinClass itinD16, InstrItinClass itinD32,
  3246. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3247. string OpcodeStr, string Dt,
  3248. SDPatternOperator IntOp, bit Commutable = 0>
  3249. : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3250. OpcodeStr, Dt, IntOp, Commutable> {
  3251. def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
  3252. OpcodeStr, !strconcat(Dt, "64"),
  3253. v1i64, v1i64, IntOp, Commutable>;
  3254. def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
  3255. OpcodeStr, !strconcat(Dt, "64"),
  3256. v2i64, v2i64, IntOp, Commutable>;
  3257. }
  3258. multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3259. InstrItinClass itinD16, InstrItinClass itinD32,
  3260. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3261. string OpcodeStr, string Dt,
  3262. SDPatternOperator IntOp>
  3263. : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3264. OpcodeStr, Dt, IntOp> {
  3265. def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
  3266. OpcodeStr, !strconcat(Dt, "64"),
  3267. v1i64, v1i64, IntOp>;
  3268. def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
  3269. OpcodeStr, !strconcat(Dt, "64"),
  3270. v2i64, v2i64, IntOp>;
  3271. }
  3272. // Neon Narrowing 3-register vector intrinsics,
  3273. // source operand element sizes of 16, 32 and 64 bits:
  3274. multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3275. string OpcodeStr, string Dt,
  3276. SDPatternOperator IntOp, bit Commutable = 0> {
  3277. def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
  3278. OpcodeStr, !strconcat(Dt, "16"),
  3279. v8i8, v8i16, IntOp, Commutable>;
  3280. def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
  3281. OpcodeStr, !strconcat(Dt, "32"),
  3282. v4i16, v4i32, IntOp, Commutable>;
  3283. def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
  3284. OpcodeStr, !strconcat(Dt, "64"),
  3285. v2i32, v2i64, IntOp, Commutable>;
  3286. }
  3287. // Neon Long 3-register vector operations.
  3288. multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3289. InstrItinClass itin16, InstrItinClass itin32,
  3290. string OpcodeStr, string Dt,
  3291. SDNode OpNode, bit Commutable = 0> {
  3292. def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
  3293. OpcodeStr, !strconcat(Dt, "8"),
  3294. v8i16, v8i8, OpNode, Commutable>;
  3295. def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
  3296. OpcodeStr, !strconcat(Dt, "16"),
  3297. v4i32, v4i16, OpNode, Commutable>;
  3298. def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
  3299. OpcodeStr, !strconcat(Dt, "32"),
  3300. v2i64, v2i32, OpNode, Commutable>;
  3301. }
  3302. multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
  3303. InstrItinClass itin, string OpcodeStr, string Dt,
  3304. SDNode OpNode> {
  3305. def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
  3306. !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
  3307. def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
  3308. !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
  3309. }
  3310. multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3311. InstrItinClass itin16, InstrItinClass itin32,
  3312. string OpcodeStr, string Dt,
  3313. SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
  3314. def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
  3315. OpcodeStr, !strconcat(Dt, "8"),
  3316. v8i16, v8i8, OpNode, ExtOp, Commutable>;
  3317. def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
  3318. OpcodeStr, !strconcat(Dt, "16"),
  3319. v4i32, v4i16, OpNode, ExtOp, Commutable>;
  3320. def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
  3321. OpcodeStr, !strconcat(Dt, "32"),
  3322. v2i64, v2i32, OpNode, ExtOp, Commutable>;
  3323. }
  3324. // Neon Long 3-register vector intrinsics.
  3325. // First with only element sizes of 16 and 32 bits:
  3326. multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3327. InstrItinClass itin16, InstrItinClass itin32,
  3328. string OpcodeStr, string Dt,
  3329. SDPatternOperator IntOp, bit Commutable = 0> {
  3330. def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
  3331. OpcodeStr, !strconcat(Dt, "16"),
  3332. v4i32, v4i16, IntOp, Commutable>;
  3333. def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
  3334. OpcodeStr, !strconcat(Dt, "32"),
  3335. v2i64, v2i32, IntOp, Commutable>;
  3336. }
  3337. multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
  3338. InstrItinClass itin, string OpcodeStr, string Dt,
  3339. SDPatternOperator IntOp> {
  3340. def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
  3341. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
  3342. def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
  3343. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3344. }
  3345. // ....then also with element size of 8 bits:
  3346. multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3347. InstrItinClass itin16, InstrItinClass itin32,
  3348. string OpcodeStr, string Dt,
  3349. SDPatternOperator IntOp, bit Commutable = 0>
  3350. : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
  3351. IntOp, Commutable> {
  3352. def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
  3353. OpcodeStr, !strconcat(Dt, "8"),
  3354. v8i16, v8i8, IntOp, Commutable>;
  3355. }
  3356. // ....with explicit extend (VABDL).
  3357. multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3358. InstrItinClass itin, string OpcodeStr, string Dt,
  3359. SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
  3360. def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
  3361. OpcodeStr, !strconcat(Dt, "8"),
  3362. v8i16, v8i8, IntOp, ExtOp, Commutable>;
  3363. def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
  3364. OpcodeStr, !strconcat(Dt, "16"),
  3365. v4i32, v4i16, IntOp, ExtOp, Commutable>;
  3366. def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
  3367. OpcodeStr, !strconcat(Dt, "32"),
  3368. v2i64, v2i32, IntOp, ExtOp, Commutable>;
  3369. }
  3370. // Neon Wide 3-register vector intrinsics,
  3371. // source operand element sizes of 8, 16 and 32 bits:
  3372. multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3373. string OpcodeStr, string Dt,
  3374. SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
  3375. def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
  3376. OpcodeStr, !strconcat(Dt, "8"),
  3377. v8i16, v8i8, OpNode, ExtOp, Commutable>;
  3378. def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
  3379. OpcodeStr, !strconcat(Dt, "16"),
  3380. v4i32, v4i16, OpNode, ExtOp, Commutable>;
  3381. def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
  3382. OpcodeStr, !strconcat(Dt, "32"),
  3383. v2i64, v2i32, OpNode, ExtOp, Commutable>;
  3384. }
  3385. // Neon Multiply-Op vector operations,
  3386. // element sizes of 8, 16 and 32 bits:
  3387. multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3388. InstrItinClass itinD16, InstrItinClass itinD32,
  3389. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3390. string OpcodeStr, string Dt, SDNode OpNode> {
  3391. // 64-bit vector types.
  3392. def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
  3393. OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
  3394. def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
  3395. OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
  3396. def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
  3397. OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
  3398. // 128-bit vector types.
  3399. def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
  3400. OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
  3401. def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
  3402. OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
  3403. def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
  3404. OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
  3405. }
  3406. multiclass N3VMulOpSL_HS<bits<4> op11_8,
  3407. InstrItinClass itinD16, InstrItinClass itinD32,
  3408. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3409. string OpcodeStr, string Dt, SDPatternOperator ShOp> {
  3410. def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
  3411. OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
  3412. def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
  3413. OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
  3414. def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
  3415. OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
  3416. mul, ShOp>;
  3417. def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
  3418. OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
  3419. mul, ShOp>;
  3420. }
  3421. // Neon Intrinsic-Op vector operations,
  3422. // element sizes of 8, 16 and 32 bits:
  3423. multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3424. InstrItinClass itinD, InstrItinClass itinQ,
  3425. string OpcodeStr, string Dt, SDPatternOperator IntOp,
  3426. SDNode OpNode> {
  3427. // 64-bit vector types.
  3428. def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
  3429. OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
  3430. def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
  3431. OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
  3432. def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
  3433. OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
  3434. // 128-bit vector types.
  3435. def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
  3436. OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
  3437. def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
  3438. OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
  3439. def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
  3440. OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
  3441. }
  3442. // Neon 3-argument intrinsics,
  3443. // element sizes of 16 and 32 bits:
  3444. multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3445. InstrItinClass itinD16, InstrItinClass itinD32,
  3446. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3447. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3448. // 64-bit vector types.
  3449. def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
  3450. OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
  3451. def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
  3452. OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
  3453. // 128-bit vector types.
  3454. def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
  3455. OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
  3456. def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
  3457. OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
  3458. }
  3459. // element sizes of 8, 16 and 32 bits:
  3460. multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3461. InstrItinClass itinD16, InstrItinClass itinD32,
  3462. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3463. string OpcodeStr, string Dt, SDPatternOperator IntOp>
  3464. :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
  3465. itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
  3466. // 64-bit vector types.
  3467. def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
  3468. OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
  3469. // 128-bit vector types.
  3470. def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
  3471. OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
  3472. }
  3473. // Neon Long Multiply-Op vector operations,
  3474. // element sizes of 8, 16 and 32 bits:
  3475. multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3476. InstrItinClass itin16, InstrItinClass itin32,
  3477. string OpcodeStr, string Dt, SDNode MulOp,
  3478. SDNode OpNode> {
  3479. def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
  3480. !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
  3481. def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
  3482. !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
  3483. def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
  3484. !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
  3485. }
  3486. multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
  3487. string Dt, SDNode MulOp, SDNode OpNode> {
  3488. def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
  3489. !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
  3490. def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
  3491. !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
  3492. }
  3493. // Neon Long 3-argument intrinsics.
  3494. // First with only element sizes of 16 and 32 bits:
  3495. multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3496. InstrItinClass itin16, InstrItinClass itin32,
  3497. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3498. def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
  3499. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
  3500. def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
  3501. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3502. }
  3503. multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
  3504. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3505. def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
  3506. OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
  3507. def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
  3508. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3509. }
  3510. // ....then also with element size of 8 bits:
  3511. multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3512. InstrItinClass itin16, InstrItinClass itin32,
  3513. string OpcodeStr, string Dt, SDPatternOperator IntOp>
  3514. : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
  3515. def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
  3516. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
  3517. }
  3518. // ....with explicit extend (VABAL).
  3519. multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3520. InstrItinClass itin, string OpcodeStr, string Dt,
  3521. SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
  3522. def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
  3523. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
  3524. IntOp, ExtOp, OpNode>;
  3525. def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
  3526. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
  3527. IntOp, ExtOp, OpNode>;
  3528. def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
  3529. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
  3530. IntOp, ExtOp, OpNode>;
  3531. }
  3532. // Neon Pairwise long 2-register intrinsics,
  3533. // element sizes of 8, 16 and 32 bits:
  3534. multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3535. bits<5> op11_7, bit op4,
  3536. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3537. // 64-bit vector types.
  3538. def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3539. OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
  3540. def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3541. OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
  3542. def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3543. OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
  3544. // 128-bit vector types.
  3545. def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3546. OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
  3547. def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3548. OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
  3549. def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3550. OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
  3551. }
  3552. // Neon Pairwise long 2-register accumulate intrinsics,
  3553. // element sizes of 8, 16 and 32 bits:
  3554. multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3555. bits<5> op11_7, bit op4,
  3556. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3557. // 64-bit vector types.
  3558. def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3559. OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
  3560. def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3561. OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
  3562. def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3563. OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
  3564. // 128-bit vector types.
  3565. def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3566. OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
  3567. def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3568. OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
  3569. def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3570. OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
  3571. }
  3572. // Neon 2-register vector shift by immediate,
  3573. // with f of either N2RegVShLFrm or N2RegVShRFrm
  3574. // element sizes of 8, 16, 32 and 64 bits:
  3575. multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3576. InstrItinClass itin, string OpcodeStr, string Dt,
  3577. SDNode OpNode> {
  3578. // 64-bit vector types.
  3579. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3580. OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
  3581. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3582. }
  3583. def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3584. OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
  3585. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3586. }
  3587. def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3588. OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
  3589. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3590. }
  3591. def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
  3592. OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
  3593. // imm6 = xxxxxx
  3594. // 128-bit vector types.
  3595. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3596. OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
  3597. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3598. }
  3599. def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3600. OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
  3601. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3602. }
  3603. def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3604. OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
  3605. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3606. }
  3607. def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
  3608. OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
  3609. // imm6 = xxxxxx
  3610. }
  3611. multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3612. InstrItinClass itin, string OpcodeStr, string Dt,
  3613. string baseOpc, SDNode OpNode> {
  3614. // 64-bit vector types.
  3615. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
  3616. OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
  3617. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3618. }
  3619. def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
  3620. OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
  3621. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3622. }
  3623. def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
  3624. OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
  3625. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3626. }
  3627. def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
  3628. OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
  3629. // imm6 = xxxxxx
  3630. // 128-bit vector types.
  3631. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
  3632. OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
  3633. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3634. }
  3635. def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
  3636. OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
  3637. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3638. }
  3639. def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
  3640. OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
  3641. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3642. }
  3643. def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
  3644. OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
  3645. // imm6 = xxxxxx
  3646. }
  3647. // Neon Shift-Accumulate vector operations,
  3648. // element sizes of 8, 16, 32 and 64 bits:
  3649. multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3650. string OpcodeStr, string Dt, SDNode ShOp> {
  3651. // 64-bit vector types.
  3652. def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
  3653. OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
  3654. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3655. }
  3656. def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
  3657. OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
  3658. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3659. }
  3660. def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
  3661. OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
  3662. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3663. }
  3664. def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
  3665. OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
  3666. // imm6 = xxxxxx
  3667. // 128-bit vector types.
  3668. def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
  3669. OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
  3670. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3671. }
  3672. def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
  3673. OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
  3674. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3675. }
  3676. def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
  3677. OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
  3678. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3679. }
  3680. def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
  3681. OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
  3682. // imm6 = xxxxxx
  3683. }
  3684. // Neon Shift-Insert vector operations,
  3685. // with f of either N2RegVShLFrm or N2RegVShRFrm
  3686. // element sizes of 8, 16, 32 and 64 bits:
  3687. multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3688. string OpcodeStr> {
  3689. // 64-bit vector types.
  3690. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3691. N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
  3692. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3693. }
  3694. def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3695. N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
  3696. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3697. }
  3698. def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3699. N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
  3700. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3701. }
  3702. def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
  3703. N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
  3704. // imm6 = xxxxxx
  3705. // 128-bit vector types.
  3706. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3707. N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
  3708. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3709. }
  3710. def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3711. N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
  3712. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3713. }
  3714. def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3715. N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
  3716. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3717. }
  3718. def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
  3719. N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
  3720. // imm6 = xxxxxx
  3721. }
  3722. multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3723. string OpcodeStr> {
  3724. // 64-bit vector types.
  3725. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
  3726. N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
  3727. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3728. }
  3729. def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
  3730. N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
  3731. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3732. }
  3733. def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
  3734. N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
  3735. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3736. }
  3737. def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
  3738. N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
  3739. // imm6 = xxxxxx
  3740. // 128-bit vector types.
  3741. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
  3742. N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
  3743. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3744. }
  3745. def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
  3746. N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
  3747. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3748. }
  3749. def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
  3750. N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
  3751. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3752. }
  3753. def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
  3754. N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
  3755. // imm6 = xxxxxx
  3756. }
  3757. // Neon Shift Long operations,
  3758. // element sizes of 8, 16, 32 bits:
  3759. multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
  3760. bit op4, string OpcodeStr, string Dt,
  3761. SDPatternOperator OpNode> {
  3762. def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3763. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
  3764. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3765. }
  3766. def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3767. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
  3768. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3769. }
  3770. def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3771. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
  3772. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3773. }
  3774. }
  3775. // Neon Shift Narrow operations,
  3776. // element sizes of 16, 32, 64 bits:
  3777. multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
  3778. bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
  3779. SDPatternOperator OpNode> {
  3780. def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3781. OpcodeStr, !strconcat(Dt, "16"),
  3782. v8i8, v8i16, shr_imm8, OpNode> {
  3783. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3784. }
  3785. def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3786. OpcodeStr, !strconcat(Dt, "32"),
  3787. v4i16, v4i32, shr_imm16, OpNode> {
  3788. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3789. }
  3790. def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3791. OpcodeStr, !strconcat(Dt, "64"),
  3792. v2i32, v2i64, shr_imm32, OpNode> {
  3793. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3794. }
  3795. }
  3796. //===----------------------------------------------------------------------===//
  3797. // Instruction Definitions.
  3798. //===----------------------------------------------------------------------===//
  3799. // Vector Add Operations.
  3800. // VADD : Vector Add (integer and floating-point)
  3801. defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
  3802. add, 1>;
  3803. def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
  3804. v2f32, v2f32, fadd, 1>;
  3805. def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
  3806. v4f32, v4f32, fadd, 1>;
  3807. def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
  3808. v4f16, v4f16, fadd, 1>,
  3809. Requires<[HasNEON,HasFullFP16]>;
  3810. def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
  3811. v8f16, v8f16, fadd, 1>,
  3812. Requires<[HasNEON,HasFullFP16]>;
  3813. // VADDL : Vector Add Long (Q = D + D)
  3814. defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
  3815. "vaddl", "s", add, sext, 1>;
  3816. defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
  3817. "vaddl", "u", add, zanyext, 1>;
  3818. // VADDW : Vector Add Wide (Q = Q + D)
  3819. defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
  3820. defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
  3821. // VHADD : Vector Halving Add
  3822. defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
  3823. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3824. "vhadd", "s", int_arm_neon_vhadds, 1>;
  3825. defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
  3826. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3827. "vhadd", "u", int_arm_neon_vhaddu, 1>;
  3828. // VRHADD : Vector Rounding Halving Add
  3829. defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
  3830. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3831. "vrhadd", "s", int_arm_neon_vrhadds, 1>;
  3832. defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
  3833. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3834. "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
  3835. // VQADD : Vector Saturating Add
  3836. defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
  3837. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3838. "vqadd", "s", saddsat, 1>;
  3839. defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
  3840. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3841. "vqadd", "u", uaddsat, 1>;
  3842. // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
  3843. defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
  3844. // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
  3845. defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
  3846. int_arm_neon_vraddhn, 1>;
  3847. let Predicates = [HasNEON] in {
  3848. def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
  3849. (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
  3850. def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
  3851. (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
  3852. def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
  3853. (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
  3854. }
  3855. // Vector Multiply Operations.
  3856. // VMUL : Vector Multiply (integer, polynomial and floating-point)
  3857. defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
  3858. IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
  3859. def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
  3860. "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
  3861. def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
  3862. "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
  3863. def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
  3864. v2f32, v2f32, fmul, 1>;
  3865. def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
  3866. v4f32, v4f32, fmul, 1>;
  3867. def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
  3868. v4f16, v4f16, fmul, 1>,
  3869. Requires<[HasNEON,HasFullFP16]>;
  3870. def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
  3871. v8f16, v8f16, fmul, 1>,
  3872. Requires<[HasNEON,HasFullFP16]>;
  3873. defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
  3874. def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
  3875. def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
  3876. v2f32, fmul>;
  3877. def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
  3878. Requires<[HasNEON,HasFullFP16]>;
  3879. def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
  3880. v4f16, fmul>,
  3881. Requires<[HasNEON,HasFullFP16]>;
  3882. let Predicates = [HasNEON] in {
  3883. def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
  3884. (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
  3885. (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
  3886. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3887. (DSubReg_i16_reg imm:$lane))),
  3888. (SubReg_i16_lane imm:$lane)))>;
  3889. def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
  3890. (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
  3891. (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
  3892. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  3893. (DSubReg_i32_reg imm:$lane))),
  3894. (SubReg_i32_lane imm:$lane)))>;
  3895. def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
  3896. (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
  3897. (v4f32 (VMULslfq (v4f32 QPR:$src1),
  3898. (v2f32 (EXTRACT_SUBREG QPR:$src2,
  3899. (DSubReg_i32_reg imm:$lane))),
  3900. (SubReg_i32_lane imm:$lane)))>;
  3901. def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
  3902. (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
  3903. (v8f16 (VMULslhq(v8f16 QPR:$src1),
  3904. (v4f16 (EXTRACT_SUBREG QPR:$src2,
  3905. (DSubReg_i16_reg imm:$lane))),
  3906. (SubReg_i16_lane imm:$lane)))>;
  3907. def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
  3908. (VMULslfd DPR:$Rn,
  3909. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
  3910. (i32 0))>;
  3911. def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
  3912. (VMULslhd DPR:$Rn,
  3913. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
  3914. (i32 0))>;
  3915. def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
  3916. (VMULslfq QPR:$Rn,
  3917. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
  3918. (i32 0))>;
  3919. def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
  3920. (VMULslhq QPR:$Rn,
  3921. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
  3922. (i32 0))>;
  3923. }
  3924. // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
  3925. defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
  3926. IIC_VMULi16Q, IIC_VMULi32Q,
  3927. "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
  3928. defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
  3929. IIC_VMULi16Q, IIC_VMULi32Q,
  3930. "vqdmulh", "s", int_arm_neon_vqdmulh>;
  3931. let Predicates = [HasNEON] in {
  3932. def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
  3933. (v8i16 (ARMvduplane (v8i16 QPR:$src2),
  3934. imm:$lane)))),
  3935. (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
  3936. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3937. (DSubReg_i16_reg imm:$lane))),
  3938. (SubReg_i16_lane imm:$lane)))>;
  3939. def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
  3940. (v4i32 (ARMvduplane (v4i32 QPR:$src2),
  3941. imm:$lane)))),
  3942. (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
  3943. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  3944. (DSubReg_i32_reg imm:$lane))),
  3945. (SubReg_i32_lane imm:$lane)))>;
  3946. }
  3947. // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
  3948. defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
  3949. IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
  3950. "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
  3951. defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
  3952. IIC_VMULi16Q, IIC_VMULi32Q,
  3953. "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
  3954. let Predicates = [HasNEON] in {
  3955. def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
  3956. (v8i16 (ARMvduplane (v8i16 QPR:$src2),
  3957. imm:$lane)))),
  3958. (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
  3959. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3960. (DSubReg_i16_reg imm:$lane))),
  3961. (SubReg_i16_lane imm:$lane)))>;
  3962. def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
  3963. (v4i32 (ARMvduplane (v4i32 QPR:$src2),
  3964. imm:$lane)))),
  3965. (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
  3966. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  3967. (DSubReg_i32_reg imm:$lane))),
  3968. (SubReg_i32_lane imm:$lane)))>;
  3969. }
  3970. // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
  3971. let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
  3972. DecoderNamespace = "NEONData" in {
  3973. defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
  3974. "vmull", "s", ARMvmulls, 1>;
  3975. defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
  3976. "vmull", "u", ARMvmullu, 1>;
  3977. def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
  3978. v8i16, v8i8, int_arm_neon_vmullp, 1>;
  3979. def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
  3980. "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
  3981. Requires<[HasV8, HasCrypto]>;
  3982. }
  3983. defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>;
  3984. defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>;
  3985. // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
  3986. defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
  3987. "vqdmull", "s", int_arm_neon_vqdmull, 1>;
  3988. defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
  3989. "vqdmull", "s", int_arm_neon_vqdmull>;
  3990. // Vector Multiply-Accumulate and Multiply-Subtract Operations.
  3991. // VMLA : Vector Multiply Accumulate (integer and floating-point)
  3992. defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  3993. IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  3994. def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
  3995. v2f32, fmul_su, fadd_mlx>,
  3996. Requires<[HasNEON, UseFPVMLx]>;
  3997. def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
  3998. v4f32, fmul_su, fadd_mlx>,
  3999. Requires<[HasNEON, UseFPVMLx]>;
  4000. def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
  4001. v4f16, fmul_su, fadd_mlx>,
  4002. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4003. def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
  4004. v8f16, fmul_su, fadd_mlx>,
  4005. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4006. defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
  4007. IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  4008. def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
  4009. v2f32, fmul_su, fadd_mlx>,
  4010. Requires<[HasNEON, UseFPVMLx]>;
  4011. def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
  4012. v4f32, v2f32, fmul_su, fadd_mlx>,
  4013. Requires<[HasNEON, UseFPVMLx]>;
  4014. def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
  4015. v4f16, fmul, fadd>,
  4016. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4017. def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
  4018. v8f16, v4f16, fmul, fadd>,
  4019. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4020. let Predicates = [HasNEON] in {
  4021. def : Pat<(v8i16 (add (v8i16 QPR:$src1),
  4022. (mul (v8i16 QPR:$src2),
  4023. (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
  4024. (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
  4025. (v4i16 (EXTRACT_SUBREG QPR:$src3,
  4026. (DSubReg_i16_reg imm:$lane))),
  4027. (SubReg_i16_lane imm:$lane)))>;
  4028. def : Pat<(v4i32 (add (v4i32 QPR:$src1),
  4029. (mul (v4i32 QPR:$src2),
  4030. (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
  4031. (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
  4032. (v2i32 (EXTRACT_SUBREG QPR:$src3,
  4033. (DSubReg_i32_reg imm:$lane))),
  4034. (SubReg_i32_lane imm:$lane)))>;
  4035. }
  4036. def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
  4037. (fmul_su (v4f32 QPR:$src2),
  4038. (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
  4039. (v4f32 (VMLAslfq (v4f32 QPR:$src1),
  4040. (v4f32 QPR:$src2),
  4041. (v2f32 (EXTRACT_SUBREG QPR:$src3,
  4042. (DSubReg_i32_reg imm:$lane))),
  4043. (SubReg_i32_lane imm:$lane)))>,
  4044. Requires<[HasNEON, UseFPVMLx]>;
  4045. // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
  4046. defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
  4047. "vmlal", "s", ARMvmulls, add>;
  4048. defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
  4049. "vmlal", "u", ARMvmullu, add>;
  4050. defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>;
  4051. defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>;
  4052. let Predicates = [HasNEON, HasV8_1a] in {
  4053. // v8.1a Neon Rounding Double Multiply-Op vector operations,
  4054. // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
  4055. // (Q += D * D)
  4056. defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
  4057. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
  4058. null_frag>;
  4059. def : Pat<(v4i16 (saddsat
  4060. (v4i16 DPR:$src1),
  4061. (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
  4062. (v4i16 DPR:$Vm))))),
  4063. (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4064. def : Pat<(v2i32 (saddsat
  4065. (v2i32 DPR:$src1),
  4066. (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
  4067. (v2i32 DPR:$Vm))))),
  4068. (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4069. def : Pat<(v8i16 (saddsat
  4070. (v8i16 QPR:$src1),
  4071. (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
  4072. (v8i16 QPR:$Vm))))),
  4073. (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4074. def : Pat<(v4i32 (saddsat
  4075. (v4i32 QPR:$src1),
  4076. (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
  4077. (v4i32 QPR:$Vm))))),
  4078. (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4079. defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
  4080. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
  4081. null_frag>;
  4082. def : Pat<(v4i16 (saddsat
  4083. (v4i16 DPR:$src1),
  4084. (v4i16 (int_arm_neon_vqrdmulh
  4085. (v4i16 DPR:$Vn),
  4086. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4087. imm:$lane)))))),
  4088. (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
  4089. imm:$lane))>;
  4090. def : Pat<(v2i32 (saddsat
  4091. (v2i32 DPR:$src1),
  4092. (v2i32 (int_arm_neon_vqrdmulh
  4093. (v2i32 DPR:$Vn),
  4094. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4095. imm:$lane)))))),
  4096. (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4097. imm:$lane))>;
  4098. def : Pat<(v8i16 (saddsat
  4099. (v8i16 QPR:$src1),
  4100. (v8i16 (int_arm_neon_vqrdmulh
  4101. (v8i16 QPR:$src2),
  4102. (v8i16 (ARMvduplane (v8i16 QPR:$src3),
  4103. imm:$lane)))))),
  4104. (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
  4105. (v8i16 QPR:$src2),
  4106. (v4i16 (EXTRACT_SUBREG
  4107. QPR:$src3,
  4108. (DSubReg_i16_reg imm:$lane))),
  4109. (SubReg_i16_lane imm:$lane)))>;
  4110. def : Pat<(v4i32 (saddsat
  4111. (v4i32 QPR:$src1),
  4112. (v4i32 (int_arm_neon_vqrdmulh
  4113. (v4i32 QPR:$src2),
  4114. (v4i32 (ARMvduplane (v4i32 QPR:$src3),
  4115. imm:$lane)))))),
  4116. (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
  4117. (v4i32 QPR:$src2),
  4118. (v2i32 (EXTRACT_SUBREG
  4119. QPR:$src3,
  4120. (DSubReg_i32_reg imm:$lane))),
  4121. (SubReg_i32_lane imm:$lane)))>;
  4122. // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
  4123. // (Q -= D * D)
  4124. defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
  4125. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
  4126. null_frag>;
  4127. def : Pat<(v4i16 (ssubsat
  4128. (v4i16 DPR:$src1),
  4129. (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
  4130. (v4i16 DPR:$Vm))))),
  4131. (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4132. def : Pat<(v2i32 (ssubsat
  4133. (v2i32 DPR:$src1),
  4134. (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
  4135. (v2i32 DPR:$Vm))))),
  4136. (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4137. def : Pat<(v8i16 (ssubsat
  4138. (v8i16 QPR:$src1),
  4139. (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
  4140. (v8i16 QPR:$Vm))))),
  4141. (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4142. def : Pat<(v4i32 (ssubsat
  4143. (v4i32 QPR:$src1),
  4144. (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
  4145. (v4i32 QPR:$Vm))))),
  4146. (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4147. defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
  4148. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
  4149. null_frag>;
  4150. def : Pat<(v4i16 (ssubsat
  4151. (v4i16 DPR:$src1),
  4152. (v4i16 (int_arm_neon_vqrdmulh
  4153. (v4i16 DPR:$Vn),
  4154. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4155. imm:$lane)))))),
  4156. (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
  4157. def : Pat<(v2i32 (ssubsat
  4158. (v2i32 DPR:$src1),
  4159. (v2i32 (int_arm_neon_vqrdmulh
  4160. (v2i32 DPR:$Vn),
  4161. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4162. imm:$lane)))))),
  4163. (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4164. imm:$lane))>;
  4165. def : Pat<(v8i16 (ssubsat
  4166. (v8i16 QPR:$src1),
  4167. (v8i16 (int_arm_neon_vqrdmulh
  4168. (v8i16 QPR:$src2),
  4169. (v8i16 (ARMvduplane (v8i16 QPR:$src3),
  4170. imm:$lane)))))),
  4171. (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
  4172. (v8i16 QPR:$src2),
  4173. (v4i16 (EXTRACT_SUBREG
  4174. QPR:$src3,
  4175. (DSubReg_i16_reg imm:$lane))),
  4176. (SubReg_i16_lane imm:$lane)))>;
  4177. def : Pat<(v4i32 (ssubsat
  4178. (v4i32 QPR:$src1),
  4179. (v4i32 (int_arm_neon_vqrdmulh
  4180. (v4i32 QPR:$src2),
  4181. (v4i32 (ARMvduplane (v4i32 QPR:$src3),
  4182. imm:$lane)))))),
  4183. (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
  4184. (v4i32 QPR:$src2),
  4185. (v2i32 (EXTRACT_SUBREG
  4186. QPR:$src3,
  4187. (DSubReg_i32_reg imm:$lane))),
  4188. (SubReg_i32_lane imm:$lane)))>;
  4189. }
  4190. // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
  4191. defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4192. "vqdmlal", "s", null_frag>;
  4193. defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
  4194. let Predicates = [HasNEON] in {
  4195. def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
  4196. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4197. (v4i16 DPR:$Vm))))),
  4198. (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4199. def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
  4200. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4201. (v2i32 DPR:$Vm))))),
  4202. (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4203. def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
  4204. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4205. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4206. imm:$lane)))))),
  4207. (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
  4208. def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
  4209. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4210. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4211. imm:$lane)))))),
  4212. (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
  4213. }
  4214. // VMLS : Vector Multiply Subtract (integer and floating-point)
  4215. defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4216. IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  4217. def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
  4218. v2f32, fmul_su, fsub_mlx>,
  4219. Requires<[HasNEON, UseFPVMLx]>;
  4220. def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
  4221. v4f32, fmul_su, fsub_mlx>,
  4222. Requires<[HasNEON, UseFPVMLx]>;
  4223. def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
  4224. v4f16, fmul, fsub>,
  4225. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4226. def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
  4227. v8f16, fmul, fsub>,
  4228. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4229. defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
  4230. IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  4231. def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
  4232. v2f32, fmul_su, fsub_mlx>,
  4233. Requires<[HasNEON, UseFPVMLx]>;
  4234. def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
  4235. v4f32, v2f32, fmul_su, fsub_mlx>,
  4236. Requires<[HasNEON, UseFPVMLx]>;
  4237. def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
  4238. v4f16, fmul, fsub>,
  4239. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4240. def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
  4241. v8f16, v4f16, fmul, fsub>,
  4242. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4243. let Predicates = [HasNEON] in {
  4244. def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
  4245. (mul (v8i16 QPR:$src2),
  4246. (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
  4247. (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
  4248. (v4i16 (EXTRACT_SUBREG QPR:$src3,
  4249. (DSubReg_i16_reg imm:$lane))),
  4250. (SubReg_i16_lane imm:$lane)))>;
  4251. def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
  4252. (mul (v4i32 QPR:$src2),
  4253. (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
  4254. (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
  4255. (v2i32 (EXTRACT_SUBREG QPR:$src3,
  4256. (DSubReg_i32_reg imm:$lane))),
  4257. (SubReg_i32_lane imm:$lane)))>;
  4258. }
  4259. def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
  4260. (fmul_su (v4f32 QPR:$src2),
  4261. (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
  4262. (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
  4263. (v2f32 (EXTRACT_SUBREG QPR:$src3,
  4264. (DSubReg_i32_reg imm:$lane))),
  4265. (SubReg_i32_lane imm:$lane)))>,
  4266. Requires<[HasNEON, UseFPVMLx]>;
  4267. // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
  4268. defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
  4269. "vmlsl", "s", ARMvmulls, sub>;
  4270. defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
  4271. "vmlsl", "u", ARMvmullu, sub>;
  4272. defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>;
  4273. defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>;
  4274. // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
  4275. defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
  4276. "vqdmlsl", "s", null_frag>;
  4277. defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
  4278. let Predicates = [HasNEON] in {
  4279. def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
  4280. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4281. (v4i16 DPR:$Vm))))),
  4282. (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4283. def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
  4284. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4285. (v2i32 DPR:$Vm))))),
  4286. (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4287. def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
  4288. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4289. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4290. imm:$lane)))))),
  4291. (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
  4292. def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
  4293. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4294. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4295. imm:$lane)))))),
  4296. (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
  4297. }
  4298. // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
  4299. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
  4300. v2f32, fmul_su, fadd_mlx>,
  4301. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4302. def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
  4303. v4f32, fmul_su, fadd_mlx>,
  4304. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4305. def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
  4306. v4f16, fmul, fadd>,
  4307. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4308. def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
  4309. v8f16, fmul, fadd>,
  4310. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4311. // Fused Vector Multiply Subtract (floating-point)
  4312. def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
  4313. v2f32, fmul_su, fsub_mlx>,
  4314. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4315. def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
  4316. v4f32, fmul_su, fsub_mlx>,
  4317. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4318. def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
  4319. v4f16, fmul, fsub>,
  4320. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4321. def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
  4322. v8f16, fmul, fsub>,
  4323. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4324. // Match @llvm.fma.* intrinsics
  4325. def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
  4326. (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4327. Requires<[HasNEON,HasFullFP16]>;
  4328. def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
  4329. (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4330. Requires<[HasNEON,HasFullFP16]>;
  4331. def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
  4332. (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4333. Requires<[HasNEON,HasVFP4]>;
  4334. def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
  4335. (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4336. Requires<[HasNEON,HasVFP4]>;
  4337. def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
  4338. (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4339. Requires<[HasNEON,HasVFP4]>;
  4340. def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
  4341. (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4342. Requires<[HasNEON,HasVFP4]>;
  4343. // ARMv8.2a dot product instructions.
  4344. // We put them in the VFPV8 decoder namespace because the ARM and Thumb
  4345. // encodings are the same and thus no further bit twiddling is necessary
  4346. // in the disassembler.
  4347. class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
  4348. string AsmTy, ValueType AccumTy, ValueType InputTy,
  4349. SDPatternOperator OpNode> :
  4350. N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
  4351. (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
  4352. Asm, AsmTy,
  4353. [(set (AccumTy RegTy:$dst),
  4354. (OpNode (AccumTy RegTy:$Vd),
  4355. (InputTy RegTy:$Vn),
  4356. (InputTy RegTy:$Vm)))]> {
  4357. let Predicates = [HasDotProd];
  4358. let DecoderNamespace = "VFPV8";
  4359. let Constraints = "$dst = $Vd";
  4360. }
  4361. def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
  4362. def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
  4363. def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
  4364. def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
  4365. // Indexed dot product instructions:
  4366. multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
  4367. ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
  4368. dag RHS> {
  4369. def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
  4370. (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  4371. N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
  4372. bit lane;
  4373. let Inst{5} = lane;
  4374. let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
  4375. let Constraints = "$dst = $Vd";
  4376. let Predicates = [HasDotProd];
  4377. let DecoderNamespace = "VFPV8";
  4378. }
  4379. def : Pat<
  4380. (AccumType (OpNode (AccumType Ty:$Vd),
  4381. (InputType Ty:$Vn),
  4382. (InputType (bitconvert (AccumType
  4383. (ARMvduplane (AccumType Ty:$Vm),
  4384. VectorIndex32:$lane)))))),
  4385. (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
  4386. }
  4387. defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
  4388. int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
  4389. defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
  4390. int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
  4391. defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
  4392. int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4393. defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
  4394. int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4395. // v8.6A matrix multiplication extension
  4396. let Predicates = [HasMatMulInt8] in {
  4397. class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
  4398. SDPatternOperator OpNode>
  4399. : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
  4400. (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
  4401. Asm, AsmTy,
  4402. [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
  4403. (v16i8 QPR:$Vn),
  4404. (v16i8 QPR:$Vm)))]> {
  4405. let DecoderNamespace = "VFPV8";
  4406. let Constraints = "$dst = $Vd";
  4407. }
  4408. multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
  4409. ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
  4410. dag RHS> {
  4411. def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
  4412. (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
  4413. NoItinerary, Asm, AsmTy, []> {
  4414. bit lane;
  4415. let Inst{5} = lane;
  4416. let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
  4417. let DecoderNamespace = "VFPV8";
  4418. let Constraints = "$dst = $Vd";
  4419. }
  4420. def : Pat<
  4421. (AccumTy (OpNode (AccumTy RegTy:$Vd),
  4422. (InputTy RegTy:$Vn),
  4423. (InputTy (bitconvert (AccumTy
  4424. (ARMvduplane (AccumTy RegTy:$Vm),
  4425. VectorIndex32:$lane)))))),
  4426. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  4427. }
  4428. multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
  4429. : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
  4430. def : Pat<
  4431. (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
  4432. (InputTy (bitconvert (AccumTy
  4433. (ARMvduplane (AccumTy RegTy:$Vm),
  4434. VectorIndex32:$lane)))),
  4435. (InputTy RegTy:$Vn))),
  4436. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  4437. }
  4438. def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>;
  4439. def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>;
  4440. def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
  4441. def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>;
  4442. def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
  4443. defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
  4444. int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
  4445. defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
  4446. int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4447. defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
  4448. defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4449. }
  4450. // ARMv8.3 complex operations
  4451. class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
  4452. InstrItinClass itin, dag oops, dag iops,
  4453. string opc, string dt, list<dag> pattern>
  4454. : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
  4455. iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
  4456. bits<2> rot;
  4457. let Inst{24-23} = rot;
  4458. }
  4459. class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
  4460. InstrItinClass itin, dag oops, dag iops, string opc,
  4461. string dt, list<dag> pattern>
  4462. : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
  4463. iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
  4464. bits<1> rot;
  4465. let Inst{24} = rot;
  4466. }
  4467. class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
  4468. dag oops, dag iops, string opc, string dt,
  4469. list<dag> pattern>
  4470. : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
  4471. "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
  4472. bits<2> rot;
  4473. bit lane;
  4474. let Inst{21-20} = rot;
  4475. let Inst{5} = lane;
  4476. }
  4477. class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
  4478. dag oops, dag iops, string opc, string dt,
  4479. list<dag> pattern>
  4480. : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
  4481. "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
  4482. bits<2> rot;
  4483. bit lane;
  4484. let Inst{21-20} = rot;
  4485. let Inst{5} = Vm{4};
  4486. // This is needed because the lane operand does not have any bits in the
  4487. // encoding (it only has one possible value), so we need to manually set it
  4488. // to it's default value.
  4489. let DecoderMethod = "DecodeNEONComplexLane64Instruction";
  4490. }
  4491. multiclass N3VCP8ComplexTied<bit op21, bit op4,
  4492. string OpcodeStr, SDPatternOperator Op> {
  4493. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4494. def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
  4495. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
  4496. OpcodeStr, "f16", []>;
  4497. def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
  4498. (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
  4499. OpcodeStr, "f16", []>;
  4500. }
  4501. let Predicates = [HasNEON,HasV8_3a] in {
  4502. def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
  4503. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
  4504. OpcodeStr, "f32", []>;
  4505. def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
  4506. (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
  4507. OpcodeStr, "f32", []>;
  4508. }
  4509. }
  4510. multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
  4511. string OpcodeStr, SDPatternOperator Op> {
  4512. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4513. def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
  4514. (outs DPR:$Vd),
  4515. (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
  4516. OpcodeStr, "f16", []>;
  4517. def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
  4518. (outs QPR:$Vd),
  4519. (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
  4520. OpcodeStr, "f16", []>;
  4521. }
  4522. let Predicates = [HasNEON,HasV8_3a] in {
  4523. def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
  4524. (outs DPR:$Vd),
  4525. (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
  4526. OpcodeStr, "f32", []>;
  4527. def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
  4528. (outs QPR:$Vd),
  4529. (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
  4530. OpcodeStr, "f32", []>;
  4531. }
  4532. }
  4533. // These instructions index by pairs of lanes, so the VectorIndexes are twice
  4534. // as wide as the data types.
  4535. multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
  4536. SDPatternOperator Op> {
  4537. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4538. def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
  4539. (outs DPR:$Vd),
  4540. (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4541. VectorIndex32:$lane, complexrotateop:$rot),
  4542. OpcodeStr, "f16", []>;
  4543. def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
  4544. (outs QPR:$Vd),
  4545. (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
  4546. VectorIndex32:$lane, complexrotateop:$rot),
  4547. OpcodeStr, "f16", []>;
  4548. }
  4549. let Predicates = [HasNEON,HasV8_3a] in {
  4550. def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
  4551. (outs DPR:$Vd),
  4552. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
  4553. complexrotateop:$rot),
  4554. OpcodeStr, "f32", []>;
  4555. def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
  4556. (outs QPR:$Vd),
  4557. (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
  4558. complexrotateop:$rot),
  4559. OpcodeStr, "f32", []>;
  4560. }
  4561. }
  4562. defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
  4563. defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
  4564. defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
  4565. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4566. def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
  4567. (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
  4568. def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
  4569. (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
  4570. def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
  4571. (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
  4572. def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
  4573. (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
  4574. }
  4575. let Predicates = [HasNEON,HasV8_3a] in {
  4576. def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
  4577. (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
  4578. def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
  4579. (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
  4580. def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
  4581. (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
  4582. def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
  4583. (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
  4584. }
  4585. // Vector Subtract Operations.
  4586. // VSUB : Vector Subtract (integer and floating-point)
  4587. defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
  4588. "vsub", "i", sub, 0>;
  4589. def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
  4590. v2f32, v2f32, fsub, 0>;
  4591. def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
  4592. v4f32, v4f32, fsub, 0>;
  4593. def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
  4594. v4f16, v4f16, fsub, 0>,
  4595. Requires<[HasNEON,HasFullFP16]>;
  4596. def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
  4597. v8f16, v8f16, fsub, 0>,
  4598. Requires<[HasNEON,HasFullFP16]>;
  4599. // VSUBL : Vector Subtract Long (Q = D - D)
  4600. defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
  4601. "vsubl", "s", sub, sext, 0>;
  4602. defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
  4603. "vsubl", "u", sub, zanyext, 0>;
  4604. // VSUBW : Vector Subtract Wide (Q = Q - D)
  4605. defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
  4606. defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
  4607. // VHSUB : Vector Halving Subtract
  4608. defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
  4609. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4610. "vhsub", "s", int_arm_neon_vhsubs, 0>;
  4611. defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
  4612. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4613. "vhsub", "u", int_arm_neon_vhsubu, 0>;
  4614. // VQSUB : Vector Saturing Subtract
  4615. defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
  4616. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4617. "vqsub", "s", ssubsat, 0>;
  4618. defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
  4619. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4620. "vqsub", "u", usubsat, 0>;
  4621. // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
  4622. defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
  4623. // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
  4624. defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
  4625. int_arm_neon_vrsubhn, 0>;
  4626. let Predicates = [HasNEON] in {
  4627. def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
  4628. (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
  4629. def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
  4630. (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
  4631. def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
  4632. (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
  4633. }
  4634. // Vector Comparisons.
  4635. // VCEQ : Vector Compare Equal
  4636. defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4637. IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
  4638. def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
  4639. ARMCCeq, 1>;
  4640. def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
  4641. ARMCCeq, 1>;
  4642. def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
  4643. ARMCCeq, 1>,
  4644. Requires<[HasNEON, HasFullFP16]>;
  4645. def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
  4646. ARMCCeq, 1>,
  4647. Requires<[HasNEON, HasFullFP16]>;
  4648. let TwoOperandAliasConstraint = "$Vm = $Vd" in
  4649. defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
  4650. "$Vd, $Vm, #0", ARMCCeq>;
  4651. // VCGE : Vector Compare Greater Than or Equal
  4652. defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4653. IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
  4654. defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4655. IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
  4656. def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
  4657. ARMCCge, 0>;
  4658. def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
  4659. ARMCCge, 0>;
  4660. def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
  4661. ARMCCge, 0>,
  4662. Requires<[HasNEON, HasFullFP16]>;
  4663. def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
  4664. ARMCCge, 0>,
  4665. Requires<[HasNEON, HasFullFP16]>;
  4666. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  4667. defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
  4668. "$Vd, $Vm, #0", ARMCCge>;
  4669. defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
  4670. "$Vd, $Vm, #0", ARMCCle>;
  4671. }
  4672. // VCGT : Vector Compare Greater Than
  4673. defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4674. IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
  4675. defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4676. IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
  4677. def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
  4678. ARMCCgt, 0>;
  4679. def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
  4680. ARMCCgt, 0>;
  4681. def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
  4682. ARMCCgt, 0>,
  4683. Requires<[HasNEON, HasFullFP16]>;
  4684. def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
  4685. ARMCCgt, 0>,
  4686. Requires<[HasNEON, HasFullFP16]>;
  4687. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  4688. defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
  4689. "$Vd, $Vm, #0", ARMCCgt>;
  4690. defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
  4691. "$Vd, $Vm, #0", ARMCClt>;
  4692. }
  4693. // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
  4694. def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
  4695. "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
  4696. def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
  4697. "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
  4698. def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
  4699. "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
  4700. Requires<[HasNEON, HasFullFP16]>;
  4701. def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
  4702. "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
  4703. Requires<[HasNEON, HasFullFP16]>;
  4704. // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
  4705. def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
  4706. "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
  4707. def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
  4708. "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
  4709. def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
  4710. "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
  4711. Requires<[HasNEON, HasFullFP16]>;
  4712. def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
  4713. "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
  4714. Requires<[HasNEON, HasFullFP16]>;
  4715. // VTST : Vector Test Bits
  4716. defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
  4717. IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
  4718. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
  4719. (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4720. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
  4721. (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4722. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
  4723. (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4724. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
  4725. (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4726. let Predicates = [HasNEON, HasFullFP16] in {
  4727. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
  4728. (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4729. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
  4730. (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4731. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
  4732. (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4733. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
  4734. (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4735. }
  4736. // +fp16fml Floating Point Multiplication Variants
  4737. let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
  4738. class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
  4739. RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
  4740. : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
  4741. asm, "f16", "$Vd, $Vn, $Vm", "", []>;
  4742. class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
  4743. RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
  4744. : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
  4745. asm, "f16", "$Vd, $Vn, $Vm", "", []>;
  4746. // Vd, Vs, Vs[0-15], Idx[0-1]
  4747. class VFMD<string opc, string type, bits<2> S>
  4748. : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
  4749. (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
  4750. IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
  4751. bit idx;
  4752. let Inst{3} = idx;
  4753. let Inst{19-16} = Vn{4-1};
  4754. let Inst{7} = Vn{0};
  4755. let Inst{5} = Vm{0};
  4756. let Inst{2-0} = Vm{3-1};
  4757. }
  4758. // Vq, Vd, Vd[0-7], Idx[0-3]
  4759. class VFMQ<string opc, string type, bits<2> S>
  4760. : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
  4761. (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
  4762. IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
  4763. bits<2> idx;
  4764. let Inst{5} = idx{1};
  4765. let Inst{3} = idx{0};
  4766. }
  4767. // op1 op2 op3
  4768. def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
  4769. def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
  4770. def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
  4771. def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
  4772. def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
  4773. def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
  4774. def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
  4775. def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
  4776. } // HasNEON, HasFP16FML
  4777. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
  4778. (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4779. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
  4780. (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4781. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
  4782. (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4783. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
  4784. (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4785. let Predicates = [HasNEON, HasFullFP16] in {
  4786. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
  4787. (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4788. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
  4789. (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4790. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
  4791. (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4792. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
  4793. (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4794. }
  4795. // Vector Bitwise Operations.
  4796. def vnotd : PatFrag<(ops node:$in),
  4797. (xor node:$in, ARMimmAllOnesD)>;
  4798. def vnotq : PatFrag<(ops node:$in),
  4799. (xor node:$in, ARMimmAllOnesV)>;
  4800. // VAND : Vector Bitwise AND
  4801. def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
  4802. v2i32, v2i32, and, 1>;
  4803. def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
  4804. v4i32, v4i32, and, 1>;
  4805. // VEOR : Vector Bitwise Exclusive OR
  4806. def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
  4807. v2i32, v2i32, xor, 1>;
  4808. def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
  4809. v4i32, v4i32, xor, 1>;
  4810. // VORR : Vector Bitwise OR
  4811. def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
  4812. v2i32, v2i32, or, 1>;
  4813. def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
  4814. v4i32, v4i32, or, 1>;
  4815. def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
  4816. (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
  4817. IIC_VMOVImm,
  4818. "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
  4819. [(set DPR:$Vd,
  4820. (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
  4821. let Inst{9} = SIMM{9};
  4822. }
  4823. def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
  4824. (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
  4825. IIC_VMOVImm,
  4826. "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
  4827. [(set DPR:$Vd,
  4828. (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
  4829. let Inst{10-9} = SIMM{10-9};
  4830. }
  4831. def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
  4832. (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
  4833. IIC_VMOVImm,
  4834. "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
  4835. [(set QPR:$Vd,
  4836. (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
  4837. let Inst{9} = SIMM{9};
  4838. }
  4839. def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
  4840. (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
  4841. IIC_VMOVImm,
  4842. "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
  4843. [(set QPR:$Vd,
  4844. (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
  4845. let Inst{10-9} = SIMM{10-9};
  4846. }
  4847. // VBIC : Vector Bitwise Bit Clear (AND NOT)
  4848. let TwoOperandAliasConstraint = "$Vn = $Vd" in {
  4849. def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
  4850. (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
  4851. "vbic", "$Vd, $Vn, $Vm", "",
  4852. [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
  4853. (vnotd DPR:$Vm))))]>;
  4854. def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
  4855. (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
  4856. "vbic", "$Vd, $Vn, $Vm", "",
  4857. [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
  4858. (vnotq QPR:$Vm))))]>;
  4859. }
  4860. def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
  4861. (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
  4862. IIC_VMOVImm,
  4863. "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
  4864. [(set DPR:$Vd,
  4865. (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
  4866. let Inst{9} = SIMM{9};
  4867. }
  4868. def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
  4869. (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
  4870. IIC_VMOVImm,
  4871. "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
  4872. [(set DPR:$Vd,
  4873. (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
  4874. let Inst{10-9} = SIMM{10-9};
  4875. }
  4876. def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
  4877. (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
  4878. IIC_VMOVImm,
  4879. "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
  4880. [(set QPR:$Vd,
  4881. (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
  4882. let Inst{9} = SIMM{9};
  4883. }
  4884. def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
  4885. (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
  4886. IIC_VMOVImm,
  4887. "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
  4888. [(set QPR:$Vd,
  4889. (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
  4890. let Inst{10-9} = SIMM{10-9};
  4891. }
  4892. // VORN : Vector Bitwise OR NOT
  4893. def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
  4894. (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
  4895. "vorn", "$Vd, $Vn, $Vm", "",
  4896. [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
  4897. (vnotd DPR:$Vm))))]>;
  4898. def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
  4899. (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
  4900. "vorn", "$Vd, $Vn, $Vm", "",
  4901. [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
  4902. (vnotq QPR:$Vm))))]>;
  4903. // VMVN : Vector Bitwise NOT (Immediate)
  4904. let isReMaterializable = 1 in {
  4905. def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
  4906. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  4907. "vmvn", "i16", "$Vd, $SIMM", "",
  4908. [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
  4909. let Inst{9} = SIMM{9};
  4910. }
  4911. def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
  4912. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  4913. "vmvn", "i16", "$Vd, $SIMM", "",
  4914. [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
  4915. let Inst{9} = SIMM{9};
  4916. }
  4917. def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
  4918. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  4919. "vmvn", "i32", "$Vd, $SIMM", "",
  4920. [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
  4921. let Inst{11-8} = SIMM{11-8};
  4922. }
  4923. def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
  4924. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  4925. "vmvn", "i32", "$Vd, $SIMM", "",
  4926. [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
  4927. let Inst{11-8} = SIMM{11-8};
  4928. }
  4929. }
  4930. // VMVN : Vector Bitwise NOT
  4931. def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
  4932. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
  4933. "vmvn", "$Vd, $Vm", "",
  4934. [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
  4935. def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
  4936. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
  4937. "vmvn", "$Vd, $Vm", "",
  4938. [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
  4939. let Predicates = [HasNEON] in {
  4940. def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
  4941. def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
  4942. }
  4943. // The TwoAddress pass will not go looking for equivalent operations
  4944. // with different register constraints; it just inserts copies.
  4945. // That is why pseudo VBSP implemented. Is is expanded later into
  4946. // VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
  4947. def VBSPd
  4948. : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  4949. IIC_VBINiD, "",
  4950. [(set DPR:$Vd,
  4951. (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
  4952. let Predicates = [HasNEON] in {
  4953. def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
  4954. (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
  4955. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4956. def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
  4957. (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
  4958. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4959. def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
  4960. (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
  4961. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4962. def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
  4963. (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
  4964. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4965. def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
  4966. (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
  4967. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4968. def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
  4969. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  4970. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  4971. def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
  4972. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  4973. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  4974. }
  4975. def VBSPq
  4976. : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  4977. IIC_VBINiQ, "",
  4978. [(set QPR:$Vd,
  4979. (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
  4980. let Predicates = [HasNEON] in {
  4981. def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
  4982. (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
  4983. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  4984. def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
  4985. (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
  4986. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  4987. def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
  4988. (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
  4989. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  4990. def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
  4991. (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
  4992. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  4993. def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
  4994. (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
  4995. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  4996. def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
  4997. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  4998. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  4999. def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
  5000. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5001. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5002. }
  5003. // VBSL : Vector Bitwise Select
  5004. def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
  5005. (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5006. N3RegFrm, IIC_VBINiD,
  5007. "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5008. []>;
  5009. def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
  5010. (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5011. N3RegFrm, IIC_VBINiQ,
  5012. "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5013. []>;
  5014. // VBIF : Vector Bitwise Insert if False
  5015. // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
  5016. def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
  5017. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5018. N3RegFrm, IIC_VBINiD,
  5019. "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5020. []>;
  5021. def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
  5022. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5023. N3RegFrm, IIC_VBINiQ,
  5024. "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5025. []>;
  5026. // VBIT : Vector Bitwise Insert if True
  5027. // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
  5028. def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
  5029. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5030. N3RegFrm, IIC_VBINiD,
  5031. "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5032. []>;
  5033. def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
  5034. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5035. N3RegFrm, IIC_VBINiQ,
  5036. "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5037. []>;
  5038. // Vector Absolute Differences.
  5039. // VABD : Vector Absolute Difference
  5040. defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
  5041. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5042. "vabd", "s", int_arm_neon_vabds, 1>;
  5043. defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
  5044. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5045. "vabd", "u", int_arm_neon_vabdu, 1>;
  5046. def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
  5047. "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
  5048. def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
  5049. "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
  5050. def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
  5051. "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
  5052. Requires<[HasNEON, HasFullFP16]>;
  5053. def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
  5054. "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
  5055. Requires<[HasNEON, HasFullFP16]>;
  5056. // VABDL : Vector Absolute Difference Long (Q = | D - D |)
  5057. defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
  5058. "vabdl", "s", int_arm_neon_vabds, zext, 1>;
  5059. defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
  5060. "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
  5061. let Predicates = [HasNEON] in {
  5062. def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
  5063. (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
  5064. def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
  5065. (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
  5066. }
  5067. // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
  5068. // shift/xor pattern for ABS.
  5069. def abd_shr :
  5070. PatFrag<(ops node:$in1, node:$in2, node:$shift),
  5071. (ARMvshrsImm (sub (zext node:$in1),
  5072. (zext node:$in2)), (i32 $shift))>;
  5073. let Predicates = [HasNEON] in {
  5074. def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
  5075. (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
  5076. (zext (v2i32 DPR:$opB))),
  5077. (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
  5078. (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
  5079. }
  5080. // VABA : Vector Absolute Difference and Accumulate
  5081. defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
  5082. "vaba", "s", int_arm_neon_vabds, add>;
  5083. defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
  5084. "vaba", "u", int_arm_neon_vabdu, add>;
  5085. // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
  5086. defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
  5087. "vabal", "s", int_arm_neon_vabds, zext, add>;
  5088. defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
  5089. "vabal", "u", int_arm_neon_vabdu, zext, add>;
  5090. // Vector Maximum and Minimum.
  5091. // VMAX : Vector Maximum
  5092. defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
  5093. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5094. "vmax", "s", smax, 1>;
  5095. defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
  5096. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5097. "vmax", "u", umax, 1>;
  5098. def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5099. "vmax", "f32",
  5100. v2f32, v2f32, fmaximum, 1>;
  5101. def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5102. "vmax", "f32",
  5103. v4f32, v4f32, fmaximum, 1>;
  5104. def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5105. "vmax", "f16",
  5106. v4f16, v4f16, fmaximum, 1>,
  5107. Requires<[HasNEON, HasFullFP16]>;
  5108. def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5109. "vmax", "f16",
  5110. v8f16, v8f16, fmaximum, 1>,
  5111. Requires<[HasNEON, HasFullFP16]>;
  5112. // VMAXNM
  5113. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  5114. def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
  5115. N3RegFrm, NoItinerary, "vmaxnm", "f32",
  5116. v2f32, v2f32, fmaxnum, 1>,
  5117. Requires<[HasV8, HasNEON]>;
  5118. def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
  5119. N3RegFrm, NoItinerary, "vmaxnm", "f32",
  5120. v4f32, v4f32, fmaxnum, 1>,
  5121. Requires<[HasV8, HasNEON]>;
  5122. def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
  5123. N3RegFrm, NoItinerary, "vmaxnm", "f16",
  5124. v4f16, v4f16, fmaxnum, 1>,
  5125. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5126. def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
  5127. N3RegFrm, NoItinerary, "vmaxnm", "f16",
  5128. v8f16, v8f16, fmaxnum, 1>,
  5129. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5130. }
  5131. // VMIN : Vector Minimum
  5132. defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
  5133. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5134. "vmin", "s", smin, 1>;
  5135. defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
  5136. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5137. "vmin", "u", umin, 1>;
  5138. def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5139. "vmin", "f32",
  5140. v2f32, v2f32, fminimum, 1>;
  5141. def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5142. "vmin", "f32",
  5143. v4f32, v4f32, fminimum, 1>;
  5144. def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5145. "vmin", "f16",
  5146. v4f16, v4f16, fminimum, 1>,
  5147. Requires<[HasNEON, HasFullFP16]>;
  5148. def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5149. "vmin", "f16",
  5150. v8f16, v8f16, fminimum, 1>,
  5151. Requires<[HasNEON, HasFullFP16]>;
  5152. // VMINNM
  5153. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  5154. def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
  5155. N3RegFrm, NoItinerary, "vminnm", "f32",
  5156. v2f32, v2f32, fminnum, 1>,
  5157. Requires<[HasV8, HasNEON]>;
  5158. def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
  5159. N3RegFrm, NoItinerary, "vminnm", "f32",
  5160. v4f32, v4f32, fminnum, 1>,
  5161. Requires<[HasV8, HasNEON]>;
  5162. def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
  5163. N3RegFrm, NoItinerary, "vminnm", "f16",
  5164. v4f16, v4f16, fminnum, 1>,
  5165. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5166. def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
  5167. N3RegFrm, NoItinerary, "vminnm", "f16",
  5168. v8f16, v8f16, fminnum, 1>,
  5169. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5170. }
  5171. // Vector Pairwise Operations.
  5172. // VPADD : Vector Pairwise Add
  5173. def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5174. "vpadd", "i8",
  5175. v8i8, v8i8, int_arm_neon_vpadd, 0>;
  5176. def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5177. "vpadd", "i16",
  5178. v4i16, v4i16, int_arm_neon_vpadd, 0>;
  5179. def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5180. "vpadd", "i32",
  5181. v2i32, v2i32, int_arm_neon_vpadd, 0>;
  5182. def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
  5183. IIC_VPBIND, "vpadd", "f32",
  5184. v2f32, v2f32, int_arm_neon_vpadd, 0>;
  5185. def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
  5186. IIC_VPBIND, "vpadd", "f16",
  5187. v4f16, v4f16, int_arm_neon_vpadd, 0>,
  5188. Requires<[HasNEON, HasFullFP16]>;
  5189. // VPADDL : Vector Pairwise Add Long
  5190. defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
  5191. int_arm_neon_vpaddls>;
  5192. defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
  5193. int_arm_neon_vpaddlu>;
  5194. // VPADAL : Vector Pairwise Add and Accumulate Long
  5195. defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
  5196. int_arm_neon_vpadals>;
  5197. defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
  5198. int_arm_neon_vpadalu>;
  5199. // VPMAX : Vector Pairwise Maximum
  5200. def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5201. "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
  5202. def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5203. "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
  5204. def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5205. "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
  5206. def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5207. "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
  5208. def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5209. "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
  5210. def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5211. "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
  5212. def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
  5213. "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
  5214. def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
  5215. "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
  5216. Requires<[HasNEON, HasFullFP16]>;
  5217. // VPMIN : Vector Pairwise Minimum
  5218. def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5219. "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
  5220. def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5221. "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
  5222. def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5223. "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
  5224. def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5225. "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
  5226. def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5227. "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
  5228. def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5229. "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
  5230. def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
  5231. "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
  5232. def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
  5233. "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
  5234. Requires<[HasNEON, HasFullFP16]>;
  5235. // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
  5236. // VRECPE : Vector Reciprocal Estimate
  5237. def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
  5238. IIC_VUNAD, "vrecpe", "u32",
  5239. v2i32, v2i32, int_arm_neon_vrecpe>;
  5240. def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
  5241. IIC_VUNAQ, "vrecpe", "u32",
  5242. v4i32, v4i32, int_arm_neon_vrecpe>;
  5243. def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
  5244. IIC_VUNAD, "vrecpe", "f32",
  5245. v2f32, v2f32, int_arm_neon_vrecpe>;
  5246. def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
  5247. IIC_VUNAQ, "vrecpe", "f32",
  5248. v4f32, v4f32, int_arm_neon_vrecpe>;
  5249. def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
  5250. IIC_VUNAD, "vrecpe", "f16",
  5251. v4f16, v4f16, int_arm_neon_vrecpe>,
  5252. Requires<[HasNEON, HasFullFP16]>;
  5253. def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
  5254. IIC_VUNAQ, "vrecpe", "f16",
  5255. v8f16, v8f16, int_arm_neon_vrecpe>,
  5256. Requires<[HasNEON, HasFullFP16]>;
  5257. // VRECPS : Vector Reciprocal Step
  5258. def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
  5259. IIC_VRECSD, "vrecps", "f32",
  5260. v2f32, v2f32, int_arm_neon_vrecps, 1>;
  5261. def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
  5262. IIC_VRECSQ, "vrecps", "f32",
  5263. v4f32, v4f32, int_arm_neon_vrecps, 1>;
  5264. def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
  5265. IIC_VRECSD, "vrecps", "f16",
  5266. v4f16, v4f16, int_arm_neon_vrecps, 1>,
  5267. Requires<[HasNEON, HasFullFP16]>;
  5268. def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
  5269. IIC_VRECSQ, "vrecps", "f16",
  5270. v8f16, v8f16, int_arm_neon_vrecps, 1>,
  5271. Requires<[HasNEON, HasFullFP16]>;
  5272. // VRSQRTE : Vector Reciprocal Square Root Estimate
  5273. def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
  5274. IIC_VUNAD, "vrsqrte", "u32",
  5275. v2i32, v2i32, int_arm_neon_vrsqrte>;
  5276. def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
  5277. IIC_VUNAQ, "vrsqrte", "u32",
  5278. v4i32, v4i32, int_arm_neon_vrsqrte>;
  5279. def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
  5280. IIC_VUNAD, "vrsqrte", "f32",
  5281. v2f32, v2f32, int_arm_neon_vrsqrte>;
  5282. def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
  5283. IIC_VUNAQ, "vrsqrte", "f32",
  5284. v4f32, v4f32, int_arm_neon_vrsqrte>;
  5285. def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
  5286. IIC_VUNAD, "vrsqrte", "f16",
  5287. v4f16, v4f16, int_arm_neon_vrsqrte>,
  5288. Requires<[HasNEON, HasFullFP16]>;
  5289. def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
  5290. IIC_VUNAQ, "vrsqrte", "f16",
  5291. v8f16, v8f16, int_arm_neon_vrsqrte>,
  5292. Requires<[HasNEON, HasFullFP16]>;
  5293. // VRSQRTS : Vector Reciprocal Square Root Step
  5294. def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
  5295. IIC_VRECSD, "vrsqrts", "f32",
  5296. v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
  5297. def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
  5298. IIC_VRECSQ, "vrsqrts", "f32",
  5299. v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
  5300. def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
  5301. IIC_VRECSD, "vrsqrts", "f16",
  5302. v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
  5303. Requires<[HasNEON, HasFullFP16]>;
  5304. def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
  5305. IIC_VRECSQ, "vrsqrts", "f16",
  5306. v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
  5307. Requires<[HasNEON, HasFullFP16]>;
  5308. // Vector Shifts.
  5309. // VSHL : Vector Shift
  5310. defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
  5311. IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
  5312. "vshl", "s", int_arm_neon_vshifts>;
  5313. defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
  5314. IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
  5315. "vshl", "u", int_arm_neon_vshiftu>;
  5316. let Predicates = [HasNEON] in {
  5317. def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
  5318. (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
  5319. def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
  5320. (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
  5321. def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
  5322. (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
  5323. def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
  5324. (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
  5325. def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
  5326. (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
  5327. def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
  5328. (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
  5329. def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
  5330. (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
  5331. def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
  5332. (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
  5333. def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
  5334. (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
  5335. def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
  5336. (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
  5337. def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
  5338. (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
  5339. def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
  5340. (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
  5341. def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
  5342. (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
  5343. def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
  5344. (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
  5345. def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
  5346. (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
  5347. def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
  5348. (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
  5349. }
  5350. // VSHL : Vector Shift Left (Immediate)
  5351. defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
  5352. // VSHR : Vector Shift Right (Immediate)
  5353. defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
  5354. ARMvshrsImm>;
  5355. defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
  5356. ARMvshruImm>;
  5357. // VSHLL : Vector Shift Left Long
  5358. defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
  5359. PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
  5360. defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
  5361. PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
  5362. // VSHLL : Vector Shift Left Long (with maximum shift count)
  5363. class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
  5364. bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
  5365. ValueType OpTy, Operand ImmTy>
  5366. : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
  5367. ResTy, OpTy, ImmTy, null_frag> {
  5368. let Inst{21-16} = op21_16;
  5369. let DecoderMethod = "DecodeVSHLMaxInstruction";
  5370. }
  5371. def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
  5372. v8i16, v8i8, imm8>;
  5373. def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
  5374. v4i32, v4i16, imm16>;
  5375. def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
  5376. v2i64, v2i32, imm32>;
  5377. let Predicates = [HasNEON] in {
  5378. def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
  5379. (VSHLLi8 DPR:$Rn, 8)>;
  5380. def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
  5381. (VSHLLi16 DPR:$Rn, 16)>;
  5382. def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
  5383. (VSHLLi32 DPR:$Rn, 32)>;
  5384. def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
  5385. (VSHLLi8 DPR:$Rn, 8)>;
  5386. def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
  5387. (VSHLLi16 DPR:$Rn, 16)>;
  5388. def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
  5389. (VSHLLi32 DPR:$Rn, 32)>;
  5390. def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
  5391. (VSHLLi8 DPR:$Rn, 8)>;
  5392. def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
  5393. (VSHLLi16 DPR:$Rn, 16)>;
  5394. def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
  5395. (VSHLLi32 DPR:$Rn, 32)>;
  5396. }
  5397. // VSHRN : Vector Shift Right and Narrow
  5398. defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
  5399. PatFrag<(ops node:$Rn, node:$amt),
  5400. (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
  5401. let Predicates = [HasNEON] in {
  5402. def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
  5403. (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
  5404. def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
  5405. (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
  5406. def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
  5407. (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
  5408. }
  5409. // VRSHL : Vector Rounding Shift
  5410. defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
  5411. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5412. "vrshl", "s", int_arm_neon_vrshifts>;
  5413. defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
  5414. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5415. "vrshl", "u", int_arm_neon_vrshiftu>;
  5416. // VRSHR : Vector Rounding Shift Right
  5417. defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
  5418. NEONvrshrsImm>;
  5419. defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
  5420. NEONvrshruImm>;
  5421. // VRSHRN : Vector Rounding Shift Right and Narrow
  5422. defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
  5423. NEONvrshrnImm>;
  5424. // VQSHL : Vector Saturating Shift
  5425. defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
  5426. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5427. "vqshl", "s", int_arm_neon_vqshifts>;
  5428. defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
  5429. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5430. "vqshl", "u", int_arm_neon_vqshiftu>;
  5431. // VQSHL : Vector Saturating Shift Left (Immediate)
  5432. defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
  5433. defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
  5434. // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
  5435. defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
  5436. // VQSHRN : Vector Saturating Shift Right and Narrow
  5437. defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
  5438. NEONvqshrnsImm>;
  5439. defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
  5440. NEONvqshrnuImm>;
  5441. // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
  5442. defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
  5443. NEONvqshrnsuImm>;
  5444. // VQRSHL : Vector Saturating Rounding Shift
  5445. defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
  5446. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5447. "vqrshl", "s", int_arm_neon_vqrshifts>;
  5448. defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
  5449. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5450. "vqrshl", "u", int_arm_neon_vqrshiftu>;
  5451. // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
  5452. defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
  5453. NEONvqrshrnsImm>;
  5454. defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
  5455. NEONvqrshrnuImm>;
  5456. // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
  5457. defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
  5458. NEONvqrshrnsuImm>;
  5459. // VSRA : Vector Shift Right and Accumulate
  5460. defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
  5461. defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
  5462. // VRSRA : Vector Rounding Shift Right and Accumulate
  5463. defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
  5464. defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
  5465. // VSLI : Vector Shift Left and Insert
  5466. defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
  5467. // VSRI : Vector Shift Right and Insert
  5468. defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
  5469. // Vector Absolute and Saturating Absolute.
  5470. // VABS : Vector Absolute Value
  5471. defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
  5472. IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
  5473. def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
  5474. "vabs", "f32",
  5475. v2f32, v2f32, fabs>;
  5476. def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
  5477. "vabs", "f32",
  5478. v4f32, v4f32, fabs>;
  5479. def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
  5480. "vabs", "f16",
  5481. v4f16, v4f16, fabs>,
  5482. Requires<[HasNEON, HasFullFP16]>;
  5483. def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
  5484. "vabs", "f16",
  5485. v8f16, v8f16, fabs>,
  5486. Requires<[HasNEON, HasFullFP16]>;
  5487. // VQABS : Vector Saturating Absolute Value
  5488. defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
  5489. IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
  5490. int_arm_neon_vqabs>;
  5491. // Vector Negate.
  5492. def vnegd : PatFrag<(ops node:$in),
  5493. (sub ARMimmAllZerosD, node:$in)>;
  5494. def vnegq : PatFrag<(ops node:$in),
  5495. (sub ARMimmAllZerosV, node:$in)>;
  5496. class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  5497. : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
  5498. IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  5499. [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
  5500. class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  5501. : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
  5502. IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
  5503. [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
  5504. // VNEG : Vector Negate (integer)
  5505. def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
  5506. def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
  5507. def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
  5508. def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
  5509. def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
  5510. def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
  5511. // VNEG : Vector Negate (floating-point)
  5512. def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
  5513. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
  5514. "vneg", "f32", "$Vd, $Vm", "",
  5515. [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
  5516. def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
  5517. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
  5518. "vneg", "f32", "$Vd, $Vm", "",
  5519. [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
  5520. def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
  5521. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
  5522. "vneg", "f16", "$Vd, $Vm", "",
  5523. [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
  5524. Requires<[HasNEON, HasFullFP16]>;
  5525. def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
  5526. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
  5527. "vneg", "f16", "$Vd, $Vm", "",
  5528. [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
  5529. Requires<[HasNEON, HasFullFP16]>;
  5530. let Predicates = [HasNEON] in {
  5531. def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
  5532. def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
  5533. def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
  5534. def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
  5535. def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
  5536. def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
  5537. }
  5538. // VQNEG : Vector Saturating Negate
  5539. defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
  5540. IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
  5541. int_arm_neon_vqneg>;
  5542. // Vector Bit Counting Operations.
  5543. // VCLS : Vector Count Leading Sign Bits
  5544. defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
  5545. IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
  5546. int_arm_neon_vcls>;
  5547. // VCLZ : Vector Count Leading Zeros
  5548. defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
  5549. IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
  5550. ctlz>;
  5551. // VCNT : Vector Count One Bits
  5552. def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
  5553. IIC_VCNTiD, "vcnt", "8",
  5554. v8i8, v8i8, ctpop>;
  5555. def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
  5556. IIC_VCNTiQ, "vcnt", "8",
  5557. v16i8, v16i8, ctpop>;
  5558. // Vector Swap
  5559. def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
  5560. (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
  5561. NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
  5562. []>;
  5563. def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
  5564. (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
  5565. NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
  5566. []>;
  5567. // Vector Move Operations.
  5568. // VMOV : Vector Move (Register)
  5569. def : NEONInstAlias<"vmov${p} $Vd, $Vm",
  5570. (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
  5571. def : NEONInstAlias<"vmov${p} $Vd, $Vm",
  5572. (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
  5573. // VMOV : Vector Move (Immediate)
  5574. // Although VMOVs are not strictly speaking cheap, they are as expensive
  5575. // as their copies counterpart (VORR), so we should prefer rematerialization
  5576. // over splitting when it applies.
  5577. let isReMaterializable = 1, isAsCheapAsAMove=1 in {
  5578. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
  5579. (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
  5580. "vmov", "i8", "$Vd, $SIMM", "",
  5581. [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
  5582. def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
  5583. (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
  5584. "vmov", "i8", "$Vd, $SIMM", "",
  5585. [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
  5586. def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
  5587. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  5588. "vmov", "i16", "$Vd, $SIMM", "",
  5589. [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
  5590. let Inst{9} = SIMM{9};
  5591. }
  5592. def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
  5593. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  5594. "vmov", "i16", "$Vd, $SIMM", "",
  5595. [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
  5596. let Inst{9} = SIMM{9};
  5597. }
  5598. def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
  5599. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  5600. "vmov", "i32", "$Vd, $SIMM", "",
  5601. [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
  5602. let Inst{11-8} = SIMM{11-8};
  5603. }
  5604. def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
  5605. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  5606. "vmov", "i32", "$Vd, $SIMM", "",
  5607. [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
  5608. let Inst{11-8} = SIMM{11-8};
  5609. }
  5610. def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
  5611. (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
  5612. "vmov", "i64", "$Vd, $SIMM", "",
  5613. [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
  5614. def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
  5615. (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
  5616. "vmov", "i64", "$Vd, $SIMM", "",
  5617. [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
  5618. def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
  5619. (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
  5620. "vmov", "f32", "$Vd, $SIMM", "",
  5621. [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
  5622. def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
  5623. (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
  5624. "vmov", "f32", "$Vd, $SIMM", "",
  5625. [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
  5626. } // isReMaterializable, isAsCheapAsAMove
  5627. // Add support for bytes replication feature, so it could be GAS compatible.
  5628. multiclass NEONImmReplicateI8InstAlias<ValueType To> {
  5629. // E.g. instructions below:
  5630. // "vmov.i32 d0, #0xffffffff"
  5631. // "vmov.i32 d0, #0xabababab"
  5632. // "vmov.i16 d0, #0xabab"
  5633. // are incorrect, but we could deal with such cases.
  5634. // For last two instructions, for example, it should emit:
  5635. // "vmov.i8 d0, #0xab"
  5636. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5637. (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
  5638. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5639. (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
  5640. // Also add same support for VMVN instructions. So instruction:
  5641. // "vmvn.i32 d0, #0xabababab"
  5642. // actually means:
  5643. // "vmov.i8 d0, #0x54"
  5644. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5645. (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
  5646. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5647. (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
  5648. }
  5649. defm : NEONImmReplicateI8InstAlias<i16>;
  5650. defm : NEONImmReplicateI8InstAlias<i32>;
  5651. defm : NEONImmReplicateI8InstAlias<i64>;
  5652. // Similar to above for types other than i8, e.g.:
  5653. // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
  5654. // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
  5655. // In this case we do not canonicalize VMVN to VMOV
  5656. multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
  5657. NeonI NV8, NeonI NV16, ValueType To> {
  5658. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5659. (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5660. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5661. (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5662. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5663. (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5664. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5665. (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5666. }
  5667. defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
  5668. VMVNv4i16, VMVNv8i16, i32>;
  5669. defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
  5670. VMVNv4i16, VMVNv8i16, i64>;
  5671. defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
  5672. VMVNv2i32, VMVNv4i32, i64>;
  5673. // TODO: add "VMOV <-> VMVN" conversion for cases like
  5674. // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
  5675. // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
  5676. // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
  5677. // require zero cycles to execute so they should be used wherever possible for
  5678. // setting a register to zero.
  5679. // Even without these pseudo-insts we would probably end up with the correct
  5680. // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
  5681. // since they are sometimes rather expensive (in general).
  5682. let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
  5683. def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
  5684. [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
  5685. (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
  5686. Requires<[HasZCZ]>;
  5687. def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
  5688. [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
  5689. (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
  5690. Requires<[HasZCZ]>;
  5691. }
  5692. // VMOV : Vector Get Lane (move scalar to ARM core register)
  5693. def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
  5694. (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
  5695. IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
  5696. [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
  5697. imm:$lane))]> {
  5698. let Inst{21} = lane{2};
  5699. let Inst{6-5} = lane{1-0};
  5700. }
  5701. def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
  5702. (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
  5703. IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
  5704. [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
  5705. imm:$lane))]> {
  5706. let Inst{21} = lane{1};
  5707. let Inst{6} = lane{0};
  5708. }
  5709. def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
  5710. (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
  5711. IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
  5712. [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
  5713. imm:$lane))]> {
  5714. let Inst{21} = lane{2};
  5715. let Inst{6-5} = lane{1-0};
  5716. }
  5717. def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
  5718. (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
  5719. IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
  5720. [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
  5721. imm:$lane))]> {
  5722. let Inst{21} = lane{1};
  5723. let Inst{6} = lane{0};
  5724. }
  5725. def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
  5726. (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
  5727. IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
  5728. [(set GPR:$R, (extractelt (v2i32 DPR:$V),
  5729. imm:$lane))]>,
  5730. Requires<[HasFPRegs, HasFastVGETLNi32]> {
  5731. let Inst{21} = lane{0};
  5732. }
  5733. let Predicates = [HasNEON] in {
  5734. // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
  5735. def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
  5736. (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
  5737. (DSubReg_i8_reg imm:$lane))),
  5738. (SubReg_i8_lane imm:$lane))>;
  5739. def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
  5740. (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5741. (DSubReg_i16_reg imm:$lane))),
  5742. (SubReg_i16_lane imm:$lane))>;
  5743. def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
  5744. (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
  5745. (DSubReg_i8_reg imm:$lane))),
  5746. (SubReg_i8_lane imm:$lane))>;
  5747. def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
  5748. (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5749. (DSubReg_i16_reg imm:$lane))),
  5750. (SubReg_i16_lane imm:$lane))>;
  5751. }
  5752. def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
  5753. (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
  5754. (DSubReg_i32_reg imm:$lane))),
  5755. (SubReg_i32_lane imm:$lane))>,
  5756. Requires<[HasNEON, HasFastVGETLNi32]>;
  5757. def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
  5758. (COPY_TO_REGCLASS
  5759. (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
  5760. Requires<[HasNEON, HasSlowVGETLNi32]>;
  5761. def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
  5762. (COPY_TO_REGCLASS
  5763. (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
  5764. Requires<[HasNEON, HasSlowVGETLNi32]>;
  5765. let Predicates = [HasNEON] in {
  5766. def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
  5767. (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
  5768. (SSubReg_f32_reg imm:$src2))>;
  5769. def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
  5770. (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
  5771. (SSubReg_f32_reg imm:$src2))>;
  5772. //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
  5773. // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
  5774. def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
  5775. (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
  5776. }
  5777. multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> {
  5778. def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
  5779. (EXTRACT_SUBREG
  5780. (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
  5781. (SSubReg_f16_reg imm_even:$lane))>;
  5782. def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
  5783. (EXTRACT_SUBREG
  5784. (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
  5785. (SSubReg_f16_reg imm_even:$lane))>;
  5786. }
  5787. multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> {
  5788. def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
  5789. (COPY_TO_REGCLASS
  5790. (VMOVH (EXTRACT_SUBREG
  5791. (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
  5792. (SSubReg_f16_reg imm_odd:$lane))),
  5793. HPR)>;
  5794. def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
  5795. (COPY_TO_REGCLASS
  5796. (VMOVH (EXTRACT_SUBREG
  5797. (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
  5798. (SSubReg_f16_reg imm_odd:$lane))),
  5799. HPR)>;
  5800. }
  5801. let Predicates = [HasNEON] in {
  5802. defm : ExtractEltEvenF16<v4f16, v8f16>;
  5803. defm : ExtractEltOddF16VMOVH<v4f16, v8f16>;
  5804. }
  5805. let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in {
  5806. // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes
  5807. defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>;
  5808. }
  5809. let Predicates = [HasBF16, HasNEON] in {
  5810. defm : ExtractEltEvenF16<v4bf16, v8bf16>;
  5811. // Otherwise, if VMOVH is not available resort to extracting the odd lane
  5812. // into a GPR and then moving to HPR
  5813. def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
  5814. (COPY_TO_REGCLASS
  5815. (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
  5816. HPR)>;
  5817. def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
  5818. (COPY_TO_REGCLASS
  5819. (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5820. (DSubReg_i16_reg imm:$lane))),
  5821. (SubReg_i16_lane imm:$lane)),
  5822. HPR)>;
  5823. }
  5824. // VMOV : Vector Set Lane (move ARM core register to scalar)
  5825. let Constraints = "$src1 = $V" in {
  5826. def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
  5827. (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
  5828. IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
  5829. [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
  5830. GPR:$R, imm:$lane))]> {
  5831. let Inst{21} = lane{2};
  5832. let Inst{6-5} = lane{1-0};
  5833. }
  5834. def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
  5835. (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
  5836. IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
  5837. [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
  5838. GPR:$R, imm:$lane))]> {
  5839. let Inst{21} = lane{1};
  5840. let Inst{6} = lane{0};
  5841. }
  5842. def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
  5843. (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
  5844. IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
  5845. [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
  5846. GPR:$R, imm:$lane))]>,
  5847. Requires<[HasVFP2]> {
  5848. let Inst{21} = lane{0};
  5849. // This instruction is equivalent as
  5850. // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
  5851. let isInsertSubreg = 1;
  5852. }
  5853. }
  5854. // TODO: for odd lanes we could optimize this a bit by using the VINS
  5855. // FullFP16 instruction when it is available
  5856. multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> {
  5857. def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
  5858. (VT4 (VSETLNi16 DPR:$src1,
  5859. (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
  5860. def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
  5861. (VT8 (INSERT_SUBREG QPR:$src1,
  5862. (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
  5863. (DSubReg_i16_reg imm:$lane))),
  5864. (COPY_TO_REGCLASS HPR:$src2, GPR),
  5865. (SubReg_i16_lane imm:$lane))),
  5866. (DSubReg_i16_reg imm:$lane)))>;
  5867. }
  5868. let Predicates = [HasNEON] in {
  5869. def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
  5870. (v16i8 (INSERT_SUBREG QPR:$src1,
  5871. (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
  5872. (DSubReg_i8_reg imm:$lane))),
  5873. GPR:$src2, (SubReg_i8_lane imm:$lane))),
  5874. (DSubReg_i8_reg imm:$lane)))>;
  5875. def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
  5876. (v8i16 (INSERT_SUBREG QPR:$src1,
  5877. (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
  5878. (DSubReg_i16_reg imm:$lane))),
  5879. GPR:$src2, (SubReg_i16_lane imm:$lane))),
  5880. (DSubReg_i16_reg imm:$lane)))>;
  5881. def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
  5882. (v4i32 (INSERT_SUBREG QPR:$src1,
  5883. (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
  5884. (DSubReg_i32_reg imm:$lane))),
  5885. GPR:$src2, (SubReg_i32_lane imm:$lane))),
  5886. (DSubReg_i32_reg imm:$lane)))>;
  5887. def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
  5888. (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
  5889. SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
  5890. def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
  5891. (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
  5892. SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
  5893. defm : InsertEltF16<f16, v4f16, v8f16>;
  5894. //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
  5895. // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
  5896. def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
  5897. (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
  5898. def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
  5899. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
  5900. def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
  5901. (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  5902. def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
  5903. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
  5904. def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
  5905. (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5906. def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
  5907. (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5908. def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
  5909. (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5910. def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
  5911. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  5912. (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  5913. dsub_0)>;
  5914. def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
  5915. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  5916. (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  5917. dsub_0)>;
  5918. def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
  5919. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  5920. (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  5921. dsub_0)>;
  5922. }
  5923. let Predicates = [HasNEON, HasBF16] in
  5924. defm : InsertEltF16<bf16, v4bf16, v8bf16>;
  5925. // VDUP : Vector Duplicate (from ARM core register to all elements)
  5926. class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
  5927. : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
  5928. IIC_VMOVIS, "vdup", Dt, "$V, $R",
  5929. [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
  5930. class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
  5931. : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
  5932. IIC_VMOVIS, "vdup", Dt, "$V, $R",
  5933. [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
  5934. def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
  5935. def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
  5936. def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
  5937. Requires<[HasNEON, HasFastVDUP32]>;
  5938. def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
  5939. def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
  5940. def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
  5941. // ARMvdup patterns for uarchs with fast VDUP.32.
  5942. def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
  5943. Requires<[HasNEON,HasFastVDUP32]>;
  5944. def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
  5945. Requires<[HasNEON]>;
  5946. // ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
  5947. def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
  5948. Requires<[HasNEON,HasSlowVDUP32]>;
  5949. def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
  5950. Requires<[HasNEON,HasSlowVDUP32]>;
  5951. // VDUP : Vector Duplicate Lane (from scalar to all elements)
  5952. class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
  5953. ValueType Ty, Operand IdxTy>
  5954. : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
  5955. IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
  5956. [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
  5957. class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
  5958. ValueType ResTy, ValueType OpTy, Operand IdxTy>
  5959. : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
  5960. IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
  5961. [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
  5962. VectorIndex32:$lane)))]>;
  5963. // Inst{19-16} is partially specified depending on the element size.
  5964. def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
  5965. bits<3> lane;
  5966. let Inst{19-17} = lane{2-0};
  5967. }
  5968. def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
  5969. bits<2> lane;
  5970. let Inst{19-18} = lane{1-0};
  5971. }
  5972. def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
  5973. bits<1> lane;
  5974. let Inst{19} = lane{0};
  5975. }
  5976. def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
  5977. bits<3> lane;
  5978. let Inst{19-17} = lane{2-0};
  5979. }
  5980. def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
  5981. bits<2> lane;
  5982. let Inst{19-18} = lane{1-0};
  5983. }
  5984. def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
  5985. bits<1> lane;
  5986. let Inst{19} = lane{0};
  5987. }
  5988. let Predicates = [HasNEON] in {
  5989. def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
  5990. (VDUPLN32d DPR:$Vm, imm:$lane)>;
  5991. def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
  5992. (VDUPLN32d DPR:$Vm, imm:$lane)>;
  5993. def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
  5994. (VDUPLN32q DPR:$Vm, imm:$lane)>;
  5995. def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
  5996. (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
  5997. (DSubReg_i8_reg imm:$lane))),
  5998. (SubReg_i8_lane imm:$lane)))>;
  5999. def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
  6000. (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
  6001. (DSubReg_i16_reg imm:$lane))),
  6002. (SubReg_i16_lane imm:$lane)))>;
  6003. def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
  6004. (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
  6005. (DSubReg_i16_reg imm:$lane))),
  6006. (SubReg_i16_lane imm:$lane)))>;
  6007. def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
  6008. (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
  6009. (DSubReg_i32_reg imm:$lane))),
  6010. (SubReg_i32_lane imm:$lane)))>;
  6011. def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
  6012. (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
  6013. (DSubReg_i32_reg imm:$lane))),
  6014. (SubReg_i32_lane imm:$lane)))>;
  6015. def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))),
  6016. (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
  6017. (f16 HPR:$src), ssub_0), (i32 0)))>;
  6018. def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
  6019. (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6020. SPR:$src, ssub_0), (i32 0)))>;
  6021. def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
  6022. (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6023. SPR:$src, ssub_0), (i32 0)))>;
  6024. def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))),
  6025. (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
  6026. (f16 HPR:$src), ssub_0), (i32 0)))>;
  6027. }
  6028. let Predicates = [HasNEON, HasBF16] in {
  6029. def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
  6030. (VDUPLN16d DPR:$Vm, imm:$lane)>;
  6031. def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
  6032. (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src,
  6033. (DSubReg_i16_reg imm:$lane))),
  6034. (SubReg_i16_lane imm:$lane)))>;
  6035. def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))),
  6036. (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
  6037. (bf16 HPR:$src), ssub_0), (i32 0)))>;
  6038. def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))),
  6039. (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
  6040. (bf16 HPR:$src), ssub_0), (i32 0)))>;
  6041. }
  6042. // VMOVN : Vector Narrowing Move
  6043. defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
  6044. "vmovn", "i", trunc>;
  6045. // VQMOVN : Vector Saturating Narrowing Move
  6046. defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
  6047. "vqmovn", "s", int_arm_neon_vqmovns>;
  6048. defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
  6049. "vqmovn", "u", int_arm_neon_vqmovnu>;
  6050. defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
  6051. "vqmovun", "s", int_arm_neon_vqmovnsu>;
  6052. // VMOVL : Vector Lengthening Move
  6053. defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
  6054. defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
  6055. let Predicates = [HasNEON] in {
  6056. def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
  6057. def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
  6058. def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
  6059. }
  6060. // Vector Conversions.
  6061. // VCVT : Vector Convert Between Floating-Point and Integers
  6062. def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
  6063. v2i32, v2f32, fp_to_sint>;
  6064. def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
  6065. v2i32, v2f32, fp_to_uint>;
  6066. def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
  6067. v2f32, v2i32, sint_to_fp>;
  6068. def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
  6069. v2f32, v2i32, uint_to_fp>;
  6070. def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
  6071. v4i32, v4f32, fp_to_sint>;
  6072. def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
  6073. v4i32, v4f32, fp_to_uint>;
  6074. def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
  6075. v4f32, v4i32, sint_to_fp>;
  6076. def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
  6077. v4f32, v4i32, uint_to_fp>;
  6078. def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
  6079. v4i16, v4f16, fp_to_sint>,
  6080. Requires<[HasNEON, HasFullFP16]>;
  6081. def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
  6082. v4i16, v4f16, fp_to_uint>,
  6083. Requires<[HasNEON, HasFullFP16]>;
  6084. def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
  6085. v4f16, v4i16, sint_to_fp>,
  6086. Requires<[HasNEON, HasFullFP16]>;
  6087. def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
  6088. v4f16, v4i16, uint_to_fp>,
  6089. Requires<[HasNEON, HasFullFP16]>;
  6090. def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
  6091. v8i16, v8f16, fp_to_sint>,
  6092. Requires<[HasNEON, HasFullFP16]>;
  6093. def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
  6094. v8i16, v8f16, fp_to_uint>,
  6095. Requires<[HasNEON, HasFullFP16]>;
  6096. def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
  6097. v8f16, v8i16, sint_to_fp>,
  6098. Requires<[HasNEON, HasFullFP16]>;
  6099. def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
  6100. v8f16, v8i16, uint_to_fp>,
  6101. Requires<[HasNEON, HasFullFP16]>;
  6102. // VCVT{A, N, P, M}
  6103. multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
  6104. SDPatternOperator IntU> {
  6105. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  6106. def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6107. "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
  6108. def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6109. "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
  6110. def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6111. "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
  6112. def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6113. "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
  6114. def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6115. "s16.f16", v4i16, v4f16, IntS>,
  6116. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6117. def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6118. "s16.f16", v8i16, v8f16, IntS>,
  6119. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6120. def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6121. "u16.f16", v4i16, v4f16, IntU>,
  6122. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6123. def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6124. "u16.f16", v8i16, v8f16, IntU>,
  6125. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6126. }
  6127. }
  6128. defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
  6129. defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
  6130. defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
  6131. defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
  6132. // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
  6133. let DecoderMethod = "DecodeVCVTD" in {
  6134. def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
  6135. v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
  6136. def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
  6137. v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
  6138. def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
  6139. v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
  6140. def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
  6141. v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
  6142. let Predicates = [HasNEON, HasFullFP16] in {
  6143. def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
  6144. v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
  6145. def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
  6146. v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
  6147. def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
  6148. v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
  6149. def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
  6150. v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
  6151. } // Predicates = [HasNEON, HasFullFP16]
  6152. }
  6153. let DecoderMethod = "DecodeVCVTQ" in {
  6154. def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
  6155. v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
  6156. def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
  6157. v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
  6158. def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
  6159. v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
  6160. def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
  6161. v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
  6162. let Predicates = [HasNEON, HasFullFP16] in {
  6163. def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
  6164. v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
  6165. def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
  6166. v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
  6167. def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
  6168. v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
  6169. def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
  6170. v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
  6171. } // Predicates = [HasNEON, HasFullFP16]
  6172. }
  6173. def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
  6174. (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6175. def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
  6176. (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
  6177. def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
  6178. (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6179. def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
  6180. (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6181. def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
  6182. (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6183. def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
  6184. (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6185. def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
  6186. (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6187. def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
  6188. (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6189. def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
  6190. (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6191. def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
  6192. (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
  6193. def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
  6194. (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6195. def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
  6196. (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6197. def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
  6198. (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6199. def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
  6200. (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6201. def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
  6202. (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6203. def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
  6204. (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6205. // VCVT : Vector Convert Between Half-Precision and Single-Precision.
  6206. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
  6207. IIC_VUNAQ, "vcvt", "f16.f32",
  6208. v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
  6209. Requires<[HasNEON, HasFP16]>;
  6210. def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
  6211. IIC_VUNAQ, "vcvt", "f32.f16",
  6212. v4f32, v4i16, int_arm_neon_vcvthf2fp>,
  6213. Requires<[HasNEON, HasFP16]>;
  6214. // Vector Reverse.
  6215. // VREV64 : Vector Reverse elements within 64-bit doublewords
  6216. class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6217. : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
  6218. (ins DPR:$Vm), IIC_VMOVD,
  6219. OpcodeStr, Dt, "$Vd, $Vm", "",
  6220. [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
  6221. class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6222. : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
  6223. (ins QPR:$Vm), IIC_VMOVQ,
  6224. OpcodeStr, Dt, "$Vd, $Vm", "",
  6225. [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
  6226. def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
  6227. def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
  6228. def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
  6229. let Predicates = [HasNEON] in {
  6230. def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
  6231. }
  6232. def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
  6233. def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
  6234. def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
  6235. let Predicates = [HasNEON] in {
  6236. def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
  6237. (VREV64q32 QPR:$Vm)>;
  6238. def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
  6239. (VREV64q16 QPR:$Vm)>;
  6240. def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
  6241. (VREV64d16 DPR:$Vm)>;
  6242. }
  6243. // VREV32 : Vector Reverse elements within 32-bit words
  6244. class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6245. : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
  6246. (ins DPR:$Vm), IIC_VMOVD,
  6247. OpcodeStr, Dt, "$Vd, $Vm", "",
  6248. [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
  6249. class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6250. : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
  6251. (ins QPR:$Vm), IIC_VMOVQ,
  6252. OpcodeStr, Dt, "$Vd, $Vm", "",
  6253. [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
  6254. def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
  6255. def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
  6256. def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
  6257. def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
  6258. let Predicates = [HasNEON] in {
  6259. def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
  6260. (VREV32q16 QPR:$Vm)>;
  6261. def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
  6262. (VREV32d16 DPR:$Vm)>;
  6263. }
  6264. // VREV16 : Vector Reverse elements within 16-bit halfwords
  6265. class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6266. : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
  6267. (ins DPR:$Vm), IIC_VMOVD,
  6268. OpcodeStr, Dt, "$Vd, $Vm", "",
  6269. [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
  6270. class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6271. : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
  6272. (ins QPR:$Vm), IIC_VMOVQ,
  6273. OpcodeStr, Dt, "$Vd, $Vm", "",
  6274. [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
  6275. def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
  6276. def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
  6277. // Other Vector Shuffles.
  6278. // Aligned extractions: really just dropping registers
  6279. class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
  6280. : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
  6281. (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
  6282. Requires<[HasNEON]>;
  6283. def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
  6284. def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
  6285. def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
  6286. def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
  6287. def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
  6288. def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
  6289. // VEXT : Vector Extract
  6290. // All of these have a two-operand InstAlias.
  6291. let TwoOperandAliasConstraint = "$Vn = $Vd" in {
  6292. class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
  6293. : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
  6294. (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
  6295. IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
  6296. [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
  6297. (Ty DPR:$Vm), imm:$index)))]> {
  6298. bits<3> index;
  6299. let Inst{11} = 0b0;
  6300. let Inst{10-8} = index{2-0};
  6301. }
  6302. class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
  6303. : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
  6304. (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
  6305. IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
  6306. [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
  6307. (Ty QPR:$Vm), imm:$index)))]> {
  6308. bits<4> index;
  6309. let Inst{11-8} = index{3-0};
  6310. }
  6311. }
  6312. def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
  6313. let Inst{10-8} = index{2-0};
  6314. }
  6315. def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
  6316. let Inst{10-9} = index{1-0};
  6317. let Inst{8} = 0b0;
  6318. }
  6319. let Predicates = [HasNEON] in {
  6320. def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
  6321. (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6322. }
  6323. def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
  6324. let Inst{10} = index{0};
  6325. let Inst{9-8} = 0b00;
  6326. }
  6327. let Predicates = [HasNEON] in {
  6328. def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
  6329. (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6330. }
  6331. def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
  6332. let Inst{11-8} = index{3-0};
  6333. }
  6334. def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
  6335. let Inst{11-9} = index{2-0};
  6336. let Inst{8} = 0b0;
  6337. }
  6338. let Predicates = [HasNEON] in {
  6339. def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
  6340. (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6341. }
  6342. def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
  6343. let Inst{11-10} = index{1-0};
  6344. let Inst{9-8} = 0b00;
  6345. }
  6346. def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
  6347. let Inst{11} = index{0};
  6348. let Inst{10-8} = 0b000;
  6349. }
  6350. let Predicates = [HasNEON] in {
  6351. def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
  6352. (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6353. }
  6354. // VTRN : Vector Transpose
  6355. def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
  6356. def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
  6357. def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
  6358. def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
  6359. def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
  6360. def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
  6361. // VUZP : Vector Unzip (Deinterleave)
  6362. def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
  6363. def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
  6364. // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  6365. def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
  6366. (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
  6367. def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
  6368. def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
  6369. def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
  6370. // VZIP : Vector Zip (Interleave)
  6371. def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
  6372. def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
  6373. // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  6374. def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
  6375. (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
  6376. def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
  6377. def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
  6378. def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
  6379. // Vector Table Lookup and Table Extension.
  6380. // VTBL : Vector Table Lookup
  6381. let DecoderMethod = "DecodeTBLInstruction" in {
  6382. def VTBL1
  6383. : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
  6384. (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
  6385. "vtbl", "8", "$Vd, $Vn, $Vm", "",
  6386. [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
  6387. let hasExtraSrcRegAllocReq = 1 in {
  6388. def VTBL2
  6389. : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
  6390. (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
  6391. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6392. def VTBL3
  6393. : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
  6394. (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
  6395. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6396. def VTBL4
  6397. : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
  6398. (ins VecListFourD:$Vn, DPR:$Vm),
  6399. NVTBLFrm, IIC_VTB4,
  6400. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6401. } // hasExtraSrcRegAllocReq = 1
  6402. def VTBL3Pseudo
  6403. : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
  6404. def VTBL4Pseudo
  6405. : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
  6406. // VTBX : Vector Table Extension
  6407. def VTBX1
  6408. : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
  6409. (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
  6410. "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
  6411. [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
  6412. DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
  6413. let hasExtraSrcRegAllocReq = 1 in {
  6414. def VTBX2
  6415. : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
  6416. (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
  6417. "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
  6418. def VTBX3
  6419. : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
  6420. (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
  6421. NVTBLFrm, IIC_VTBX3,
  6422. "vtbx", "8", "$Vd, $Vn, $Vm",
  6423. "$orig = $Vd", []>;
  6424. def VTBX4
  6425. : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
  6426. (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
  6427. "vtbx", "8", "$Vd, $Vn, $Vm",
  6428. "$orig = $Vd", []>;
  6429. } // hasExtraSrcRegAllocReq = 1
  6430. def VTBX3Pseudo
  6431. : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
  6432. IIC_VTBX3, "$orig = $dst", []>;
  6433. def VTBX4Pseudo
  6434. : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
  6435. IIC_VTBX4, "$orig = $dst", []>;
  6436. } // DecoderMethod = "DecodeTBLInstruction"
  6437. let Predicates = [HasNEON] in {
  6438. def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
  6439. (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
  6440. v8i8:$Vn1, dsub_1),
  6441. v8i8:$Vm))>;
  6442. def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6443. v8i8:$Vm)),
  6444. (v8i8 (VTBX2 v8i8:$orig,
  6445. (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
  6446. v8i8:$Vn1, dsub_1),
  6447. v8i8:$Vm))>;
  6448. def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
  6449. v8i8:$Vn2, v8i8:$Vm)),
  6450. (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6451. v8i8:$Vn1, dsub_1,
  6452. v8i8:$Vn2, dsub_2,
  6453. (v8i8 (IMPLICIT_DEF)), dsub_3),
  6454. v8i8:$Vm))>;
  6455. def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6456. v8i8:$Vn2, v8i8:$Vm)),
  6457. (v8i8 (VTBX3Pseudo v8i8:$orig,
  6458. (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6459. v8i8:$Vn1, dsub_1,
  6460. v8i8:$Vn2, dsub_2,
  6461. (v8i8 (IMPLICIT_DEF)), dsub_3),
  6462. v8i8:$Vm))>;
  6463. def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
  6464. v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
  6465. (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6466. v8i8:$Vn1, dsub_1,
  6467. v8i8:$Vn2, dsub_2,
  6468. v8i8:$Vn3, dsub_3),
  6469. v8i8:$Vm))>;
  6470. def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6471. v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
  6472. (v8i8 (VTBX4Pseudo v8i8:$orig,
  6473. (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6474. v8i8:$Vn1, dsub_1,
  6475. v8i8:$Vn2, dsub_2,
  6476. v8i8:$Vn3, dsub_3),
  6477. v8i8:$Vm))>;
  6478. }
  6479. // VRINT : Vector Rounding
  6480. multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
  6481. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  6482. def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
  6483. !strconcat("vrint", op), "f32",
  6484. v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
  6485. let Inst{9-7} = op9_7;
  6486. }
  6487. def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
  6488. !strconcat("vrint", op), "f32",
  6489. v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
  6490. let Inst{9-7} = op9_7;
  6491. }
  6492. def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
  6493. !strconcat("vrint", op), "f16",
  6494. v4f16, v4f16, Int>,
  6495. Requires<[HasV8, HasNEON, HasFullFP16]> {
  6496. let Inst{9-7} = op9_7;
  6497. }
  6498. def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
  6499. !strconcat("vrint", op), "f16",
  6500. v8f16, v8f16, Int>,
  6501. Requires<[HasV8, HasNEON, HasFullFP16]> {
  6502. let Inst{9-7} = op9_7;
  6503. }
  6504. }
  6505. def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
  6506. (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
  6507. def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
  6508. (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
  6509. let Predicates = [HasNEON, HasFullFP16] in {
  6510. def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
  6511. (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
  6512. def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
  6513. (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
  6514. }
  6515. }
  6516. defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
  6517. defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
  6518. defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
  6519. defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
  6520. defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
  6521. defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
  6522. // Cryptography instructions
  6523. let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
  6524. DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
  6525. class AES<string op, bit op7, bit op6, SDPatternOperator Int>
  6526. : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
  6527. !strconcat("aes", op), "8", v16i8, v16i8, Int>,
  6528. Requires<[HasV8, HasCrypto]>;
  6529. class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
  6530. : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
  6531. !strconcat("aes", op), "8", v16i8, v16i8, Int>,
  6532. Requires<[HasV8, HasCrypto]>;
  6533. class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
  6534. SDPatternOperator Int>
  6535. : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
  6536. !strconcat("sha", op), "32", v4i32, v4i32, Int>,
  6537. Requires<[HasV8, HasCrypto]>;
  6538. class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
  6539. SDPatternOperator Int>
  6540. : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
  6541. !strconcat("sha", op), "32", v4i32, v4i32, Int>,
  6542. Requires<[HasV8, HasCrypto]>;
  6543. class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
  6544. : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
  6545. !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
  6546. Requires<[HasV8, HasCrypto]>;
  6547. }
  6548. def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
  6549. def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
  6550. def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
  6551. def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
  6552. def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
  6553. def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
  6554. def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
  6555. def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
  6556. def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
  6557. def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
  6558. def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
  6559. def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
  6560. def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
  6561. def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
  6562. let Predicates = [HasNEON] in {
  6563. def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
  6564. (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
  6565. (SHA1H (SUBREG_TO_REG (i64 0),
  6566. (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
  6567. ssub_0)),
  6568. ssub_0)), GPR)>;
  6569. def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6570. (SHA1C v4i32:$hash_abcd,
  6571. (SUBREG_TO_REG (i64 0),
  6572. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6573. ssub_0),
  6574. v4i32:$wk)>;
  6575. def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6576. (SHA1M v4i32:$hash_abcd,
  6577. (SUBREG_TO_REG (i64 0),
  6578. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6579. ssub_0),
  6580. v4i32:$wk)>;
  6581. def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6582. (SHA1P v4i32:$hash_abcd,
  6583. (SUBREG_TO_REG (i64 0),
  6584. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6585. ssub_0),
  6586. v4i32:$wk)>;
  6587. }
  6588. //===----------------------------------------------------------------------===//
  6589. // NEON instructions for single-precision FP math
  6590. //===----------------------------------------------------------------------===//
  6591. class N2VSPat<SDNode OpNode, NeonI Inst>
  6592. : NEONFPPat<(f32 (OpNode SPR:$a)),
  6593. (EXTRACT_SUBREG
  6594. (v2f32 (COPY_TO_REGCLASS (Inst
  6595. (INSERT_SUBREG
  6596. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6597. SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
  6598. class N3VSPat<SDNode OpNode, NeonI Inst>
  6599. : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
  6600. (EXTRACT_SUBREG
  6601. (v2f32 (COPY_TO_REGCLASS (Inst
  6602. (INSERT_SUBREG
  6603. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6604. SPR:$a, ssub_0),
  6605. (INSERT_SUBREG
  6606. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6607. SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6608. class N3VSPatFP16<SDNode OpNode, NeonI Inst>
  6609. : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
  6610. (EXTRACT_SUBREG
  6611. (v4f16 (COPY_TO_REGCLASS (Inst
  6612. (INSERT_SUBREG
  6613. (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
  6614. HPR:$a, ssub_0),
  6615. (INSERT_SUBREG
  6616. (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
  6617. HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6618. class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
  6619. : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
  6620. (EXTRACT_SUBREG
  6621. (v2f32 (COPY_TO_REGCLASS (Inst
  6622. (INSERT_SUBREG
  6623. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6624. SPR:$acc, ssub_0),
  6625. (INSERT_SUBREG
  6626. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6627. SPR:$a, ssub_0),
  6628. (INSERT_SUBREG
  6629. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6630. SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6631. class NVCVTIFPat<SDNode OpNode, NeonI Inst>
  6632. : NEONFPPat<(f32 (OpNode GPR:$a)),
  6633. (f32 (EXTRACT_SUBREG
  6634. (v2f32 (Inst
  6635. (INSERT_SUBREG
  6636. (v2f32 (IMPLICIT_DEF)),
  6637. (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
  6638. ssub_0))>;
  6639. class NVCVTFIPat<SDNode OpNode, NeonI Inst>
  6640. : NEONFPPat<(i32 (OpNode SPR:$a)),
  6641. (i32 (EXTRACT_SUBREG
  6642. (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6643. SPR:$a, ssub_0))),
  6644. ssub_0))>;
  6645. def : N3VSPat<fadd, VADDfd>;
  6646. def : N3VSPat<fsub, VSUBfd>;
  6647. def : N3VSPat<fmul, VMULfd>;
  6648. def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
  6649. Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
  6650. def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
  6651. Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
  6652. def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
  6653. Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
  6654. def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
  6655. Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
  6656. def : N2VSPat<fabs, VABSfd>;
  6657. def : N2VSPat<fneg, VNEGfd>;
  6658. def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
  6659. def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
  6660. def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
  6661. def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
  6662. def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
  6663. def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
  6664. def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
  6665. def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
  6666. // NEON doesn't have any f64 conversions, so provide patterns to make
  6667. // sure the VFP conversions match when extracting from a vector.
  6668. def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
  6669. (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6670. def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
  6671. (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6672. def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
  6673. (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6674. def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
  6675. (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6676. // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
  6677. def : Pat<(f32 (bitconvert GPR:$a)),
  6678. (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
  6679. Requires<[HasNEON, DontUseVMOVSR]>;
  6680. def : Pat<(arm_vmovsr GPR:$a),
  6681. (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
  6682. Requires<[HasNEON, DontUseVMOVSR]>;
  6683. //===----------------------------------------------------------------------===//
  6684. // Non-Instruction Patterns or Endianess - Revert Patterns
  6685. //===----------------------------------------------------------------------===//
  6686. // bit_convert
  6687. // 64 bit conversions
  6688. let Predicates = [HasNEON] in {
  6689. def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
  6690. def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
  6691. def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
  6692. def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
  6693. def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>;
  6694. def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>;
  6695. def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>;
  6696. def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>;
  6697. // 128 bit conversions
  6698. def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
  6699. def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
  6700. def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
  6701. def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
  6702. def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
  6703. def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
  6704. def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>;
  6705. def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>;
  6706. }
  6707. let Predicates = [IsLE,HasNEON] in {
  6708. // 64 bit conversions
  6709. def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
  6710. def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
  6711. def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
  6712. def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>;
  6713. def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
  6714. def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
  6715. def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
  6716. def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
  6717. def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
  6718. def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>;
  6719. def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
  6720. def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
  6721. def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
  6722. def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
  6723. def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
  6724. def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>;
  6725. def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
  6726. def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
  6727. def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
  6728. def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
  6729. def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
  6730. def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>;
  6731. def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
  6732. def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
  6733. def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
  6734. def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
  6735. def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
  6736. def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
  6737. def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>;
  6738. def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>;
  6739. def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>;
  6740. def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>;
  6741. def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>;
  6742. def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>;
  6743. def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
  6744. def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
  6745. def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
  6746. def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
  6747. def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
  6748. def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
  6749. def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
  6750. def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
  6751. def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
  6752. def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>;
  6753. def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>;
  6754. def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
  6755. // 128 bit conversions
  6756. def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
  6757. def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
  6758. def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
  6759. def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>;
  6760. def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
  6761. def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
  6762. def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
  6763. def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
  6764. def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
  6765. def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>;
  6766. def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
  6767. def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
  6768. def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
  6769. def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
  6770. def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
  6771. def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>;
  6772. def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
  6773. def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
  6774. def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
  6775. def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
  6776. def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
  6777. def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>;
  6778. def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
  6779. def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
  6780. def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
  6781. def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
  6782. def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
  6783. def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
  6784. def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
  6785. def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>;
  6786. def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>;
  6787. def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>;
  6788. def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>;
  6789. def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>;
  6790. def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
  6791. def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
  6792. def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
  6793. def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
  6794. def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
  6795. def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
  6796. def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
  6797. def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
  6798. def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
  6799. def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
  6800. def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>;
  6801. def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
  6802. }
  6803. let Predicates = [IsBE,HasNEON] in {
  6804. // 64 bit conversions
  6805. def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6806. def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6807. def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6808. def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6809. def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6810. def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
  6811. def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6812. def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6813. def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6814. def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6815. def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6816. def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
  6817. def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6818. def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6819. def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6820. def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6821. def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6822. def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
  6823. def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6824. def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6825. def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6826. def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6827. def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6828. def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
  6829. def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6830. def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6831. def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6832. def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6833. def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6834. def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6835. def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6836. def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6837. def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6838. def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6839. def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6840. def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6841. def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6842. def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6843. def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6844. def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
  6845. def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
  6846. def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
  6847. def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
  6848. def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6849. def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6850. def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6851. // 128 bit conversions
  6852. def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6853. def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6854. def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6855. def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6856. def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6857. def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
  6858. def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6859. def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6860. def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6861. def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6862. def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6863. def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
  6864. def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6865. def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6866. def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6867. def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6868. def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6869. def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
  6870. def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6871. def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6872. def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6873. def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6874. def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6875. def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
  6876. def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6877. def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6878. def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6879. def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6880. def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6881. def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6882. def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6883. def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6884. def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6885. def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6886. def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6887. def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6888. def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6889. def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6890. def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6891. def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
  6892. def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
  6893. def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
  6894. def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
  6895. def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>;
  6896. def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>;
  6897. def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
  6898. }
  6899. let Predicates = [HasNEON] in {
  6900. // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
  6901. // rather than the more general 'ARMVectorRegCast' which would also
  6902. // match some bitconverts. If we use the latter in cases where the
  6903. // input and output types are the same, the bitconvert gets elided
  6904. // and we end up generating a nonsense match of nothing.
  6905. foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
  6906. foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
  6907. def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
  6908. foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
  6909. foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
  6910. def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
  6911. }
  6912. // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
  6913. let Predicates = [IsBE,HasNEON] in {
  6914. def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
  6915. (VREV64q8 (VLD1q8 addrmode6:$addr))>;
  6916. def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  6917. (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
  6918. def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
  6919. (VREV64q16 (VLD1q16 addrmode6:$addr))>;
  6920. def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  6921. (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
  6922. }
  6923. // Fold extracting an element out of a v2i32 into a vfp register.
  6924. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
  6925. (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
  6926. Requires<[HasNEON]>;
  6927. // Vector lengthening move with load, matching extending loads.
  6928. // extload, zextload and sextload for a standard lengthening load. Example:
  6929. // Lengthen_Single<"8", "i16", "8"> =
  6930. // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
  6931. // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
  6932. // (f64 (IMPLICIT_DEF)), (i32 0)))>;
  6933. multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
  6934. let AddedComplexity = 10 in {
  6935. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6936. (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
  6937. (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
  6938. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  6939. Requires<[HasNEON]>;
  6940. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6941. (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
  6942. (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
  6943. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  6944. Requires<[HasNEON]>;
  6945. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6946. (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
  6947. (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
  6948. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  6949. Requires<[HasNEON]>;
  6950. }
  6951. }
  6952. // extload, zextload and sextload for a lengthening load which only uses
  6953. // half the lanes available. Example:
  6954. // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
  6955. // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
  6956. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
  6957. // (f64 (IMPLICIT_DEF)), (i32 0))),
  6958. // dsub_0)>;
  6959. multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
  6960. string InsnLanes, string InsnTy> {
  6961. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6962. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  6963. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  6964. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  6965. dsub_0)>,
  6966. Requires<[HasNEON]>;
  6967. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6968. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  6969. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  6970. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  6971. dsub_0)>,
  6972. Requires<[HasNEON]>;
  6973. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6974. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  6975. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
  6976. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  6977. dsub_0)>,
  6978. Requires<[HasNEON]>;
  6979. }
  6980. // The following class definition is basically a copy of the
  6981. // Lengthen_HalfSingle definition above, however with an additional parameter
  6982. // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
  6983. // data loaded by VLD1LN into proper vector format in big endian mode.
  6984. multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  6985. string InsnLanes, string InsnTy, string RevLanes> {
  6986. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6987. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  6988. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  6989. (!cast<Instruction>("VREV32d" # RevLanes)
  6990. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  6991. dsub_0)>,
  6992. Requires<[HasNEON]>;
  6993. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  6994. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  6995. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  6996. (!cast<Instruction>("VREV32d" # RevLanes)
  6997. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  6998. dsub_0)>,
  6999. Requires<[HasNEON]>;
  7000. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7001. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7002. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
  7003. (!cast<Instruction>("VREV32d" # RevLanes)
  7004. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7005. dsub_0)>,
  7006. Requires<[HasNEON]>;
  7007. }
  7008. // extload, zextload and sextload for a lengthening load followed by another
  7009. // lengthening load, to quadruple the initial length.
  7010. //
  7011. // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
  7012. // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
  7013. // (EXTRACT_SUBREG (VMOVLuv4i32
  7014. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
  7015. // (f64 (IMPLICIT_DEF)),
  7016. // (i32 0))),
  7017. // dsub_0)),
  7018. // dsub_0)>;
  7019. multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
  7020. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7021. string Insn2Ty> {
  7022. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7023. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7024. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7025. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7026. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7027. dsub_0))>,
  7028. Requires<[HasNEON]>;
  7029. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7030. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7031. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7032. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7033. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7034. dsub_0))>,
  7035. Requires<[HasNEON]>;
  7036. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7037. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7038. (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7039. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7040. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7041. dsub_0))>,
  7042. Requires<[HasNEON]>;
  7043. }
  7044. // The following class definition is basically a copy of the
  7045. // Lengthen_Double definition above, however with an additional parameter
  7046. // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
  7047. // data loaded by VLD1LN into proper vector format in big endian mode.
  7048. multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7049. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7050. string Insn2Ty, string RevLanes> {
  7051. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7052. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7053. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7054. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7055. (!cast<Instruction>("VREV32d" # RevLanes)
  7056. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7057. dsub_0))>,
  7058. Requires<[HasNEON]>;
  7059. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7060. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7061. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7062. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7063. (!cast<Instruction>("VREV32d" # RevLanes)
  7064. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7065. dsub_0))>,
  7066. Requires<[HasNEON]>;
  7067. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7068. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7069. (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7070. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7071. (!cast<Instruction>("VREV32d" # RevLanes)
  7072. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7073. dsub_0))>,
  7074. Requires<[HasNEON]>;
  7075. }
  7076. // extload, zextload and sextload for a lengthening load followed by another
  7077. // lengthening load, to quadruple the initial length, but which ends up only
  7078. // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
  7079. //
  7080. // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
  7081. // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
  7082. // (EXTRACT_SUBREG (VMOVLuv4i32
  7083. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
  7084. // (f64 (IMPLICIT_DEF)), (i32 0))),
  7085. // dsub_0)),
  7086. // dsub_0)>;
  7087. multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
  7088. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7089. string Insn2Ty> {
  7090. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7091. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
  7092. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7093. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7094. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7095. dsub_0)),
  7096. dsub_0)>,
  7097. Requires<[HasNEON]>;
  7098. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7099. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
  7100. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7101. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7102. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7103. dsub_0)),
  7104. dsub_0)>,
  7105. Requires<[HasNEON]>;
  7106. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7107. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
  7108. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7109. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7110. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7111. dsub_0)),
  7112. dsub_0)>,
  7113. Requires<[HasNEON]>;
  7114. }
  7115. // The following class definition is basically a copy of the
  7116. // Lengthen_HalfDouble definition above, however with an additional VREV16d8
  7117. // instruction to convert data loaded by VLD1LN into proper vector format
  7118. // in big endian mode.
  7119. multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7120. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7121. string Insn2Ty> {
  7122. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7123. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
  7124. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7125. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7126. (!cast<Instruction>("VREV16d8")
  7127. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7128. dsub_0)),
  7129. dsub_0)>,
  7130. Requires<[HasNEON]>;
  7131. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7132. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
  7133. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7134. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7135. (!cast<Instruction>("VREV16d8")
  7136. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7137. dsub_0)),
  7138. dsub_0)>,
  7139. Requires<[HasNEON]>;
  7140. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7141. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
  7142. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7143. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7144. (!cast<Instruction>("VREV16d8")
  7145. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7146. dsub_0)),
  7147. dsub_0)>,
  7148. Requires<[HasNEON]>;
  7149. }
  7150. defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
  7151. defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
  7152. defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
  7153. let Predicates = [HasNEON,IsLE] in {
  7154. defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
  7155. defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
  7156. // Double lengthening - v4i8 -> v4i16 -> v4i32
  7157. defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
  7158. // v2i8 -> v2i16 -> v2i32
  7159. defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
  7160. // v2i16 -> v2i32 -> v2i64
  7161. defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
  7162. }
  7163. let Predicates = [HasNEON,IsBE] in {
  7164. defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
  7165. defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
  7166. // Double lengthening - v4i8 -> v4i16 -> v4i32
  7167. defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
  7168. // v2i8 -> v2i16 -> v2i32
  7169. defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
  7170. // v2i16 -> v2i32 -> v2i64
  7171. defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
  7172. }
  7173. // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
  7174. let Predicates = [HasNEON,IsLE] in {
  7175. def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
  7176. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7177. (VLD1LNd16 addrmode6:$addr,
  7178. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7179. def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
  7180. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7181. (VLD1LNd16 addrmode6:$addr,
  7182. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7183. def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
  7184. (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
  7185. (VLD1LNd16 addrmode6:$addr,
  7186. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7187. }
  7188. // The following patterns are basically a copy of the patterns above,
  7189. // however with an additional VREV16d instruction to convert data
  7190. // loaded by VLD1LN into proper vector format in big endian mode.
  7191. let Predicates = [HasNEON,IsBE] in {
  7192. def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
  7193. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7194. (!cast<Instruction>("VREV16d8")
  7195. (VLD1LNd16 addrmode6:$addr,
  7196. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7197. def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
  7198. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7199. (!cast<Instruction>("VREV16d8")
  7200. (VLD1LNd16 addrmode6:$addr,
  7201. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7202. def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
  7203. (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
  7204. (!cast<Instruction>("VREV16d8")
  7205. (VLD1LNd16 addrmode6:$addr,
  7206. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7207. }
  7208. let Predicates = [HasNEON] in {
  7209. def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7210. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7211. def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7212. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7213. def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7214. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7215. def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7216. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7217. def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7218. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7219. def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7220. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7221. def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7222. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7223. }
  7224. //===----------------------------------------------------------------------===//
  7225. // Assembler aliases
  7226. //
  7227. def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
  7228. (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
  7229. def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
  7230. (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
  7231. // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
  7232. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
  7233. (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7234. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
  7235. (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7236. defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
  7237. (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7238. defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
  7239. (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7240. defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
  7241. (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7242. defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
  7243. (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7244. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
  7245. (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7246. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
  7247. (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7248. // ... two-operand aliases
  7249. defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
  7250. (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7251. defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
  7252. (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7253. defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
  7254. (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7255. defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
  7256. (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7257. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
  7258. (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7259. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
  7260. (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7261. // ... immediates
  7262. def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
  7263. (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
  7264. def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
  7265. (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
  7266. def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
  7267. (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
  7268. def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
  7269. (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
  7270. // VLD1 single-lane pseudo-instructions. These need special handling for
  7271. // the lane index that an InstAlias can't handle, so we use these instead.
  7272. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
  7273. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7274. pred:$p)>;
  7275. def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
  7276. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7277. pred:$p)>;
  7278. def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
  7279. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7280. pred:$p)>;
  7281. def VLD1LNdWB_fixed_Asm_8 :
  7282. NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
  7283. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7284. pred:$p)>;
  7285. def VLD1LNdWB_fixed_Asm_16 :
  7286. NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
  7287. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7288. pred:$p)>;
  7289. def VLD1LNdWB_fixed_Asm_32 :
  7290. NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
  7291. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7292. pred:$p)>;
  7293. def VLD1LNdWB_register_Asm_8 :
  7294. NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
  7295. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7296. rGPR:$Rm, pred:$p)>;
  7297. def VLD1LNdWB_register_Asm_16 :
  7298. NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
  7299. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7300. rGPR:$Rm, pred:$p)>;
  7301. def VLD1LNdWB_register_Asm_32 :
  7302. NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
  7303. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7304. rGPR:$Rm, pred:$p)>;
  7305. // VST1 single-lane pseudo-instructions. These need special handling for
  7306. // the lane index that an InstAlias can't handle, so we use these instead.
  7307. def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
  7308. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7309. pred:$p)>;
  7310. def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
  7311. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7312. pred:$p)>;
  7313. def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
  7314. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7315. pred:$p)>;
  7316. def VST1LNdWB_fixed_Asm_8 :
  7317. NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
  7318. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7319. pred:$p)>;
  7320. def VST1LNdWB_fixed_Asm_16 :
  7321. NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
  7322. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7323. pred:$p)>;
  7324. def VST1LNdWB_fixed_Asm_32 :
  7325. NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
  7326. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7327. pred:$p)>;
  7328. def VST1LNdWB_register_Asm_8 :
  7329. NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
  7330. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7331. rGPR:$Rm, pred:$p)>;
  7332. def VST1LNdWB_register_Asm_16 :
  7333. NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
  7334. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7335. rGPR:$Rm, pred:$p)>;
  7336. def VST1LNdWB_register_Asm_32 :
  7337. NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
  7338. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7339. rGPR:$Rm, pred:$p)>;
  7340. // VLD2 single-lane pseudo-instructions. These need special handling for
  7341. // the lane index that an InstAlias can't handle, so we use these instead.
  7342. def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
  7343. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7344. pred:$p)>;
  7345. def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
  7346. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7347. pred:$p)>;
  7348. def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
  7349. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
  7350. def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
  7351. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7352. pred:$p)>;
  7353. def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
  7354. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7355. pred:$p)>;
  7356. def VLD2LNdWB_fixed_Asm_8 :
  7357. NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
  7358. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7359. pred:$p)>;
  7360. def VLD2LNdWB_fixed_Asm_16 :
  7361. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
  7362. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7363. pred:$p)>;
  7364. def VLD2LNdWB_fixed_Asm_32 :
  7365. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
  7366. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7367. pred:$p)>;
  7368. def VLD2LNqWB_fixed_Asm_16 :
  7369. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
  7370. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7371. pred:$p)>;
  7372. def VLD2LNqWB_fixed_Asm_32 :
  7373. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
  7374. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7375. pred:$p)>;
  7376. def VLD2LNdWB_register_Asm_8 :
  7377. NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
  7378. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7379. rGPR:$Rm, pred:$p)>;
  7380. def VLD2LNdWB_register_Asm_16 :
  7381. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
  7382. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7383. rGPR:$Rm, pred:$p)>;
  7384. def VLD2LNdWB_register_Asm_32 :
  7385. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
  7386. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7387. rGPR:$Rm, pred:$p)>;
  7388. def VLD2LNqWB_register_Asm_16 :
  7389. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
  7390. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7391. rGPR:$Rm, pred:$p)>;
  7392. def VLD2LNqWB_register_Asm_32 :
  7393. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
  7394. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7395. rGPR:$Rm, pred:$p)>;
  7396. // VST2 single-lane pseudo-instructions. These need special handling for
  7397. // the lane index that an InstAlias can't handle, so we use these instead.
  7398. def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
  7399. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7400. pred:$p)>;
  7401. def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
  7402. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7403. pred:$p)>;
  7404. def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
  7405. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7406. pred:$p)>;
  7407. def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
  7408. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7409. pred:$p)>;
  7410. def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
  7411. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7412. pred:$p)>;
  7413. def VST2LNdWB_fixed_Asm_8 :
  7414. NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
  7415. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7416. pred:$p)>;
  7417. def VST2LNdWB_fixed_Asm_16 :
  7418. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
  7419. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7420. pred:$p)>;
  7421. def VST2LNdWB_fixed_Asm_32 :
  7422. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
  7423. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7424. pred:$p)>;
  7425. def VST2LNqWB_fixed_Asm_16 :
  7426. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
  7427. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7428. pred:$p)>;
  7429. def VST2LNqWB_fixed_Asm_32 :
  7430. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
  7431. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7432. pred:$p)>;
  7433. def VST2LNdWB_register_Asm_8 :
  7434. NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
  7435. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7436. rGPR:$Rm, pred:$p)>;
  7437. def VST2LNdWB_register_Asm_16 :
  7438. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
  7439. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7440. rGPR:$Rm, pred:$p)>;
  7441. def VST2LNdWB_register_Asm_32 :
  7442. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
  7443. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7444. rGPR:$Rm, pred:$p)>;
  7445. def VST2LNqWB_register_Asm_16 :
  7446. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
  7447. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7448. rGPR:$Rm, pred:$p)>;
  7449. def VST2LNqWB_register_Asm_32 :
  7450. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
  7451. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7452. rGPR:$Rm, pred:$p)>;
  7453. // VLD3 all-lanes pseudo-instructions. These need special handling for
  7454. // the lane index that an InstAlias can't handle, so we use these instead.
  7455. def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7456. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7457. pred:$p)>;
  7458. def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7459. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7460. pred:$p)>;
  7461. def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7462. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7463. pred:$p)>;
  7464. def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7465. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7466. pred:$p)>;
  7467. def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7468. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7469. pred:$p)>;
  7470. def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7471. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7472. pred:$p)>;
  7473. def VLD3DUPdWB_fixed_Asm_8 :
  7474. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7475. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7476. pred:$p)>;
  7477. def VLD3DUPdWB_fixed_Asm_16 :
  7478. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7479. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7480. pred:$p)>;
  7481. def VLD3DUPdWB_fixed_Asm_32 :
  7482. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7483. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7484. pred:$p)>;
  7485. def VLD3DUPqWB_fixed_Asm_8 :
  7486. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7487. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7488. pred:$p)>;
  7489. def VLD3DUPqWB_fixed_Asm_16 :
  7490. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7491. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7492. pred:$p)>;
  7493. def VLD3DUPqWB_fixed_Asm_32 :
  7494. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7495. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7496. pred:$p)>;
  7497. def VLD3DUPdWB_register_Asm_8 :
  7498. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7499. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7500. rGPR:$Rm, pred:$p)>;
  7501. def VLD3DUPdWB_register_Asm_16 :
  7502. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7503. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7504. rGPR:$Rm, pred:$p)>;
  7505. def VLD3DUPdWB_register_Asm_32 :
  7506. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7507. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7508. rGPR:$Rm, pred:$p)>;
  7509. def VLD3DUPqWB_register_Asm_8 :
  7510. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7511. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7512. rGPR:$Rm, pred:$p)>;
  7513. def VLD3DUPqWB_register_Asm_16 :
  7514. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7515. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7516. rGPR:$Rm, pred:$p)>;
  7517. def VLD3DUPqWB_register_Asm_32 :
  7518. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7519. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7520. rGPR:$Rm, pred:$p)>;
  7521. // VLD3 single-lane pseudo-instructions. These need special handling for
  7522. // the lane index that an InstAlias can't handle, so we use these instead.
  7523. def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7524. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7525. pred:$p)>;
  7526. def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7527. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7528. pred:$p)>;
  7529. def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7530. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7531. pred:$p)>;
  7532. def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7533. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7534. pred:$p)>;
  7535. def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7536. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7537. pred:$p)>;
  7538. def VLD3LNdWB_fixed_Asm_8 :
  7539. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7540. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7541. pred:$p)>;
  7542. def VLD3LNdWB_fixed_Asm_16 :
  7543. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7544. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7545. pred:$p)>;
  7546. def VLD3LNdWB_fixed_Asm_32 :
  7547. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7548. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7549. pred:$p)>;
  7550. def VLD3LNqWB_fixed_Asm_16 :
  7551. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7552. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7553. pred:$p)>;
  7554. def VLD3LNqWB_fixed_Asm_32 :
  7555. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7556. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7557. pred:$p)>;
  7558. def VLD3LNdWB_register_Asm_8 :
  7559. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7560. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7561. rGPR:$Rm, pred:$p)>;
  7562. def VLD3LNdWB_register_Asm_16 :
  7563. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7564. (ins VecListThreeDHWordIndexed:$list,
  7565. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7566. def VLD3LNdWB_register_Asm_32 :
  7567. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7568. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7569. rGPR:$Rm, pred:$p)>;
  7570. def VLD3LNqWB_register_Asm_16 :
  7571. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7572. (ins VecListThreeQHWordIndexed:$list,
  7573. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7574. def VLD3LNqWB_register_Asm_32 :
  7575. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7576. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7577. rGPR:$Rm, pred:$p)>;
  7578. // VLD3 multiple structure pseudo-instructions. These need special handling for
  7579. // the vector operands that the normal instructions don't yet model.
  7580. // FIXME: Remove these when the register classes and instructions are updated.
  7581. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7582. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7583. def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7584. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7585. def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7586. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7587. def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7588. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7589. def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7590. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7591. def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7592. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7593. def VLD3dWB_fixed_Asm_8 :
  7594. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7595. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7596. def VLD3dWB_fixed_Asm_16 :
  7597. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7598. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7599. def VLD3dWB_fixed_Asm_32 :
  7600. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7601. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7602. def VLD3qWB_fixed_Asm_8 :
  7603. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7604. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7605. def VLD3qWB_fixed_Asm_16 :
  7606. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7607. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7608. def VLD3qWB_fixed_Asm_32 :
  7609. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7610. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7611. def VLD3dWB_register_Asm_8 :
  7612. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7613. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7614. rGPR:$Rm, pred:$p)>;
  7615. def VLD3dWB_register_Asm_16 :
  7616. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7617. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7618. rGPR:$Rm, pred:$p)>;
  7619. def VLD3dWB_register_Asm_32 :
  7620. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7621. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7622. rGPR:$Rm, pred:$p)>;
  7623. def VLD3qWB_register_Asm_8 :
  7624. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7625. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7626. rGPR:$Rm, pred:$p)>;
  7627. def VLD3qWB_register_Asm_16 :
  7628. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7629. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7630. rGPR:$Rm, pred:$p)>;
  7631. def VLD3qWB_register_Asm_32 :
  7632. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7633. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7634. rGPR:$Rm, pred:$p)>;
  7635. // VST3 single-lane pseudo-instructions. These need special handling for
  7636. // the lane index that an InstAlias can't handle, so we use these instead.
  7637. def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7638. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7639. pred:$p)>;
  7640. def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7641. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7642. pred:$p)>;
  7643. def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7644. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7645. pred:$p)>;
  7646. def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7647. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7648. pred:$p)>;
  7649. def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7650. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7651. pred:$p)>;
  7652. def VST3LNdWB_fixed_Asm_8 :
  7653. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7654. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7655. pred:$p)>;
  7656. def VST3LNdWB_fixed_Asm_16 :
  7657. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7658. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7659. pred:$p)>;
  7660. def VST3LNdWB_fixed_Asm_32 :
  7661. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7662. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7663. pred:$p)>;
  7664. def VST3LNqWB_fixed_Asm_16 :
  7665. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7666. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7667. pred:$p)>;
  7668. def VST3LNqWB_fixed_Asm_32 :
  7669. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7670. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7671. pred:$p)>;
  7672. def VST3LNdWB_register_Asm_8 :
  7673. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7674. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7675. rGPR:$Rm, pred:$p)>;
  7676. def VST3LNdWB_register_Asm_16 :
  7677. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7678. (ins VecListThreeDHWordIndexed:$list,
  7679. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7680. def VST3LNdWB_register_Asm_32 :
  7681. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7682. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7683. rGPR:$Rm, pred:$p)>;
  7684. def VST3LNqWB_register_Asm_16 :
  7685. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7686. (ins VecListThreeQHWordIndexed:$list,
  7687. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7688. def VST3LNqWB_register_Asm_32 :
  7689. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7690. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7691. rGPR:$Rm, pred:$p)>;
  7692. // VST3 multiple structure pseudo-instructions. These need special handling for
  7693. // the vector operands that the normal instructions don't yet model.
  7694. // FIXME: Remove these when the register classes and instructions are updated.
  7695. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7696. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7697. def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7698. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7699. def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7700. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7701. def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7702. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7703. def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7704. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7705. def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7706. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7707. def VST3dWB_fixed_Asm_8 :
  7708. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7709. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7710. def VST3dWB_fixed_Asm_16 :
  7711. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7712. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7713. def VST3dWB_fixed_Asm_32 :
  7714. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7715. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7716. def VST3qWB_fixed_Asm_8 :
  7717. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7718. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7719. def VST3qWB_fixed_Asm_16 :
  7720. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7721. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7722. def VST3qWB_fixed_Asm_32 :
  7723. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7724. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7725. def VST3dWB_register_Asm_8 :
  7726. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7727. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7728. rGPR:$Rm, pred:$p)>;
  7729. def VST3dWB_register_Asm_16 :
  7730. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7731. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7732. rGPR:$Rm, pred:$p)>;
  7733. def VST3dWB_register_Asm_32 :
  7734. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7735. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7736. rGPR:$Rm, pred:$p)>;
  7737. def VST3qWB_register_Asm_8 :
  7738. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7739. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7740. rGPR:$Rm, pred:$p)>;
  7741. def VST3qWB_register_Asm_16 :
  7742. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7743. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7744. rGPR:$Rm, pred:$p)>;
  7745. def VST3qWB_register_Asm_32 :
  7746. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7747. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7748. rGPR:$Rm, pred:$p)>;
  7749. // VLD4 all-lanes pseudo-instructions. These need special handling for
  7750. // the lane index that an InstAlias can't handle, so we use these instead.
  7751. def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7752. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7753. pred:$p)>;
  7754. def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7755. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7756. pred:$p)>;
  7757. def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7758. (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
  7759. pred:$p)>;
  7760. def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7761. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7762. pred:$p)>;
  7763. def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7764. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7765. pred:$p)>;
  7766. def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7767. (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
  7768. pred:$p)>;
  7769. def VLD4DUPdWB_fixed_Asm_8 :
  7770. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7771. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7772. pred:$p)>;
  7773. def VLD4DUPdWB_fixed_Asm_16 :
  7774. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7775. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7776. pred:$p)>;
  7777. def VLD4DUPdWB_fixed_Asm_32 :
  7778. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7779. (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
  7780. pred:$p)>;
  7781. def VLD4DUPqWB_fixed_Asm_8 :
  7782. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7783. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7784. pred:$p)>;
  7785. def VLD4DUPqWB_fixed_Asm_16 :
  7786. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7787. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7788. pred:$p)>;
  7789. def VLD4DUPqWB_fixed_Asm_32 :
  7790. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7791. (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
  7792. pred:$p)>;
  7793. def VLD4DUPdWB_register_Asm_8 :
  7794. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7795. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7796. rGPR:$Rm, pred:$p)>;
  7797. def VLD4DUPdWB_register_Asm_16 :
  7798. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7799. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7800. rGPR:$Rm, pred:$p)>;
  7801. def VLD4DUPdWB_register_Asm_32 :
  7802. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7803. (ins VecListFourDAllLanes:$list,
  7804. addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
  7805. def VLD4DUPqWB_register_Asm_8 :
  7806. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7807. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7808. rGPR:$Rm, pred:$p)>;
  7809. def VLD4DUPqWB_register_Asm_16 :
  7810. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7811. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7812. rGPR:$Rm, pred:$p)>;
  7813. def VLD4DUPqWB_register_Asm_32 :
  7814. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7815. (ins VecListFourQAllLanes:$list,
  7816. addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
  7817. // VLD4 single-lane pseudo-instructions. These need special handling for
  7818. // the lane index that an InstAlias can't handle, so we use these instead.
  7819. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7820. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7821. pred:$p)>;
  7822. def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7823. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7824. pred:$p)>;
  7825. def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7826. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7827. pred:$p)>;
  7828. def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7829. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7830. pred:$p)>;
  7831. def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7832. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7833. pred:$p)>;
  7834. def VLD4LNdWB_fixed_Asm_8 :
  7835. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7836. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7837. pred:$p)>;
  7838. def VLD4LNdWB_fixed_Asm_16 :
  7839. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7840. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7841. pred:$p)>;
  7842. def VLD4LNdWB_fixed_Asm_32 :
  7843. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7844. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7845. pred:$p)>;
  7846. def VLD4LNqWB_fixed_Asm_16 :
  7847. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7848. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7849. pred:$p)>;
  7850. def VLD4LNqWB_fixed_Asm_32 :
  7851. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7852. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7853. pred:$p)>;
  7854. def VLD4LNdWB_register_Asm_8 :
  7855. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7856. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7857. rGPR:$Rm, pred:$p)>;
  7858. def VLD4LNdWB_register_Asm_16 :
  7859. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7860. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7861. rGPR:$Rm, pred:$p)>;
  7862. def VLD4LNdWB_register_Asm_32 :
  7863. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7864. (ins VecListFourDWordIndexed:$list,
  7865. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7866. def VLD4LNqWB_register_Asm_16 :
  7867. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7868. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7869. rGPR:$Rm, pred:$p)>;
  7870. def VLD4LNqWB_register_Asm_32 :
  7871. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7872. (ins VecListFourQWordIndexed:$list,
  7873. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7874. // VLD4 multiple structure pseudo-instructions. These need special handling for
  7875. // the vector operands that the normal instructions don't yet model.
  7876. // FIXME: Remove these when the register classes and instructions are updated.
  7877. def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7878. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7879. pred:$p)>;
  7880. def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7881. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7882. pred:$p)>;
  7883. def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7884. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7885. pred:$p)>;
  7886. def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7887. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7888. pred:$p)>;
  7889. def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7890. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7891. pred:$p)>;
  7892. def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7893. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7894. pred:$p)>;
  7895. def VLD4dWB_fixed_Asm_8 :
  7896. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7897. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7898. pred:$p)>;
  7899. def VLD4dWB_fixed_Asm_16 :
  7900. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7901. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7902. pred:$p)>;
  7903. def VLD4dWB_fixed_Asm_32 :
  7904. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7905. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7906. pred:$p)>;
  7907. def VLD4qWB_fixed_Asm_8 :
  7908. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7909. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7910. pred:$p)>;
  7911. def VLD4qWB_fixed_Asm_16 :
  7912. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7913. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7914. pred:$p)>;
  7915. def VLD4qWB_fixed_Asm_32 :
  7916. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7917. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7918. pred:$p)>;
  7919. def VLD4dWB_register_Asm_8 :
  7920. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7921. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7922. rGPR:$Rm, pred:$p)>;
  7923. def VLD4dWB_register_Asm_16 :
  7924. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7925. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7926. rGPR:$Rm, pred:$p)>;
  7927. def VLD4dWB_register_Asm_32 :
  7928. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7929. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7930. rGPR:$Rm, pred:$p)>;
  7931. def VLD4qWB_register_Asm_8 :
  7932. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7933. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7934. rGPR:$Rm, pred:$p)>;
  7935. def VLD4qWB_register_Asm_16 :
  7936. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7937. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7938. rGPR:$Rm, pred:$p)>;
  7939. def VLD4qWB_register_Asm_32 :
  7940. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7941. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7942. rGPR:$Rm, pred:$p)>;
  7943. // VST4 single-lane pseudo-instructions. These need special handling for
  7944. // the lane index that an InstAlias can't handle, so we use these instead.
  7945. def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  7946. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7947. pred:$p)>;
  7948. def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  7949. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7950. pred:$p)>;
  7951. def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  7952. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7953. pred:$p)>;
  7954. def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  7955. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7956. pred:$p)>;
  7957. def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  7958. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7959. pred:$p)>;
  7960. def VST4LNdWB_fixed_Asm_8 :
  7961. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  7962. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7963. pred:$p)>;
  7964. def VST4LNdWB_fixed_Asm_16 :
  7965. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  7966. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7967. pred:$p)>;
  7968. def VST4LNdWB_fixed_Asm_32 :
  7969. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  7970. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7971. pred:$p)>;
  7972. def VST4LNqWB_fixed_Asm_16 :
  7973. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  7974. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7975. pred:$p)>;
  7976. def VST4LNqWB_fixed_Asm_32 :
  7977. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  7978. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7979. pred:$p)>;
  7980. def VST4LNdWB_register_Asm_8 :
  7981. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  7982. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7983. rGPR:$Rm, pred:$p)>;
  7984. def VST4LNdWB_register_Asm_16 :
  7985. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  7986. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7987. rGPR:$Rm, pred:$p)>;
  7988. def VST4LNdWB_register_Asm_32 :
  7989. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  7990. (ins VecListFourDWordIndexed:$list,
  7991. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7992. def VST4LNqWB_register_Asm_16 :
  7993. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  7994. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7995. rGPR:$Rm, pred:$p)>;
  7996. def VST4LNqWB_register_Asm_32 :
  7997. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  7998. (ins VecListFourQWordIndexed:$list,
  7999. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  8000. // VST4 multiple structure pseudo-instructions. These need special handling for
  8001. // the vector operands that the normal instructions don't yet model.
  8002. // FIXME: Remove these when the register classes and instructions are updated.
  8003. def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8004. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8005. pred:$p)>;
  8006. def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8007. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8008. pred:$p)>;
  8009. def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8010. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8011. pred:$p)>;
  8012. def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8013. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8014. pred:$p)>;
  8015. def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8016. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8017. pred:$p)>;
  8018. def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8019. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8020. pred:$p)>;
  8021. def VST4dWB_fixed_Asm_8 :
  8022. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8023. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8024. pred:$p)>;
  8025. def VST4dWB_fixed_Asm_16 :
  8026. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8027. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8028. pred:$p)>;
  8029. def VST4dWB_fixed_Asm_32 :
  8030. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8031. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8032. pred:$p)>;
  8033. def VST4qWB_fixed_Asm_8 :
  8034. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8035. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8036. pred:$p)>;
  8037. def VST4qWB_fixed_Asm_16 :
  8038. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8039. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8040. pred:$p)>;
  8041. def VST4qWB_fixed_Asm_32 :
  8042. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8043. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8044. pred:$p)>;
  8045. def VST4dWB_register_Asm_8 :
  8046. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8047. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8048. rGPR:$Rm, pred:$p)>;
  8049. def VST4dWB_register_Asm_16 :
  8050. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8051. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8052. rGPR:$Rm, pred:$p)>;
  8053. def VST4dWB_register_Asm_32 :
  8054. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8055. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8056. rGPR:$Rm, pred:$p)>;
  8057. def VST4qWB_register_Asm_8 :
  8058. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8059. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8060. rGPR:$Rm, pred:$p)>;
  8061. def VST4qWB_register_Asm_16 :
  8062. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8063. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8064. rGPR:$Rm, pred:$p)>;
  8065. def VST4qWB_register_Asm_32 :
  8066. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8067. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8068. rGPR:$Rm, pred:$p)>;
  8069. // VMOV/VMVN takes an optional datatype suffix
  8070. defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
  8071. (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
  8072. defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
  8073. (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
  8074. defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
  8075. (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
  8076. defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
  8077. (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
  8078. // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
  8079. // D-register versions.
  8080. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
  8081. (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8082. def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
  8083. (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8084. def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
  8085. (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8086. def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
  8087. (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8088. def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
  8089. (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8090. def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
  8091. (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8092. def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
  8093. (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8094. let Predicates = [HasNEON, HasFullFP16] in
  8095. def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
  8096. (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8097. // Q-register versions.
  8098. def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
  8099. (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8100. def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
  8101. (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8102. def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
  8103. (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8104. def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
  8105. (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8106. def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
  8107. (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8108. def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
  8109. (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8110. def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
  8111. (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8112. let Predicates = [HasNEON, HasFullFP16] in
  8113. def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
  8114. (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8115. // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
  8116. // D-register versions.
  8117. def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
  8118. (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8119. def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
  8120. (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8121. def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
  8122. (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8123. def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
  8124. (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8125. def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
  8126. (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8127. def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
  8128. (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8129. def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
  8130. (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8131. let Predicates = [HasNEON, HasFullFP16] in
  8132. def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
  8133. (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8134. // Q-register versions.
  8135. def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
  8136. (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8137. def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
  8138. (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8139. def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
  8140. (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8141. def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
  8142. (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8143. def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
  8144. (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8145. def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
  8146. (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8147. def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
  8148. (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8149. let Predicates = [HasNEON, HasFullFP16] in
  8150. def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
  8151. (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8152. // VSWP allows, but does not require, a type suffix.
  8153. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
  8154. (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
  8155. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
  8156. (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
  8157. // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
  8158. defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
  8159. (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8160. defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
  8161. (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8162. defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
  8163. (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8164. defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
  8165. (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8166. defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
  8167. (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8168. defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
  8169. (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8170. // "vmov Rd, #-imm" can be handled via "vmvn".
  8171. def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
  8172. (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8173. def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
  8174. (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8175. def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
  8176. (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8177. def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
  8178. (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8179. // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
  8180. // these should restrict to just the Q register variants, but the register
  8181. // classes are enough to match correctly regardless, so we keep it simple
  8182. // and just use MnemonicAlias.
  8183. def : NEONMnemonicAlias<"vbicq", "vbic">;
  8184. def : NEONMnemonicAlias<"vandq", "vand">;
  8185. def : NEONMnemonicAlias<"veorq", "veor">;
  8186. def : NEONMnemonicAlias<"vorrq", "vorr">;
  8187. def : NEONMnemonicAlias<"vmovq", "vmov">;
  8188. def : NEONMnemonicAlias<"vmvnq", "vmvn">;
  8189. // Explicit versions for floating point so that the FPImm variants get
  8190. // handled early. The parser gets confused otherwise.
  8191. def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
  8192. def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
  8193. def : NEONMnemonicAlias<"vaddq", "vadd">;
  8194. def : NEONMnemonicAlias<"vsubq", "vsub">;
  8195. def : NEONMnemonicAlias<"vminq", "vmin">;
  8196. def : NEONMnemonicAlias<"vmaxq", "vmax">;
  8197. def : NEONMnemonicAlias<"vmulq", "vmul">;
  8198. def : NEONMnemonicAlias<"vabsq", "vabs">;
  8199. def : NEONMnemonicAlias<"vshlq", "vshl">;
  8200. def : NEONMnemonicAlias<"vshrq", "vshr">;
  8201. def : NEONMnemonicAlias<"vcvtq", "vcvt">;
  8202. def : NEONMnemonicAlias<"vcleq", "vcle">;
  8203. def : NEONMnemonicAlias<"vceqq", "vceq">;
  8204. def : NEONMnemonicAlias<"vzipq", "vzip">;
  8205. def : NEONMnemonicAlias<"vswpq", "vswp">;
  8206. def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
  8207. def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
  8208. // Alias for loading floating point immediates that aren't representable
  8209. // using the vmov.f32 encoding but the bitpattern is representable using
  8210. // the .i32 encoding.
  8211. def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
  8212. (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
  8213. def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
  8214. (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
  8215. // ARMv8.6a BFloat16 instructions.
  8216. let Predicates = [HasBF16, HasNEON] in {
  8217. class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
  8218. dag oops, dag iops, list<dag> pattern>
  8219. : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
  8220. N3RegFrm, IIC_VDOTPROD, "", "", pattern>
  8221. {
  8222. let DecoderNamespace = "VFPV8";
  8223. }
  8224. class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
  8225. : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
  8226. (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
  8227. [(set (AccumTy RegTy:$dst),
  8228. (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
  8229. (InputTy RegTy:$Vn),
  8230. (InputTy RegTy:$Vm)))]> {
  8231. let Constraints = "$dst = $Vd";
  8232. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
  8233. let DecoderNamespace = "VFPV8";
  8234. }
  8235. multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
  8236. ValueType InputTy, dag RHS> {
  8237. def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
  8238. (ins RegTy:$Vd, RegTy:$Vn,
  8239. DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
  8240. bit lane;
  8241. let Inst{5} = lane;
  8242. let Constraints = "$dst = $Vd";
  8243. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
  8244. let DecoderNamespace = "VFPV8";
  8245. }
  8246. def : Pat<
  8247. (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
  8248. (InputTy RegTy:$Vn),
  8249. (InputTy (bitconvert (AccumTy
  8250. (ARMvduplane (AccumTy RegTy:$Vm),
  8251. VectorIndex32:$lane)))))),
  8252. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  8253. }
  8254. def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
  8255. def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
  8256. defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
  8257. defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  8258. class BF16MM<bit Q, RegisterClass RegTy,
  8259. string opc>
  8260. : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
  8261. (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
  8262. N3RegFrm, IIC_VDOTPROD, "", "",
  8263. [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
  8264. (v8bf16 QPR:$Vn),
  8265. (v8bf16 QPR:$Vm)))]> {
  8266. let Constraints = "$dst = $Vd";
  8267. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
  8268. let DecoderNamespace = "VFPV8";
  8269. }
  8270. def VMMLA : BF16MM<1, QPR, "vmmla">;
  8271. class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
  8272. : N3VCP8<0b00, 0b11, T, 1,
  8273. (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
  8274. NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
  8275. [(set (v4f32 QPR:$dst),
  8276. (OpNode (v4f32 QPR:$Vd),
  8277. (v8bf16 QPR:$Vn),
  8278. (v8bf16 QPR:$Vm)))]> {
  8279. let Constraints = "$dst = $Vd";
  8280. let DecoderNamespace = "VFPV8";
  8281. }
  8282. def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>;
  8283. def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>;
  8284. multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
  8285. def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
  8286. (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
  8287. IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
  8288. bits<2> idx;
  8289. let Inst{5} = idx{1};
  8290. let Inst{3} = idx{0};
  8291. let Constraints = "$dst = $Vd";
  8292. let DecoderNamespace = "VFPV8";
  8293. }
  8294. def : Pat<
  8295. (v4f32 (OpNode (v4f32 QPR:$Vd),
  8296. (v8bf16 QPR:$Vn),
  8297. (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
  8298. VectorIndex16:$lane)))),
  8299. (!cast<Instruction>(NAME) QPR:$Vd,
  8300. QPR:$Vn,
  8301. (EXTRACT_SUBREG QPR:$Vm,
  8302. (DSubReg_i16_reg VectorIndex16:$lane)),
  8303. (SubReg_i16_lane VectorIndex16:$lane))>;
  8304. }
  8305. defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>;
  8306. defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>;
  8307. def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
  8308. (outs DPR:$Vd), (ins QPR:$Vm),
  8309. NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
  8310. }
  8311. // End of BFloat16 instructions