ARMInstrNEON.td 443 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262
  1. //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file describes the ARM NEON instruction set.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // NEON-specific Operands.
  14. //===----------------------------------------------------------------------===//
  15. def nModImm : Operand<i32> {
  16. let PrintMethod = "printVMOVModImmOperand";
  17. }
  18. def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
  19. def nImmSplatI8 : Operand<i32> {
  20. let PrintMethod = "printVMOVModImmOperand";
  21. let ParserMatchClass = nImmSplatI8AsmOperand;
  22. }
  23. def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
  24. def nImmSplatI16 : Operand<i32> {
  25. let PrintMethod = "printVMOVModImmOperand";
  26. let ParserMatchClass = nImmSplatI16AsmOperand;
  27. }
  28. def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
  29. def nImmSplatI32 : Operand<i32> {
  30. let PrintMethod = "printVMOVModImmOperand";
  31. let ParserMatchClass = nImmSplatI32AsmOperand;
  32. }
  33. def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
  34. def nImmSplatNotI16 : Operand<i32> {
  35. let ParserMatchClass = nImmSplatNotI16AsmOperand;
  36. }
  37. def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
  38. def nImmSplatNotI32 : Operand<i32> {
  39. let ParserMatchClass = nImmSplatNotI32AsmOperand;
  40. }
  41. def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
  42. def nImmVMOVI32 : Operand<i32> {
  43. let PrintMethod = "printVMOVModImmOperand";
  44. let ParserMatchClass = nImmVMOVI32AsmOperand;
  45. }
  46. class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
  47. : AsmOperandClass {
  48. let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
  49. let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
  50. let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
  51. }
  52. class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
  53. : AsmOperandClass {
  54. let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
  55. let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
  56. let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
  57. }
  58. class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
  59. let PrintMethod = "printVMOVModImmOperand";
  60. let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
  61. }
  62. class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
  63. let PrintMethod = "printVMOVModImmOperand";
  64. let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
  65. }
  66. def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
  67. def nImmVMOVI32Neg : Operand<i32> {
  68. let PrintMethod = "printVMOVModImmOperand";
  69. let ParserMatchClass = nImmVMOVI32NegAsmOperand;
  70. }
  71. def nImmVMOVF32 : Operand<i32> {
  72. let PrintMethod = "printFPImmOperand";
  73. let ParserMatchClass = FPImmOperand;
  74. }
  75. def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
  76. def nImmSplatI64 : Operand<i32> {
  77. let PrintMethod = "printVMOVModImmOperand";
  78. let ParserMatchClass = nImmSplatI64AsmOperand;
  79. }
  80. def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
  81. def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
  82. def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
  83. def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
  84. def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
  85. return ((uint64_t)Imm) < 8;
  86. }]> {
  87. let ParserMatchClass = VectorIndex8Operand;
  88. let PrintMethod = "printVectorIndex";
  89. let MIOperandInfo = (ops i32imm);
  90. }
  91. def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
  92. return ((uint64_t)Imm) < 4;
  93. }]> {
  94. let ParserMatchClass = VectorIndex16Operand;
  95. let PrintMethod = "printVectorIndex";
  96. let MIOperandInfo = (ops i32imm);
  97. }
  98. def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
  99. return ((uint64_t)Imm) < 2;
  100. }]> {
  101. let ParserMatchClass = VectorIndex32Operand;
  102. let PrintMethod = "printVectorIndex";
  103. let MIOperandInfo = (ops i32imm);
  104. }
  105. def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
  106. return ((uint64_t)Imm) < 1;
  107. }]> {
  108. let ParserMatchClass = VectorIndex64Operand;
  109. let PrintMethod = "printVectorIndex";
  110. let MIOperandInfo = (ops i32imm);
  111. }
  112. // Register list of one D register.
  113. def VecListOneDAsmOperand : AsmOperandClass {
  114. let Name = "VecListOneD";
  115. let ParserMethod = "parseVectorList";
  116. let RenderMethod = "addVecListOperands";
  117. }
  118. def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
  119. let ParserMatchClass = VecListOneDAsmOperand;
  120. }
  121. // Register list of two sequential D registers.
  122. def VecListDPairAsmOperand : AsmOperandClass {
  123. let Name = "VecListDPair";
  124. let ParserMethod = "parseVectorList";
  125. let RenderMethod = "addVecListOperands";
  126. }
  127. def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
  128. let ParserMatchClass = VecListDPairAsmOperand;
  129. }
  130. // Register list of three sequential D registers.
  131. def VecListThreeDAsmOperand : AsmOperandClass {
  132. let Name = "VecListThreeD";
  133. let ParserMethod = "parseVectorList";
  134. let RenderMethod = "addVecListOperands";
  135. }
  136. def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
  137. let ParserMatchClass = VecListThreeDAsmOperand;
  138. }
  139. // Register list of four sequential D registers.
  140. def VecListFourDAsmOperand : AsmOperandClass {
  141. let Name = "VecListFourD";
  142. let ParserMethod = "parseVectorList";
  143. let RenderMethod = "addVecListOperands";
  144. }
  145. def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
  146. let ParserMatchClass = VecListFourDAsmOperand;
  147. }
  148. // Register list of two D registers spaced by 2 (two sequential Q registers).
  149. def VecListDPairSpacedAsmOperand : AsmOperandClass {
  150. let Name = "VecListDPairSpaced";
  151. let ParserMethod = "parseVectorList";
  152. let RenderMethod = "addVecListOperands";
  153. }
  154. def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
  155. let ParserMatchClass = VecListDPairSpacedAsmOperand;
  156. }
  157. // Register list of three D registers spaced by 2 (three Q registers).
  158. def VecListThreeQAsmOperand : AsmOperandClass {
  159. let Name = "VecListThreeQ";
  160. let ParserMethod = "parseVectorList";
  161. let RenderMethod = "addVecListOperands";
  162. }
  163. def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
  164. let ParserMatchClass = VecListThreeQAsmOperand;
  165. }
  166. // Register list of three D registers spaced by 2 (three Q registers).
  167. def VecListFourQAsmOperand : AsmOperandClass {
  168. let Name = "VecListFourQ";
  169. let ParserMethod = "parseVectorList";
  170. let RenderMethod = "addVecListOperands";
  171. }
  172. def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
  173. let ParserMatchClass = VecListFourQAsmOperand;
  174. }
  175. // Register list of one D register, with "all lanes" subscripting.
  176. def VecListOneDAllLanesAsmOperand : AsmOperandClass {
  177. let Name = "VecListOneDAllLanes";
  178. let ParserMethod = "parseVectorList";
  179. let RenderMethod = "addVecListOperands";
  180. }
  181. def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
  182. let ParserMatchClass = VecListOneDAllLanesAsmOperand;
  183. }
  184. // Register list of two D registers, with "all lanes" subscripting.
  185. def VecListDPairAllLanesAsmOperand : AsmOperandClass {
  186. let Name = "VecListDPairAllLanes";
  187. let ParserMethod = "parseVectorList";
  188. let RenderMethod = "addVecListOperands";
  189. }
  190. def VecListDPairAllLanes : RegisterOperand<DPair,
  191. "printVectorListTwoAllLanes"> {
  192. let ParserMatchClass = VecListDPairAllLanesAsmOperand;
  193. }
  194. // Register list of two D registers spaced by 2 (two sequential Q registers).
  195. def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
  196. let Name = "VecListDPairSpacedAllLanes";
  197. let ParserMethod = "parseVectorList";
  198. let RenderMethod = "addVecListOperands";
  199. }
  200. def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
  201. "printVectorListTwoSpacedAllLanes"> {
  202. let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
  203. }
  204. // Register list of three D registers, with "all lanes" subscripting.
  205. def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
  206. let Name = "VecListThreeDAllLanes";
  207. let ParserMethod = "parseVectorList";
  208. let RenderMethod = "addVecListOperands";
  209. }
  210. def VecListThreeDAllLanes : RegisterOperand<DPR,
  211. "printVectorListThreeAllLanes"> {
  212. let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
  213. }
  214. // Register list of three D registers spaced by 2 (three sequential Q regs).
  215. def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
  216. let Name = "VecListThreeQAllLanes";
  217. let ParserMethod = "parseVectorList";
  218. let RenderMethod = "addVecListOperands";
  219. }
  220. def VecListThreeQAllLanes : RegisterOperand<DPR,
  221. "printVectorListThreeSpacedAllLanes"> {
  222. let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
  223. }
  224. // Register list of four D registers, with "all lanes" subscripting.
  225. def VecListFourDAllLanesAsmOperand : AsmOperandClass {
  226. let Name = "VecListFourDAllLanes";
  227. let ParserMethod = "parseVectorList";
  228. let RenderMethod = "addVecListOperands";
  229. }
  230. def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
  231. let ParserMatchClass = VecListFourDAllLanesAsmOperand;
  232. }
  233. // Register list of four D registers spaced by 2 (four sequential Q regs).
  234. def VecListFourQAllLanesAsmOperand : AsmOperandClass {
  235. let Name = "VecListFourQAllLanes";
  236. let ParserMethod = "parseVectorList";
  237. let RenderMethod = "addVecListOperands";
  238. }
  239. def VecListFourQAllLanes : RegisterOperand<DPR,
  240. "printVectorListFourSpacedAllLanes"> {
  241. let ParserMatchClass = VecListFourQAllLanesAsmOperand;
  242. }
  243. // Register list of one D register, with byte lane subscripting.
  244. def VecListOneDByteIndexAsmOperand : AsmOperandClass {
  245. let Name = "VecListOneDByteIndexed";
  246. let ParserMethod = "parseVectorList";
  247. let RenderMethod = "addVecListIndexedOperands";
  248. }
  249. def VecListOneDByteIndexed : Operand<i32> {
  250. let ParserMatchClass = VecListOneDByteIndexAsmOperand;
  251. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  252. }
  253. // ...with half-word lane subscripting.
  254. def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
  255. let Name = "VecListOneDHWordIndexed";
  256. let ParserMethod = "parseVectorList";
  257. let RenderMethod = "addVecListIndexedOperands";
  258. }
  259. def VecListOneDHWordIndexed : Operand<i32> {
  260. let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
  261. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  262. }
  263. // ...with word lane subscripting.
  264. def VecListOneDWordIndexAsmOperand : AsmOperandClass {
  265. let Name = "VecListOneDWordIndexed";
  266. let ParserMethod = "parseVectorList";
  267. let RenderMethod = "addVecListIndexedOperands";
  268. }
  269. def VecListOneDWordIndexed : Operand<i32> {
  270. let ParserMatchClass = VecListOneDWordIndexAsmOperand;
  271. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  272. }
  273. // Register list of two D registers with byte lane subscripting.
  274. def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
  275. let Name = "VecListTwoDByteIndexed";
  276. let ParserMethod = "parseVectorList";
  277. let RenderMethod = "addVecListIndexedOperands";
  278. }
  279. def VecListTwoDByteIndexed : Operand<i32> {
  280. let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
  281. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  282. }
  283. // ...with half-word lane subscripting.
  284. def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
  285. let Name = "VecListTwoDHWordIndexed";
  286. let ParserMethod = "parseVectorList";
  287. let RenderMethod = "addVecListIndexedOperands";
  288. }
  289. def VecListTwoDHWordIndexed : Operand<i32> {
  290. let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
  291. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  292. }
  293. // ...with word lane subscripting.
  294. def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
  295. let Name = "VecListTwoDWordIndexed";
  296. let ParserMethod = "parseVectorList";
  297. let RenderMethod = "addVecListIndexedOperands";
  298. }
  299. def VecListTwoDWordIndexed : Operand<i32> {
  300. let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
  301. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  302. }
  303. // Register list of two Q registers with half-word lane subscripting.
  304. def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
  305. let Name = "VecListTwoQHWordIndexed";
  306. let ParserMethod = "parseVectorList";
  307. let RenderMethod = "addVecListIndexedOperands";
  308. }
  309. def VecListTwoQHWordIndexed : Operand<i32> {
  310. let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
  311. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  312. }
  313. // ...with word lane subscripting.
  314. def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
  315. let Name = "VecListTwoQWordIndexed";
  316. let ParserMethod = "parseVectorList";
  317. let RenderMethod = "addVecListIndexedOperands";
  318. }
  319. def VecListTwoQWordIndexed : Operand<i32> {
  320. let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
  321. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  322. }
  323. // Register list of three D registers with byte lane subscripting.
  324. def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
  325. let Name = "VecListThreeDByteIndexed";
  326. let ParserMethod = "parseVectorList";
  327. let RenderMethod = "addVecListIndexedOperands";
  328. }
  329. def VecListThreeDByteIndexed : Operand<i32> {
  330. let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
  331. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  332. }
  333. // ...with half-word lane subscripting.
  334. def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
  335. let Name = "VecListThreeDHWordIndexed";
  336. let ParserMethod = "parseVectorList";
  337. let RenderMethod = "addVecListIndexedOperands";
  338. }
  339. def VecListThreeDHWordIndexed : Operand<i32> {
  340. let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
  341. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  342. }
  343. // ...with word lane subscripting.
  344. def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
  345. let Name = "VecListThreeDWordIndexed";
  346. let ParserMethod = "parseVectorList";
  347. let RenderMethod = "addVecListIndexedOperands";
  348. }
  349. def VecListThreeDWordIndexed : Operand<i32> {
  350. let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
  351. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  352. }
  353. // Register list of three Q registers with half-word lane subscripting.
  354. def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
  355. let Name = "VecListThreeQHWordIndexed";
  356. let ParserMethod = "parseVectorList";
  357. let RenderMethod = "addVecListIndexedOperands";
  358. }
  359. def VecListThreeQHWordIndexed : Operand<i32> {
  360. let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
  361. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  362. }
  363. // ...with word lane subscripting.
  364. def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
  365. let Name = "VecListThreeQWordIndexed";
  366. let ParserMethod = "parseVectorList";
  367. let RenderMethod = "addVecListIndexedOperands";
  368. }
  369. def VecListThreeQWordIndexed : Operand<i32> {
  370. let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
  371. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  372. }
  373. // Register list of four D registers with byte lane subscripting.
  374. def VecListFourDByteIndexAsmOperand : AsmOperandClass {
  375. let Name = "VecListFourDByteIndexed";
  376. let ParserMethod = "parseVectorList";
  377. let RenderMethod = "addVecListIndexedOperands";
  378. }
  379. def VecListFourDByteIndexed : Operand<i32> {
  380. let ParserMatchClass = VecListFourDByteIndexAsmOperand;
  381. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  382. }
  383. // ...with half-word lane subscripting.
  384. def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
  385. let Name = "VecListFourDHWordIndexed";
  386. let ParserMethod = "parseVectorList";
  387. let RenderMethod = "addVecListIndexedOperands";
  388. }
  389. def VecListFourDHWordIndexed : Operand<i32> {
  390. let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
  391. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  392. }
  393. // ...with word lane subscripting.
  394. def VecListFourDWordIndexAsmOperand : AsmOperandClass {
  395. let Name = "VecListFourDWordIndexed";
  396. let ParserMethod = "parseVectorList";
  397. let RenderMethod = "addVecListIndexedOperands";
  398. }
  399. def VecListFourDWordIndexed : Operand<i32> {
  400. let ParserMatchClass = VecListFourDWordIndexAsmOperand;
  401. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  402. }
  403. // Register list of four Q registers with half-word lane subscripting.
  404. def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
  405. let Name = "VecListFourQHWordIndexed";
  406. let ParserMethod = "parseVectorList";
  407. let RenderMethod = "addVecListIndexedOperands";
  408. }
  409. def VecListFourQHWordIndexed : Operand<i32> {
  410. let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
  411. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  412. }
  413. // ...with word lane subscripting.
  414. def VecListFourQWordIndexAsmOperand : AsmOperandClass {
  415. let Name = "VecListFourQWordIndexed";
  416. let ParserMethod = "parseVectorList";
  417. let RenderMethod = "addVecListIndexedOperands";
  418. }
  419. def VecListFourQWordIndexed : Operand<i32> {
  420. let ParserMatchClass = VecListFourQWordIndexAsmOperand;
  421. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  422. }
  423. def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  424. return cast<LoadSDNode>(N)->getAlign() >= 8;
  425. }]>;
  426. def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  427. (store node:$val, node:$ptr), [{
  428. return cast<StoreSDNode>(N)->getAlign() >= 8;
  429. }]>;
  430. def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  431. return cast<LoadSDNode>(N)->getAlign() == 4;
  432. }]>;
  433. def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  434. (store node:$val, node:$ptr), [{
  435. return cast<StoreSDNode>(N)->getAlign() == 4;
  436. }]>;
  437. def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  438. return cast<LoadSDNode>(N)->getAlign() == 2;
  439. }]>;
  440. def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  441. (store node:$val, node:$ptr), [{
  442. return cast<StoreSDNode>(N)->getAlign() == 2;
  443. }]>;
  444. def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  445. return cast<LoadSDNode>(N)->getAlign() == 1;
  446. }]>;
  447. def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  448. (store node:$val, node:$ptr), [{
  449. return cast<StoreSDNode>(N)->getAlign() == 1;
  450. }]>;
  451. def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  452. return cast<LoadSDNode>(N)->getAlign() < 4;
  453. }]>;
  454. def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  455. (store node:$val, node:$ptr), [{
  456. return cast<StoreSDNode>(N)->getAlign() < 4;
  457. }]>;
  458. //===----------------------------------------------------------------------===//
  459. // NEON-specific DAG Nodes.
  460. //===----------------------------------------------------------------------===//
  461. def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
  462. def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>;
  463. // Types for vector shift by immediates. The "SHX" version is for long and
  464. // narrow operations where the source and destination vectors have different
  465. // types. The "SHINS" version is for shift and insert operations.
  466. def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
  467. SDTCisVT<2, i32>]>;
  468. def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
  469. SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
  470. def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
  471. def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
  472. def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
  473. def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
  474. def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
  475. def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
  476. def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
  477. def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
  478. def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
  479. def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
  480. def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
  481. def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
  482. def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
  483. def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
  484. def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
  485. def NEONvbsp : SDNode<"ARMISD::VBSP",
  486. SDTypeProfile<1, 3, [SDTCisVec<0>,
  487. SDTCisSameAs<0, 1>,
  488. SDTCisSameAs<0, 2>,
  489. SDTCisSameAs<0, 3>]>>;
  490. def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
  491. SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
  492. def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
  493. def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
  494. SDTCisSameAs<0, 2>,
  495. SDTCisSameAs<0, 3>]>;
  496. def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
  497. def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
  498. def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
  499. def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
  500. SDTCisVT<2, v8i8>]>;
  501. def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
  502. SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
  503. def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
  504. def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
  505. //===----------------------------------------------------------------------===//
  506. // NEON load / store instructions
  507. //===----------------------------------------------------------------------===//
  508. // Use VLDM to load a Q register as a D register pair.
  509. // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
  510. def VLDMQIA
  511. : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
  512. IIC_fpLoad_m, "",
  513. [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
  514. // Use VSTM to store a Q register as a D register pair.
  515. // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
  516. def VSTMQIA
  517. : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
  518. IIC_fpStore_m, "",
  519. [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
  520. // Classes for VLD* pseudo-instructions with multi-register operands.
  521. // These are expanded to real instructions after register allocation.
  522. class VLDQPseudo<InstrItinClass itin>
  523. : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
  524. class VLDQWBPseudo<InstrItinClass itin>
  525. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  526. (ins addrmode6:$addr, am6offset:$offset), itin,
  527. "$addr.addr = $wb">;
  528. class VLDQWBfixedPseudo<InstrItinClass itin>
  529. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  530. (ins addrmode6:$addr), itin,
  531. "$addr.addr = $wb">;
  532. class VLDQWBregisterPseudo<InstrItinClass itin>
  533. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  534. (ins addrmode6:$addr, rGPR:$offset), itin,
  535. "$addr.addr = $wb">;
  536. class VLDQQPseudo<InstrItinClass itin>
  537. : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
  538. class VLDQQWBPseudo<InstrItinClass itin>
  539. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  540. (ins addrmode6:$addr, am6offset:$offset), itin,
  541. "$addr.addr = $wb">;
  542. class VLDQQWBfixedPseudo<InstrItinClass itin>
  543. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  544. (ins addrmode6:$addr), itin,
  545. "$addr.addr = $wb">;
  546. class VLDQQWBregisterPseudo<InstrItinClass itin>
  547. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  548. (ins addrmode6:$addr, rGPR:$offset), itin,
  549. "$addr.addr = $wb">;
  550. class VLDQQQQPseudo<InstrItinClass itin>
  551. : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
  552. "$src = $dst">;
  553. class VLDQQQQWBPseudo<InstrItinClass itin>
  554. : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
  555. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
  556. "$addr.addr = $wb, $src = $dst">;
  557. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  558. // VLD1 : Vector Load (multiple single elements)
  559. class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
  560. : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
  561. (ins AddrMode:$Rn), IIC_VLD1,
  562. "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
  563. let Rm = 0b1111;
  564. let Inst{4} = Rn{4};
  565. let DecoderMethod = "DecodeVLDST1Instruction";
  566. }
  567. class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
  568. : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
  569. (ins AddrMode:$Rn), IIC_VLD1x2,
  570. "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
  571. let Rm = 0b1111;
  572. let Inst{5-4} = Rn{5-4};
  573. let DecoderMethod = "DecodeVLDST1Instruction";
  574. }
  575. def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
  576. def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
  577. def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
  578. def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
  579. def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
  580. def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
  581. def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
  582. def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
  583. // ...with address register writeback:
  584. multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  585. def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
  586. (ins AddrMode:$Rn), IIC_VLD1u,
  587. "vld1", Dt, "$Vd, $Rn!",
  588. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  589. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  590. let Inst{4} = Rn{4};
  591. let DecoderMethod = "DecodeVLDST1Instruction";
  592. }
  593. def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
  594. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
  595. "vld1", Dt, "$Vd, $Rn, $Rm",
  596. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  597. let Inst{4} = Rn{4};
  598. let DecoderMethod = "DecodeVLDST1Instruction";
  599. }
  600. }
  601. multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  602. def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
  603. (ins AddrMode:$Rn), IIC_VLD1x2u,
  604. "vld1", Dt, "$Vd, $Rn!",
  605. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  606. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  607. let Inst{5-4} = Rn{5-4};
  608. let DecoderMethod = "DecodeVLDST1Instruction";
  609. }
  610. def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
  611. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  612. "vld1", Dt, "$Vd, $Rn, $Rm",
  613. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  614. let Inst{5-4} = Rn{5-4};
  615. let DecoderMethod = "DecodeVLDST1Instruction";
  616. }
  617. }
  618. defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
  619. defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
  620. defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
  621. defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
  622. defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
  623. defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
  624. defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
  625. defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
  626. // ...with 3 registers
  627. class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
  628. : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
  629. (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
  630. "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
  631. let Rm = 0b1111;
  632. let Inst{4} = Rn{4};
  633. let DecoderMethod = "DecodeVLDST1Instruction";
  634. }
  635. multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  636. def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
  637. (ins AddrMode:$Rn), IIC_VLD1x2u,
  638. "vld1", Dt, "$Vd, $Rn!",
  639. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  640. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  641. let Inst{4} = Rn{4};
  642. let DecoderMethod = "DecodeVLDST1Instruction";
  643. }
  644. def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
  645. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  646. "vld1", Dt, "$Vd, $Rn, $Rm",
  647. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  648. let Inst{4} = Rn{4};
  649. let DecoderMethod = "DecodeVLDST1Instruction";
  650. }
  651. }
  652. def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
  653. def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
  654. def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
  655. def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
  656. defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
  657. defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
  658. defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
  659. defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
  660. def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  661. def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  662. def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  663. def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  664. def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  665. def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  666. def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  667. def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  668. def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  669. def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  670. def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  671. def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  672. def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  673. def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  674. def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  675. def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  676. def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  677. def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  678. def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  679. def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  680. def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  681. def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  682. def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  683. def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  684. // ...with 4 registers
  685. class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
  686. : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
  687. (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
  688. "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
  689. let Rm = 0b1111;
  690. let Inst{5-4} = Rn{5-4};
  691. let DecoderMethod = "DecodeVLDST1Instruction";
  692. }
  693. multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  694. def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
  695. (ins AddrMode:$Rn), IIC_VLD1x2u,
  696. "vld1", Dt, "$Vd, $Rn!",
  697. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  698. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  699. let Inst{5-4} = Rn{5-4};
  700. let DecoderMethod = "DecodeVLDST1Instruction";
  701. }
  702. def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
  703. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  704. "vld1", Dt, "$Vd, $Rn, $Rm",
  705. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  706. let Inst{5-4} = Rn{5-4};
  707. let DecoderMethod = "DecodeVLDST1Instruction";
  708. }
  709. }
  710. def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
  711. def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
  712. def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
  713. def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
  714. defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  715. defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  716. defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  717. defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
  718. def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  719. def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  720. def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  721. def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  722. def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  723. def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  724. def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  725. def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  726. def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  727. def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  728. def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  729. def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  730. def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  731. def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  732. def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  733. def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  734. def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  735. def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  736. def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  737. def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  738. def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  739. def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  740. def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  741. def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  742. // VLD2 : Vector Load (multiple 2-element structures)
  743. class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
  744. InstrItinClass itin, Operand AddrMode>
  745. : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
  746. (ins AddrMode:$Rn), itin,
  747. "vld2", Dt, "$Vd, $Rn", "", []> {
  748. let Rm = 0b1111;
  749. let Inst{5-4} = Rn{5-4};
  750. let DecoderMethod = "DecodeVLDST2Instruction";
  751. }
  752. def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
  753. addrmode6align64or128>, Sched<[WriteVLD2]>;
  754. def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
  755. addrmode6align64or128>, Sched<[WriteVLD2]>;
  756. def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
  757. addrmode6align64or128>, Sched<[WriteVLD2]>;
  758. def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
  759. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  760. def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
  761. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  762. def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
  763. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  764. def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  765. def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  766. def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  767. // ...with address register writeback:
  768. multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
  769. RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
  770. def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
  771. (ins AddrMode:$Rn), itin,
  772. "vld2", Dt, "$Vd, $Rn!",
  773. "$Rn.addr = $wb", []> {
  774. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  775. let Inst{5-4} = Rn{5-4};
  776. let DecoderMethod = "DecodeVLDST2Instruction";
  777. }
  778. def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
  779. (ins AddrMode:$Rn, rGPR:$Rm), itin,
  780. "vld2", Dt, "$Vd, $Rn, $Rm",
  781. "$Rn.addr = $wb", []> {
  782. let Inst{5-4} = Rn{5-4};
  783. let DecoderMethod = "DecodeVLDST2Instruction";
  784. }
  785. }
  786. defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
  787. addrmode6align64or128>, Sched<[WriteVLD2]>;
  788. defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
  789. addrmode6align64or128>, Sched<[WriteVLD2]>;
  790. defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
  791. addrmode6align64or128>, Sched<[WriteVLD2]>;
  792. defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
  793. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  794. defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
  795. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  796. defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
  797. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  798. def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  799. def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  800. def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  801. def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  802. def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  803. def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  804. // ...with double-spaced registers
  805. def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
  806. addrmode6align64or128>, Sched<[WriteVLD2]>;
  807. def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
  808. addrmode6align64or128>, Sched<[WriteVLD2]>;
  809. def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
  810. addrmode6align64or128>, Sched<[WriteVLD2]>;
  811. defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
  812. addrmode6align64or128>, Sched<[WriteVLD2]>;
  813. defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
  814. addrmode6align64or128>, Sched<[WriteVLD2]>;
  815. defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
  816. addrmode6align64or128>, Sched<[WriteVLD2]>;
  817. // VLD3 : Vector Load (multiple 3-element structures)
  818. class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
  819. : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  820. (ins addrmode6:$Rn), IIC_VLD3,
  821. "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
  822. let Rm = 0b1111;
  823. let Inst{4} = Rn{4};
  824. let DecoderMethod = "DecodeVLDST3Instruction";
  825. }
  826. def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
  827. def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
  828. def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
  829. def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  830. def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  831. def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  832. // ...with address register writeback:
  833. class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  834. : NLdSt<0, 0b10, op11_8, op7_4,
  835. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  836. (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
  837. "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
  838. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  839. let Inst{4} = Rn{4};
  840. let DecoderMethod = "DecodeVLDST3Instruction";
  841. }
  842. def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
  843. def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
  844. def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
  845. def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  846. def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  847. def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  848. // ...with double-spaced registers:
  849. def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
  850. def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
  851. def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
  852. def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
  853. def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
  854. def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
  855. def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  856. def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  857. def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  858. // ...alternate versions to be allocated odd register numbers:
  859. def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  860. def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  861. def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  862. def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  863. def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  864. def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  865. // VLD4 : Vector Load (multiple 4-element structures)
  866. class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
  867. : NLdSt<0, 0b10, op11_8, op7_4,
  868. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  869. (ins addrmode6:$Rn), IIC_VLD4,
  870. "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
  871. Sched<[WriteVLD4]> {
  872. let Rm = 0b1111;
  873. let Inst{5-4} = Rn{5-4};
  874. let DecoderMethod = "DecodeVLDST4Instruction";
  875. }
  876. def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
  877. def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
  878. def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
  879. def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  880. def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  881. def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  882. // ...with address register writeback:
  883. class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  884. : NLdSt<0, 0b10, op11_8, op7_4,
  885. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  886. (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
  887. "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
  888. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  889. let Inst{5-4} = Rn{5-4};
  890. let DecoderMethod = "DecodeVLDST4Instruction";
  891. }
  892. def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
  893. def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
  894. def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
  895. def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  896. def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  897. def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  898. // ...with double-spaced registers:
  899. def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
  900. def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
  901. def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
  902. def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
  903. def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
  904. def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
  905. def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  906. def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  907. def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  908. // ...alternate versions to be allocated odd register numbers:
  909. def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  910. def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  911. def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  912. def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  913. def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  914. def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  915. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  916. // Classes for VLD*LN pseudo-instructions with multi-register operands.
  917. // These are expanded to real instructions after register allocation.
  918. class VLDQLNPseudo<InstrItinClass itin>
  919. : PseudoNLdSt<(outs QPR:$dst),
  920. (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
  921. itin, "$src = $dst">;
  922. class VLDQLNWBPseudo<InstrItinClass itin>
  923. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  924. (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
  925. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  926. class VLDQQLNPseudo<InstrItinClass itin>
  927. : PseudoNLdSt<(outs QQPR:$dst),
  928. (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
  929. itin, "$src = $dst">;
  930. class VLDQQLNWBPseudo<InstrItinClass itin>
  931. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  932. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
  933. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  934. class VLDQQQQLNPseudo<InstrItinClass itin>
  935. : PseudoNLdSt<(outs QQQQPR:$dst),
  936. (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
  937. itin, "$src = $dst">;
  938. class VLDQQQQLNWBPseudo<InstrItinClass itin>
  939. : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
  940. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
  941. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  942. // VLD1LN : Vector Load (single element to one lane)
  943. class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  944. PatFrag LoadOp>
  945. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
  946. (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
  947. IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
  948. "$src = $Vd",
  949. [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
  950. (i32 (LoadOp addrmode6:$Rn)),
  951. imm:$lane))]> {
  952. let Rm = 0b1111;
  953. let DecoderMethod = "DecodeVLD1LN";
  954. }
  955. class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  956. PatFrag LoadOp>
  957. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
  958. (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
  959. IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
  960. "$src = $Vd",
  961. [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
  962. (i32 (LoadOp addrmode6oneL32:$Rn)),
  963. imm:$lane))]>, Sched<[WriteVLD1]> {
  964. let Rm = 0b1111;
  965. let DecoderMethod = "DecodeVLD1LN";
  966. }
  967. class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
  968. Sched<[WriteVLD1]> {
  969. let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
  970. (i32 (LoadOp addrmode6:$addr)),
  971. imm:$lane))];
  972. }
  973. def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
  974. let Inst{7-5} = lane{2-0};
  975. }
  976. def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
  977. let Inst{7-6} = lane{1-0};
  978. let Inst{5-4} = Rn{5-4};
  979. }
  980. def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
  981. let Inst{7} = lane{0};
  982. let Inst{5-4} = Rn{5-4};
  983. }
  984. def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
  985. def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
  986. def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
  987. let Predicates = [HasNEON] in {
  988. def : Pat<(vector_insert (v4f16 DPR:$src),
  989. (f16 (load addrmode6:$addr)), imm:$lane),
  990. (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  991. def : Pat<(vector_insert (v8f16 QPR:$src),
  992. (f16 (load addrmode6:$addr)), imm:$lane),
  993. (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  994. def : Pat<(vector_insert (v4bf16 DPR:$src),
  995. (bf16 (load addrmode6:$addr)), imm:$lane),
  996. (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  997. def : Pat<(vector_insert (v8bf16 QPR:$src),
  998. (bf16 (load addrmode6:$addr)), imm:$lane),
  999. (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1000. def : Pat<(vector_insert (v2f32 DPR:$src),
  1001. (f32 (load addrmode6:$addr)), imm:$lane),
  1002. (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1003. def : Pat<(vector_insert (v4f32 QPR:$src),
  1004. (f32 (load addrmode6:$addr)), imm:$lane),
  1005. (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1006. // A 64-bit subvector insert to the first 128-bit vector position
  1007. // is a subregister copy that needs no instruction.
  1008. def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
  1009. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1010. def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
  1011. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1012. def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
  1013. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1014. def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
  1015. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1016. def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
  1017. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1018. def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
  1019. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1020. }
  1021. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  1022. // ...with address register writeback:
  1023. class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1024. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
  1025. (ins addrmode6:$Rn, am6offset:$Rm,
  1026. DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
  1027. "\\{$Vd[$lane]\\}, $Rn$Rm",
  1028. "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1029. let DecoderMethod = "DecodeVLD1LN";
  1030. }
  1031. def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
  1032. let Inst{7-5} = lane{2-0};
  1033. }
  1034. def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
  1035. let Inst{7-6} = lane{1-0};
  1036. let Inst{4} = Rn{4};
  1037. }
  1038. def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
  1039. let Inst{7} = lane{0};
  1040. let Inst{5} = Rn{4};
  1041. let Inst{4} = Rn{4};
  1042. }
  1043. def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1044. def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1045. def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1046. // VLD2LN : Vector Load (single 2-element structure to one lane)
  1047. class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1048. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
  1049. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
  1050. IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
  1051. "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
  1052. let Rm = 0b1111;
  1053. let Inst{4} = Rn{4};
  1054. let DecoderMethod = "DecodeVLD2LN";
  1055. }
  1056. def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
  1057. let Inst{7-5} = lane{2-0};
  1058. }
  1059. def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
  1060. let Inst{7-6} = lane{1-0};
  1061. }
  1062. def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
  1063. let Inst{7} = lane{0};
  1064. }
  1065. def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1066. def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1067. def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1068. // ...with double-spaced registers:
  1069. def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
  1070. let Inst{7-6} = lane{1-0};
  1071. }
  1072. def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
  1073. let Inst{7} = lane{0};
  1074. }
  1075. def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1076. def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1077. // ...with address register writeback:
  1078. class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1079. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
  1080. (ins addrmode6:$Rn, am6offset:$Rm,
  1081. DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
  1082. "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
  1083. "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
  1084. let Inst{4} = Rn{4};
  1085. let DecoderMethod = "DecodeVLD2LN";
  1086. }
  1087. def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
  1088. let Inst{7-5} = lane{2-0};
  1089. }
  1090. def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
  1091. let Inst{7-6} = lane{1-0};
  1092. }
  1093. def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
  1094. let Inst{7} = lane{0};
  1095. }
  1096. def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1097. def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1098. def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1099. def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
  1100. let Inst{7-6} = lane{1-0};
  1101. }
  1102. def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
  1103. let Inst{7} = lane{0};
  1104. }
  1105. def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1106. def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1107. // VLD3LN : Vector Load (single 3-element structure to one lane)
  1108. class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1109. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  1110. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
  1111. nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
  1112. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
  1113. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
  1114. let Rm = 0b1111;
  1115. let DecoderMethod = "DecodeVLD3LN";
  1116. }
  1117. def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
  1118. let Inst{7-5} = lane{2-0};
  1119. }
  1120. def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
  1121. let Inst{7-6} = lane{1-0};
  1122. }
  1123. def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
  1124. let Inst{7} = lane{0};
  1125. }
  1126. def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1127. def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1128. def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1129. // ...with double-spaced registers:
  1130. def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
  1131. let Inst{7-6} = lane{1-0};
  1132. }
  1133. def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
  1134. let Inst{7} = lane{0};
  1135. }
  1136. def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1137. def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1138. // ...with address register writeback:
  1139. class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1140. : NLdStLn<1, 0b10, op11_8, op7_4,
  1141. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  1142. (ins addrmode6:$Rn, am6offset:$Rm,
  1143. DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
  1144. IIC_VLD3lnu, "vld3", Dt,
  1145. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
  1146. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
  1147. []>, Sched<[WriteVLD2]> {
  1148. let DecoderMethod = "DecodeVLD3LN";
  1149. }
  1150. def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
  1151. let Inst{7-5} = lane{2-0};
  1152. }
  1153. def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
  1154. let Inst{7-6} = lane{1-0};
  1155. }
  1156. def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
  1157. let Inst{7} = lane{0};
  1158. }
  1159. def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1160. def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1161. def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1162. def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
  1163. let Inst{7-6} = lane{1-0};
  1164. }
  1165. def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
  1166. let Inst{7} = lane{0};
  1167. }
  1168. def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1169. def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1170. // VLD4LN : Vector Load (single 4-element structure to one lane)
  1171. class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1172. : NLdStLn<1, 0b10, op11_8, op7_4,
  1173. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  1174. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
  1175. nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
  1176. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
  1177. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
  1178. Sched<[WriteVLD2]> {
  1179. let Rm = 0b1111;
  1180. let Inst{4} = Rn{4};
  1181. let DecoderMethod = "DecodeVLD4LN";
  1182. }
  1183. def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
  1184. let Inst{7-5} = lane{2-0};
  1185. }
  1186. def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
  1187. let Inst{7-6} = lane{1-0};
  1188. }
  1189. def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
  1190. let Inst{7} = lane{0};
  1191. let Inst{5} = Rn{5};
  1192. }
  1193. def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1194. def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1195. def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1196. // ...with double-spaced registers:
  1197. def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
  1198. let Inst{7-6} = lane{1-0};
  1199. }
  1200. def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
  1201. let Inst{7} = lane{0};
  1202. let Inst{5} = Rn{5};
  1203. }
  1204. def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1205. def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1206. // ...with address register writeback:
  1207. class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1208. : NLdStLn<1, 0b10, op11_8, op7_4,
  1209. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  1210. (ins addrmode6:$Rn, am6offset:$Rm,
  1211. DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
  1212. IIC_VLD4lnu, "vld4", Dt,
  1213. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
  1214. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
  1215. []> {
  1216. let Inst{4} = Rn{4};
  1217. let DecoderMethod = "DecodeVLD4LN" ;
  1218. }
  1219. def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
  1220. let Inst{7-5} = lane{2-0};
  1221. }
  1222. def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
  1223. let Inst{7-6} = lane{1-0};
  1224. }
  1225. def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
  1226. let Inst{7} = lane{0};
  1227. let Inst{5} = Rn{5};
  1228. }
  1229. def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1230. def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1231. def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1232. def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
  1233. let Inst{7-6} = lane{1-0};
  1234. }
  1235. def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
  1236. let Inst{7} = lane{0};
  1237. let Inst{5} = Rn{5};
  1238. }
  1239. def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1240. def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1241. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  1242. // VLD1DUP : Vector Load (single element to all lanes)
  1243. class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
  1244. Operand AddrMode>
  1245. : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
  1246. (ins AddrMode:$Rn),
  1247. IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
  1248. [(set VecListOneDAllLanes:$Vd,
  1249. (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
  1250. Sched<[WriteVLD2]> {
  1251. let Rm = 0b1111;
  1252. let Inst{4} = Rn{4};
  1253. let DecoderMethod = "DecodeVLD1DupInstruction";
  1254. }
  1255. def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
  1256. addrmode6dupalignNone>;
  1257. def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
  1258. addrmode6dupalign16>;
  1259. def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
  1260. addrmode6dupalign32>;
  1261. let Predicates = [HasNEON] in {
  1262. def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
  1263. (VLD1DUPd32 addrmode6:$addr)>;
  1264. }
  1265. class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
  1266. Operand AddrMode>
  1267. : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
  1268. (ins AddrMode:$Rn), IIC_VLD1dup,
  1269. "vld1", Dt, "$Vd, $Rn", "",
  1270. [(set VecListDPairAllLanes:$Vd,
  1271. (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
  1272. let Rm = 0b1111;
  1273. let Inst{4} = Rn{4};
  1274. let DecoderMethod = "DecodeVLD1DupInstruction";
  1275. }
  1276. def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
  1277. addrmode6dupalignNone>;
  1278. def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
  1279. addrmode6dupalign16>;
  1280. def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
  1281. addrmode6dupalign32>;
  1282. let Predicates = [HasNEON] in {
  1283. def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
  1284. (VLD1DUPq32 addrmode6:$addr)>;
  1285. }
  1286. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  1287. // ...with address register writeback:
  1288. multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1289. def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
  1290. (outs VecListOneDAllLanes:$Vd, GPR:$wb),
  1291. (ins AddrMode:$Rn), IIC_VLD1dupu,
  1292. "vld1", Dt, "$Vd, $Rn!",
  1293. "$Rn.addr = $wb", []> {
  1294. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1295. let Inst{4} = Rn{4};
  1296. let DecoderMethod = "DecodeVLD1DupInstruction";
  1297. }
  1298. def _register : NLdSt<1, 0b10, 0b1100, op7_4,
  1299. (outs VecListOneDAllLanes:$Vd, GPR:$wb),
  1300. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
  1301. "vld1", Dt, "$Vd, $Rn, $Rm",
  1302. "$Rn.addr = $wb", []> {
  1303. let Inst{4} = Rn{4};
  1304. let DecoderMethod = "DecodeVLD1DupInstruction";
  1305. }
  1306. }
  1307. multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1308. def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
  1309. (outs VecListDPairAllLanes:$Vd, GPR:$wb),
  1310. (ins AddrMode:$Rn), IIC_VLD1dupu,
  1311. "vld1", Dt, "$Vd, $Rn!",
  1312. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1313. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1314. let Inst{4} = Rn{4};
  1315. let DecoderMethod = "DecodeVLD1DupInstruction";
  1316. }
  1317. def _register : NLdSt<1, 0b10, 0b1100, op7_4,
  1318. (outs VecListDPairAllLanes:$Vd, GPR:$wb),
  1319. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
  1320. "vld1", Dt, "$Vd, $Rn, $Rm",
  1321. "$Rn.addr = $wb", []> {
  1322. let Inst{4} = Rn{4};
  1323. let DecoderMethod = "DecodeVLD1DupInstruction";
  1324. }
  1325. }
  1326. defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
  1327. defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
  1328. defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
  1329. defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
  1330. defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
  1331. defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
  1332. // VLD2DUP : Vector Load (single 2-element structure to all lanes)
  1333. class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
  1334. : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
  1335. (ins AddrMode:$Rn), IIC_VLD2dup,
  1336. "vld2", Dt, "$Vd, $Rn", "", []> {
  1337. let Rm = 0b1111;
  1338. let Inst{4} = Rn{4};
  1339. let DecoderMethod = "DecodeVLD2DupInstruction";
  1340. }
  1341. def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
  1342. addrmode6dupalign16>;
  1343. def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
  1344. addrmode6dupalign32>;
  1345. def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
  1346. addrmode6dupalign64>;
  1347. // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
  1348. // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
  1349. // ...with double-spaced registers
  1350. def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
  1351. addrmode6dupalign16>;
  1352. def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
  1353. addrmode6dupalign32>;
  1354. def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
  1355. addrmode6dupalign64>;
  1356. def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1357. def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1358. def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1359. def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1360. def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1361. def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1362. // ...with address register writeback:
  1363. multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
  1364. Operand AddrMode> {
  1365. def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
  1366. (outs VdTy:$Vd, GPR:$wb),
  1367. (ins AddrMode:$Rn), IIC_VLD2dupu,
  1368. "vld2", Dt, "$Vd, $Rn!",
  1369. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1370. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1371. let Inst{4} = Rn{4};
  1372. let DecoderMethod = "DecodeVLD2DupInstruction";
  1373. }
  1374. def _register : NLdSt<1, 0b10, 0b1101, op7_4,
  1375. (outs VdTy:$Vd, GPR:$wb),
  1376. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
  1377. "vld2", Dt, "$Vd, $Rn, $Rm",
  1378. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1379. let Inst{4} = Rn{4};
  1380. let DecoderMethod = "DecodeVLD2DupInstruction";
  1381. }
  1382. }
  1383. defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
  1384. addrmode6dupalign16>;
  1385. defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
  1386. addrmode6dupalign32>;
  1387. defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
  1388. addrmode6dupalign64>;
  1389. defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
  1390. addrmode6dupalign16>;
  1391. defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
  1392. addrmode6dupalign32>;
  1393. defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
  1394. addrmode6dupalign64>;
  1395. def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1396. def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1397. def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1398. def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1399. def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1400. def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1401. // VLD3DUP : Vector Load (single 3-element structure to all lanes)
  1402. class VLD3DUP<bits<4> op7_4, string Dt>
  1403. : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  1404. (ins addrmode6dup:$Rn), IIC_VLD3dup,
  1405. "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
  1406. Sched<[WriteVLD2]> {
  1407. let Rm = 0b1111;
  1408. let Inst{4} = 0;
  1409. let DecoderMethod = "DecodeVLD3DupInstruction";
  1410. }
  1411. def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
  1412. def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
  1413. def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
  1414. def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1415. def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1416. def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1417. // ...with double-spaced registers (not used for codegen):
  1418. def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
  1419. def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
  1420. def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
  1421. def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1422. def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1423. def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1424. def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1425. def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1426. def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1427. // ...with address register writeback:
  1428. class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
  1429. : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  1430. (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
  1431. "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
  1432. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  1433. let Inst{4} = 0;
  1434. let DecoderMethod = "DecodeVLD3DupInstruction";
  1435. }
  1436. def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
  1437. def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
  1438. def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
  1439. def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
  1440. def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
  1441. def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
  1442. def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1443. def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1444. def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1445. def VLD3DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1446. def VLD3DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1447. def VLD3DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1448. // VLD4DUP : Vector Load (single 4-element structure to all lanes)
  1449. class VLD4DUP<bits<4> op7_4, string Dt>
  1450. : NLdSt<1, 0b10, 0b1111, op7_4,
  1451. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  1452. (ins addrmode6dup:$Rn), IIC_VLD4dup,
  1453. "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
  1454. let Rm = 0b1111;
  1455. let Inst{4} = Rn{4};
  1456. let DecoderMethod = "DecodeVLD4DupInstruction";
  1457. }
  1458. def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
  1459. def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
  1460. def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
  1461. def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1462. def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1463. def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1464. // ...with double-spaced registers (not used for codegen):
  1465. def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
  1466. def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
  1467. def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
  1468. def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1469. def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1470. def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1471. def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1472. def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1473. def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1474. // ...with address register writeback:
  1475. class VLD4DUPWB<bits<4> op7_4, string Dt>
  1476. : NLdSt<1, 0b10, 0b1111, op7_4,
  1477. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  1478. (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
  1479. "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
  1480. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  1481. let Inst{4} = Rn{4};
  1482. let DecoderMethod = "DecodeVLD4DupInstruction";
  1483. }
  1484. def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
  1485. def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
  1486. def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
  1487. def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
  1488. def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
  1489. def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
  1490. def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1491. def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1492. def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1493. def VLD4DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1494. def VLD4DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1495. def VLD4DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1496. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  1497. let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
  1498. // Classes for VST* pseudo-instructions with multi-register operands.
  1499. // These are expanded to real instructions after register allocation.
  1500. class VSTQPseudo<InstrItinClass itin>
  1501. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
  1502. class VSTQWBPseudo<InstrItinClass itin>
  1503. : PseudoNLdSt<(outs GPR:$wb),
  1504. (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
  1505. "$addr.addr = $wb">;
  1506. class VSTQWBfixedPseudo<InstrItinClass itin>
  1507. : PseudoNLdSt<(outs GPR:$wb),
  1508. (ins addrmode6:$addr, QPR:$src), itin,
  1509. "$addr.addr = $wb">;
  1510. class VSTQWBregisterPseudo<InstrItinClass itin>
  1511. : PseudoNLdSt<(outs GPR:$wb),
  1512. (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
  1513. "$addr.addr = $wb">;
  1514. class VSTQQPseudo<InstrItinClass itin>
  1515. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
  1516. class VSTQQWBPseudo<InstrItinClass itin>
  1517. : PseudoNLdSt<(outs GPR:$wb),
  1518. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
  1519. "$addr.addr = $wb">;
  1520. class VSTQQWBfixedPseudo<InstrItinClass itin>
  1521. : PseudoNLdSt<(outs GPR:$wb),
  1522. (ins addrmode6:$addr, QQPR:$src), itin,
  1523. "$addr.addr = $wb">;
  1524. class VSTQQWBregisterPseudo<InstrItinClass itin>
  1525. : PseudoNLdSt<(outs GPR:$wb),
  1526. (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
  1527. "$addr.addr = $wb">;
  1528. class VSTQQQQPseudo<InstrItinClass itin>
  1529. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
  1530. class VSTQQQQWBPseudo<InstrItinClass itin>
  1531. : PseudoNLdSt<(outs GPR:$wb),
  1532. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
  1533. "$addr.addr = $wb">;
  1534. // VST1 : Vector Store (multiple single elements)
  1535. class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
  1536. : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
  1537. IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
  1538. let Rm = 0b1111;
  1539. let Inst{4} = Rn{4};
  1540. let DecoderMethod = "DecodeVLDST1Instruction";
  1541. }
  1542. class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
  1543. : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
  1544. IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
  1545. let Rm = 0b1111;
  1546. let Inst{5-4} = Rn{5-4};
  1547. let DecoderMethod = "DecodeVLDST1Instruction";
  1548. }
  1549. def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
  1550. def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
  1551. def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
  1552. def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
  1553. def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
  1554. def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
  1555. def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
  1556. def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
  1557. // ...with address register writeback:
  1558. multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1559. def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
  1560. (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
  1561. "vst1", Dt, "$Vd, $Rn!",
  1562. "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
  1563. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1564. let Inst{4} = Rn{4};
  1565. let DecoderMethod = "DecodeVLDST1Instruction";
  1566. }
  1567. def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
  1568. (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
  1569. IIC_VLD1u,
  1570. "vst1", Dt, "$Vd, $Rn, $Rm",
  1571. "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
  1572. let Inst{4} = Rn{4};
  1573. let DecoderMethod = "DecodeVLDST1Instruction";
  1574. }
  1575. }
  1576. multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1577. def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
  1578. (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
  1579. "vst1", Dt, "$Vd, $Rn!",
  1580. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1581. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1582. let Inst{5-4} = Rn{5-4};
  1583. let DecoderMethod = "DecodeVLDST1Instruction";
  1584. }
  1585. def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
  1586. (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
  1587. IIC_VLD1x2u,
  1588. "vst1", Dt, "$Vd, $Rn, $Rm",
  1589. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1590. let Inst{5-4} = Rn{5-4};
  1591. let DecoderMethod = "DecodeVLDST1Instruction";
  1592. }
  1593. }
  1594. defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
  1595. defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
  1596. defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
  1597. defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
  1598. defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
  1599. defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
  1600. defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
  1601. defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
  1602. // ...with 3 registers
  1603. class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
  1604. : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
  1605. (ins AddrMode:$Rn, VecListThreeD:$Vd),
  1606. IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
  1607. let Rm = 0b1111;
  1608. let Inst{4} = Rn{4};
  1609. let DecoderMethod = "DecodeVLDST1Instruction";
  1610. }
  1611. multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1612. def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
  1613. (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
  1614. "vst1", Dt, "$Vd, $Rn!",
  1615. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1616. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1617. let Inst{5-4} = Rn{5-4};
  1618. let DecoderMethod = "DecodeVLDST1Instruction";
  1619. }
  1620. def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
  1621. (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
  1622. IIC_VLD1x3u,
  1623. "vst1", Dt, "$Vd, $Rn, $Rm",
  1624. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1625. let Inst{5-4} = Rn{5-4};
  1626. let DecoderMethod = "DecodeVLDST1Instruction";
  1627. }
  1628. }
  1629. def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
  1630. def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
  1631. def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
  1632. def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
  1633. defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
  1634. defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
  1635. defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
  1636. defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
  1637. def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1638. def VST1d8TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1639. def VST1d8TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1640. def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1641. def VST1d16TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1642. def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1643. def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1644. def VST1d32TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1645. def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1646. def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1647. def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1648. def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1649. def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1650. def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1651. def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1652. def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1653. def VST1q8HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1654. def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1655. def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1656. def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1657. def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1658. def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1659. def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1660. def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1661. // ...with 4 registers
  1662. class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
  1663. : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
  1664. (ins AddrMode:$Rn, VecListFourD:$Vd),
  1665. IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
  1666. []>, Sched<[WriteVST4]> {
  1667. let Rm = 0b1111;
  1668. let Inst{5-4} = Rn{5-4};
  1669. let DecoderMethod = "DecodeVLDST1Instruction";
  1670. }
  1671. multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1672. def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
  1673. (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
  1674. "vst1", Dt, "$Vd, $Rn!",
  1675. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1676. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1677. let Inst{5-4} = Rn{5-4};
  1678. let DecoderMethod = "DecodeVLDST1Instruction";
  1679. }
  1680. def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
  1681. (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
  1682. IIC_VLD1x4u,
  1683. "vst1", Dt, "$Vd, $Rn, $Rm",
  1684. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1685. let Inst{5-4} = Rn{5-4};
  1686. let DecoderMethod = "DecodeVLDST1Instruction";
  1687. }
  1688. }
  1689. def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1690. def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1691. def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1692. def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
  1693. defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1694. defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1695. defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1696. defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
  1697. def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1698. def VST1d8QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1699. def VST1d8QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1700. def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1701. def VST1d16QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1702. def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1703. def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1704. def VST1d32QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1705. def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1706. def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1707. def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1708. def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1709. def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1710. def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1711. def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1712. def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1713. def VST1q8HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1714. def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1715. def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1716. def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1717. def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1718. def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1719. def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1720. def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1721. // VST2 : Vector Store (multiple 2-element structures)
  1722. class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
  1723. InstrItinClass itin, Operand AddrMode>
  1724. : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
  1725. itin, "vst2", Dt, "$Vd, $Rn", "", []> {
  1726. let Rm = 0b1111;
  1727. let Inst{5-4} = Rn{5-4};
  1728. let DecoderMethod = "DecodeVLDST2Instruction";
  1729. }
  1730. def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
  1731. addrmode6align64or128>, Sched<[WriteVST2]>;
  1732. def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
  1733. addrmode6align64or128>, Sched<[WriteVST2]>;
  1734. def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
  1735. addrmode6align64or128>, Sched<[WriteVST2]>;
  1736. def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
  1737. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1738. def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
  1739. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1740. def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
  1741. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1742. def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1743. def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1744. def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1745. // ...with address register writeback:
  1746. multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
  1747. RegisterOperand VdTy, Operand AddrMode> {
  1748. def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1749. (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
  1750. "vst2", Dt, "$Vd, $Rn!",
  1751. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1752. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1753. let Inst{5-4} = Rn{5-4};
  1754. let DecoderMethod = "DecodeVLDST2Instruction";
  1755. }
  1756. def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1757. (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
  1758. "vst2", Dt, "$Vd, $Rn, $Rm",
  1759. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1760. let Inst{5-4} = Rn{5-4};
  1761. let DecoderMethod = "DecodeVLDST2Instruction";
  1762. }
  1763. }
  1764. multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1765. def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
  1766. (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
  1767. "vst2", Dt, "$Vd, $Rn!",
  1768. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1769. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1770. let Inst{5-4} = Rn{5-4};
  1771. let DecoderMethod = "DecodeVLDST2Instruction";
  1772. }
  1773. def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
  1774. (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
  1775. IIC_VLD1u,
  1776. "vst2", Dt, "$Vd, $Rn, $Rm",
  1777. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1778. let Inst{5-4} = Rn{5-4};
  1779. let DecoderMethod = "DecodeVLDST2Instruction";
  1780. }
  1781. }
  1782. defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
  1783. addrmode6align64or128>;
  1784. defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
  1785. addrmode6align64or128>;
  1786. defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
  1787. addrmode6align64or128>;
  1788. defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1789. defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1790. defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1791. def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1792. def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1793. def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1794. def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1795. def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1796. def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1797. // ...with double-spaced registers
  1798. def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
  1799. addrmode6align64or128>;
  1800. def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
  1801. addrmode6align64or128>;
  1802. def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
  1803. addrmode6align64or128>;
  1804. defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
  1805. addrmode6align64or128>;
  1806. defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
  1807. addrmode6align64or128>;
  1808. defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
  1809. addrmode6align64or128>;
  1810. // VST3 : Vector Store (multiple 3-element structures)
  1811. class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
  1812. : NLdSt<0, 0b00, op11_8, op7_4, (outs),
  1813. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
  1814. "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
  1815. let Rm = 0b1111;
  1816. let Inst{4} = Rn{4};
  1817. let DecoderMethod = "DecodeVLDST3Instruction";
  1818. }
  1819. def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
  1820. def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
  1821. def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
  1822. def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1823. def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1824. def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1825. // ...with address register writeback:
  1826. class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1827. : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1828. (ins addrmode6:$Rn, am6offset:$Rm,
  1829. DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
  1830. "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
  1831. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1832. let Inst{4} = Rn{4};
  1833. let DecoderMethod = "DecodeVLDST3Instruction";
  1834. }
  1835. def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
  1836. def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
  1837. def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
  1838. def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1839. def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1840. def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1841. // ...with double-spaced registers:
  1842. def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
  1843. def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
  1844. def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
  1845. def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
  1846. def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
  1847. def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
  1848. def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1849. def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1850. def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1851. // ...alternate versions to be allocated odd register numbers:
  1852. def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1853. def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1854. def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1855. def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1856. def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1857. def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1858. // VST4 : Vector Store (multiple 4-element structures)
  1859. class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
  1860. : NLdSt<0, 0b00, op11_8, op7_4, (outs),
  1861. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
  1862. IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
  1863. "", []>, Sched<[WriteVST4]> {
  1864. let Rm = 0b1111;
  1865. let Inst{5-4} = Rn{5-4};
  1866. let DecoderMethod = "DecodeVLDST4Instruction";
  1867. }
  1868. def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
  1869. def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
  1870. def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
  1871. def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1872. def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1873. def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1874. // ...with address register writeback:
  1875. class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1876. : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1877. (ins addrmode6:$Rn, am6offset:$Rm,
  1878. DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
  1879. "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
  1880. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1881. let Inst{5-4} = Rn{5-4};
  1882. let DecoderMethod = "DecodeVLDST4Instruction";
  1883. }
  1884. def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
  1885. def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
  1886. def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
  1887. def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1888. def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1889. def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1890. // ...with double-spaced registers:
  1891. def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
  1892. def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
  1893. def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
  1894. def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
  1895. def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
  1896. def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
  1897. def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1898. def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1899. def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1900. // ...alternate versions to be allocated odd register numbers:
  1901. def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1902. def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1903. def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1904. def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1905. def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1906. def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1907. } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
  1908. // Classes for VST*LN pseudo-instructions with multi-register operands.
  1909. // These are expanded to real instructions after register allocation.
  1910. class VSTQLNPseudo<InstrItinClass itin>
  1911. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
  1912. itin, "">;
  1913. class VSTQLNWBPseudo<InstrItinClass itin>
  1914. : PseudoNLdSt<(outs GPR:$wb),
  1915. (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
  1916. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1917. class VSTQQLNPseudo<InstrItinClass itin>
  1918. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
  1919. itin, "">;
  1920. class VSTQQLNWBPseudo<InstrItinClass itin>
  1921. : PseudoNLdSt<(outs GPR:$wb),
  1922. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
  1923. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1924. class VSTQQQQLNPseudo<InstrItinClass itin>
  1925. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
  1926. itin, "">;
  1927. class VSTQQQQLNWBPseudo<InstrItinClass itin>
  1928. : PseudoNLdSt<(outs GPR:$wb),
  1929. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
  1930. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1931. // VST1LN : Vector Store (single element from one lane)
  1932. class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  1933. PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
  1934. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  1935. (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
  1936. IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
  1937. [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
  1938. Sched<[WriteVST1]> {
  1939. let Rm = 0b1111;
  1940. let DecoderMethod = "DecodeVST1LN";
  1941. }
  1942. class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
  1943. : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
  1944. let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
  1945. addrmode6:$addr)];
  1946. }
  1947. def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
  1948. ARMvgetlaneu, addrmode6> {
  1949. let Inst{7-5} = lane{2-0};
  1950. }
  1951. def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
  1952. ARMvgetlaneu, addrmode6> {
  1953. let Inst{7-6} = lane{1-0};
  1954. let Inst{4} = Rn{4};
  1955. }
  1956. def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
  1957. addrmode6oneL32> {
  1958. let Inst{7} = lane{0};
  1959. let Inst{5-4} = Rn{5-4};
  1960. }
  1961. def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
  1962. def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
  1963. def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
  1964. let Predicates = [HasNEON] in {
  1965. def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
  1966. (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1967. def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
  1968. (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1969. def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
  1970. (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1971. def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
  1972. (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1973. }
  1974. // ...with address register writeback:
  1975. class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  1976. PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
  1977. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1978. (ins AdrMode:$Rn, am6offset:$Rm,
  1979. DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
  1980. "\\{$Vd[$lane]\\}, $Rn$Rm",
  1981. "$Rn.addr = $wb",
  1982. [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
  1983. AdrMode:$Rn, am6offset:$Rm))]>,
  1984. Sched<[WriteVST1]> {
  1985. let DecoderMethod = "DecodeVST1LN";
  1986. }
  1987. class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
  1988. : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
  1989. let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
  1990. addrmode6:$addr, am6offset:$offset))];
  1991. }
  1992. def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
  1993. ARMvgetlaneu, addrmode6> {
  1994. let Inst{7-5} = lane{2-0};
  1995. }
  1996. def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
  1997. ARMvgetlaneu, addrmode6> {
  1998. let Inst{7-6} = lane{1-0};
  1999. let Inst{4} = Rn{4};
  2000. }
  2001. def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
  2002. extractelt, addrmode6oneL32> {
  2003. let Inst{7} = lane{0};
  2004. let Inst{5-4} = Rn{5-4};
  2005. }
  2006. def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
  2007. def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
  2008. def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
  2009. let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
  2010. // VST2LN : Vector Store (single 2-element structure from one lane)
  2011. class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2012. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2013. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
  2014. IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
  2015. "", []>, Sched<[WriteVST1]> {
  2016. let Rm = 0b1111;
  2017. let Inst{4} = Rn{4};
  2018. let DecoderMethod = "DecodeVST2LN";
  2019. }
  2020. def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
  2021. let Inst{7-5} = lane{2-0};
  2022. }
  2023. def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
  2024. let Inst{7-6} = lane{1-0};
  2025. }
  2026. def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
  2027. let Inst{7} = lane{0};
  2028. }
  2029. def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2030. def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2031. def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2032. // ...with double-spaced registers:
  2033. def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
  2034. let Inst{7-6} = lane{1-0};
  2035. let Inst{4} = Rn{4};
  2036. }
  2037. def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
  2038. let Inst{7} = lane{0};
  2039. let Inst{4} = Rn{4};
  2040. }
  2041. def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2042. def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2043. // ...with address register writeback:
  2044. class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2045. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2046. (ins addrmode6:$Rn, am6offset:$Rm,
  2047. DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
  2048. "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
  2049. "$Rn.addr = $wb", []> {
  2050. let Inst{4} = Rn{4};
  2051. let DecoderMethod = "DecodeVST2LN";
  2052. }
  2053. def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
  2054. let Inst{7-5} = lane{2-0};
  2055. }
  2056. def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
  2057. let Inst{7-6} = lane{1-0};
  2058. }
  2059. def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
  2060. let Inst{7} = lane{0};
  2061. }
  2062. def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2063. def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2064. def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2065. def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
  2066. let Inst{7-6} = lane{1-0};
  2067. }
  2068. def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
  2069. let Inst{7} = lane{0};
  2070. }
  2071. def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2072. def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2073. // VST3LN : Vector Store (single 3-element structure from one lane)
  2074. class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2075. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2076. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
  2077. nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
  2078. "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
  2079. Sched<[WriteVST2]> {
  2080. let Rm = 0b1111;
  2081. let DecoderMethod = "DecodeVST3LN";
  2082. }
  2083. def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
  2084. let Inst{7-5} = lane{2-0};
  2085. }
  2086. def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
  2087. let Inst{7-6} = lane{1-0};
  2088. }
  2089. def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
  2090. let Inst{7} = lane{0};
  2091. }
  2092. def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2093. def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2094. def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2095. // ...with double-spaced registers:
  2096. def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
  2097. let Inst{7-6} = lane{1-0};
  2098. }
  2099. def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
  2100. let Inst{7} = lane{0};
  2101. }
  2102. def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
  2103. def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
  2104. // ...with address register writeback:
  2105. class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2106. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2107. (ins addrmode6:$Rn, am6offset:$Rm,
  2108. DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
  2109. IIC_VST3lnu, "vst3", Dt,
  2110. "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
  2111. "$Rn.addr = $wb", []> {
  2112. let DecoderMethod = "DecodeVST3LN";
  2113. }
  2114. def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
  2115. let Inst{7-5} = lane{2-0};
  2116. }
  2117. def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
  2118. let Inst{7-6} = lane{1-0};
  2119. }
  2120. def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
  2121. let Inst{7} = lane{0};
  2122. }
  2123. def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2124. def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2125. def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2126. def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
  2127. let Inst{7-6} = lane{1-0};
  2128. }
  2129. def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
  2130. let Inst{7} = lane{0};
  2131. }
  2132. def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2133. def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2134. // VST4LN : Vector Store (single 4-element structure from one lane)
  2135. class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2136. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2137. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
  2138. nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
  2139. "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
  2140. "", []>, Sched<[WriteVST2]> {
  2141. let Rm = 0b1111;
  2142. let Inst{4} = Rn{4};
  2143. let DecoderMethod = "DecodeVST4LN";
  2144. }
  2145. def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
  2146. let Inst{7-5} = lane{2-0};
  2147. }
  2148. def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
  2149. let Inst{7-6} = lane{1-0};
  2150. }
  2151. def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
  2152. let Inst{7} = lane{0};
  2153. let Inst{5} = Rn{5};
  2154. }
  2155. def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2156. def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2157. def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2158. // ...with double-spaced registers:
  2159. def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
  2160. let Inst{7-6} = lane{1-0};
  2161. }
  2162. def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
  2163. let Inst{7} = lane{0};
  2164. let Inst{5} = Rn{5};
  2165. }
  2166. def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2167. def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2168. // ...with address register writeback:
  2169. class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2170. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2171. (ins addrmode6:$Rn, am6offset:$Rm,
  2172. DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
  2173. IIC_VST4lnu, "vst4", Dt,
  2174. "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
  2175. "$Rn.addr = $wb", []> {
  2176. let Inst{4} = Rn{4};
  2177. let DecoderMethod = "DecodeVST4LN";
  2178. }
  2179. def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
  2180. let Inst{7-5} = lane{2-0};
  2181. }
  2182. def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
  2183. let Inst{7-6} = lane{1-0};
  2184. }
  2185. def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
  2186. let Inst{7} = lane{0};
  2187. let Inst{5} = Rn{5};
  2188. }
  2189. def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2190. def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2191. def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2192. def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
  2193. let Inst{7-6} = lane{1-0};
  2194. }
  2195. def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
  2196. let Inst{7} = lane{0};
  2197. let Inst{5} = Rn{5};
  2198. }
  2199. def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2200. def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2201. } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
  2202. // Use vld1/vst1 for unaligned f64 load / store
  2203. let Predicates = [IsLE,HasNEON] in {
  2204. def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
  2205. (VLD1d16 addrmode6:$addr)>;
  2206. def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2207. (VST1d16 addrmode6:$addr, DPR:$value)>;
  2208. def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
  2209. (VLD1d8 addrmode6:$addr)>;
  2210. def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2211. (VST1d8 addrmode6:$addr, DPR:$value)>;
  2212. }
  2213. let Predicates = [IsBE,HasNEON] in {
  2214. def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
  2215. (VLD1d64 addrmode6:$addr)>;
  2216. def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2217. (VST1d64 addrmode6:$addr, DPR:$value)>;
  2218. }
  2219. // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
  2220. // load / store if it's legal.
  2221. let Predicates = [HasNEON] in {
  2222. def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
  2223. (VLD1q64 addrmode6:$addr)>;
  2224. def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2225. (VST1q64 addrmode6:$addr, QPR:$value)>;
  2226. }
  2227. let Predicates = [IsLE,HasNEON] in {
  2228. def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
  2229. (VLD1q32 addrmode6:$addr)>;
  2230. def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2231. (VST1q32 addrmode6:$addr, QPR:$value)>;
  2232. def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
  2233. (VLD1q16 addrmode6:$addr)>;
  2234. def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2235. (VST1q16 addrmode6:$addr, QPR:$value)>;
  2236. def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
  2237. (VLD1q8 addrmode6:$addr)>;
  2238. def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2239. (VST1q8 addrmode6:$addr, QPR:$value)>;
  2240. }
  2241. //===----------------------------------------------------------------------===//
  2242. // Instruction Classes
  2243. //===----------------------------------------------------------------------===//
  2244. // Basic 2-register operations: double- and quad-register.
  2245. class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2246. bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
  2247. string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
  2248. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2249. (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
  2250. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
  2251. class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2252. bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
  2253. string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
  2254. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2255. (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
  2256. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
  2257. // Basic 2-register intrinsics, both double- and quad-register.
  2258. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2259. bits<2> op17_16, bits<5> op11_7, bit op4,
  2260. InstrItinClass itin, string OpcodeStr, string Dt,
  2261. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2262. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2263. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2264. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2265. class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2266. bits<2> op17_16, bits<5> op11_7, bit op4,
  2267. InstrItinClass itin, string OpcodeStr, string Dt,
  2268. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2269. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2270. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2271. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2272. // Same as above, but not predicated.
  2273. class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
  2274. InstrItinClass itin, string OpcodeStr, string Dt,
  2275. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2276. : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
  2277. itin, OpcodeStr, Dt,
  2278. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2279. class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
  2280. InstrItinClass itin, string OpcodeStr, string Dt,
  2281. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2282. : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
  2283. itin, OpcodeStr, Dt,
  2284. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2285. // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
  2286. class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
  2287. bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
  2288. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2289. : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
  2290. itin, OpcodeStr, Dt,
  2291. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2292. // Same as N2VQIntXnp but with Vd as a src register.
  2293. class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
  2294. bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
  2295. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2296. : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
  2297. (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
  2298. itin, OpcodeStr, Dt,
  2299. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
  2300. let Constraints = "$src = $Vd";
  2301. }
  2302. // Narrow 2-register operations.
  2303. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2304. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2305. InstrItinClass itin, string OpcodeStr, string Dt,
  2306. ValueType TyD, ValueType TyQ, SDNode OpNode>
  2307. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
  2308. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2309. [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
  2310. // Narrow 2-register intrinsics.
  2311. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2312. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2313. InstrItinClass itin, string OpcodeStr, string Dt,
  2314. ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
  2315. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
  2316. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2317. [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
  2318. // Long 2-register operations (currently only used for VMOVL).
  2319. class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2320. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2321. InstrItinClass itin, string OpcodeStr, string Dt,
  2322. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2323. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
  2324. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2325. [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
  2326. // Long 2-register intrinsics.
  2327. class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2328. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2329. InstrItinClass itin, string OpcodeStr, string Dt,
  2330. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
  2331. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
  2332. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2333. [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
  2334. // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
  2335. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
  2336. : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
  2337. (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
  2338. OpcodeStr, Dt, "$Vd, $Vm",
  2339. "$src1 = $Vd, $src2 = $Vm", []>;
  2340. class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
  2341. InstrItinClass itin, string OpcodeStr, string Dt>
  2342. : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
  2343. (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
  2344. "$src1 = $Vd, $src2 = $Vm", []>;
  2345. // Basic 3-register operations: double- and quad-register.
  2346. class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2347. InstrItinClass itin, string OpcodeStr, string Dt,
  2348. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2349. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2350. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2351. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2352. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2353. // All of these have a two-operand InstAlias.
  2354. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2355. let isCommutable = Commutable;
  2356. }
  2357. // Same as N3VD but no data type.
  2358. class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2359. InstrItinClass itin, string OpcodeStr,
  2360. ValueType ResTy, ValueType OpTy,
  2361. SDNode OpNode, bit Commutable>
  2362. : N3VX<op24, op23, op21_20, op11_8, 0, op4,
  2363. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2364. OpcodeStr, "$Vd, $Vn, $Vm", "",
  2365. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
  2366. // All of these have a two-operand InstAlias.
  2367. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2368. let isCommutable = Commutable;
  2369. }
  2370. class N3VDSL<bits<2> op21_20, bits<4> op11_8,
  2371. InstrItinClass itin, string OpcodeStr, string Dt,
  2372. ValueType Ty, SDNode ShOp>
  2373. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2374. (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2375. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2376. [(set (Ty DPR:$Vd),
  2377. (Ty (ShOp (Ty DPR:$Vn),
  2378. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
  2379. // All of these have a two-operand InstAlias.
  2380. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2381. let isCommutable = 0;
  2382. }
  2383. class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
  2384. string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
  2385. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2386. (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2387. NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
  2388. [(set (Ty DPR:$Vd),
  2389. (Ty (ShOp (Ty DPR:$Vn),
  2390. (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
  2391. // All of these have a two-operand InstAlias.
  2392. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2393. let isCommutable = 0;
  2394. }
  2395. class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2396. InstrItinClass itin, string OpcodeStr, string Dt,
  2397. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2398. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2399. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2400. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2401. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2402. // All of these have a two-operand InstAlias.
  2403. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2404. let isCommutable = Commutable;
  2405. }
  2406. class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2407. InstrItinClass itin, string OpcodeStr,
  2408. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2409. : N3VX<op24, op23, op21_20, op11_8, 1, op4,
  2410. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2411. OpcodeStr, "$Vd, $Vn, $Vm", "",
  2412. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
  2413. // All of these have a two-operand InstAlias.
  2414. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2415. let isCommutable = Commutable;
  2416. }
  2417. class N3VQSL<bits<2> op21_20, bits<4> op11_8,
  2418. InstrItinClass itin, string OpcodeStr, string Dt,
  2419. ValueType ResTy, ValueType OpTy, SDNode ShOp>
  2420. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2421. (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2422. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2423. [(set (ResTy QPR:$Vd),
  2424. (ResTy (ShOp (ResTy QPR:$Vn),
  2425. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2426. imm:$lane)))))]> {
  2427. // All of these have a two-operand InstAlias.
  2428. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2429. let isCommutable = 0;
  2430. }
  2431. class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
  2432. ValueType ResTy, ValueType OpTy, SDNode ShOp>
  2433. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2434. (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2435. NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
  2436. [(set (ResTy QPR:$Vd),
  2437. (ResTy (ShOp (ResTy QPR:$Vn),
  2438. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2439. imm:$lane)))))]> {
  2440. // All of these have a two-operand InstAlias.
  2441. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2442. let isCommutable = 0;
  2443. }
  2444. // Basic 3-register intrinsics, both double- and quad-register.
  2445. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2446. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2447. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
  2448. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2449. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
  2450. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2451. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2452. // All of these have a two-operand InstAlias.
  2453. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2454. let isCommutable = Commutable;
  2455. }
  2456. class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2457. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2458. string Dt, ValueType ResTy, ValueType OpTy,
  2459. SDPatternOperator IntOp, bit Commutable>
  2460. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2461. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
  2462. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2463. let isCommutable = Commutable;
  2464. }
  2465. class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2466. string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
  2467. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2468. (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2469. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2470. [(set (Ty DPR:$Vd),
  2471. (Ty (IntOp (Ty DPR:$Vn),
  2472. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
  2473. imm:$lane)))))]> {
  2474. let isCommutable = 0;
  2475. }
  2476. class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2477. string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
  2478. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2479. (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2480. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2481. [(set (Ty DPR:$Vd),
  2482. (Ty (IntOp (Ty DPR:$Vn),
  2483. (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
  2484. let isCommutable = 0;
  2485. }
  2486. class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2487. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2488. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2489. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2490. (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
  2491. OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
  2492. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
  2493. let TwoOperandAliasConstraint = "$Vm = $Vd";
  2494. let isCommutable = 0;
  2495. }
  2496. class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2497. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2498. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
  2499. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2500. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
  2501. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2502. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2503. // All of these have a two-operand InstAlias.
  2504. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2505. let isCommutable = Commutable;
  2506. }
  2507. class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2508. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2509. string Dt, ValueType ResTy, ValueType OpTy,
  2510. SDPatternOperator IntOp, bit Commutable>
  2511. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2512. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
  2513. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2514. let isCommutable = Commutable;
  2515. }
  2516. // Same as N3VQIntnp but with Vd as a src register.
  2517. class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2518. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2519. string Dt, ValueType ResTy, ValueType OpTy,
  2520. SDPatternOperator IntOp>
  2521. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2522. (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
  2523. f, itin, OpcodeStr, Dt,
  2524. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
  2525. (OpTy QPR:$Vm))))]> {
  2526. let Constraints = "$src = $Vd";
  2527. let isCommutable = 0;
  2528. }
  2529. class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2530. string OpcodeStr, string Dt,
  2531. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2532. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2533. (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2534. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2535. [(set (ResTy QPR:$Vd),
  2536. (ResTy (IntOp (ResTy QPR:$Vn),
  2537. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2538. imm:$lane)))))]> {
  2539. let isCommutable = 0;
  2540. }
  2541. class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2542. string OpcodeStr, string Dt,
  2543. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2544. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2545. (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2546. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2547. [(set (ResTy QPR:$Vd),
  2548. (ResTy (IntOp (ResTy QPR:$Vn),
  2549. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2550. imm:$lane)))))]> {
  2551. let isCommutable = 0;
  2552. }
  2553. class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2554. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2555. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2556. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2557. (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
  2558. OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
  2559. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
  2560. let TwoOperandAliasConstraint = "$Vm = $Vd";
  2561. let isCommutable = 0;
  2562. }
  2563. // Multiply-Add/Sub operations: double- and quad-register.
  2564. class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2565. InstrItinClass itin, string OpcodeStr, string Dt,
  2566. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
  2567. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2568. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2569. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2570. [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
  2571. (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
  2572. class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2573. string OpcodeStr, string Dt,
  2574. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
  2575. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2576. (outs DPR:$Vd),
  2577. (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2578. NVMulSLFrm, itin,
  2579. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2580. [(set (Ty DPR:$Vd),
  2581. (Ty (ShOp (Ty DPR:$src1),
  2582. (Ty (MulOp DPR:$Vn,
  2583. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
  2584. imm:$lane)))))))]>;
  2585. class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2586. string OpcodeStr, string Dt,
  2587. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
  2588. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2589. (outs DPR:$Vd),
  2590. (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2591. NVMulSLFrm, itin,
  2592. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2593. [(set (Ty DPR:$Vd),
  2594. (Ty (ShOp (Ty DPR:$src1),
  2595. (Ty (MulOp DPR:$Vn,
  2596. (Ty (ARMvduplane (Ty DPR_8:$Vm),
  2597. imm:$lane)))))))]>;
  2598. class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2599. InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
  2600. SDPatternOperator MulOp, SDPatternOperator OpNode>
  2601. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2602. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2603. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2604. [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
  2605. (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
  2606. class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2607. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2608. SDPatternOperator MulOp, SDPatternOperator ShOp>
  2609. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2610. (outs QPR:$Vd),
  2611. (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2612. NVMulSLFrm, itin,
  2613. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2614. [(set (ResTy QPR:$Vd),
  2615. (ResTy (ShOp (ResTy QPR:$src1),
  2616. (ResTy (MulOp QPR:$Vn,
  2617. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2618. imm:$lane)))))))]>;
  2619. class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2620. string OpcodeStr, string Dt,
  2621. ValueType ResTy, ValueType OpTy,
  2622. SDPatternOperator MulOp, SDPatternOperator ShOp>
  2623. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2624. (outs QPR:$Vd),
  2625. (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2626. NVMulSLFrm, itin,
  2627. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2628. [(set (ResTy QPR:$Vd),
  2629. (ResTy (ShOp (ResTy QPR:$src1),
  2630. (ResTy (MulOp QPR:$Vn,
  2631. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2632. imm:$lane)))))))]>;
  2633. // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
  2634. class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2635. InstrItinClass itin, string OpcodeStr, string Dt,
  2636. ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
  2637. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2638. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2639. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2640. [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
  2641. (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
  2642. class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2643. InstrItinClass itin, string OpcodeStr, string Dt,
  2644. ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
  2645. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2646. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2647. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2648. [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
  2649. (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
  2650. // Neon 3-argument intrinsics, both double- and quad-register.
  2651. // The destination register is also used as the first source operand register.
  2652. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2653. InstrItinClass itin, string OpcodeStr, string Dt,
  2654. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2655. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2656. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2657. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2658. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
  2659. (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
  2660. class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2661. InstrItinClass itin, string OpcodeStr, string Dt,
  2662. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2663. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2664. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2665. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2666. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
  2667. (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
  2668. // Long Multiply-Add/Sub operations.
  2669. class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2670. InstrItinClass itin, string OpcodeStr, string Dt,
  2671. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2672. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2673. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2674. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2675. [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
  2676. (TyQ (MulOp (TyD DPR:$Vn),
  2677. (TyD DPR:$Vm)))))]>;
  2678. class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
  2679. InstrItinClass itin, string OpcodeStr, string Dt,
  2680. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2681. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
  2682. (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2683. NVMulSLFrm, itin,
  2684. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2685. [(set QPR:$Vd,
  2686. (OpNode (TyQ QPR:$src1),
  2687. (TyQ (MulOp (TyD DPR:$Vn),
  2688. (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
  2689. imm:$lane))))))]>;
  2690. class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2691. InstrItinClass itin, string OpcodeStr, string Dt,
  2692. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2693. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
  2694. (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2695. NVMulSLFrm, itin,
  2696. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2697. [(set QPR:$Vd,
  2698. (OpNode (TyQ QPR:$src1),
  2699. (TyQ (MulOp (TyD DPR:$Vn),
  2700. (TyD (ARMvduplane (TyD DPR_8:$Vm),
  2701. imm:$lane))))))]>;
  2702. // Long Intrinsic-Op vector operations with explicit extend (VABAL).
  2703. class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2704. InstrItinClass itin, string OpcodeStr, string Dt,
  2705. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
  2706. SDNode OpNode>
  2707. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2708. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2709. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2710. [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
  2711. (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
  2712. (TyD DPR:$Vm)))))))]>;
  2713. // Neon Long 3-argument intrinsic. The destination register is
  2714. // a quad-register and is also used as the first source operand register.
  2715. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2716. InstrItinClass itin, string OpcodeStr, string Dt,
  2717. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
  2718. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2719. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2720. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2721. [(set QPR:$Vd,
  2722. (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
  2723. class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2724. string OpcodeStr, string Dt,
  2725. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2726. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2727. (outs QPR:$Vd),
  2728. (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2729. NVMulSLFrm, itin,
  2730. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2731. [(set (ResTy QPR:$Vd),
  2732. (ResTy (IntOp (ResTy QPR:$src1),
  2733. (OpTy DPR:$Vn),
  2734. (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2735. imm:$lane)))))]>;
  2736. class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2737. InstrItinClass itin, string OpcodeStr, string Dt,
  2738. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2739. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2740. (outs QPR:$Vd),
  2741. (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2742. NVMulSLFrm, itin,
  2743. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2744. [(set (ResTy QPR:$Vd),
  2745. (ResTy (IntOp (ResTy QPR:$src1),
  2746. (OpTy DPR:$Vn),
  2747. (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
  2748. imm:$lane)))))]>;
  2749. // Narrowing 3-register intrinsics.
  2750. class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2751. string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
  2752. SDPatternOperator IntOp, bit Commutable>
  2753. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2754. (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
  2755. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2756. [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
  2757. let isCommutable = Commutable;
  2758. }
  2759. // Long 3-register operations.
  2760. class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2761. InstrItinClass itin, string OpcodeStr, string Dt,
  2762. ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
  2763. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2764. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2765. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2766. [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
  2767. let isCommutable = Commutable;
  2768. }
  2769. class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
  2770. InstrItinClass itin, string OpcodeStr, string Dt,
  2771. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2772. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2773. (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2774. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2775. [(set QPR:$Vd,
  2776. (TyQ (OpNode (TyD DPR:$Vn),
  2777. (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
  2778. class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2779. InstrItinClass itin, string OpcodeStr, string Dt,
  2780. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2781. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2782. (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2783. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2784. [(set QPR:$Vd,
  2785. (TyQ (OpNode (TyD DPR:$Vn),
  2786. (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
  2787. // Long 3-register operations with explicitly extended operands.
  2788. class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2789. InstrItinClass itin, string OpcodeStr, string Dt,
  2790. ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp,
  2791. bit Commutable>
  2792. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2793. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2794. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2795. [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
  2796. (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
  2797. let isCommutable = Commutable;
  2798. }
  2799. // Long 3-register intrinsics with explicit extend (VABDL).
  2800. class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2801. InstrItinClass itin, string OpcodeStr, string Dt,
  2802. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
  2803. bit Commutable>
  2804. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2805. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2806. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2807. [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
  2808. (TyD DPR:$Vm))))))]> {
  2809. let isCommutable = Commutable;
  2810. }
  2811. // Long 3-register intrinsics.
  2812. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2813. InstrItinClass itin, string OpcodeStr, string Dt,
  2814. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
  2815. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2816. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2817. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2818. [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
  2819. let isCommutable = Commutable;
  2820. }
  2821. // Same as above, but not predicated.
  2822. class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2823. bit op4, InstrItinClass itin, string OpcodeStr,
  2824. string Dt, ValueType ResTy, ValueType OpTy,
  2825. SDPatternOperator IntOp, bit Commutable>
  2826. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2827. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
  2828. [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2829. let isCommutable = Commutable;
  2830. }
  2831. class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2832. string OpcodeStr, string Dt,
  2833. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2834. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2835. (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2836. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2837. [(set (ResTy QPR:$Vd),
  2838. (ResTy (IntOp (OpTy DPR:$Vn),
  2839. (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2840. imm:$lane)))))]>;
  2841. class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2842. InstrItinClass itin, string OpcodeStr, string Dt,
  2843. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2844. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2845. (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2846. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2847. [(set (ResTy QPR:$Vd),
  2848. (ResTy (IntOp (OpTy DPR:$Vn),
  2849. (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
  2850. imm:$lane)))))]>;
  2851. // Wide 3-register operations.
  2852. class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2853. string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
  2854. SDNode OpNode, SDPatternOperator ExtOp, bit Commutable>
  2855. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2856. (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
  2857. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2858. [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
  2859. (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
  2860. // All of these have a two-operand InstAlias.
  2861. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2862. let isCommutable = Commutable;
  2863. }
  2864. // Pairwise long 2-register intrinsics, both double- and quad-register.
  2865. class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2866. bits<2> op17_16, bits<5> op11_7, bit op4,
  2867. string OpcodeStr, string Dt,
  2868. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2869. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2870. (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  2871. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2872. class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2873. bits<2> op17_16, bits<5> op11_7, bit op4,
  2874. string OpcodeStr, string Dt,
  2875. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2876. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2877. (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  2878. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2879. // Pairwise long 2-register accumulate intrinsics,
  2880. // both double- and quad-register.
  2881. // The destination register is also used as the first source operand register.
  2882. class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2883. bits<2> op17_16, bits<5> op11_7, bit op4,
  2884. string OpcodeStr, string Dt,
  2885. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2886. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
  2887. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
  2888. OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
  2889. [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
  2890. class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2891. bits<2> op17_16, bits<5> op11_7, bit op4,
  2892. string OpcodeStr, string Dt,
  2893. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2894. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
  2895. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
  2896. OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
  2897. [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
  2898. // Shift by immediate,
  2899. // both double- and quad-register.
  2900. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2901. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2902. Format f, InstrItinClass itin, Operand ImmTy,
  2903. string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
  2904. : N2VImm<op24, op23, op11_8, op7, 0, op4,
  2905. (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
  2906. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2907. [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
  2908. class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2909. Format f, InstrItinClass itin, Operand ImmTy,
  2910. string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
  2911. : N2VImm<op24, op23, op11_8, op7, 1, op4,
  2912. (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
  2913. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2914. [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
  2915. }
  2916. // Long shift by immediate.
  2917. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
  2918. string OpcodeStr, string Dt,
  2919. ValueType ResTy, ValueType OpTy, Operand ImmTy,
  2920. SDPatternOperator OpNode>
  2921. : N2VImm<op24, op23, op11_8, op7, op6, op4,
  2922. (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
  2923. IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2924. [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
  2925. // Narrow shift by immediate.
  2926. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
  2927. InstrItinClass itin, string OpcodeStr, string Dt,
  2928. ValueType ResTy, ValueType OpTy, Operand ImmTy,
  2929. SDPatternOperator OpNode>
  2930. : N2VImm<op24, op23, op11_8, op7, op6, op4,
  2931. (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
  2932. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2933. [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
  2934. (i32 ImmTy:$SIMM))))]>;
  2935. // Shift right by immediate and accumulate,
  2936. // both double- and quad-register.
  2937. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2938. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2939. Operand ImmTy, string OpcodeStr, string Dt,
  2940. ValueType Ty, SDNode ShOp>
  2941. : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
  2942. (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
  2943. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2944. [(set DPR:$Vd, (Ty (add DPR:$src1,
  2945. (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
  2946. class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2947. Operand ImmTy, string OpcodeStr, string Dt,
  2948. ValueType Ty, SDNode ShOp>
  2949. : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
  2950. (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
  2951. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2952. [(set QPR:$Vd, (Ty (add QPR:$src1,
  2953. (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
  2954. }
  2955. // Shift by immediate and insert,
  2956. // both double- and quad-register.
  2957. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2958. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2959. Operand ImmTy, Format f, string OpcodeStr, string Dt,
  2960. ValueType Ty,SDNode ShOp>
  2961. : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
  2962. (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
  2963. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2964. [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
  2965. class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2966. Operand ImmTy, Format f, string OpcodeStr, string Dt,
  2967. ValueType Ty,SDNode ShOp>
  2968. : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
  2969. (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
  2970. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2971. [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
  2972. }
  2973. // Convert, with fractional bits immediate,
  2974. // both double- and quad-register.
  2975. class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2976. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2977. SDPatternOperator IntOp>
  2978. : N2VImm<op24, op23, op11_8, op7, 0, op4,
  2979. (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
  2980. IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2981. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
  2982. class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2983. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2984. SDPatternOperator IntOp>
  2985. : N2VImm<op24, op23, op11_8, op7, 1, op4,
  2986. (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
  2987. IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2988. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
  2989. //===----------------------------------------------------------------------===//
  2990. // Multiclasses
  2991. //===----------------------------------------------------------------------===//
  2992. // Abbreviations used in multiclass suffixes:
  2993. // Q = quarter int (8 bit) elements
  2994. // H = half int (16 bit) elements
  2995. // S = single int (32 bit) elements
  2996. // D = double int (64 bit) elements
  2997. // Neon 2-register vector operations and intrinsics.
  2998. // Neon 2-register comparisons.
  2999. // source operand element sizes of 8, 16 and 32 bits:
  3000. multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3001. bits<5> op11_7, bit op4, string opc, string Dt,
  3002. string asm, PatFrag fc> {
  3003. // 64-bit vector types.
  3004. def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
  3005. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3006. opc, !strconcat(Dt, "8"), asm, "",
  3007. [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
  3008. def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
  3009. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3010. opc, !strconcat(Dt, "16"), asm, "",
  3011. [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
  3012. def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
  3013. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3014. opc, !strconcat(Dt, "32"), asm, "",
  3015. [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
  3016. def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
  3017. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3018. opc, "f32", asm, "",
  3019. [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
  3020. let Inst{10} = 1; // overwrite F = 1
  3021. }
  3022. def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
  3023. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3024. opc, "f16", asm, "",
  3025. [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
  3026. Requires<[HasNEON,HasFullFP16]> {
  3027. let Inst{10} = 1; // overwrite F = 1
  3028. }
  3029. // 128-bit vector types.
  3030. def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
  3031. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3032. opc, !strconcat(Dt, "8"), asm, "",
  3033. [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
  3034. def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
  3035. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3036. opc, !strconcat(Dt, "16"), asm, "",
  3037. [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
  3038. def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
  3039. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3040. opc, !strconcat(Dt, "32"), asm, "",
  3041. [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
  3042. def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
  3043. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3044. opc, "f32", asm, "",
  3045. [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
  3046. let Inst{10} = 1; // overwrite F = 1
  3047. }
  3048. def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
  3049. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3050. opc, "f16", asm, "",
  3051. [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
  3052. Requires<[HasNEON,HasFullFP16]> {
  3053. let Inst{10} = 1; // overwrite F = 1
  3054. }
  3055. }
  3056. // Neon 3-register comparisons.
  3057. class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  3058. InstrItinClass itin, string OpcodeStr, string Dt,
  3059. ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
  3060. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  3061. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  3062. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  3063. [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
  3064. // All of these have a two-operand InstAlias.
  3065. let TwoOperandAliasConstraint = "$Vn = $Vd";
  3066. let isCommutable = Commutable;
  3067. }
  3068. class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  3069. InstrItinClass itin, string OpcodeStr, string Dt,
  3070. ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
  3071. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  3072. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  3073. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  3074. [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
  3075. // All of these have a two-operand InstAlias.
  3076. let TwoOperandAliasConstraint = "$Vn = $Vd";
  3077. let isCommutable = Commutable;
  3078. }
  3079. multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
  3080. InstrItinClass itinD16, InstrItinClass itinD32,
  3081. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3082. string OpcodeStr, string Dt,
  3083. PatFrag fc, bit Commutable = 0> {
  3084. // 64-bit vector types.
  3085. def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
  3086. OpcodeStr, !strconcat(Dt, "8"),
  3087. v8i8, v8i8, fc, Commutable>;
  3088. def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
  3089. OpcodeStr, !strconcat(Dt, "16"),
  3090. v4i16, v4i16, fc, Commutable>;
  3091. def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
  3092. OpcodeStr, !strconcat(Dt, "32"),
  3093. v2i32, v2i32, fc, Commutable>;
  3094. // 128-bit vector types.
  3095. def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
  3096. OpcodeStr, !strconcat(Dt, "8"),
  3097. v16i8, v16i8, fc, Commutable>;
  3098. def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
  3099. OpcodeStr, !strconcat(Dt, "16"),
  3100. v8i16, v8i16, fc, Commutable>;
  3101. def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
  3102. OpcodeStr, !strconcat(Dt, "32"),
  3103. v4i32, v4i32, fc, Commutable>;
  3104. }
  3105. // Neon 2-register vector intrinsics,
  3106. // element sizes of 8, 16 and 32 bits:
  3107. multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3108. bits<5> op11_7, bit op4,
  3109. InstrItinClass itinD, InstrItinClass itinQ,
  3110. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3111. // 64-bit vector types.
  3112. def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3113. itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
  3114. def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3115. itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
  3116. def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3117. itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
  3118. // 128-bit vector types.
  3119. def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3120. itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
  3121. def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3122. itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
  3123. def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3124. itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
  3125. }
  3126. // Neon Narrowing 2-register vector operations,
  3127. // source operand element sizes of 16, 32 and 64 bits:
  3128. multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3129. bits<5> op11_7, bit op6, bit op4,
  3130. InstrItinClass itin, string OpcodeStr, string Dt,
  3131. SDNode OpNode> {
  3132. def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
  3133. itin, OpcodeStr, !strconcat(Dt, "16"),
  3134. v8i8, v8i16, OpNode>;
  3135. def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
  3136. itin, OpcodeStr, !strconcat(Dt, "32"),
  3137. v4i16, v4i32, OpNode>;
  3138. def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
  3139. itin, OpcodeStr, !strconcat(Dt, "64"),
  3140. v2i32, v2i64, OpNode>;
  3141. }
  3142. // Neon Narrowing 2-register vector intrinsics,
  3143. // source operand element sizes of 16, 32 and 64 bits:
  3144. multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3145. bits<5> op11_7, bit op6, bit op4,
  3146. InstrItinClass itin, string OpcodeStr, string Dt,
  3147. SDPatternOperator IntOp> {
  3148. def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
  3149. itin, OpcodeStr, !strconcat(Dt, "16"),
  3150. v8i8, v8i16, IntOp>;
  3151. def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
  3152. itin, OpcodeStr, !strconcat(Dt, "32"),
  3153. v4i16, v4i32, IntOp>;
  3154. def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
  3155. itin, OpcodeStr, !strconcat(Dt, "64"),
  3156. v2i32, v2i64, IntOp>;
  3157. }
  3158. // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
  3159. // source operand element sizes of 16, 32 and 64 bits:
  3160. multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
  3161. string OpcodeStr, string Dt, SDNode OpNode> {
  3162. def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3163. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
  3164. def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3165. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
  3166. def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3167. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
  3168. }
  3169. // Neon 3-register vector operations.
  3170. // First with only element sizes of 8, 16 and 32 bits:
  3171. multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3172. InstrItinClass itinD16, InstrItinClass itinD32,
  3173. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3174. string OpcodeStr, string Dt,
  3175. SDNode OpNode, bit Commutable = 0> {
  3176. // 64-bit vector types.
  3177. def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
  3178. OpcodeStr, !strconcat(Dt, "8"),
  3179. v8i8, v8i8, OpNode, Commutable>;
  3180. def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
  3181. OpcodeStr, !strconcat(Dt, "16"),
  3182. v4i16, v4i16, OpNode, Commutable>;
  3183. def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
  3184. OpcodeStr, !strconcat(Dt, "32"),
  3185. v2i32, v2i32, OpNode, Commutable>;
  3186. // 128-bit vector types.
  3187. def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
  3188. OpcodeStr, !strconcat(Dt, "8"),
  3189. v16i8, v16i8, OpNode, Commutable>;
  3190. def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
  3191. OpcodeStr, !strconcat(Dt, "16"),
  3192. v8i16, v8i16, OpNode, Commutable>;
  3193. def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
  3194. OpcodeStr, !strconcat(Dt, "32"),
  3195. v4i32, v4i32, OpNode, Commutable>;
  3196. }
  3197. multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
  3198. def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
  3199. def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
  3200. def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
  3201. def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
  3202. v4i32, v2i32, ShOp>;
  3203. }
  3204. // ....then also with element size 64 bits:
  3205. multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3206. InstrItinClass itinD, InstrItinClass itinQ,
  3207. string OpcodeStr, string Dt,
  3208. SDNode OpNode, bit Commutable = 0>
  3209. : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
  3210. OpcodeStr, Dt, OpNode, Commutable> {
  3211. def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
  3212. OpcodeStr, !strconcat(Dt, "64"),
  3213. v1i64, v1i64, OpNode, Commutable>;
  3214. def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
  3215. OpcodeStr, !strconcat(Dt, "64"),
  3216. v2i64, v2i64, OpNode, Commutable>;
  3217. }
  3218. // Neon 3-register vector intrinsics.
  3219. // First with only element sizes of 16 and 32 bits:
  3220. multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3221. InstrItinClass itinD16, InstrItinClass itinD32,
  3222. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3223. string OpcodeStr, string Dt,
  3224. SDPatternOperator IntOp, bit Commutable = 0> {
  3225. // 64-bit vector types.
  3226. def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
  3227. OpcodeStr, !strconcat(Dt, "16"),
  3228. v4i16, v4i16, IntOp, Commutable>;
  3229. def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
  3230. OpcodeStr, !strconcat(Dt, "32"),
  3231. v2i32, v2i32, IntOp, Commutable>;
  3232. // 128-bit vector types.
  3233. def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
  3234. OpcodeStr, !strconcat(Dt, "16"),
  3235. v8i16, v8i16, IntOp, Commutable>;
  3236. def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
  3237. OpcodeStr, !strconcat(Dt, "32"),
  3238. v4i32, v4i32, IntOp, Commutable>;
  3239. }
  3240. multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3241. InstrItinClass itinD16, InstrItinClass itinD32,
  3242. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3243. string OpcodeStr, string Dt,
  3244. SDPatternOperator IntOp> {
  3245. // 64-bit vector types.
  3246. def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
  3247. OpcodeStr, !strconcat(Dt, "16"),
  3248. v4i16, v4i16, IntOp>;
  3249. def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
  3250. OpcodeStr, !strconcat(Dt, "32"),
  3251. v2i32, v2i32, IntOp>;
  3252. // 128-bit vector types.
  3253. def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
  3254. OpcodeStr, !strconcat(Dt, "16"),
  3255. v8i16, v8i16, IntOp>;
  3256. def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
  3257. OpcodeStr, !strconcat(Dt, "32"),
  3258. v4i32, v4i32, IntOp>;
  3259. }
  3260. multiclass N3VIntSL_HS<bits<4> op11_8,
  3261. InstrItinClass itinD16, InstrItinClass itinD32,
  3262. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3263. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3264. def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
  3265. OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
  3266. def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
  3267. OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
  3268. def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
  3269. OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
  3270. def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
  3271. OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
  3272. }
  3273. // ....then also with element size of 8 bits:
  3274. multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3275. InstrItinClass itinD16, InstrItinClass itinD32,
  3276. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3277. string OpcodeStr, string Dt,
  3278. SDPatternOperator IntOp, bit Commutable = 0>
  3279. : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3280. OpcodeStr, Dt, IntOp, Commutable> {
  3281. def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
  3282. OpcodeStr, !strconcat(Dt, "8"),
  3283. v8i8, v8i8, IntOp, Commutable>;
  3284. def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
  3285. OpcodeStr, !strconcat(Dt, "8"),
  3286. v16i8, v16i8, IntOp, Commutable>;
  3287. }
  3288. multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3289. InstrItinClass itinD16, InstrItinClass itinD32,
  3290. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3291. string OpcodeStr, string Dt,
  3292. SDPatternOperator IntOp>
  3293. : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3294. OpcodeStr, Dt, IntOp> {
  3295. def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
  3296. OpcodeStr, !strconcat(Dt, "8"),
  3297. v8i8, v8i8, IntOp>;
  3298. def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
  3299. OpcodeStr, !strconcat(Dt, "8"),
  3300. v16i8, v16i8, IntOp>;
  3301. }
  3302. // ....then also with element size of 64 bits:
  3303. multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3304. InstrItinClass itinD16, InstrItinClass itinD32,
  3305. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3306. string OpcodeStr, string Dt,
  3307. SDPatternOperator IntOp, bit Commutable = 0>
  3308. : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3309. OpcodeStr, Dt, IntOp, Commutable> {
  3310. def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
  3311. OpcodeStr, !strconcat(Dt, "64"),
  3312. v1i64, v1i64, IntOp, Commutable>;
  3313. def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
  3314. OpcodeStr, !strconcat(Dt, "64"),
  3315. v2i64, v2i64, IntOp, Commutable>;
  3316. }
  3317. multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3318. InstrItinClass itinD16, InstrItinClass itinD32,
  3319. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3320. string OpcodeStr, string Dt,
  3321. SDPatternOperator IntOp>
  3322. : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3323. OpcodeStr, Dt, IntOp> {
  3324. def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
  3325. OpcodeStr, !strconcat(Dt, "64"),
  3326. v1i64, v1i64, IntOp>;
  3327. def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
  3328. OpcodeStr, !strconcat(Dt, "64"),
  3329. v2i64, v2i64, IntOp>;
  3330. }
  3331. // Neon Narrowing 3-register vector intrinsics,
  3332. // source operand element sizes of 16, 32 and 64 bits:
  3333. multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3334. string OpcodeStr, string Dt,
  3335. SDPatternOperator IntOp, bit Commutable = 0> {
  3336. def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
  3337. OpcodeStr, !strconcat(Dt, "16"),
  3338. v8i8, v8i16, IntOp, Commutable>;
  3339. def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
  3340. OpcodeStr, !strconcat(Dt, "32"),
  3341. v4i16, v4i32, IntOp, Commutable>;
  3342. def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
  3343. OpcodeStr, !strconcat(Dt, "64"),
  3344. v2i32, v2i64, IntOp, Commutable>;
  3345. }
  3346. // Neon Long 3-register vector operations.
  3347. multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3348. InstrItinClass itin16, InstrItinClass itin32,
  3349. string OpcodeStr, string Dt,
  3350. SDNode OpNode, bit Commutable = 0> {
  3351. def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
  3352. OpcodeStr, !strconcat(Dt, "8"),
  3353. v8i16, v8i8, OpNode, Commutable>;
  3354. def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
  3355. OpcodeStr, !strconcat(Dt, "16"),
  3356. v4i32, v4i16, OpNode, Commutable>;
  3357. def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
  3358. OpcodeStr, !strconcat(Dt, "32"),
  3359. v2i64, v2i32, OpNode, Commutable>;
  3360. }
  3361. multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
  3362. InstrItinClass itin, string OpcodeStr, string Dt,
  3363. SDNode OpNode> {
  3364. def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
  3365. !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
  3366. def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
  3367. !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
  3368. }
  3369. multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3370. InstrItinClass itin16, InstrItinClass itin32,
  3371. string OpcodeStr, string Dt,
  3372. SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> {
  3373. def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
  3374. OpcodeStr, !strconcat(Dt, "8"),
  3375. v8i16, v8i8, OpNode, ExtOp, Commutable>;
  3376. def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
  3377. OpcodeStr, !strconcat(Dt, "16"),
  3378. v4i32, v4i16, OpNode, ExtOp, Commutable>;
  3379. def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
  3380. OpcodeStr, !strconcat(Dt, "32"),
  3381. v2i64, v2i32, OpNode, ExtOp, Commutable>;
  3382. }
  3383. // Neon Long 3-register vector intrinsics.
  3384. // First with only element sizes of 16 and 32 bits:
  3385. multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3386. InstrItinClass itin16, InstrItinClass itin32,
  3387. string OpcodeStr, string Dt,
  3388. SDPatternOperator IntOp, bit Commutable = 0> {
  3389. def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
  3390. OpcodeStr, !strconcat(Dt, "16"),
  3391. v4i32, v4i16, IntOp, Commutable>;
  3392. def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
  3393. OpcodeStr, !strconcat(Dt, "32"),
  3394. v2i64, v2i32, IntOp, Commutable>;
  3395. }
  3396. multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
  3397. InstrItinClass itin, string OpcodeStr, string Dt,
  3398. SDPatternOperator IntOp> {
  3399. def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
  3400. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
  3401. def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
  3402. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3403. }
  3404. // ....then also with element size of 8 bits:
  3405. multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3406. InstrItinClass itin16, InstrItinClass itin32,
  3407. string OpcodeStr, string Dt,
  3408. SDPatternOperator IntOp, bit Commutable = 0>
  3409. : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
  3410. IntOp, Commutable> {
  3411. def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
  3412. OpcodeStr, !strconcat(Dt, "8"),
  3413. v8i16, v8i8, IntOp, Commutable>;
  3414. }
  3415. // ....with explicit extend (VABDL).
  3416. multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3417. InstrItinClass itin, string OpcodeStr, string Dt,
  3418. SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
  3419. def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
  3420. OpcodeStr, !strconcat(Dt, "8"),
  3421. v8i16, v8i8, IntOp, ExtOp, Commutable>;
  3422. def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
  3423. OpcodeStr, !strconcat(Dt, "16"),
  3424. v4i32, v4i16, IntOp, ExtOp, Commutable>;
  3425. def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
  3426. OpcodeStr, !strconcat(Dt, "32"),
  3427. v2i64, v2i32, IntOp, ExtOp, Commutable>;
  3428. }
  3429. // Neon Wide 3-register vector intrinsics,
  3430. // source operand element sizes of 8, 16 and 32 bits:
  3431. multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3432. string OpcodeStr, string Dt,
  3433. SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> {
  3434. def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
  3435. OpcodeStr, !strconcat(Dt, "8"),
  3436. v8i16, v8i8, OpNode, ExtOp, Commutable>;
  3437. def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
  3438. OpcodeStr, !strconcat(Dt, "16"),
  3439. v4i32, v4i16, OpNode, ExtOp, Commutable>;
  3440. def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
  3441. OpcodeStr, !strconcat(Dt, "32"),
  3442. v2i64, v2i32, OpNode, ExtOp, Commutable>;
  3443. }
  3444. // Neon Multiply-Op vector operations,
  3445. // element sizes of 8, 16 and 32 bits:
  3446. multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3447. InstrItinClass itinD16, InstrItinClass itinD32,
  3448. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3449. string OpcodeStr, string Dt, SDNode OpNode> {
  3450. // 64-bit vector types.
  3451. def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
  3452. OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
  3453. def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
  3454. OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
  3455. def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
  3456. OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
  3457. // 128-bit vector types.
  3458. def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
  3459. OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
  3460. def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
  3461. OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
  3462. def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
  3463. OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
  3464. }
  3465. multiclass N3VMulOpSL_HS<bits<4> op11_8,
  3466. InstrItinClass itinD16, InstrItinClass itinD32,
  3467. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3468. string OpcodeStr, string Dt, SDPatternOperator ShOp> {
  3469. def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
  3470. OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
  3471. def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
  3472. OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
  3473. def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
  3474. OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
  3475. mul, ShOp>;
  3476. def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
  3477. OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
  3478. mul, ShOp>;
  3479. }
  3480. // Neon Intrinsic-Op vector operations,
  3481. // element sizes of 8, 16 and 32 bits:
  3482. multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3483. InstrItinClass itinD, InstrItinClass itinQ,
  3484. string OpcodeStr, string Dt, SDPatternOperator IntOp,
  3485. SDNode OpNode> {
  3486. // 64-bit vector types.
  3487. def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
  3488. OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
  3489. def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
  3490. OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
  3491. def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
  3492. OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
  3493. // 128-bit vector types.
  3494. def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
  3495. OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
  3496. def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
  3497. OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
  3498. def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
  3499. OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
  3500. }
  3501. // Neon 3-argument intrinsics,
  3502. // element sizes of 16 and 32 bits:
  3503. multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3504. InstrItinClass itinD16, InstrItinClass itinD32,
  3505. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3506. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3507. // 64-bit vector types.
  3508. def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
  3509. OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
  3510. def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
  3511. OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
  3512. // 128-bit vector types.
  3513. def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
  3514. OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
  3515. def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
  3516. OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
  3517. }
  3518. // element sizes of 8, 16 and 32 bits:
  3519. multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3520. InstrItinClass itinD16, InstrItinClass itinD32,
  3521. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3522. string OpcodeStr, string Dt, SDPatternOperator IntOp>
  3523. :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
  3524. itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
  3525. // 64-bit vector types.
  3526. def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
  3527. OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
  3528. // 128-bit vector types.
  3529. def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
  3530. OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
  3531. }
  3532. // Neon Long Multiply-Op vector operations,
  3533. // element sizes of 8, 16 and 32 bits:
  3534. multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3535. InstrItinClass itin16, InstrItinClass itin32,
  3536. string OpcodeStr, string Dt, SDNode MulOp,
  3537. SDNode OpNode> {
  3538. def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
  3539. !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
  3540. def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
  3541. !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
  3542. def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
  3543. !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
  3544. }
  3545. multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
  3546. string Dt, SDNode MulOp, SDNode OpNode> {
  3547. def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
  3548. !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
  3549. def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
  3550. !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
  3551. }
  3552. // Neon Long 3-argument intrinsics.
  3553. // First with only element sizes of 16 and 32 bits:
  3554. multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3555. InstrItinClass itin16, InstrItinClass itin32,
  3556. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3557. def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
  3558. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
  3559. def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
  3560. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3561. }
  3562. multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
  3563. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3564. def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
  3565. OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
  3566. def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
  3567. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3568. }
  3569. // ....then also with element size of 8 bits:
  3570. multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3571. InstrItinClass itin16, InstrItinClass itin32,
  3572. string OpcodeStr, string Dt, SDPatternOperator IntOp>
  3573. : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
  3574. def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
  3575. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
  3576. }
  3577. // ....with explicit extend (VABAL).
  3578. multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3579. InstrItinClass itin, string OpcodeStr, string Dt,
  3580. SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
  3581. def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
  3582. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
  3583. IntOp, ExtOp, OpNode>;
  3584. def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
  3585. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
  3586. IntOp, ExtOp, OpNode>;
  3587. def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
  3588. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
  3589. IntOp, ExtOp, OpNode>;
  3590. }
  3591. // Neon Pairwise long 2-register intrinsics,
  3592. // element sizes of 8, 16 and 32 bits:
  3593. multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3594. bits<5> op11_7, bit op4,
  3595. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3596. // 64-bit vector types.
  3597. def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3598. OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
  3599. def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3600. OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
  3601. def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3602. OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
  3603. // 128-bit vector types.
  3604. def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3605. OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
  3606. def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3607. OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
  3608. def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3609. OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
  3610. }
  3611. // Neon Pairwise long 2-register accumulate intrinsics,
  3612. // element sizes of 8, 16 and 32 bits:
  3613. multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3614. bits<5> op11_7, bit op4,
  3615. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3616. // 64-bit vector types.
  3617. def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3618. OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
  3619. def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3620. OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
  3621. def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3622. OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
  3623. // 128-bit vector types.
  3624. def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3625. OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
  3626. def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3627. OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
  3628. def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3629. OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
  3630. }
  3631. // Neon 2-register vector shift by immediate,
  3632. // with f of either N2RegVShLFrm or N2RegVShRFrm
  3633. // element sizes of 8, 16, 32 and 64 bits:
  3634. multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3635. InstrItinClass itin, string OpcodeStr, string Dt,
  3636. SDNode OpNode> {
  3637. // 64-bit vector types.
  3638. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3639. OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
  3640. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3641. }
  3642. def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3643. OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
  3644. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3645. }
  3646. def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3647. OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
  3648. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3649. }
  3650. def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
  3651. OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
  3652. // imm6 = xxxxxx
  3653. // 128-bit vector types.
  3654. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3655. OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
  3656. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3657. }
  3658. def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3659. OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
  3660. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3661. }
  3662. def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3663. OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
  3664. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3665. }
  3666. def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
  3667. OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
  3668. // imm6 = xxxxxx
  3669. }
  3670. multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3671. InstrItinClass itin, string OpcodeStr, string Dt,
  3672. SDNode OpNode> {
  3673. // 64-bit vector types.
  3674. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
  3675. OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
  3676. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3677. }
  3678. def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
  3679. OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
  3680. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3681. }
  3682. def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
  3683. OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
  3684. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3685. }
  3686. def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
  3687. OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
  3688. // imm6 = xxxxxx
  3689. // 128-bit vector types.
  3690. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
  3691. OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
  3692. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3693. }
  3694. def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
  3695. OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
  3696. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3697. }
  3698. def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
  3699. OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
  3700. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3701. }
  3702. def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
  3703. OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
  3704. // imm6 = xxxxxx
  3705. }
  3706. // Neon Shift-Accumulate vector operations,
  3707. // element sizes of 8, 16, 32 and 64 bits:
  3708. multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3709. string OpcodeStr, string Dt, SDNode ShOp> {
  3710. // 64-bit vector types.
  3711. def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
  3712. OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
  3713. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3714. }
  3715. def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
  3716. OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
  3717. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3718. }
  3719. def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
  3720. OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
  3721. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3722. }
  3723. def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
  3724. OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
  3725. // imm6 = xxxxxx
  3726. // 128-bit vector types.
  3727. def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
  3728. OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
  3729. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3730. }
  3731. def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
  3732. OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
  3733. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3734. }
  3735. def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
  3736. OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
  3737. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3738. }
  3739. def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
  3740. OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
  3741. // imm6 = xxxxxx
  3742. }
  3743. // Neon Shift-Insert vector operations,
  3744. // with f of either N2RegVShLFrm or N2RegVShRFrm
  3745. // element sizes of 8, 16, 32 and 64 bits:
  3746. multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3747. string OpcodeStr> {
  3748. // 64-bit vector types.
  3749. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3750. N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
  3751. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3752. }
  3753. def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3754. N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
  3755. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3756. }
  3757. def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3758. N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
  3759. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3760. }
  3761. def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
  3762. N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
  3763. // imm6 = xxxxxx
  3764. // 128-bit vector types.
  3765. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3766. N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
  3767. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3768. }
  3769. def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3770. N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
  3771. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3772. }
  3773. def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3774. N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
  3775. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3776. }
  3777. def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
  3778. N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
  3779. // imm6 = xxxxxx
  3780. }
  3781. multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3782. string OpcodeStr> {
  3783. // 64-bit vector types.
  3784. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
  3785. N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
  3786. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3787. }
  3788. def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
  3789. N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
  3790. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3791. }
  3792. def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
  3793. N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
  3794. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3795. }
  3796. def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
  3797. N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
  3798. // imm6 = xxxxxx
  3799. // 128-bit vector types.
  3800. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
  3801. N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
  3802. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3803. }
  3804. def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
  3805. N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
  3806. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3807. }
  3808. def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
  3809. N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
  3810. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3811. }
  3812. def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
  3813. N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
  3814. // imm6 = xxxxxx
  3815. }
  3816. // Neon Shift Long operations,
  3817. // element sizes of 8, 16, 32 bits:
  3818. multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
  3819. bit op4, string OpcodeStr, string Dt,
  3820. SDPatternOperator OpNode> {
  3821. def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3822. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
  3823. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3824. }
  3825. def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3826. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
  3827. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3828. }
  3829. def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3830. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
  3831. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3832. }
  3833. }
  3834. // Neon Shift Narrow operations,
  3835. // element sizes of 16, 32, 64 bits:
  3836. multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
  3837. bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
  3838. SDPatternOperator OpNode> {
  3839. def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3840. OpcodeStr, !strconcat(Dt, "16"),
  3841. v8i8, v8i16, shr_imm8, OpNode> {
  3842. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3843. }
  3844. def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3845. OpcodeStr, !strconcat(Dt, "32"),
  3846. v4i16, v4i32, shr_imm16, OpNode> {
  3847. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3848. }
  3849. def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3850. OpcodeStr, !strconcat(Dt, "64"),
  3851. v2i32, v2i64, shr_imm32, OpNode> {
  3852. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3853. }
  3854. }
  3855. //===----------------------------------------------------------------------===//
  3856. // Instruction Definitions.
  3857. //===----------------------------------------------------------------------===//
  3858. // Vector Add Operations.
  3859. // VADD : Vector Add (integer and floating-point)
  3860. defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
  3861. add, 1>;
  3862. def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
  3863. v2f32, v2f32, fadd, 1>;
  3864. def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
  3865. v4f32, v4f32, fadd, 1>;
  3866. def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
  3867. v4f16, v4f16, fadd, 1>,
  3868. Requires<[HasNEON,HasFullFP16]>;
  3869. def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
  3870. v8f16, v8f16, fadd, 1>,
  3871. Requires<[HasNEON,HasFullFP16]>;
  3872. // VADDL : Vector Add Long (Q = D + D)
  3873. defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
  3874. "vaddl", "s", add, sext, 1>;
  3875. defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
  3876. "vaddl", "u", add, zanyext, 1>;
  3877. // VADDW : Vector Add Wide (Q = Q + D)
  3878. defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
  3879. defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
  3880. // VHADD : Vector Halving Add
  3881. defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
  3882. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3883. "vhadd", "s", int_arm_neon_vhadds, 1>;
  3884. defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
  3885. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3886. "vhadd", "u", int_arm_neon_vhaddu, 1>;
  3887. // VRHADD : Vector Rounding Halving Add
  3888. defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
  3889. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3890. "vrhadd", "s", int_arm_neon_vrhadds, 1>;
  3891. defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
  3892. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3893. "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
  3894. // VQADD : Vector Saturating Add
  3895. defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
  3896. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3897. "vqadd", "s", saddsat, 1>;
  3898. defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
  3899. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3900. "vqadd", "u", uaddsat, 1>;
  3901. // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
  3902. defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
  3903. // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
  3904. defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
  3905. int_arm_neon_vraddhn, 1>;
  3906. let Predicates = [HasNEON] in {
  3907. def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
  3908. (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
  3909. def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
  3910. (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
  3911. def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
  3912. (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
  3913. }
  3914. // Vector Multiply Operations.
  3915. // VMUL : Vector Multiply (integer, polynomial and floating-point)
  3916. defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
  3917. IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
  3918. def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
  3919. "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
  3920. def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
  3921. "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
  3922. def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
  3923. v2f32, v2f32, fmul, 1>;
  3924. def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
  3925. v4f32, v4f32, fmul, 1>;
  3926. def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
  3927. v4f16, v4f16, fmul, 1>,
  3928. Requires<[HasNEON,HasFullFP16]>;
  3929. def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
  3930. v8f16, v8f16, fmul, 1>,
  3931. Requires<[HasNEON,HasFullFP16]>;
  3932. defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
  3933. def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
  3934. def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
  3935. v2f32, fmul>;
  3936. def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
  3937. Requires<[HasNEON,HasFullFP16]>;
  3938. def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
  3939. v4f16, fmul>,
  3940. Requires<[HasNEON,HasFullFP16]>;
  3941. let Predicates = [HasNEON] in {
  3942. def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
  3943. (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
  3944. (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
  3945. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3946. (DSubReg_i16_reg imm:$lane))),
  3947. (SubReg_i16_lane imm:$lane)))>;
  3948. def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
  3949. (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
  3950. (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
  3951. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  3952. (DSubReg_i32_reg imm:$lane))),
  3953. (SubReg_i32_lane imm:$lane)))>;
  3954. def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
  3955. (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
  3956. (v4f32 (VMULslfq (v4f32 QPR:$src1),
  3957. (v2f32 (EXTRACT_SUBREG QPR:$src2,
  3958. (DSubReg_i32_reg imm:$lane))),
  3959. (SubReg_i32_lane imm:$lane)))>;
  3960. def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
  3961. (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
  3962. (v8f16 (VMULslhq(v8f16 QPR:$src1),
  3963. (v4f16 (EXTRACT_SUBREG QPR:$src2,
  3964. (DSubReg_i16_reg imm:$lane))),
  3965. (SubReg_i16_lane imm:$lane)))>;
  3966. def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
  3967. (VMULslfd DPR:$Rn,
  3968. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
  3969. (i32 0))>;
  3970. def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
  3971. (VMULslhd DPR:$Rn,
  3972. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
  3973. (i32 0))>;
  3974. def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
  3975. (VMULslfq QPR:$Rn,
  3976. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
  3977. (i32 0))>;
  3978. def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
  3979. (VMULslhq QPR:$Rn,
  3980. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
  3981. (i32 0))>;
  3982. }
  3983. // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
  3984. defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
  3985. IIC_VMULi16Q, IIC_VMULi32Q,
  3986. "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
  3987. defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
  3988. IIC_VMULi16Q, IIC_VMULi32Q,
  3989. "vqdmulh", "s", int_arm_neon_vqdmulh>;
  3990. let Predicates = [HasNEON] in {
  3991. def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
  3992. (v8i16 (ARMvduplane (v8i16 QPR:$src2),
  3993. imm:$lane)))),
  3994. (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
  3995. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3996. (DSubReg_i16_reg imm:$lane))),
  3997. (SubReg_i16_lane imm:$lane)))>;
  3998. def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
  3999. (v4i32 (ARMvduplane (v4i32 QPR:$src2),
  4000. imm:$lane)))),
  4001. (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
  4002. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  4003. (DSubReg_i32_reg imm:$lane))),
  4004. (SubReg_i32_lane imm:$lane)))>;
  4005. }
  4006. // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
  4007. defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
  4008. IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
  4009. "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
  4010. defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
  4011. IIC_VMULi16Q, IIC_VMULi32Q,
  4012. "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
  4013. let Predicates = [HasNEON] in {
  4014. def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
  4015. (v8i16 (ARMvduplane (v8i16 QPR:$src2),
  4016. imm:$lane)))),
  4017. (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
  4018. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  4019. (DSubReg_i16_reg imm:$lane))),
  4020. (SubReg_i16_lane imm:$lane)))>;
  4021. def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
  4022. (v4i32 (ARMvduplane (v4i32 QPR:$src2),
  4023. imm:$lane)))),
  4024. (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
  4025. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  4026. (DSubReg_i32_reg imm:$lane))),
  4027. (SubReg_i32_lane imm:$lane)))>;
  4028. }
  4029. // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
  4030. let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
  4031. DecoderNamespace = "NEONData" in {
  4032. defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
  4033. "vmull", "s", ARMvmulls, 1>;
  4034. defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
  4035. "vmull", "u", ARMvmullu, 1>;
  4036. def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
  4037. v8i16, v8i8, int_arm_neon_vmullp, 1>;
  4038. def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
  4039. "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
  4040. Requires<[HasV8, HasAES]>;
  4041. }
  4042. defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>;
  4043. defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>;
  4044. // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
  4045. defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
  4046. "vqdmull", "s", int_arm_neon_vqdmull, 1>;
  4047. defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
  4048. "vqdmull", "s", int_arm_neon_vqdmull>;
  4049. // Vector Multiply-Accumulate and Multiply-Subtract Operations.
  4050. // VMLA : Vector Multiply Accumulate (integer and floating-point)
  4051. defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4052. IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  4053. def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
  4054. v2f32, fmul_su, fadd_mlx>,
  4055. Requires<[HasNEON, UseFPVMLx]>;
  4056. def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
  4057. v4f32, fmul_su, fadd_mlx>,
  4058. Requires<[HasNEON, UseFPVMLx]>;
  4059. def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
  4060. v4f16, fmul_su, fadd_mlx>,
  4061. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4062. def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
  4063. v8f16, fmul_su, fadd_mlx>,
  4064. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4065. defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
  4066. IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  4067. def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
  4068. v2f32, fmul_su, fadd_mlx>,
  4069. Requires<[HasNEON, UseFPVMLx]>;
  4070. def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
  4071. v4f32, v2f32, fmul_su, fadd_mlx>,
  4072. Requires<[HasNEON, UseFPVMLx]>;
  4073. def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
  4074. v4f16, fmul, fadd>,
  4075. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4076. def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
  4077. v8f16, v4f16, fmul, fadd>,
  4078. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4079. let Predicates = [HasNEON] in {
  4080. def : Pat<(v8i16 (add (v8i16 QPR:$src1),
  4081. (mul (v8i16 QPR:$src2),
  4082. (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
  4083. (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
  4084. (v4i16 (EXTRACT_SUBREG QPR:$src3,
  4085. (DSubReg_i16_reg imm:$lane))),
  4086. (SubReg_i16_lane imm:$lane)))>;
  4087. def : Pat<(v4i32 (add (v4i32 QPR:$src1),
  4088. (mul (v4i32 QPR:$src2),
  4089. (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
  4090. (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
  4091. (v2i32 (EXTRACT_SUBREG QPR:$src3,
  4092. (DSubReg_i32_reg imm:$lane))),
  4093. (SubReg_i32_lane imm:$lane)))>;
  4094. }
  4095. def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
  4096. (fmul_su (v4f32 QPR:$src2),
  4097. (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
  4098. (v4f32 (VMLAslfq (v4f32 QPR:$src1),
  4099. (v4f32 QPR:$src2),
  4100. (v2f32 (EXTRACT_SUBREG QPR:$src3,
  4101. (DSubReg_i32_reg imm:$lane))),
  4102. (SubReg_i32_lane imm:$lane)))>,
  4103. Requires<[HasNEON, UseFPVMLx]>;
  4104. // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
  4105. defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
  4106. "vmlal", "s", ARMvmulls, add>;
  4107. defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
  4108. "vmlal", "u", ARMvmullu, add>;
  4109. defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>;
  4110. defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>;
  4111. let Predicates = [HasNEON, HasV8_1a] in {
  4112. // v8.1a Neon Rounding Double Multiply-Op vector operations,
  4113. // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
  4114. // (Q += D * D)
  4115. defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
  4116. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
  4117. null_frag>;
  4118. def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1), (v4i16 DPR:$Vn),
  4119. (v4i16 DPR:$Vm))),
  4120. (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4121. def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1), (v2i32 DPR:$Vn),
  4122. (v2i32 DPR:$Vm))),
  4123. (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4124. def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1), (v8i16 QPR:$Vn),
  4125. (v8i16 QPR:$Vm))),
  4126. (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4127. def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1), (v4i32 QPR:$Vn),
  4128. (v4i32 QPR:$Vm))),
  4129. (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4130. defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
  4131. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
  4132. null_frag>;
  4133. def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1),
  4134. (v4i16 DPR:$Vn),
  4135. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4136. imm:$lane)))),
  4137. (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
  4138. imm:$lane))>;
  4139. def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1),
  4140. (v2i32 DPR:$Vn),
  4141. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4142. imm:$lane)))),
  4143. (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4144. imm:$lane))>;
  4145. def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1),
  4146. (v8i16 QPR:$src2),
  4147. (v8i16 (ARMvduplane (v8i16 QPR:$src3),
  4148. imm:$lane)))),
  4149. (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
  4150. (v8i16 QPR:$src2),
  4151. (v4i16 (EXTRACT_SUBREG
  4152. QPR:$src3,
  4153. (DSubReg_i16_reg imm:$lane))),
  4154. (SubReg_i16_lane imm:$lane)))>;
  4155. def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1),
  4156. (v4i32 QPR:$src2),
  4157. (v4i32 (ARMvduplane (v4i32 QPR:$src3),
  4158. imm:$lane)))),
  4159. (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
  4160. (v4i32 QPR:$src2),
  4161. (v2i32 (EXTRACT_SUBREG
  4162. QPR:$src3,
  4163. (DSubReg_i32_reg imm:$lane))),
  4164. (SubReg_i32_lane imm:$lane)))>;
  4165. // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
  4166. // (Q -= D * D)
  4167. defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
  4168. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
  4169. null_frag>;
  4170. def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1), (v4i16 DPR:$Vn),
  4171. (v4i16 DPR:$Vm))),
  4172. (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4173. def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1), (v2i32 DPR:$Vn),
  4174. (v2i32 DPR:$Vm))),
  4175. (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4176. def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1), (v8i16 QPR:$Vn),
  4177. (v8i16 QPR:$Vm))),
  4178. (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4179. def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1), (v4i32 QPR:$Vn),
  4180. (v4i32 QPR:$Vm))),
  4181. (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4182. defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
  4183. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
  4184. null_frag>;
  4185. def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1),
  4186. (v4i16 DPR:$Vn),
  4187. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4188. imm:$lane)))),
  4189. (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
  4190. def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1),
  4191. (v2i32 DPR:$Vn),
  4192. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4193. imm:$lane)))),
  4194. (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4195. imm:$lane))>;
  4196. def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1),
  4197. (v8i16 QPR:$src2),
  4198. (v8i16 (ARMvduplane (v8i16 QPR:$src3),
  4199. imm:$lane)))),
  4200. (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
  4201. (v8i16 QPR:$src2),
  4202. (v4i16 (EXTRACT_SUBREG
  4203. QPR:$src3,
  4204. (DSubReg_i16_reg imm:$lane))),
  4205. (SubReg_i16_lane imm:$lane)))>;
  4206. def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1),
  4207. (v4i32 QPR:$src2),
  4208. (v4i32 (ARMvduplane (v4i32 QPR:$src3),
  4209. imm:$lane)))),
  4210. (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
  4211. (v4i32 QPR:$src2),
  4212. (v2i32 (EXTRACT_SUBREG
  4213. QPR:$src3,
  4214. (DSubReg_i32_reg imm:$lane))),
  4215. (SubReg_i32_lane imm:$lane)))>;
  4216. }
  4217. // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
  4218. defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4219. "vqdmlal", "s", null_frag>;
  4220. defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
  4221. let Predicates = [HasNEON] in {
  4222. def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
  4223. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4224. (v4i16 DPR:$Vm))))),
  4225. (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4226. def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
  4227. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4228. (v2i32 DPR:$Vm))))),
  4229. (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4230. def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
  4231. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4232. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4233. imm:$lane)))))),
  4234. (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
  4235. def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
  4236. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4237. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4238. imm:$lane)))))),
  4239. (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
  4240. }
  4241. // VMLS : Vector Multiply Subtract (integer and floating-point)
  4242. defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4243. IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  4244. def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
  4245. v2f32, fmul_su, fsub_mlx>,
  4246. Requires<[HasNEON, UseFPVMLx]>;
  4247. def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
  4248. v4f32, fmul_su, fsub_mlx>,
  4249. Requires<[HasNEON, UseFPVMLx]>;
  4250. def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
  4251. v4f16, fmul, fsub>,
  4252. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4253. def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
  4254. v8f16, fmul, fsub>,
  4255. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4256. defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
  4257. IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  4258. def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
  4259. v2f32, fmul_su, fsub_mlx>,
  4260. Requires<[HasNEON, UseFPVMLx]>;
  4261. def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
  4262. v4f32, v2f32, fmul_su, fsub_mlx>,
  4263. Requires<[HasNEON, UseFPVMLx]>;
  4264. def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
  4265. v4f16, fmul, fsub>,
  4266. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4267. def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
  4268. v8f16, v4f16, fmul, fsub>,
  4269. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4270. let Predicates = [HasNEON] in {
  4271. def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
  4272. (mul (v8i16 QPR:$src2),
  4273. (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
  4274. (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
  4275. (v4i16 (EXTRACT_SUBREG QPR:$src3,
  4276. (DSubReg_i16_reg imm:$lane))),
  4277. (SubReg_i16_lane imm:$lane)))>;
  4278. def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
  4279. (mul (v4i32 QPR:$src2),
  4280. (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
  4281. (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
  4282. (v2i32 (EXTRACT_SUBREG QPR:$src3,
  4283. (DSubReg_i32_reg imm:$lane))),
  4284. (SubReg_i32_lane imm:$lane)))>;
  4285. }
  4286. def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
  4287. (fmul_su (v4f32 QPR:$src2),
  4288. (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
  4289. (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
  4290. (v2f32 (EXTRACT_SUBREG QPR:$src3,
  4291. (DSubReg_i32_reg imm:$lane))),
  4292. (SubReg_i32_lane imm:$lane)))>,
  4293. Requires<[HasNEON, UseFPVMLx]>;
  4294. // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
  4295. defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
  4296. "vmlsl", "s", ARMvmulls, sub>;
  4297. defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
  4298. "vmlsl", "u", ARMvmullu, sub>;
  4299. defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>;
  4300. defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>;
  4301. // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
  4302. defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
  4303. "vqdmlsl", "s", null_frag>;
  4304. defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
  4305. let Predicates = [HasNEON] in {
  4306. def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
  4307. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4308. (v4i16 DPR:$Vm))))),
  4309. (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4310. def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
  4311. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4312. (v2i32 DPR:$Vm))))),
  4313. (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4314. def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
  4315. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4316. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4317. imm:$lane)))))),
  4318. (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
  4319. def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
  4320. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4321. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4322. imm:$lane)))))),
  4323. (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
  4324. }
  4325. // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
  4326. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
  4327. v2f32, fmul_su, fadd_mlx>,
  4328. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4329. def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
  4330. v4f32, fmul_su, fadd_mlx>,
  4331. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4332. def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
  4333. v4f16, fmul, fadd>,
  4334. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4335. def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
  4336. v8f16, fmul, fadd>,
  4337. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4338. // Fused Vector Multiply Subtract (floating-point)
  4339. def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
  4340. v2f32, fmul_su, fsub_mlx>,
  4341. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4342. def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
  4343. v4f32, fmul_su, fsub_mlx>,
  4344. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4345. def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
  4346. v4f16, fmul, fsub>,
  4347. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4348. def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
  4349. v8f16, fmul, fsub>,
  4350. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4351. // Match @llvm.fma.* intrinsics
  4352. def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
  4353. (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4354. Requires<[HasNEON,HasFullFP16]>;
  4355. def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
  4356. (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4357. Requires<[HasNEON,HasFullFP16]>;
  4358. def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
  4359. (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4360. Requires<[HasNEON,HasVFP4]>;
  4361. def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
  4362. (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4363. Requires<[HasNEON,HasVFP4]>;
  4364. def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
  4365. (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4366. Requires<[HasNEON,HasVFP4]>;
  4367. def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
  4368. (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4369. Requires<[HasNEON,HasVFP4]>;
  4370. // ARMv8.2a dot product instructions.
  4371. // We put them in the VFPV8 decoder namespace because the ARM and Thumb
  4372. // encodings are the same and thus no further bit twiddling is necessary
  4373. // in the disassembler.
  4374. class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
  4375. string AsmTy, ValueType AccumTy, ValueType InputTy,
  4376. SDPatternOperator OpNode> :
  4377. N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
  4378. (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
  4379. Asm, AsmTy,
  4380. [(set (AccumTy RegTy:$dst),
  4381. (OpNode (AccumTy RegTy:$Vd),
  4382. (InputTy RegTy:$Vn),
  4383. (InputTy RegTy:$Vm)))]> {
  4384. let Predicates = [HasDotProd];
  4385. let DecoderNamespace = "VFPV8";
  4386. let Constraints = "$dst = $Vd";
  4387. }
  4388. def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
  4389. def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
  4390. def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
  4391. def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
  4392. // Indexed dot product instructions:
  4393. multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
  4394. ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
  4395. dag RHS> {
  4396. def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
  4397. (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  4398. N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
  4399. bit lane;
  4400. let Inst{5} = lane;
  4401. let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
  4402. let Constraints = "$dst = $Vd";
  4403. let Predicates = [HasDotProd];
  4404. let DecoderNamespace = "VFPV8";
  4405. }
  4406. def : Pat<
  4407. (AccumType (OpNode (AccumType Ty:$Vd),
  4408. (InputType Ty:$Vn),
  4409. (InputType (bitconvert (AccumType
  4410. (ARMvduplane (AccumType Ty:$Vm),
  4411. VectorIndex32:$lane)))))),
  4412. (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
  4413. }
  4414. defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
  4415. int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
  4416. defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
  4417. int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
  4418. defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
  4419. int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4420. defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
  4421. int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4422. // v8.6A matrix multiplication extension
  4423. let Predicates = [HasMatMulInt8] in {
  4424. class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
  4425. SDPatternOperator OpNode>
  4426. : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
  4427. (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
  4428. Asm, AsmTy,
  4429. [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
  4430. (v16i8 QPR:$Vn),
  4431. (v16i8 QPR:$Vm)))]> {
  4432. let DecoderNamespace = "VFPV8";
  4433. let Constraints = "$dst = $Vd";
  4434. }
  4435. multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
  4436. ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
  4437. dag RHS> {
  4438. def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
  4439. (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
  4440. NoItinerary, Asm, AsmTy, []> {
  4441. bit lane;
  4442. let Inst{5} = lane;
  4443. let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
  4444. let DecoderNamespace = "VFPV8";
  4445. let Constraints = "$dst = $Vd";
  4446. }
  4447. def : Pat<
  4448. (AccumTy (OpNode (AccumTy RegTy:$Vd),
  4449. (InputTy RegTy:$Vn),
  4450. (InputTy (bitconvert (AccumTy
  4451. (ARMvduplane (AccumTy RegTy:$Vm),
  4452. VectorIndex32:$lane)))))),
  4453. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  4454. }
  4455. multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
  4456. : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
  4457. def : Pat<
  4458. (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
  4459. (InputTy (bitconvert (AccumTy
  4460. (ARMvduplane (AccumTy RegTy:$Vm),
  4461. VectorIndex32:$lane)))),
  4462. (InputTy RegTy:$Vn))),
  4463. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  4464. }
  4465. def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>;
  4466. def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>;
  4467. def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
  4468. def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>;
  4469. def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
  4470. defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
  4471. int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
  4472. defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
  4473. int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4474. defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
  4475. defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4476. }
  4477. // ARMv8.3 complex operations
  4478. class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
  4479. InstrItinClass itin, dag oops, dag iops,
  4480. string opc, string dt, list<dag> pattern>
  4481. : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
  4482. iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
  4483. bits<2> rot;
  4484. let Inst{24-23} = rot;
  4485. }
  4486. class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
  4487. InstrItinClass itin, dag oops, dag iops, string opc,
  4488. string dt, list<dag> pattern>
  4489. : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
  4490. iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
  4491. bits<1> rot;
  4492. let Inst{24} = rot;
  4493. }
  4494. class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
  4495. dag oops, dag iops, string opc, string dt,
  4496. list<dag> pattern>
  4497. : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
  4498. "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
  4499. bits<2> rot;
  4500. bit lane;
  4501. let Inst{21-20} = rot;
  4502. let Inst{5} = lane;
  4503. }
  4504. class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
  4505. dag oops, dag iops, string opc, string dt,
  4506. list<dag> pattern>
  4507. : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
  4508. "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
  4509. bits<2> rot;
  4510. bit lane;
  4511. let Inst{21-20} = rot;
  4512. let Inst{5} = Vm{4};
  4513. // This is needed because the lane operand does not have any bits in the
  4514. // encoding (it only has one possible value), so we need to manually set it
  4515. // to it's default value.
  4516. let DecoderMethod = "DecodeNEONComplexLane64Instruction";
  4517. }
  4518. multiclass N3VCP8ComplexTied<bit op21, bit op4,
  4519. string OpcodeStr> {
  4520. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4521. def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
  4522. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
  4523. OpcodeStr, "f16", []>;
  4524. def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
  4525. (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
  4526. OpcodeStr, "f16", []>;
  4527. }
  4528. let Predicates = [HasNEON,HasV8_3a] in {
  4529. def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
  4530. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
  4531. OpcodeStr, "f32", []>;
  4532. def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
  4533. (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
  4534. OpcodeStr, "f32", []>;
  4535. }
  4536. }
  4537. multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
  4538. string OpcodeStr> {
  4539. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4540. def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
  4541. (outs DPR:$Vd),
  4542. (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
  4543. OpcodeStr, "f16", []>;
  4544. def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
  4545. (outs QPR:$Vd),
  4546. (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
  4547. OpcodeStr, "f16", []>;
  4548. }
  4549. let Predicates = [HasNEON,HasV8_3a] in {
  4550. def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
  4551. (outs DPR:$Vd),
  4552. (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
  4553. OpcodeStr, "f32", []>;
  4554. def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
  4555. (outs QPR:$Vd),
  4556. (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
  4557. OpcodeStr, "f32", []>;
  4558. }
  4559. }
  4560. // These instructions index by pairs of lanes, so the VectorIndexes are twice
  4561. // as wide as the data types.
  4562. multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr> {
  4563. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4564. def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
  4565. (outs DPR:$Vd),
  4566. (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4567. VectorIndex32:$lane, complexrotateop:$rot),
  4568. OpcodeStr, "f16", []>;
  4569. def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
  4570. (outs QPR:$Vd),
  4571. (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
  4572. VectorIndex32:$lane, complexrotateop:$rot),
  4573. OpcodeStr, "f16", []>;
  4574. }
  4575. let Predicates = [HasNEON,HasV8_3a] in {
  4576. def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
  4577. (outs DPR:$Vd),
  4578. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
  4579. complexrotateop:$rot),
  4580. OpcodeStr, "f32", []>;
  4581. def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
  4582. (outs QPR:$Vd),
  4583. (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
  4584. complexrotateop:$rot),
  4585. OpcodeStr, "f32", []>;
  4586. }
  4587. }
  4588. defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla">;
  4589. defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd">;
  4590. defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla">;
  4591. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4592. def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
  4593. (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
  4594. def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
  4595. (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
  4596. def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
  4597. (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
  4598. def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
  4599. (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
  4600. }
  4601. let Predicates = [HasNEON,HasV8_3a] in {
  4602. def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
  4603. (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
  4604. def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
  4605. (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
  4606. def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
  4607. (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
  4608. def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
  4609. (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
  4610. }
  4611. // Vector Subtract Operations.
  4612. // VSUB : Vector Subtract (integer and floating-point)
  4613. defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
  4614. "vsub", "i", sub, 0>;
  4615. def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
  4616. v2f32, v2f32, fsub, 0>;
  4617. def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
  4618. v4f32, v4f32, fsub, 0>;
  4619. def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
  4620. v4f16, v4f16, fsub, 0>,
  4621. Requires<[HasNEON,HasFullFP16]>;
  4622. def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
  4623. v8f16, v8f16, fsub, 0>,
  4624. Requires<[HasNEON,HasFullFP16]>;
  4625. // VSUBL : Vector Subtract Long (Q = D - D)
  4626. defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
  4627. "vsubl", "s", sub, sext, 0>;
  4628. defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
  4629. "vsubl", "u", sub, zanyext, 0>;
  4630. // VSUBW : Vector Subtract Wide (Q = Q - D)
  4631. defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
  4632. defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
  4633. // VHSUB : Vector Halving Subtract
  4634. defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
  4635. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4636. "vhsub", "s", int_arm_neon_vhsubs, 0>;
  4637. defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
  4638. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4639. "vhsub", "u", int_arm_neon_vhsubu, 0>;
  4640. // VQSUB : Vector Saturing Subtract
  4641. defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
  4642. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4643. "vqsub", "s", ssubsat, 0>;
  4644. defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
  4645. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4646. "vqsub", "u", usubsat, 0>;
  4647. // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
  4648. defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
  4649. // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
  4650. defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
  4651. int_arm_neon_vrsubhn, 0>;
  4652. let Predicates = [HasNEON] in {
  4653. def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
  4654. (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
  4655. def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
  4656. (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
  4657. def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
  4658. (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
  4659. }
  4660. // Vector Comparisons.
  4661. // VCEQ : Vector Compare Equal
  4662. defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4663. IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
  4664. def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
  4665. ARMCCeq, 1>;
  4666. def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
  4667. ARMCCeq, 1>;
  4668. def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
  4669. ARMCCeq, 1>,
  4670. Requires<[HasNEON, HasFullFP16]>;
  4671. def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
  4672. ARMCCeq, 1>,
  4673. Requires<[HasNEON, HasFullFP16]>;
  4674. let TwoOperandAliasConstraint = "$Vm = $Vd" in
  4675. defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
  4676. "$Vd, $Vm, #0", ARMCCeq>;
  4677. // VCGE : Vector Compare Greater Than or Equal
  4678. defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4679. IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
  4680. defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4681. IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
  4682. def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
  4683. ARMCCge, 0>;
  4684. def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
  4685. ARMCCge, 0>;
  4686. def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
  4687. ARMCCge, 0>,
  4688. Requires<[HasNEON, HasFullFP16]>;
  4689. def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
  4690. ARMCCge, 0>,
  4691. Requires<[HasNEON, HasFullFP16]>;
  4692. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  4693. defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
  4694. "$Vd, $Vm, #0", ARMCCge>;
  4695. defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
  4696. "$Vd, $Vm, #0", ARMCCle>;
  4697. }
  4698. // VCGT : Vector Compare Greater Than
  4699. defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4700. IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
  4701. defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4702. IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
  4703. def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
  4704. ARMCCgt, 0>;
  4705. def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
  4706. ARMCCgt, 0>;
  4707. def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
  4708. ARMCCgt, 0>,
  4709. Requires<[HasNEON, HasFullFP16]>;
  4710. def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
  4711. ARMCCgt, 0>,
  4712. Requires<[HasNEON, HasFullFP16]>;
  4713. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  4714. defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
  4715. "$Vd, $Vm, #0", ARMCCgt>;
  4716. defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
  4717. "$Vd, $Vm, #0", ARMCClt>;
  4718. }
  4719. // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
  4720. def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
  4721. "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
  4722. def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
  4723. "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
  4724. def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
  4725. "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
  4726. Requires<[HasNEON, HasFullFP16]>;
  4727. def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
  4728. "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
  4729. Requires<[HasNEON, HasFullFP16]>;
  4730. // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
  4731. def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
  4732. "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
  4733. def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
  4734. "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
  4735. def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
  4736. "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
  4737. Requires<[HasNEON, HasFullFP16]>;
  4738. def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
  4739. "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
  4740. Requires<[HasNEON, HasFullFP16]>;
  4741. // VTST : Vector Test Bits
  4742. defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
  4743. IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
  4744. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
  4745. (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4746. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
  4747. (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4748. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
  4749. (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4750. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
  4751. (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4752. let Predicates = [HasNEON, HasFullFP16] in {
  4753. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
  4754. (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4755. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
  4756. (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4757. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
  4758. (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4759. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
  4760. (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4761. }
  4762. // +fp16fml Floating Point Multiplication Variants
  4763. let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
  4764. class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
  4765. RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
  4766. : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
  4767. asm, "f16", "$Vd, $Vn, $Vm", "", []>;
  4768. class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
  4769. RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
  4770. : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
  4771. asm, "f16", "$Vd, $Vn, $Vm", "", []>;
  4772. // Vd, Vs, Vs[0-15], Idx[0-1]
  4773. class VFMD<string opc, string type, bits<2> S>
  4774. : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
  4775. (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
  4776. IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
  4777. bit idx;
  4778. let Inst{3} = idx;
  4779. let Inst{19-16} = Vn{4-1};
  4780. let Inst{7} = Vn{0};
  4781. let Inst{5} = Vm{0};
  4782. let Inst{2-0} = Vm{3-1};
  4783. }
  4784. // Vq, Vd, Vd[0-7], Idx[0-3]
  4785. class VFMQ<string opc, string type, bits<2> S>
  4786. : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
  4787. (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
  4788. IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
  4789. bits<2> idx;
  4790. let Inst{5} = idx{1};
  4791. let Inst{3} = idx{0};
  4792. }
  4793. // op1 op2 op3
  4794. def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
  4795. def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
  4796. def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
  4797. def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
  4798. def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
  4799. def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
  4800. def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
  4801. def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
  4802. } // HasNEON, HasFP16FML
  4803. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
  4804. (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4805. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
  4806. (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4807. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
  4808. (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4809. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
  4810. (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4811. let Predicates = [HasNEON, HasFullFP16] in {
  4812. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
  4813. (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4814. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
  4815. (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4816. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
  4817. (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4818. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
  4819. (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4820. }
  4821. // Vector Bitwise Operations.
  4822. def vnotd : PatFrag<(ops node:$in),
  4823. (xor node:$in, ARMimmAllOnesD)>;
  4824. def vnotq : PatFrag<(ops node:$in),
  4825. (xor node:$in, ARMimmAllOnesV)>;
  4826. // VAND : Vector Bitwise AND
  4827. def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
  4828. v2i32, v2i32, and, 1>;
  4829. def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
  4830. v4i32, v4i32, and, 1>;
  4831. // VEOR : Vector Bitwise Exclusive OR
  4832. def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
  4833. v2i32, v2i32, xor, 1>;
  4834. def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
  4835. v4i32, v4i32, xor, 1>;
  4836. // VORR : Vector Bitwise OR
  4837. def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
  4838. v2i32, v2i32, or, 1>;
  4839. def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
  4840. v4i32, v4i32, or, 1>;
  4841. multiclass BitwisePatterns<string Name, SDPatternOperator OpNodeD,
  4842. SDPatternOperator OpNodeQ> {
  4843. def : Pat<(v8i8 (OpNodeD DPR:$LHS, DPR:$RHS)),
  4844. (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>;
  4845. def : Pat<(v4i16 (OpNodeD DPR:$LHS, DPR:$RHS)),
  4846. (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>;
  4847. def : Pat<(v1i64 (OpNodeD DPR:$LHS, DPR:$RHS)),
  4848. (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>;
  4849. def : Pat<(v16i8 (OpNodeQ QPR:$LHS, QPR:$RHS)),
  4850. (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>;
  4851. def : Pat<(v8i16 (OpNodeQ QPR:$LHS, QPR:$RHS)),
  4852. (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>;
  4853. def : Pat<(v2i64 (OpNodeQ QPR:$LHS, QPR:$RHS)),
  4854. (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>;
  4855. }
  4856. let Predicates = [HasNEON] in {
  4857. defm : BitwisePatterns<"VAND", and, and>;
  4858. defm : BitwisePatterns<"VORR", or, or>;
  4859. defm : BitwisePatterns<"VEOR", xor, xor>;
  4860. }
  4861. def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
  4862. (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
  4863. IIC_VMOVImm,
  4864. "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
  4865. [(set DPR:$Vd,
  4866. (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
  4867. let Inst{9} = SIMM{9};
  4868. }
  4869. def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
  4870. (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
  4871. IIC_VMOVImm,
  4872. "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
  4873. [(set DPR:$Vd,
  4874. (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
  4875. let Inst{10-9} = SIMM{10-9};
  4876. }
  4877. def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
  4878. (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
  4879. IIC_VMOVImm,
  4880. "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
  4881. [(set QPR:$Vd,
  4882. (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
  4883. let Inst{9} = SIMM{9};
  4884. }
  4885. def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
  4886. (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
  4887. IIC_VMOVImm,
  4888. "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
  4889. [(set QPR:$Vd,
  4890. (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
  4891. let Inst{10-9} = SIMM{10-9};
  4892. }
  4893. // VBIC : Vector Bitwise Bit Clear (AND NOT)
  4894. let TwoOperandAliasConstraint = "$Vn = $Vd" in {
  4895. def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
  4896. (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
  4897. "vbic", "$Vd, $Vn, $Vm", "",
  4898. [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
  4899. (vnotd DPR:$Vm))))]>;
  4900. def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
  4901. (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
  4902. "vbic", "$Vd, $Vn, $Vm", "",
  4903. [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
  4904. (vnotq QPR:$Vm))))]>;
  4905. }
  4906. let Predicates = [HasNEON] in {
  4907. defm : BitwisePatterns<"VBIC", BinOpFrag<(and node:$LHS, (vnotd node:$RHS))>,
  4908. BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>>;
  4909. }
  4910. def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
  4911. (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
  4912. IIC_VMOVImm,
  4913. "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
  4914. [(set DPR:$Vd,
  4915. (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
  4916. let Inst{9} = SIMM{9};
  4917. }
  4918. def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
  4919. (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
  4920. IIC_VMOVImm,
  4921. "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
  4922. [(set DPR:$Vd,
  4923. (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
  4924. let Inst{10-9} = SIMM{10-9};
  4925. }
  4926. def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
  4927. (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
  4928. IIC_VMOVImm,
  4929. "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
  4930. [(set QPR:$Vd,
  4931. (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
  4932. let Inst{9} = SIMM{9};
  4933. }
  4934. def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
  4935. (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
  4936. IIC_VMOVImm,
  4937. "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
  4938. [(set QPR:$Vd,
  4939. (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
  4940. let Inst{10-9} = SIMM{10-9};
  4941. }
  4942. // VORN : Vector Bitwise OR NOT
  4943. def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
  4944. (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
  4945. "vorn", "$Vd, $Vn, $Vm", "",
  4946. [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
  4947. (vnotd DPR:$Vm))))]>;
  4948. def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
  4949. (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
  4950. "vorn", "$Vd, $Vn, $Vm", "",
  4951. [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
  4952. (vnotq QPR:$Vm))))]>;
  4953. let Predicates = [HasNEON] in {
  4954. defm : BitwisePatterns<"VORN", BinOpFrag<(or node:$LHS, (vnotd node:$RHS))>,
  4955. BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>>;
  4956. }
  4957. // VMVN : Vector Bitwise NOT (Immediate)
  4958. let isReMaterializable = 1 in {
  4959. def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
  4960. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  4961. "vmvn", "i16", "$Vd, $SIMM", "",
  4962. [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
  4963. let Inst{9} = SIMM{9};
  4964. }
  4965. def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
  4966. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  4967. "vmvn", "i16", "$Vd, $SIMM", "",
  4968. [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
  4969. let Inst{9} = SIMM{9};
  4970. }
  4971. def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
  4972. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  4973. "vmvn", "i32", "$Vd, $SIMM", "",
  4974. [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
  4975. let Inst{11-8} = SIMM{11-8};
  4976. }
  4977. def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
  4978. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  4979. "vmvn", "i32", "$Vd, $SIMM", "",
  4980. [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
  4981. let Inst{11-8} = SIMM{11-8};
  4982. }
  4983. }
  4984. // VMVN : Vector Bitwise NOT
  4985. def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
  4986. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
  4987. "vmvn", "$Vd, $Vm", "",
  4988. [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
  4989. def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
  4990. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
  4991. "vmvn", "$Vd, $Vm", "",
  4992. [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
  4993. let Predicates = [HasNEON] in {
  4994. def : Pat<(v1i64 (vnotd DPR:$src)),
  4995. (VMVNd DPR:$src)>;
  4996. def : Pat<(v4i16 (vnotd DPR:$src)),
  4997. (VMVNd DPR:$src)>;
  4998. def : Pat<(v8i8 (vnotd DPR:$src)),
  4999. (VMVNd DPR:$src)>;
  5000. def : Pat<(v2i64 (vnotq QPR:$src)),
  5001. (VMVNq QPR:$src)>;
  5002. def : Pat<(v8i16 (vnotq QPR:$src)),
  5003. (VMVNq QPR:$src)>;
  5004. def : Pat<(v16i8 (vnotq QPR:$src)),
  5005. (VMVNq QPR:$src)>;
  5006. }
  5007. // The TwoAddress pass will not go looking for equivalent operations
  5008. // with different register constraints; it just inserts copies.
  5009. // That is why pseudo VBSP implemented. Is is expanded later into
  5010. // VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
  5011. def VBSPd
  5012. : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5013. IIC_VBINiD, "",
  5014. [(set DPR:$Vd,
  5015. (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
  5016. let Predicates = [HasNEON] in {
  5017. def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
  5018. (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
  5019. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5020. def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
  5021. (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
  5022. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5023. def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
  5024. (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
  5025. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5026. def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
  5027. (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
  5028. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5029. def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
  5030. (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
  5031. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5032. def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd),
  5033. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5034. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5035. def : Pat<(v4i16 (or (and DPR:$Vn, DPR:$Vd),
  5036. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5037. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5038. def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
  5039. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5040. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5041. def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
  5042. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5043. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5044. }
  5045. def VBSPq
  5046. : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5047. IIC_VBINiQ, "",
  5048. [(set QPR:$Vd,
  5049. (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
  5050. let Predicates = [HasNEON] in {
  5051. def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
  5052. (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
  5053. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5054. def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
  5055. (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
  5056. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5057. def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
  5058. (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
  5059. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5060. def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
  5061. (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
  5062. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5063. def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
  5064. (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
  5065. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5066. def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd),
  5067. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5068. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5069. def : Pat<(v8i16 (or (and QPR:$Vn, QPR:$Vd),
  5070. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5071. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5072. def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
  5073. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5074. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5075. def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
  5076. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5077. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5078. }
  5079. // VBSL : Vector Bitwise Select
  5080. def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
  5081. (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5082. N3RegFrm, IIC_VBINiD,
  5083. "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5084. []>;
  5085. def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
  5086. (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5087. N3RegFrm, IIC_VBINiQ,
  5088. "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5089. []>;
  5090. // VBIF : Vector Bitwise Insert if False
  5091. // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
  5092. def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
  5093. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5094. N3RegFrm, IIC_VBINiD,
  5095. "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5096. []>;
  5097. def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
  5098. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5099. N3RegFrm, IIC_VBINiQ,
  5100. "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5101. []>;
  5102. // VBIT : Vector Bitwise Insert if True
  5103. // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
  5104. def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
  5105. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5106. N3RegFrm, IIC_VBINiD,
  5107. "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5108. []>;
  5109. def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
  5110. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5111. N3RegFrm, IIC_VBINiQ,
  5112. "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5113. []>;
  5114. // Vector Absolute Differences.
  5115. // VABD : Vector Absolute Difference
  5116. defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
  5117. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5118. "vabd", "s", int_arm_neon_vabds, 1>;
  5119. defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
  5120. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5121. "vabd", "u", int_arm_neon_vabdu, 1>;
  5122. def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
  5123. "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
  5124. def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
  5125. "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
  5126. def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
  5127. "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
  5128. Requires<[HasNEON, HasFullFP16]>;
  5129. def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
  5130. "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
  5131. Requires<[HasNEON, HasFullFP16]>;
  5132. // VABDL : Vector Absolute Difference Long (Q = | D - D |)
  5133. defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
  5134. "vabdl", "s", int_arm_neon_vabds, zext, 1>;
  5135. defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
  5136. "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
  5137. let Predicates = [HasNEON] in {
  5138. def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
  5139. (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
  5140. def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
  5141. (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
  5142. }
  5143. // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
  5144. // shift/xor pattern for ABS.
  5145. def abd_shr :
  5146. PatFrag<(ops node:$in1, node:$in2, node:$shift),
  5147. (ARMvshrsImm (sub (zext node:$in1),
  5148. (zext node:$in2)), (i32 $shift))>;
  5149. let Predicates = [HasNEON] in {
  5150. def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)),
  5151. (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
  5152. (zext (v2i32 DPR:$opB))),
  5153. (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
  5154. (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
  5155. }
  5156. // VABA : Vector Absolute Difference and Accumulate
  5157. defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
  5158. "vaba", "s", int_arm_neon_vabds, add>;
  5159. defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
  5160. "vaba", "u", int_arm_neon_vabdu, add>;
  5161. // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
  5162. defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
  5163. "vabal", "s", int_arm_neon_vabds, zext, add>;
  5164. defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
  5165. "vabal", "u", int_arm_neon_vabdu, zext, add>;
  5166. // Vector Maximum and Minimum.
  5167. // VMAX : Vector Maximum
  5168. defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
  5169. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5170. "vmax", "s", smax, 1>;
  5171. defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
  5172. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5173. "vmax", "u", umax, 1>;
  5174. def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5175. "vmax", "f32",
  5176. v2f32, v2f32, fmaximum, 1>;
  5177. def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5178. "vmax", "f32",
  5179. v4f32, v4f32, fmaximum, 1>;
  5180. def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5181. "vmax", "f16",
  5182. v4f16, v4f16, fmaximum, 1>,
  5183. Requires<[HasNEON, HasFullFP16]>;
  5184. def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5185. "vmax", "f16",
  5186. v8f16, v8f16, fmaximum, 1>,
  5187. Requires<[HasNEON, HasFullFP16]>;
  5188. // VMAXNM
  5189. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  5190. def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
  5191. N3RegFrm, NoItinerary, "vmaxnm", "f32",
  5192. v2f32, v2f32, fmaxnum, 1>,
  5193. Requires<[HasV8, HasNEON]>;
  5194. def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
  5195. N3RegFrm, NoItinerary, "vmaxnm", "f32",
  5196. v4f32, v4f32, fmaxnum, 1>,
  5197. Requires<[HasV8, HasNEON]>;
  5198. def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
  5199. N3RegFrm, NoItinerary, "vmaxnm", "f16",
  5200. v4f16, v4f16, fmaxnum, 1>,
  5201. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5202. def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
  5203. N3RegFrm, NoItinerary, "vmaxnm", "f16",
  5204. v8f16, v8f16, fmaxnum, 1>,
  5205. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5206. }
  5207. // VMIN : Vector Minimum
  5208. defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
  5209. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5210. "vmin", "s", smin, 1>;
  5211. defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
  5212. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5213. "vmin", "u", umin, 1>;
  5214. def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5215. "vmin", "f32",
  5216. v2f32, v2f32, fminimum, 1>;
  5217. def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5218. "vmin", "f32",
  5219. v4f32, v4f32, fminimum, 1>;
  5220. def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5221. "vmin", "f16",
  5222. v4f16, v4f16, fminimum, 1>,
  5223. Requires<[HasNEON, HasFullFP16]>;
  5224. def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5225. "vmin", "f16",
  5226. v8f16, v8f16, fminimum, 1>,
  5227. Requires<[HasNEON, HasFullFP16]>;
  5228. // VMINNM
  5229. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  5230. def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
  5231. N3RegFrm, NoItinerary, "vminnm", "f32",
  5232. v2f32, v2f32, fminnum, 1>,
  5233. Requires<[HasV8, HasNEON]>;
  5234. def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
  5235. N3RegFrm, NoItinerary, "vminnm", "f32",
  5236. v4f32, v4f32, fminnum, 1>,
  5237. Requires<[HasV8, HasNEON]>;
  5238. def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
  5239. N3RegFrm, NoItinerary, "vminnm", "f16",
  5240. v4f16, v4f16, fminnum, 1>,
  5241. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5242. def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
  5243. N3RegFrm, NoItinerary, "vminnm", "f16",
  5244. v8f16, v8f16, fminnum, 1>,
  5245. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5246. }
  5247. // Vector Pairwise Operations.
  5248. // VPADD : Vector Pairwise Add
  5249. def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5250. "vpadd", "i8",
  5251. v8i8, v8i8, int_arm_neon_vpadd, 0>;
  5252. def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5253. "vpadd", "i16",
  5254. v4i16, v4i16, int_arm_neon_vpadd, 0>;
  5255. def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5256. "vpadd", "i32",
  5257. v2i32, v2i32, int_arm_neon_vpadd, 0>;
  5258. def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
  5259. IIC_VPBIND, "vpadd", "f32",
  5260. v2f32, v2f32, int_arm_neon_vpadd, 0>;
  5261. def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
  5262. IIC_VPBIND, "vpadd", "f16",
  5263. v4f16, v4f16, int_arm_neon_vpadd, 0>,
  5264. Requires<[HasNEON, HasFullFP16]>;
  5265. // VPADDL : Vector Pairwise Add Long
  5266. defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
  5267. int_arm_neon_vpaddls>;
  5268. defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
  5269. int_arm_neon_vpaddlu>;
  5270. // VPADAL : Vector Pairwise Add and Accumulate Long
  5271. defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
  5272. int_arm_neon_vpadals>;
  5273. defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
  5274. int_arm_neon_vpadalu>;
  5275. // VPMAX : Vector Pairwise Maximum
  5276. def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5277. "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
  5278. def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5279. "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
  5280. def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5281. "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
  5282. def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5283. "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
  5284. def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5285. "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
  5286. def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5287. "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
  5288. def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
  5289. "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
  5290. def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
  5291. "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
  5292. Requires<[HasNEON, HasFullFP16]>;
  5293. // VPMIN : Vector Pairwise Minimum
  5294. def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5295. "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
  5296. def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5297. "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
  5298. def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5299. "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
  5300. def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5301. "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
  5302. def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5303. "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
  5304. def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5305. "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
  5306. def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
  5307. "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
  5308. def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
  5309. "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
  5310. Requires<[HasNEON, HasFullFP16]>;
  5311. // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
  5312. // VRECPE : Vector Reciprocal Estimate
  5313. def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
  5314. IIC_VUNAD, "vrecpe", "u32",
  5315. v2i32, v2i32, int_arm_neon_vrecpe>;
  5316. def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
  5317. IIC_VUNAQ, "vrecpe", "u32",
  5318. v4i32, v4i32, int_arm_neon_vrecpe>;
  5319. def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
  5320. IIC_VUNAD, "vrecpe", "f32",
  5321. v2f32, v2f32, int_arm_neon_vrecpe>;
  5322. def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
  5323. IIC_VUNAQ, "vrecpe", "f32",
  5324. v4f32, v4f32, int_arm_neon_vrecpe>;
  5325. def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
  5326. IIC_VUNAD, "vrecpe", "f16",
  5327. v4f16, v4f16, int_arm_neon_vrecpe>,
  5328. Requires<[HasNEON, HasFullFP16]>;
  5329. def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
  5330. IIC_VUNAQ, "vrecpe", "f16",
  5331. v8f16, v8f16, int_arm_neon_vrecpe>,
  5332. Requires<[HasNEON, HasFullFP16]>;
  5333. // VRECPS : Vector Reciprocal Step
  5334. def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
  5335. IIC_VRECSD, "vrecps", "f32",
  5336. v2f32, v2f32, int_arm_neon_vrecps, 1>;
  5337. def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
  5338. IIC_VRECSQ, "vrecps", "f32",
  5339. v4f32, v4f32, int_arm_neon_vrecps, 1>;
  5340. def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
  5341. IIC_VRECSD, "vrecps", "f16",
  5342. v4f16, v4f16, int_arm_neon_vrecps, 1>,
  5343. Requires<[HasNEON, HasFullFP16]>;
  5344. def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
  5345. IIC_VRECSQ, "vrecps", "f16",
  5346. v8f16, v8f16, int_arm_neon_vrecps, 1>,
  5347. Requires<[HasNEON, HasFullFP16]>;
  5348. // VRSQRTE : Vector Reciprocal Square Root Estimate
  5349. def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
  5350. IIC_VUNAD, "vrsqrte", "u32",
  5351. v2i32, v2i32, int_arm_neon_vrsqrte>;
  5352. def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
  5353. IIC_VUNAQ, "vrsqrte", "u32",
  5354. v4i32, v4i32, int_arm_neon_vrsqrte>;
  5355. def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
  5356. IIC_VUNAD, "vrsqrte", "f32",
  5357. v2f32, v2f32, int_arm_neon_vrsqrte>;
  5358. def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
  5359. IIC_VUNAQ, "vrsqrte", "f32",
  5360. v4f32, v4f32, int_arm_neon_vrsqrte>;
  5361. def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
  5362. IIC_VUNAD, "vrsqrte", "f16",
  5363. v4f16, v4f16, int_arm_neon_vrsqrte>,
  5364. Requires<[HasNEON, HasFullFP16]>;
  5365. def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
  5366. IIC_VUNAQ, "vrsqrte", "f16",
  5367. v8f16, v8f16, int_arm_neon_vrsqrte>,
  5368. Requires<[HasNEON, HasFullFP16]>;
  5369. // VRSQRTS : Vector Reciprocal Square Root Step
  5370. def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
  5371. IIC_VRECSD, "vrsqrts", "f32",
  5372. v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
  5373. def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
  5374. IIC_VRECSQ, "vrsqrts", "f32",
  5375. v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
  5376. def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
  5377. IIC_VRECSD, "vrsqrts", "f16",
  5378. v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
  5379. Requires<[HasNEON, HasFullFP16]>;
  5380. def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
  5381. IIC_VRECSQ, "vrsqrts", "f16",
  5382. v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
  5383. Requires<[HasNEON, HasFullFP16]>;
  5384. // Vector Shifts.
  5385. // VSHL : Vector Shift
  5386. defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
  5387. IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
  5388. "vshl", "s", int_arm_neon_vshifts>;
  5389. defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
  5390. IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
  5391. "vshl", "u", int_arm_neon_vshiftu>;
  5392. let Predicates = [HasNEON] in {
  5393. def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
  5394. (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
  5395. def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
  5396. (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
  5397. def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
  5398. (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
  5399. def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
  5400. (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
  5401. def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
  5402. (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
  5403. def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
  5404. (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
  5405. def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
  5406. (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
  5407. def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
  5408. (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
  5409. def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
  5410. (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
  5411. def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
  5412. (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
  5413. def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
  5414. (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
  5415. def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
  5416. (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
  5417. def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
  5418. (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
  5419. def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
  5420. (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
  5421. def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
  5422. (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
  5423. def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
  5424. (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
  5425. }
  5426. // VSHL : Vector Shift Left (Immediate)
  5427. defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
  5428. // VSHR : Vector Shift Right (Immediate)
  5429. defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",
  5430. ARMvshrsImm>;
  5431. defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",
  5432. ARMvshruImm>;
  5433. // VSHLL : Vector Shift Left Long
  5434. defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
  5435. PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
  5436. defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
  5437. PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
  5438. // VSHLL : Vector Shift Left Long (with maximum shift count)
  5439. class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
  5440. bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
  5441. ValueType OpTy, Operand ImmTy>
  5442. : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
  5443. ResTy, OpTy, ImmTy, null_frag> {
  5444. let Inst{21-16} = op21_16;
  5445. let DecoderMethod = "DecodeVSHLMaxInstruction";
  5446. }
  5447. def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
  5448. v8i16, v8i8, imm8>;
  5449. def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
  5450. v4i32, v4i16, imm16>;
  5451. def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
  5452. v2i64, v2i32, imm32>;
  5453. let Predicates = [HasNEON] in {
  5454. def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
  5455. (VSHLLi8 DPR:$Rn, 8)>;
  5456. def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
  5457. (VSHLLi16 DPR:$Rn, 16)>;
  5458. def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
  5459. (VSHLLi32 DPR:$Rn, 32)>;
  5460. def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
  5461. (VSHLLi8 DPR:$Rn, 8)>;
  5462. def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
  5463. (VSHLLi16 DPR:$Rn, 16)>;
  5464. def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
  5465. (VSHLLi32 DPR:$Rn, 32)>;
  5466. def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
  5467. (VSHLLi8 DPR:$Rn, 8)>;
  5468. def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
  5469. (VSHLLi16 DPR:$Rn, 16)>;
  5470. def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
  5471. (VSHLLi32 DPR:$Rn, 32)>;
  5472. }
  5473. // VSHRN : Vector Shift Right and Narrow
  5474. defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
  5475. PatFrag<(ops node:$Rn, node:$amt),
  5476. (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
  5477. let Predicates = [HasNEON] in {
  5478. def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
  5479. (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
  5480. def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
  5481. (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
  5482. def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
  5483. (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
  5484. }
  5485. // VRSHL : Vector Rounding Shift
  5486. defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
  5487. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5488. "vrshl", "s", int_arm_neon_vrshifts>;
  5489. defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
  5490. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5491. "vrshl", "u", int_arm_neon_vrshiftu>;
  5492. // VRSHR : Vector Rounding Shift Right
  5493. defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",
  5494. NEONvrshrsImm>;
  5495. defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",
  5496. NEONvrshruImm>;
  5497. // VRSHRN : Vector Rounding Shift Right and Narrow
  5498. defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
  5499. NEONvrshrnImm>;
  5500. // VQSHL : Vector Saturating Shift
  5501. defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
  5502. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5503. "vqshl", "s", int_arm_neon_vqshifts>;
  5504. defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
  5505. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5506. "vqshl", "u", int_arm_neon_vqshiftu>;
  5507. // VQSHL : Vector Saturating Shift Left (Immediate)
  5508. defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
  5509. defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
  5510. // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
  5511. defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
  5512. // VQSHRN : Vector Saturating Shift Right and Narrow
  5513. defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
  5514. NEONvqshrnsImm>;
  5515. defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
  5516. NEONvqshrnuImm>;
  5517. // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
  5518. defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
  5519. NEONvqshrnsuImm>;
  5520. // VQRSHL : Vector Saturating Rounding Shift
  5521. defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
  5522. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5523. "vqrshl", "s", int_arm_neon_vqrshifts>;
  5524. defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
  5525. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5526. "vqrshl", "u", int_arm_neon_vqrshiftu>;
  5527. // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
  5528. defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
  5529. NEONvqrshrnsImm>;
  5530. defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
  5531. NEONvqrshrnuImm>;
  5532. // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
  5533. defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
  5534. NEONvqrshrnsuImm>;
  5535. // VSRA : Vector Shift Right and Accumulate
  5536. defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
  5537. defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
  5538. // VRSRA : Vector Rounding Shift Right and Accumulate
  5539. defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
  5540. defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
  5541. // VSLI : Vector Shift Left and Insert
  5542. defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
  5543. // VSRI : Vector Shift Right and Insert
  5544. defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
  5545. // Vector Absolute and Saturating Absolute.
  5546. // VABS : Vector Absolute Value
  5547. defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
  5548. IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
  5549. def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
  5550. "vabs", "f32",
  5551. v2f32, v2f32, fabs>;
  5552. def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
  5553. "vabs", "f32",
  5554. v4f32, v4f32, fabs>;
  5555. def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
  5556. "vabs", "f16",
  5557. v4f16, v4f16, fabs>,
  5558. Requires<[HasNEON, HasFullFP16]>;
  5559. def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
  5560. "vabs", "f16",
  5561. v8f16, v8f16, fabs>,
  5562. Requires<[HasNEON, HasFullFP16]>;
  5563. // VQABS : Vector Saturating Absolute Value
  5564. defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
  5565. IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
  5566. int_arm_neon_vqabs>;
  5567. // Vector Negate.
  5568. def vnegd : PatFrag<(ops node:$in),
  5569. (sub ARMimmAllZerosD, node:$in)>;
  5570. def vnegq : PatFrag<(ops node:$in),
  5571. (sub ARMimmAllZerosV, node:$in)>;
  5572. class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  5573. : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
  5574. IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  5575. [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
  5576. class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  5577. : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
  5578. IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
  5579. [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
  5580. // VNEG : Vector Negate (integer)
  5581. def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
  5582. def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
  5583. def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
  5584. def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
  5585. def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
  5586. def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
  5587. // VNEG : Vector Negate (floating-point)
  5588. def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
  5589. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
  5590. "vneg", "f32", "$Vd, $Vm", "",
  5591. [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
  5592. def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
  5593. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
  5594. "vneg", "f32", "$Vd, $Vm", "",
  5595. [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
  5596. def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
  5597. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
  5598. "vneg", "f16", "$Vd, $Vm", "",
  5599. [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
  5600. Requires<[HasNEON, HasFullFP16]>;
  5601. def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
  5602. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
  5603. "vneg", "f16", "$Vd, $Vm", "",
  5604. [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
  5605. Requires<[HasNEON, HasFullFP16]>;
  5606. let Predicates = [HasNEON] in {
  5607. def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
  5608. def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
  5609. def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
  5610. def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
  5611. def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
  5612. def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
  5613. }
  5614. // VQNEG : Vector Saturating Negate
  5615. defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
  5616. IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
  5617. int_arm_neon_vqneg>;
  5618. // Vector Bit Counting Operations.
  5619. // VCLS : Vector Count Leading Sign Bits
  5620. defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
  5621. IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
  5622. int_arm_neon_vcls>;
  5623. // VCLZ : Vector Count Leading Zeros
  5624. defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
  5625. IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
  5626. ctlz>;
  5627. // VCNT : Vector Count One Bits
  5628. def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
  5629. IIC_VCNTiD, "vcnt", "8",
  5630. v8i8, v8i8, ctpop>;
  5631. def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
  5632. IIC_VCNTiQ, "vcnt", "8",
  5633. v16i8, v16i8, ctpop>;
  5634. // Vector Swap
  5635. def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
  5636. (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
  5637. NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
  5638. []>;
  5639. def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
  5640. (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
  5641. NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
  5642. []>;
  5643. // Vector Move Operations.
  5644. // VMOV : Vector Move (Register)
  5645. def : NEONInstAlias<"vmov${p} $Vd, $Vm",
  5646. (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
  5647. def : NEONInstAlias<"vmov${p} $Vd, $Vm",
  5648. (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
  5649. // VMOV : Vector Move (Immediate)
  5650. // Although VMOVs are not strictly speaking cheap, they are as expensive
  5651. // as their copies counterpart (VORR), so we should prefer rematerialization
  5652. // over splitting when it applies.
  5653. let isReMaterializable = 1, isAsCheapAsAMove=1 in {
  5654. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
  5655. (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
  5656. "vmov", "i8", "$Vd, $SIMM", "",
  5657. [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
  5658. def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
  5659. (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
  5660. "vmov", "i8", "$Vd, $SIMM", "",
  5661. [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
  5662. def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
  5663. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  5664. "vmov", "i16", "$Vd, $SIMM", "",
  5665. [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
  5666. let Inst{9} = SIMM{9};
  5667. }
  5668. def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
  5669. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  5670. "vmov", "i16", "$Vd, $SIMM", "",
  5671. [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
  5672. let Inst{9} = SIMM{9};
  5673. }
  5674. def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
  5675. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  5676. "vmov", "i32", "$Vd, $SIMM", "",
  5677. [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
  5678. let Inst{11-8} = SIMM{11-8};
  5679. }
  5680. def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
  5681. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  5682. "vmov", "i32", "$Vd, $SIMM", "",
  5683. [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
  5684. let Inst{11-8} = SIMM{11-8};
  5685. }
  5686. def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
  5687. (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
  5688. "vmov", "i64", "$Vd, $SIMM", "",
  5689. [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
  5690. def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
  5691. (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
  5692. "vmov", "i64", "$Vd, $SIMM", "",
  5693. [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
  5694. def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
  5695. (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
  5696. "vmov", "f32", "$Vd, $SIMM", "",
  5697. [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
  5698. def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
  5699. (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
  5700. "vmov", "f32", "$Vd, $SIMM", "",
  5701. [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
  5702. } // isReMaterializable, isAsCheapAsAMove
  5703. // Add support for bytes replication feature, so it could be GAS compatible.
  5704. multiclass NEONImmReplicateI8InstAlias<ValueType To> {
  5705. // E.g. instructions below:
  5706. // "vmov.i32 d0, #0xffffffff"
  5707. // "vmov.i32 d0, #0xabababab"
  5708. // "vmov.i16 d0, #0xabab"
  5709. // are incorrect, but we could deal with such cases.
  5710. // For last two instructions, for example, it should emit:
  5711. // "vmov.i8 d0, #0xab"
  5712. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5713. (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
  5714. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5715. (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
  5716. // Also add same support for VMVN instructions. So instruction:
  5717. // "vmvn.i32 d0, #0xabababab"
  5718. // actually means:
  5719. // "vmov.i8 d0, #0x54"
  5720. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5721. (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
  5722. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5723. (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
  5724. }
  5725. defm : NEONImmReplicateI8InstAlias<i16>;
  5726. defm : NEONImmReplicateI8InstAlias<i32>;
  5727. defm : NEONImmReplicateI8InstAlias<i64>;
  5728. // Similar to above for types other than i8, e.g.:
  5729. // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
  5730. // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
  5731. // In this case we do not canonicalize VMVN to VMOV
  5732. multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
  5733. NeonI NV8, NeonI NV16, ValueType To> {
  5734. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5735. (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5736. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5737. (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5738. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5739. (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5740. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5741. (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5742. }
  5743. defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
  5744. VMVNv4i16, VMVNv8i16, i32>;
  5745. defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
  5746. VMVNv4i16, VMVNv8i16, i64>;
  5747. defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
  5748. VMVNv2i32, VMVNv4i32, i64>;
  5749. // TODO: add "VMOV <-> VMVN" conversion for cases like
  5750. // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
  5751. // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
  5752. // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
  5753. // require zero cycles to execute so they should be used wherever possible for
  5754. // setting a register to zero.
  5755. // Even without these pseudo-insts we would probably end up with the correct
  5756. // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
  5757. // since they are sometimes rather expensive (in general).
  5758. let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
  5759. def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
  5760. [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
  5761. (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
  5762. Requires<[HasZCZ]>;
  5763. def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
  5764. [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
  5765. (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
  5766. Requires<[HasZCZ]>;
  5767. }
  5768. // VMOV : Vector Get Lane (move scalar to ARM core register)
  5769. def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
  5770. (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
  5771. IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
  5772. [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
  5773. imm:$lane))]> {
  5774. let Inst{21} = lane{2};
  5775. let Inst{6-5} = lane{1-0};
  5776. }
  5777. def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
  5778. (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
  5779. IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
  5780. [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
  5781. imm:$lane))]> {
  5782. let Inst{21} = lane{1};
  5783. let Inst{6} = lane{0};
  5784. }
  5785. def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
  5786. (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
  5787. IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
  5788. [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
  5789. imm:$lane))]> {
  5790. let Inst{21} = lane{2};
  5791. let Inst{6-5} = lane{1-0};
  5792. }
  5793. def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
  5794. (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
  5795. IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
  5796. [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
  5797. imm:$lane))]> {
  5798. let Inst{21} = lane{1};
  5799. let Inst{6} = lane{0};
  5800. }
  5801. def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
  5802. (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
  5803. IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
  5804. [(set GPR:$R, (extractelt (v2i32 DPR:$V),
  5805. imm:$lane))]>,
  5806. Requires<[HasFPRegs, HasFastVGETLNi32]> {
  5807. let Inst{21} = lane{0};
  5808. }
  5809. let Predicates = [HasNEON] in {
  5810. // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
  5811. def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
  5812. (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
  5813. (DSubReg_i8_reg imm:$lane))),
  5814. (SubReg_i8_lane imm:$lane))>;
  5815. def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
  5816. (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5817. (DSubReg_i16_reg imm:$lane))),
  5818. (SubReg_i16_lane imm:$lane))>;
  5819. def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
  5820. (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
  5821. (DSubReg_i8_reg imm:$lane))),
  5822. (SubReg_i8_lane imm:$lane))>;
  5823. def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
  5824. (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5825. (DSubReg_i16_reg imm:$lane))),
  5826. (SubReg_i16_lane imm:$lane))>;
  5827. def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane),
  5828. (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src,
  5829. (DSubReg_i16_reg imm:$lane))),
  5830. (SubReg_i16_lane imm:$lane))>;
  5831. def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane),
  5832. (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>;
  5833. def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane),
  5834. (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src,
  5835. (DSubReg_i16_reg imm:$lane))),
  5836. (SubReg_i16_lane imm:$lane))>;
  5837. def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane),
  5838. (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>;
  5839. }
  5840. def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
  5841. (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
  5842. (DSubReg_i32_reg imm:$lane))),
  5843. (SubReg_i32_lane imm:$lane))>,
  5844. Requires<[HasNEON, HasFastVGETLNi32]>;
  5845. def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
  5846. (COPY_TO_REGCLASS
  5847. (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
  5848. Requires<[HasNEON, HasSlowVGETLNi32]>;
  5849. def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
  5850. (COPY_TO_REGCLASS
  5851. (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
  5852. Requires<[HasNEON, HasSlowVGETLNi32]>;
  5853. let Predicates = [HasNEON] in {
  5854. def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
  5855. (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
  5856. (SSubReg_f32_reg imm:$src2))>;
  5857. def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
  5858. (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
  5859. (SSubReg_f32_reg imm:$src2))>;
  5860. //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
  5861. // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
  5862. def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
  5863. (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
  5864. }
  5865. multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> {
  5866. def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
  5867. (EXTRACT_SUBREG
  5868. (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
  5869. (SSubReg_f16_reg imm_even:$lane))>;
  5870. def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
  5871. (EXTRACT_SUBREG
  5872. (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
  5873. (SSubReg_f16_reg imm_even:$lane))>;
  5874. }
  5875. multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> {
  5876. def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
  5877. (COPY_TO_REGCLASS
  5878. (VMOVH (EXTRACT_SUBREG
  5879. (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
  5880. (SSubReg_f16_reg imm_odd:$lane))),
  5881. HPR)>;
  5882. def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
  5883. (COPY_TO_REGCLASS
  5884. (VMOVH (EXTRACT_SUBREG
  5885. (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
  5886. (SSubReg_f16_reg imm_odd:$lane))),
  5887. HPR)>;
  5888. }
  5889. let Predicates = [HasNEON] in {
  5890. defm : ExtractEltEvenF16<v4f16, v8f16>;
  5891. defm : ExtractEltOddF16VMOVH<v4f16, v8f16>;
  5892. }
  5893. let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in {
  5894. // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes
  5895. defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>;
  5896. }
  5897. let Predicates = [HasBF16, HasNEON] in {
  5898. defm : ExtractEltEvenF16<v4bf16, v8bf16>;
  5899. // Otherwise, if VMOVH is not available resort to extracting the odd lane
  5900. // into a GPR and then moving to HPR
  5901. def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
  5902. (COPY_TO_REGCLASS
  5903. (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
  5904. HPR)>;
  5905. def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
  5906. (COPY_TO_REGCLASS
  5907. (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5908. (DSubReg_i16_reg imm:$lane))),
  5909. (SubReg_i16_lane imm:$lane)),
  5910. HPR)>;
  5911. }
  5912. // VMOV : Vector Set Lane (move ARM core register to scalar)
  5913. let Constraints = "$src1 = $V" in {
  5914. def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
  5915. (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
  5916. IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
  5917. [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
  5918. GPR:$R, imm:$lane))]> {
  5919. let Inst{21} = lane{2};
  5920. let Inst{6-5} = lane{1-0};
  5921. }
  5922. def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
  5923. (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
  5924. IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
  5925. [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
  5926. GPR:$R, imm:$lane))]> {
  5927. let Inst{21} = lane{1};
  5928. let Inst{6} = lane{0};
  5929. }
  5930. def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
  5931. (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
  5932. IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
  5933. [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
  5934. GPR:$R, imm:$lane))]>,
  5935. Requires<[HasVFP2]> {
  5936. let Inst{21} = lane{0};
  5937. // This instruction is equivalent as
  5938. // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
  5939. let isInsertSubreg = 1;
  5940. }
  5941. }
  5942. // TODO: for odd lanes we could optimize this a bit by using the VINS
  5943. // FullFP16 instruction when it is available
  5944. multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> {
  5945. def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
  5946. (VT4 (VSETLNi16 DPR:$src1,
  5947. (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
  5948. def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
  5949. (VT8 (INSERT_SUBREG QPR:$src1,
  5950. (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
  5951. (DSubReg_i16_reg imm:$lane))),
  5952. (COPY_TO_REGCLASS HPR:$src2, GPR),
  5953. (SubReg_i16_lane imm:$lane))),
  5954. (DSubReg_i16_reg imm:$lane)))>;
  5955. }
  5956. let Predicates = [HasNEON] in {
  5957. def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
  5958. (v16i8 (INSERT_SUBREG QPR:$src1,
  5959. (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
  5960. (DSubReg_i8_reg imm:$lane))),
  5961. GPR:$src2, (SubReg_i8_lane imm:$lane))),
  5962. (DSubReg_i8_reg imm:$lane)))>;
  5963. def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
  5964. (v8i16 (INSERT_SUBREG QPR:$src1,
  5965. (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
  5966. (DSubReg_i16_reg imm:$lane))),
  5967. GPR:$src2, (SubReg_i16_lane imm:$lane))),
  5968. (DSubReg_i16_reg imm:$lane)))>;
  5969. def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
  5970. (v4i32 (INSERT_SUBREG QPR:$src1,
  5971. (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
  5972. (DSubReg_i32_reg imm:$lane))),
  5973. GPR:$src2, (SubReg_i32_lane imm:$lane))),
  5974. (DSubReg_i32_reg imm:$lane)))>;
  5975. def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
  5976. (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
  5977. SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
  5978. def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
  5979. (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
  5980. SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
  5981. defm : InsertEltF16<f16, v4f16, v8f16>;
  5982. def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
  5983. (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
  5984. def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
  5985. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
  5986. def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
  5987. (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  5988. def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
  5989. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
  5990. def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))),
  5991. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
  5992. def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
  5993. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
  5994. def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
  5995. (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5996. def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
  5997. (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5998. def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
  5999. (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  6000. def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
  6001. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  6002. (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  6003. dsub_0)>;
  6004. def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
  6005. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  6006. (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  6007. dsub_0)>;
  6008. def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
  6009. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  6010. (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  6011. dsub_0)>;
  6012. }
  6013. let Predicates = [HasNEON, HasBF16] in
  6014. defm : InsertEltF16<bf16, v4bf16, v8bf16>;
  6015. // VDUP : Vector Duplicate (from ARM core register to all elements)
  6016. class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
  6017. : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
  6018. IIC_VMOVIS, "vdup", Dt, "$V, $R",
  6019. [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
  6020. class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
  6021. : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
  6022. IIC_VMOVIS, "vdup", Dt, "$V, $R",
  6023. [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
  6024. def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
  6025. def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
  6026. def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
  6027. Requires<[HasNEON, HasFastVDUP32]>;
  6028. def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
  6029. def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
  6030. def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
  6031. // ARMvdup patterns for uarchs with fast VDUP.32.
  6032. def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
  6033. Requires<[HasNEON,HasFastVDUP32]>;
  6034. def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
  6035. Requires<[HasNEON]>;
  6036. // ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
  6037. def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
  6038. Requires<[HasNEON,HasSlowVDUP32]>;
  6039. def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
  6040. Requires<[HasNEON,HasSlowVDUP32]>;
  6041. // VDUP : Vector Duplicate Lane (from scalar to all elements)
  6042. class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
  6043. ValueType Ty, Operand IdxTy>
  6044. : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
  6045. IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
  6046. [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
  6047. class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
  6048. ValueType ResTy, ValueType OpTy, Operand IdxTy>
  6049. : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
  6050. IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
  6051. [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
  6052. VectorIndex32:$lane)))]>;
  6053. // Inst{19-16} is partially specified depending on the element size.
  6054. def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
  6055. bits<3> lane;
  6056. let Inst{19-17} = lane{2-0};
  6057. }
  6058. def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
  6059. bits<2> lane;
  6060. let Inst{19-18} = lane{1-0};
  6061. }
  6062. def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
  6063. bits<1> lane;
  6064. let Inst{19} = lane{0};
  6065. }
  6066. def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
  6067. bits<3> lane;
  6068. let Inst{19-17} = lane{2-0};
  6069. }
  6070. def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
  6071. bits<2> lane;
  6072. let Inst{19-18} = lane{1-0};
  6073. }
  6074. def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
  6075. bits<1> lane;
  6076. let Inst{19} = lane{0};
  6077. }
  6078. let Predicates = [HasNEON] in {
  6079. def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
  6080. (VDUPLN16d DPR:$Vm, imm:$lane)>;
  6081. def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
  6082. (VDUPLN32d DPR:$Vm, imm:$lane)>;
  6083. def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
  6084. (VDUPLN32q DPR:$Vm, imm:$lane)>;
  6085. def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
  6086. (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
  6087. (DSubReg_i8_reg imm:$lane))),
  6088. (SubReg_i8_lane imm:$lane)))>;
  6089. def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
  6090. (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
  6091. (DSubReg_i16_reg imm:$lane))),
  6092. (SubReg_i16_lane imm:$lane)))>;
  6093. def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
  6094. (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
  6095. (DSubReg_i16_reg imm:$lane))),
  6096. (SubReg_i16_lane imm:$lane)))>;
  6097. def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
  6098. (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
  6099. (DSubReg_i32_reg imm:$lane))),
  6100. (SubReg_i32_lane imm:$lane)))>;
  6101. def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
  6102. (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
  6103. (DSubReg_i32_reg imm:$lane))),
  6104. (SubReg_i32_lane imm:$lane)))>;
  6105. def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))),
  6106. (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
  6107. (f16 HPR:$src), ssub_0), (i32 0)))>;
  6108. def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
  6109. (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6110. SPR:$src, ssub_0), (i32 0)))>;
  6111. def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
  6112. (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6113. SPR:$src, ssub_0), (i32 0)))>;
  6114. def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))),
  6115. (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
  6116. (f16 HPR:$src), ssub_0), (i32 0)))>;
  6117. }
  6118. let Predicates = [HasNEON, HasBF16] in {
  6119. def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
  6120. (VDUPLN16d DPR:$Vm, imm:$lane)>;
  6121. def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
  6122. (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src,
  6123. (DSubReg_i16_reg imm:$lane))),
  6124. (SubReg_i16_lane imm:$lane)))>;
  6125. def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))),
  6126. (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
  6127. (bf16 HPR:$src), ssub_0), (i32 0)))>;
  6128. def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))),
  6129. (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
  6130. (bf16 HPR:$src), ssub_0), (i32 0)))>;
  6131. }
  6132. // VMOVN : Vector Narrowing Move
  6133. defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
  6134. "vmovn", "i", trunc>;
  6135. // VQMOVN : Vector Saturating Narrowing Move
  6136. defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
  6137. "vqmovn", "s", int_arm_neon_vqmovns>;
  6138. defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
  6139. "vqmovn", "u", int_arm_neon_vqmovnu>;
  6140. defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
  6141. "vqmovun", "s", int_arm_neon_vqmovnsu>;
  6142. // VMOVL : Vector Lengthening Move
  6143. defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
  6144. defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
  6145. let Predicates = [HasNEON] in {
  6146. def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
  6147. def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
  6148. def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
  6149. }
  6150. // Vector Conversions.
  6151. // VCVT : Vector Convert Between Floating-Point and Integers
  6152. def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
  6153. v2i32, v2f32, fp_to_sint>;
  6154. def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
  6155. v2i32, v2f32, fp_to_uint>;
  6156. def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
  6157. v2f32, v2i32, sint_to_fp>;
  6158. def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
  6159. v2f32, v2i32, uint_to_fp>;
  6160. def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
  6161. v4i32, v4f32, fp_to_sint>;
  6162. def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
  6163. v4i32, v4f32, fp_to_uint>;
  6164. def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
  6165. v4f32, v4i32, sint_to_fp>;
  6166. def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
  6167. v4f32, v4i32, uint_to_fp>;
  6168. def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
  6169. v4i16, v4f16, fp_to_sint>,
  6170. Requires<[HasNEON, HasFullFP16]>;
  6171. def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
  6172. v4i16, v4f16, fp_to_uint>,
  6173. Requires<[HasNEON, HasFullFP16]>;
  6174. def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
  6175. v4f16, v4i16, sint_to_fp>,
  6176. Requires<[HasNEON, HasFullFP16]>;
  6177. def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
  6178. v4f16, v4i16, uint_to_fp>,
  6179. Requires<[HasNEON, HasFullFP16]>;
  6180. def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
  6181. v8i16, v8f16, fp_to_sint>,
  6182. Requires<[HasNEON, HasFullFP16]>;
  6183. def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
  6184. v8i16, v8f16, fp_to_uint>,
  6185. Requires<[HasNEON, HasFullFP16]>;
  6186. def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
  6187. v8f16, v8i16, sint_to_fp>,
  6188. Requires<[HasNEON, HasFullFP16]>;
  6189. def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
  6190. v8f16, v8i16, uint_to_fp>,
  6191. Requires<[HasNEON, HasFullFP16]>;
  6192. // VCVT{A, N, P, M}
  6193. multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
  6194. SDPatternOperator IntU> {
  6195. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  6196. def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6197. "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
  6198. def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6199. "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
  6200. def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6201. "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
  6202. def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6203. "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
  6204. def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6205. "s16.f16", v4i16, v4f16, IntS>,
  6206. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6207. def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6208. "s16.f16", v8i16, v8f16, IntS>,
  6209. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6210. def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6211. "u16.f16", v4i16, v4f16, IntU>,
  6212. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6213. def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6214. "u16.f16", v8i16, v8f16, IntU>,
  6215. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6216. }
  6217. }
  6218. defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
  6219. defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
  6220. defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
  6221. defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
  6222. // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
  6223. let DecoderMethod = "DecodeVCVTD" in {
  6224. def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
  6225. v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
  6226. def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
  6227. v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
  6228. def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
  6229. v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
  6230. def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
  6231. v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
  6232. let Predicates = [HasNEON, HasFullFP16] in {
  6233. def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
  6234. v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
  6235. def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
  6236. v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
  6237. def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
  6238. v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
  6239. def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
  6240. v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
  6241. } // Predicates = [HasNEON, HasFullFP16]
  6242. }
  6243. let DecoderMethod = "DecodeVCVTQ" in {
  6244. def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
  6245. v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
  6246. def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
  6247. v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
  6248. def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
  6249. v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
  6250. def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
  6251. v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
  6252. let Predicates = [HasNEON, HasFullFP16] in {
  6253. def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
  6254. v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
  6255. def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
  6256. v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
  6257. def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
  6258. v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
  6259. def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
  6260. v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
  6261. } // Predicates = [HasNEON, HasFullFP16]
  6262. }
  6263. def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
  6264. (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6265. def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
  6266. (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
  6267. def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
  6268. (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6269. def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
  6270. (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6271. def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
  6272. (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6273. def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
  6274. (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6275. def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
  6276. (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6277. def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
  6278. (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6279. def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
  6280. (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6281. def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
  6282. (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
  6283. def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
  6284. (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6285. def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
  6286. (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6287. def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
  6288. (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6289. def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
  6290. (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6291. def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
  6292. (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6293. def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
  6294. (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6295. // VCVT : Vector Convert Between Half-Precision and Single-Precision.
  6296. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
  6297. IIC_VUNAQ, "vcvt", "f16.f32",
  6298. v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
  6299. Requires<[HasNEON, HasFP16]>;
  6300. def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
  6301. IIC_VUNAQ, "vcvt", "f32.f16",
  6302. v4f32, v4i16, int_arm_neon_vcvthf2fp>,
  6303. Requires<[HasNEON, HasFP16]>;
  6304. def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>;
  6305. def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>;
  6306. // Vector Reverse.
  6307. // VREV64 : Vector Reverse elements within 64-bit doublewords
  6308. class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6309. : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
  6310. (ins DPR:$Vm), IIC_VMOVD,
  6311. OpcodeStr, Dt, "$Vd, $Vm", "",
  6312. [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
  6313. class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6314. : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
  6315. (ins QPR:$Vm), IIC_VMOVQ,
  6316. OpcodeStr, Dt, "$Vd, $Vm", "",
  6317. [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
  6318. def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
  6319. def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
  6320. def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
  6321. let Predicates = [HasNEON] in {
  6322. def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
  6323. }
  6324. def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
  6325. def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
  6326. def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
  6327. let Predicates = [HasNEON] in {
  6328. def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
  6329. (VREV64q32 QPR:$Vm)>;
  6330. def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
  6331. (VREV64q16 QPR:$Vm)>;
  6332. def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
  6333. (VREV64d16 DPR:$Vm)>;
  6334. def : Pat<(v8bf16 (ARMvrev64 (v8bf16 QPR:$Vm))),
  6335. (VREV64q16 QPR:$Vm)>;
  6336. def : Pat<(v4bf16 (ARMvrev64 (v4bf16 DPR:$Vm))),
  6337. (VREV64d16 DPR:$Vm)>;
  6338. }
  6339. // VREV32 : Vector Reverse elements within 32-bit words
  6340. class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6341. : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
  6342. (ins DPR:$Vm), IIC_VMOVD,
  6343. OpcodeStr, Dt, "$Vd, $Vm", "",
  6344. [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
  6345. class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6346. : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
  6347. (ins QPR:$Vm), IIC_VMOVQ,
  6348. OpcodeStr, Dt, "$Vd, $Vm", "",
  6349. [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
  6350. def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
  6351. def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
  6352. def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
  6353. def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
  6354. let Predicates = [HasNEON] in {
  6355. def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
  6356. (VREV32q16 QPR:$Vm)>;
  6357. def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
  6358. (VREV32d16 DPR:$Vm)>;
  6359. def : Pat<(v8bf16 (ARMvrev32 (v8bf16 QPR:$Vm))),
  6360. (VREV32q16 QPR:$Vm)>;
  6361. def : Pat<(v4bf16 (ARMvrev32 (v4bf16 DPR:$Vm))),
  6362. (VREV32d16 DPR:$Vm)>;
  6363. }
  6364. // VREV16 : Vector Reverse elements within 16-bit halfwords
  6365. class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6366. : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
  6367. (ins DPR:$Vm), IIC_VMOVD,
  6368. OpcodeStr, Dt, "$Vd, $Vm", "",
  6369. [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
  6370. class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6371. : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
  6372. (ins QPR:$Vm), IIC_VMOVQ,
  6373. OpcodeStr, Dt, "$Vd, $Vm", "",
  6374. [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
  6375. def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
  6376. def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
  6377. // Other Vector Shuffles.
  6378. // Aligned extractions: really just dropping registers
  6379. class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
  6380. : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
  6381. (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
  6382. Requires<[HasNEON]>;
  6383. def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
  6384. def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
  6385. def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
  6386. def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
  6387. def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
  6388. def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>;
  6389. def : AlignedVEXTq<v4bf16, v8bf16, DSubReg_i16_reg>;
  6390. // VEXT : Vector Extract
  6391. // All of these have a two-operand InstAlias.
  6392. let TwoOperandAliasConstraint = "$Vn = $Vd" in {
  6393. class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
  6394. : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
  6395. (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
  6396. IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
  6397. [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
  6398. (Ty DPR:$Vm), imm:$index)))]> {
  6399. bits<3> index;
  6400. let Inst{11} = 0b0;
  6401. let Inst{10-8} = index{2-0};
  6402. }
  6403. class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
  6404. : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
  6405. (ins QPR:$Vn, QPR:$Vm, immTy:$index), NVExtFrm,
  6406. IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
  6407. [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
  6408. (Ty QPR:$Vm), imm:$index)))]> {
  6409. bits<4> index;
  6410. let Inst{11-8} = index{3-0};
  6411. }
  6412. }
  6413. def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
  6414. let Inst{10-8} = index{2-0};
  6415. }
  6416. def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
  6417. let Inst{10-9} = index{1-0};
  6418. let Inst{8} = 0b0;
  6419. }
  6420. let Predicates = [HasNEON] in {
  6421. def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
  6422. (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6423. def : Pat<(v4bf16 (NEONvext (v4bf16 DPR:$Vn), (v4bf16 DPR:$Vm), (i32 imm:$index))),
  6424. (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6425. }
  6426. def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
  6427. let Inst{10} = index{0};
  6428. let Inst{9-8} = 0b00;
  6429. }
  6430. let Predicates = [HasNEON] in {
  6431. def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
  6432. (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6433. }
  6434. def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
  6435. let Inst{11-8} = index{3-0};
  6436. }
  6437. def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
  6438. let Inst{11-9} = index{2-0};
  6439. let Inst{8} = 0b0;
  6440. }
  6441. let Predicates = [HasNEON] in {
  6442. def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
  6443. (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6444. def : Pat<(v8bf16 (NEONvext (v8bf16 QPR:$Vn), (v8bf16 QPR:$Vm), (i32 imm:$index))),
  6445. (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6446. }
  6447. def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
  6448. let Inst{11-10} = index{1-0};
  6449. let Inst{9-8} = 0b00;
  6450. }
  6451. def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
  6452. let Inst{11} = index{0};
  6453. let Inst{10-8} = 0b000;
  6454. }
  6455. let Predicates = [HasNEON] in {
  6456. def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
  6457. (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6458. }
  6459. // VTRN : Vector Transpose
  6460. def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
  6461. def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
  6462. def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
  6463. def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
  6464. def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
  6465. def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
  6466. // VUZP : Vector Unzip (Deinterleave)
  6467. def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
  6468. def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
  6469. // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  6470. def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
  6471. (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
  6472. def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
  6473. def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
  6474. def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
  6475. // VZIP : Vector Zip (Interleave)
  6476. def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
  6477. def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
  6478. // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  6479. def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
  6480. (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
  6481. def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
  6482. def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
  6483. def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
  6484. // Vector Table Lookup and Table Extension.
  6485. // VTBL : Vector Table Lookup
  6486. let DecoderMethod = "DecodeTBLInstruction" in {
  6487. def VTBL1
  6488. : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
  6489. (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
  6490. "vtbl", "8", "$Vd, $Vn, $Vm", "",
  6491. [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
  6492. let hasExtraSrcRegAllocReq = 1 in {
  6493. def VTBL2
  6494. : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
  6495. (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
  6496. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6497. def VTBL3
  6498. : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
  6499. (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
  6500. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6501. def VTBL4
  6502. : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
  6503. (ins VecListFourD:$Vn, DPR:$Vm),
  6504. NVTBLFrm, IIC_VTB4,
  6505. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6506. } // hasExtraSrcRegAllocReq = 1
  6507. def VTBL3Pseudo
  6508. : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
  6509. def VTBL4Pseudo
  6510. : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
  6511. // VTBX : Vector Table Extension
  6512. def VTBX1
  6513. : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
  6514. (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
  6515. "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
  6516. [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
  6517. DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
  6518. let hasExtraSrcRegAllocReq = 1 in {
  6519. def VTBX2
  6520. : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
  6521. (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
  6522. "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
  6523. def VTBX3
  6524. : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
  6525. (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
  6526. NVTBLFrm, IIC_VTBX3,
  6527. "vtbx", "8", "$Vd, $Vn, $Vm",
  6528. "$orig = $Vd", []>;
  6529. def VTBX4
  6530. : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
  6531. (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
  6532. "vtbx", "8", "$Vd, $Vn, $Vm",
  6533. "$orig = $Vd", []>;
  6534. } // hasExtraSrcRegAllocReq = 1
  6535. def VTBX3Pseudo
  6536. : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
  6537. IIC_VTBX3, "$orig = $dst", []>;
  6538. def VTBX4Pseudo
  6539. : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
  6540. IIC_VTBX4, "$orig = $dst", []>;
  6541. } // DecoderMethod = "DecodeTBLInstruction"
  6542. let Predicates = [HasNEON] in {
  6543. def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
  6544. (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
  6545. v8i8:$Vn1, dsub_1),
  6546. v8i8:$Vm))>;
  6547. def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6548. v8i8:$Vm)),
  6549. (v8i8 (VTBX2 v8i8:$orig,
  6550. (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
  6551. v8i8:$Vn1, dsub_1),
  6552. v8i8:$Vm))>;
  6553. def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
  6554. v8i8:$Vn2, v8i8:$Vm)),
  6555. (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6556. v8i8:$Vn1, dsub_1,
  6557. v8i8:$Vn2, dsub_2,
  6558. (v8i8 (IMPLICIT_DEF)), dsub_3),
  6559. v8i8:$Vm))>;
  6560. def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6561. v8i8:$Vn2, v8i8:$Vm)),
  6562. (v8i8 (VTBX3Pseudo v8i8:$orig,
  6563. (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6564. v8i8:$Vn1, dsub_1,
  6565. v8i8:$Vn2, dsub_2,
  6566. (v8i8 (IMPLICIT_DEF)), dsub_3),
  6567. v8i8:$Vm))>;
  6568. def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
  6569. v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
  6570. (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6571. v8i8:$Vn1, dsub_1,
  6572. v8i8:$Vn2, dsub_2,
  6573. v8i8:$Vn3, dsub_3),
  6574. v8i8:$Vm))>;
  6575. def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6576. v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
  6577. (v8i8 (VTBX4Pseudo v8i8:$orig,
  6578. (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6579. v8i8:$Vn1, dsub_1,
  6580. v8i8:$Vn2, dsub_2,
  6581. v8i8:$Vn3, dsub_3),
  6582. v8i8:$Vm))>;
  6583. }
  6584. // VRINT : Vector Rounding
  6585. multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
  6586. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  6587. def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
  6588. !strconcat("vrint", op), "f32",
  6589. v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
  6590. let Inst{9-7} = op9_7;
  6591. }
  6592. def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
  6593. !strconcat("vrint", op), "f32",
  6594. v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
  6595. let Inst{9-7} = op9_7;
  6596. }
  6597. def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
  6598. !strconcat("vrint", op), "f16",
  6599. v4f16, v4f16, Int>,
  6600. Requires<[HasV8, HasNEON, HasFullFP16]> {
  6601. let Inst{9-7} = op9_7;
  6602. }
  6603. def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
  6604. !strconcat("vrint", op), "f16",
  6605. v8f16, v8f16, Int>,
  6606. Requires<[HasV8, HasNEON, HasFullFP16]> {
  6607. let Inst{9-7} = op9_7;
  6608. }
  6609. }
  6610. def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
  6611. (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
  6612. def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
  6613. (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
  6614. let Predicates = [HasNEON, HasFullFP16] in {
  6615. def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
  6616. (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
  6617. def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
  6618. (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
  6619. }
  6620. }
  6621. defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
  6622. defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
  6623. defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
  6624. defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
  6625. defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
  6626. defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
  6627. // Cryptography instructions
  6628. let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
  6629. DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
  6630. class AES<string op, bit op7, bit op6, SDPatternOperator Int>
  6631. : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
  6632. !strconcat("aes", op), "8", v16i8, v16i8, Int>;
  6633. class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
  6634. : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
  6635. !strconcat("aes", op), "8", v16i8, v16i8, Int>;
  6636. class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
  6637. SDPatternOperator Int>
  6638. : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
  6639. !strconcat("sha", op), "32", v4i32, v4i32, Int>;
  6640. class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
  6641. SDPatternOperator Int>
  6642. : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
  6643. !strconcat("sha", op), "32", v4i32, v4i32, Int>;
  6644. class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
  6645. : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
  6646. !strconcat("sha", op), "32", v4i32, v4i32, Int>;
  6647. }
  6648. let Predicates = [HasV8, HasAES] in {
  6649. def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
  6650. def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
  6651. def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
  6652. def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
  6653. }
  6654. let Predicates = [HasV8, HasSHA2] in {
  6655. def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
  6656. def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
  6657. def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
  6658. def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
  6659. def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
  6660. def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
  6661. def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
  6662. def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
  6663. def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
  6664. def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
  6665. }
  6666. let Predicates = [HasNEON] in {
  6667. def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
  6668. (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
  6669. (SHA1H (SUBREG_TO_REG (i64 0),
  6670. (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
  6671. ssub_0)),
  6672. ssub_0)), GPR)>;
  6673. def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6674. (SHA1C v4i32:$hash_abcd,
  6675. (SUBREG_TO_REG (i64 0),
  6676. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6677. ssub_0),
  6678. v4i32:$wk)>;
  6679. def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6680. (SHA1M v4i32:$hash_abcd,
  6681. (SUBREG_TO_REG (i64 0),
  6682. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6683. ssub_0),
  6684. v4i32:$wk)>;
  6685. def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6686. (SHA1P v4i32:$hash_abcd,
  6687. (SUBREG_TO_REG (i64 0),
  6688. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6689. ssub_0),
  6690. v4i32:$wk)>;
  6691. }
  6692. //===----------------------------------------------------------------------===//
  6693. // NEON instructions for single-precision FP math
  6694. //===----------------------------------------------------------------------===//
  6695. class N2VSPat<SDNode OpNode, NeonI Inst>
  6696. : NEONFPPat<(f32 (OpNode SPR:$a)),
  6697. (EXTRACT_SUBREG
  6698. (v2f32 (COPY_TO_REGCLASS (Inst
  6699. (INSERT_SUBREG
  6700. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6701. SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
  6702. class N3VSPat<SDNode OpNode, NeonI Inst>
  6703. : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
  6704. (EXTRACT_SUBREG
  6705. (v2f32 (COPY_TO_REGCLASS (Inst
  6706. (INSERT_SUBREG
  6707. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6708. SPR:$a, ssub_0),
  6709. (INSERT_SUBREG
  6710. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6711. SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6712. class N3VSPatFP16<SDNode OpNode, NeonI Inst>
  6713. : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
  6714. (EXTRACT_SUBREG
  6715. (v4f16 (COPY_TO_REGCLASS (Inst
  6716. (INSERT_SUBREG
  6717. (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
  6718. HPR:$a, ssub_0),
  6719. (INSERT_SUBREG
  6720. (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
  6721. HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6722. class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
  6723. : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
  6724. (EXTRACT_SUBREG
  6725. (v2f32 (COPY_TO_REGCLASS (Inst
  6726. (INSERT_SUBREG
  6727. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6728. SPR:$acc, ssub_0),
  6729. (INSERT_SUBREG
  6730. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6731. SPR:$a, ssub_0),
  6732. (INSERT_SUBREG
  6733. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6734. SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6735. class NVCVTIFPat<SDNode OpNode, NeonI Inst>
  6736. : NEONFPPat<(f32 (OpNode GPR:$a)),
  6737. (f32 (EXTRACT_SUBREG
  6738. (v2f32 (Inst
  6739. (INSERT_SUBREG
  6740. (v2f32 (IMPLICIT_DEF)),
  6741. (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
  6742. ssub_0))>;
  6743. class NVCVTFIPat<SDNode OpNode, NeonI Inst>
  6744. : NEONFPPat<(i32 (OpNode SPR:$a)),
  6745. (i32 (EXTRACT_SUBREG
  6746. (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6747. SPR:$a, ssub_0))),
  6748. ssub_0))>;
  6749. def : N3VSPat<fadd, VADDfd>;
  6750. def : N3VSPat<fsub, VSUBfd>;
  6751. def : N3VSPat<fmul, VMULfd>;
  6752. def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
  6753. Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
  6754. def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
  6755. Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
  6756. def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
  6757. Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
  6758. def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
  6759. Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
  6760. def : N2VSPat<fabs, VABSfd>;
  6761. def : N2VSPat<fneg, VNEGfd>;
  6762. def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
  6763. def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
  6764. def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
  6765. def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
  6766. def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
  6767. def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
  6768. def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
  6769. def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
  6770. // NEON doesn't have any f64 conversions, so provide patterns to make
  6771. // sure the VFP conversions match when extracting from a vector.
  6772. def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
  6773. (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6774. def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
  6775. (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6776. def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
  6777. (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6778. def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
  6779. (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6780. // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
  6781. def : Pat<(f32 (bitconvert GPR:$a)),
  6782. (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
  6783. Requires<[HasNEON, DontUseVMOVSR]>;
  6784. def : Pat<(arm_vmovsr GPR:$a),
  6785. (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
  6786. Requires<[HasNEON, DontUseVMOVSR]>;
  6787. //===----------------------------------------------------------------------===//
  6788. // Non-Instruction Patterns or Endianess - Revert Patterns
  6789. //===----------------------------------------------------------------------===//
  6790. // bit_convert
  6791. // 64 bit conversions
  6792. let Predicates = [HasNEON] in {
  6793. def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
  6794. def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
  6795. def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
  6796. def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
  6797. def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>;
  6798. def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>;
  6799. def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>;
  6800. def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>;
  6801. // 128 bit conversions
  6802. def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
  6803. def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
  6804. def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
  6805. def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
  6806. def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
  6807. def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
  6808. def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>;
  6809. def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>;
  6810. }
  6811. let Predicates = [IsLE,HasNEON] in {
  6812. // 64 bit conversions
  6813. def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
  6814. def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
  6815. def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
  6816. def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>;
  6817. def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
  6818. def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
  6819. def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
  6820. def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
  6821. def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
  6822. def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>;
  6823. def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
  6824. def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
  6825. def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
  6826. def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
  6827. def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
  6828. def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>;
  6829. def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
  6830. def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
  6831. def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
  6832. def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
  6833. def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
  6834. def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>;
  6835. def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
  6836. def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
  6837. def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
  6838. def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
  6839. def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
  6840. def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
  6841. def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>;
  6842. def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>;
  6843. def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>;
  6844. def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>;
  6845. def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>;
  6846. def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>;
  6847. def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
  6848. def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
  6849. def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
  6850. def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
  6851. def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
  6852. def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
  6853. def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
  6854. def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
  6855. def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
  6856. def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>;
  6857. def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>;
  6858. def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
  6859. // 128 bit conversions
  6860. def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
  6861. def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
  6862. def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
  6863. def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>;
  6864. def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
  6865. def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
  6866. def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
  6867. def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
  6868. def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
  6869. def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>;
  6870. def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
  6871. def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
  6872. def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
  6873. def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
  6874. def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
  6875. def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>;
  6876. def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
  6877. def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
  6878. def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
  6879. def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
  6880. def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
  6881. def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>;
  6882. def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
  6883. def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
  6884. def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
  6885. def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
  6886. def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
  6887. def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
  6888. def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
  6889. def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>;
  6890. def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>;
  6891. def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>;
  6892. def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>;
  6893. def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>;
  6894. def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
  6895. def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
  6896. def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
  6897. def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
  6898. def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
  6899. def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
  6900. def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
  6901. def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
  6902. def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
  6903. def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
  6904. def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>;
  6905. def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
  6906. }
  6907. let Predicates = [IsBE,HasNEON] in {
  6908. // 64 bit conversions
  6909. def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6910. def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6911. def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6912. def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6913. def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6914. def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
  6915. def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6916. def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6917. def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6918. def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6919. def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6920. def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
  6921. def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6922. def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6923. def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6924. def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6925. def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6926. def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
  6927. def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6928. def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6929. def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6930. def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6931. def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6932. def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
  6933. def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6934. def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6935. def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6936. def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6937. def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6938. def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6939. def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6940. def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6941. def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6942. def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6943. def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6944. def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6945. def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6946. def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6947. def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6948. def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
  6949. def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
  6950. def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
  6951. def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
  6952. def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6953. def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6954. def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6955. // 128 bit conversions
  6956. def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6957. def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6958. def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6959. def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6960. def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6961. def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
  6962. def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6963. def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6964. def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6965. def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6966. def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6967. def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
  6968. def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6969. def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6970. def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6971. def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6972. def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6973. def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
  6974. def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6975. def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6976. def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6977. def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6978. def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6979. def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
  6980. def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6981. def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6982. def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6983. def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6984. def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6985. def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6986. def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6987. def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6988. def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6989. def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6990. def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6991. def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6992. def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6993. def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6994. def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6995. def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
  6996. def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
  6997. def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
  6998. def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
  6999. def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>;
  7000. def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>;
  7001. def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
  7002. }
  7003. let Predicates = [HasNEON] in {
  7004. // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
  7005. // rather than the more general 'ARMVectorRegCast' which would also
  7006. // match some bitconverts. If we use the latter in cases where the
  7007. // input and output types are the same, the bitconvert gets elided
  7008. // and we end up generating a nonsense match of nothing.
  7009. foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
  7010. foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
  7011. def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
  7012. foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
  7013. foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
  7014. def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
  7015. }
  7016. // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
  7017. let Predicates = [IsBE,HasNEON] in {
  7018. def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
  7019. (VREV64q8 (VLD1q8 addrmode6:$addr))>;
  7020. def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  7021. (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
  7022. def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
  7023. (VREV64q16 (VLD1q16 addrmode6:$addr))>;
  7024. def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  7025. (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
  7026. }
  7027. // Fold extracting an element out of a v2i32 into a vfp register.
  7028. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
  7029. (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
  7030. Requires<[HasNEON]>;
  7031. // Vector lengthening move with load, matching extending loads.
  7032. // extload, zextload and sextload for a standard lengthening load. Example:
  7033. // Lengthen_Single<"8", "i16", "8"> =
  7034. // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
  7035. // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
  7036. // (f64 (IMPLICIT_DEF)), (i32 0)))>;
  7037. multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
  7038. let AddedComplexity = 10 in {
  7039. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7040. (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
  7041. (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
  7042. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  7043. Requires<[HasNEON]>;
  7044. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7045. (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
  7046. (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
  7047. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  7048. Requires<[HasNEON]>;
  7049. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7050. (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
  7051. (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
  7052. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  7053. Requires<[HasNEON]>;
  7054. }
  7055. }
  7056. // extload, zextload and sextload for a lengthening load which only uses
  7057. // half the lanes available. Example:
  7058. // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
  7059. // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
  7060. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
  7061. // (f64 (IMPLICIT_DEF)), (i32 0))),
  7062. // dsub_0)>;
  7063. multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
  7064. string InsnLanes, string InsnTy> {
  7065. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7066. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7067. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7068. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7069. dsub_0)>,
  7070. Requires<[HasNEON]>;
  7071. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7072. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7073. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7074. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7075. dsub_0)>,
  7076. Requires<[HasNEON]>;
  7077. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7078. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7079. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
  7080. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7081. dsub_0)>,
  7082. Requires<[HasNEON]>;
  7083. }
  7084. // The following class definition is basically a copy of the
  7085. // Lengthen_HalfSingle definition above, however with an additional parameter
  7086. // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
  7087. // data loaded by VLD1LN into proper vector format in big endian mode.
  7088. multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7089. string InsnLanes, string InsnTy, string RevLanes> {
  7090. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7091. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7092. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7093. (!cast<Instruction>("VREV32d" # RevLanes)
  7094. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7095. dsub_0)>,
  7096. Requires<[HasNEON]>;
  7097. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7098. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7099. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7100. (!cast<Instruction>("VREV32d" # RevLanes)
  7101. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7102. dsub_0)>,
  7103. Requires<[HasNEON]>;
  7104. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7105. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7106. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
  7107. (!cast<Instruction>("VREV32d" # RevLanes)
  7108. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7109. dsub_0)>,
  7110. Requires<[HasNEON]>;
  7111. }
  7112. // extload, zextload and sextload for a lengthening load followed by another
  7113. // lengthening load, to quadruple the initial length.
  7114. //
  7115. // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
  7116. // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
  7117. // (EXTRACT_SUBREG (VMOVLuv4i32
  7118. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
  7119. // (f64 (IMPLICIT_DEF)),
  7120. // (i32 0))),
  7121. // dsub_0)),
  7122. // dsub_0)>;
  7123. multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
  7124. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7125. string Insn2Ty> {
  7126. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7127. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7128. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7129. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7130. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7131. dsub_0))>,
  7132. Requires<[HasNEON]>;
  7133. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7134. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7135. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7136. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7137. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7138. dsub_0))>,
  7139. Requires<[HasNEON]>;
  7140. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7141. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7142. (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7143. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7144. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7145. dsub_0))>,
  7146. Requires<[HasNEON]>;
  7147. }
  7148. // The following class definition is basically a copy of the
  7149. // Lengthen_Double definition above, however with an additional parameter
  7150. // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
  7151. // data loaded by VLD1LN into proper vector format in big endian mode.
  7152. multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7153. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7154. string Insn2Ty, string RevLanes> {
  7155. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7156. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7157. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7158. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7159. (!cast<Instruction>("VREV32d" # RevLanes)
  7160. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7161. dsub_0))>,
  7162. Requires<[HasNEON]>;
  7163. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7164. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7165. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7166. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7167. (!cast<Instruction>("VREV32d" # RevLanes)
  7168. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7169. dsub_0))>,
  7170. Requires<[HasNEON]>;
  7171. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7172. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7173. (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7174. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7175. (!cast<Instruction>("VREV32d" # RevLanes)
  7176. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7177. dsub_0))>,
  7178. Requires<[HasNEON]>;
  7179. }
  7180. // extload, zextload and sextload for a lengthening load followed by another
  7181. // lengthening load, to quadruple the initial length, but which ends up only
  7182. // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
  7183. //
  7184. // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
  7185. // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
  7186. // (EXTRACT_SUBREG (VMOVLuv4i32
  7187. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
  7188. // (f64 (IMPLICIT_DEF)), (i32 0))),
  7189. // dsub_0)),
  7190. // dsub_0)>;
  7191. multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
  7192. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7193. string Insn2Ty> {
  7194. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7195. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
  7196. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7197. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7198. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7199. dsub_0)),
  7200. dsub_0)>,
  7201. Requires<[HasNEON]>;
  7202. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7203. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
  7204. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7205. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7206. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7207. dsub_0)),
  7208. dsub_0)>,
  7209. Requires<[HasNEON]>;
  7210. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7211. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
  7212. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7213. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7214. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7215. dsub_0)),
  7216. dsub_0)>,
  7217. Requires<[HasNEON]>;
  7218. }
  7219. // The following class definition is basically a copy of the
  7220. // Lengthen_HalfDouble definition above, however with an additional VREV16d8
  7221. // instruction to convert data loaded by VLD1LN into proper vector format
  7222. // in big endian mode.
  7223. multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7224. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7225. string Insn2Ty> {
  7226. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7227. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
  7228. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7229. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7230. (!cast<Instruction>("VREV16d8")
  7231. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7232. dsub_0)),
  7233. dsub_0)>,
  7234. Requires<[HasNEON]>;
  7235. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7236. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
  7237. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7238. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7239. (!cast<Instruction>("VREV16d8")
  7240. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7241. dsub_0)),
  7242. dsub_0)>,
  7243. Requires<[HasNEON]>;
  7244. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7245. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
  7246. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7247. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7248. (!cast<Instruction>("VREV16d8")
  7249. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7250. dsub_0)),
  7251. dsub_0)>,
  7252. Requires<[HasNEON]>;
  7253. }
  7254. defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
  7255. defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
  7256. defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
  7257. let Predicates = [HasNEON,IsLE] in {
  7258. defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
  7259. defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
  7260. // Double lengthening - v4i8 -> v4i16 -> v4i32
  7261. defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
  7262. // v2i8 -> v2i16 -> v2i32
  7263. defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
  7264. // v2i16 -> v2i32 -> v2i64
  7265. defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
  7266. }
  7267. let Predicates = [HasNEON,IsBE] in {
  7268. defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
  7269. defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
  7270. // Double lengthening - v4i8 -> v4i16 -> v4i32
  7271. defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
  7272. // v2i8 -> v2i16 -> v2i32
  7273. defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
  7274. // v2i16 -> v2i32 -> v2i64
  7275. defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
  7276. }
  7277. // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
  7278. let Predicates = [HasNEON,IsLE] in {
  7279. def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
  7280. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7281. (VLD1LNd16 addrmode6:$addr,
  7282. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7283. def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
  7284. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7285. (VLD1LNd16 addrmode6:$addr,
  7286. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7287. def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
  7288. (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
  7289. (VLD1LNd16 addrmode6:$addr,
  7290. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7291. }
  7292. // The following patterns are basically a copy of the patterns above,
  7293. // however with an additional VREV16d instruction to convert data
  7294. // loaded by VLD1LN into proper vector format in big endian mode.
  7295. let Predicates = [HasNEON,IsBE] in {
  7296. def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
  7297. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7298. (!cast<Instruction>("VREV16d8")
  7299. (VLD1LNd16 addrmode6:$addr,
  7300. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7301. def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
  7302. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7303. (!cast<Instruction>("VREV16d8")
  7304. (VLD1LNd16 addrmode6:$addr,
  7305. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7306. def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
  7307. (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
  7308. (!cast<Instruction>("VREV16d8")
  7309. (VLD1LNd16 addrmode6:$addr,
  7310. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7311. }
  7312. let Predicates = [HasNEON] in {
  7313. def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7314. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7315. def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7316. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7317. def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7318. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7319. def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7320. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7321. def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7322. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7323. def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7324. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7325. def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7326. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7327. }
  7328. //===----------------------------------------------------------------------===//
  7329. // Assembler aliases
  7330. //
  7331. def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
  7332. (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
  7333. def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
  7334. (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
  7335. // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
  7336. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
  7337. (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7338. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
  7339. (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7340. defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
  7341. (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7342. defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
  7343. (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7344. defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
  7345. (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7346. defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
  7347. (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7348. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
  7349. (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7350. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
  7351. (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7352. // ... two-operand aliases
  7353. defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
  7354. (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7355. defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
  7356. (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7357. defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
  7358. (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7359. defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
  7360. (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7361. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
  7362. (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7363. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
  7364. (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7365. // ... immediates
  7366. def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
  7367. (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
  7368. def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
  7369. (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
  7370. def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
  7371. (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
  7372. def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
  7373. (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
  7374. // VLD1 single-lane pseudo-instructions. These need special handling for
  7375. // the lane index that an InstAlias can't handle, so we use these instead.
  7376. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
  7377. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7378. pred:$p)>;
  7379. def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
  7380. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7381. pred:$p)>;
  7382. def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
  7383. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7384. pred:$p)>;
  7385. def VLD1LNdWB_fixed_Asm_8 :
  7386. NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
  7387. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7388. pred:$p)>;
  7389. def VLD1LNdWB_fixed_Asm_16 :
  7390. NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
  7391. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7392. pred:$p)>;
  7393. def VLD1LNdWB_fixed_Asm_32 :
  7394. NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
  7395. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7396. pred:$p)>;
  7397. def VLD1LNdWB_register_Asm_8 :
  7398. NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
  7399. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7400. rGPR:$Rm, pred:$p)>;
  7401. def VLD1LNdWB_register_Asm_16 :
  7402. NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
  7403. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7404. rGPR:$Rm, pred:$p)>;
  7405. def VLD1LNdWB_register_Asm_32 :
  7406. NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
  7407. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7408. rGPR:$Rm, pred:$p)>;
  7409. // VST1 single-lane pseudo-instructions. These need special handling for
  7410. // the lane index that an InstAlias can't handle, so we use these instead.
  7411. def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
  7412. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7413. pred:$p)>;
  7414. def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
  7415. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7416. pred:$p)>;
  7417. def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
  7418. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7419. pred:$p)>;
  7420. def VST1LNdWB_fixed_Asm_8 :
  7421. NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
  7422. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7423. pred:$p)>;
  7424. def VST1LNdWB_fixed_Asm_16 :
  7425. NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
  7426. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7427. pred:$p)>;
  7428. def VST1LNdWB_fixed_Asm_32 :
  7429. NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
  7430. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7431. pred:$p)>;
  7432. def VST1LNdWB_register_Asm_8 :
  7433. NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
  7434. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7435. rGPR:$Rm, pred:$p)>;
  7436. def VST1LNdWB_register_Asm_16 :
  7437. NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
  7438. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7439. rGPR:$Rm, pred:$p)>;
  7440. def VST1LNdWB_register_Asm_32 :
  7441. NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
  7442. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7443. rGPR:$Rm, pred:$p)>;
  7444. // VLD2 single-lane pseudo-instructions. These need special handling for
  7445. // the lane index that an InstAlias can't handle, so we use these instead.
  7446. def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
  7447. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7448. pred:$p)>;
  7449. def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
  7450. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7451. pred:$p)>;
  7452. def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
  7453. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
  7454. def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
  7455. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7456. pred:$p)>;
  7457. def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
  7458. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7459. pred:$p)>;
  7460. def VLD2LNdWB_fixed_Asm_8 :
  7461. NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
  7462. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7463. pred:$p)>;
  7464. def VLD2LNdWB_fixed_Asm_16 :
  7465. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
  7466. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7467. pred:$p)>;
  7468. def VLD2LNdWB_fixed_Asm_32 :
  7469. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
  7470. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7471. pred:$p)>;
  7472. def VLD2LNqWB_fixed_Asm_16 :
  7473. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
  7474. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7475. pred:$p)>;
  7476. def VLD2LNqWB_fixed_Asm_32 :
  7477. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
  7478. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7479. pred:$p)>;
  7480. def VLD2LNdWB_register_Asm_8 :
  7481. NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
  7482. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7483. rGPR:$Rm, pred:$p)>;
  7484. def VLD2LNdWB_register_Asm_16 :
  7485. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
  7486. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7487. rGPR:$Rm, pred:$p)>;
  7488. def VLD2LNdWB_register_Asm_32 :
  7489. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
  7490. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7491. rGPR:$Rm, pred:$p)>;
  7492. def VLD2LNqWB_register_Asm_16 :
  7493. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
  7494. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7495. rGPR:$Rm, pred:$p)>;
  7496. def VLD2LNqWB_register_Asm_32 :
  7497. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
  7498. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7499. rGPR:$Rm, pred:$p)>;
  7500. // VST2 single-lane pseudo-instructions. These need special handling for
  7501. // the lane index that an InstAlias can't handle, so we use these instead.
  7502. def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
  7503. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7504. pred:$p)>;
  7505. def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
  7506. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7507. pred:$p)>;
  7508. def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
  7509. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7510. pred:$p)>;
  7511. def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
  7512. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7513. pred:$p)>;
  7514. def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
  7515. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7516. pred:$p)>;
  7517. def VST2LNdWB_fixed_Asm_8 :
  7518. NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
  7519. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7520. pred:$p)>;
  7521. def VST2LNdWB_fixed_Asm_16 :
  7522. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
  7523. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7524. pred:$p)>;
  7525. def VST2LNdWB_fixed_Asm_32 :
  7526. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
  7527. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7528. pred:$p)>;
  7529. def VST2LNqWB_fixed_Asm_16 :
  7530. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
  7531. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7532. pred:$p)>;
  7533. def VST2LNqWB_fixed_Asm_32 :
  7534. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
  7535. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7536. pred:$p)>;
  7537. def VST2LNdWB_register_Asm_8 :
  7538. NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
  7539. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7540. rGPR:$Rm, pred:$p)>;
  7541. def VST2LNdWB_register_Asm_16 :
  7542. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
  7543. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7544. rGPR:$Rm, pred:$p)>;
  7545. def VST2LNdWB_register_Asm_32 :
  7546. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
  7547. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7548. rGPR:$Rm, pred:$p)>;
  7549. def VST2LNqWB_register_Asm_16 :
  7550. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
  7551. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7552. rGPR:$Rm, pred:$p)>;
  7553. def VST2LNqWB_register_Asm_32 :
  7554. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
  7555. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7556. rGPR:$Rm, pred:$p)>;
  7557. // VLD3 all-lanes pseudo-instructions. These need special handling for
  7558. // the lane index that an InstAlias can't handle, so we use these instead.
  7559. def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7560. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7561. pred:$p)>;
  7562. def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7563. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7564. pred:$p)>;
  7565. def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7566. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7567. pred:$p)>;
  7568. def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7569. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7570. pred:$p)>;
  7571. def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7572. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7573. pred:$p)>;
  7574. def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7575. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7576. pred:$p)>;
  7577. def VLD3DUPdWB_fixed_Asm_8 :
  7578. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7579. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7580. pred:$p)>;
  7581. def VLD3DUPdWB_fixed_Asm_16 :
  7582. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7583. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7584. pred:$p)>;
  7585. def VLD3DUPdWB_fixed_Asm_32 :
  7586. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7587. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7588. pred:$p)>;
  7589. def VLD3DUPqWB_fixed_Asm_8 :
  7590. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7591. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7592. pred:$p)>;
  7593. def VLD3DUPqWB_fixed_Asm_16 :
  7594. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7595. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7596. pred:$p)>;
  7597. def VLD3DUPqWB_fixed_Asm_32 :
  7598. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7599. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7600. pred:$p)>;
  7601. def VLD3DUPdWB_register_Asm_8 :
  7602. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7603. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7604. rGPR:$Rm, pred:$p)>;
  7605. def VLD3DUPdWB_register_Asm_16 :
  7606. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7607. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7608. rGPR:$Rm, pred:$p)>;
  7609. def VLD3DUPdWB_register_Asm_32 :
  7610. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7611. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7612. rGPR:$Rm, pred:$p)>;
  7613. def VLD3DUPqWB_register_Asm_8 :
  7614. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7615. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7616. rGPR:$Rm, pred:$p)>;
  7617. def VLD3DUPqWB_register_Asm_16 :
  7618. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7619. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7620. rGPR:$Rm, pred:$p)>;
  7621. def VLD3DUPqWB_register_Asm_32 :
  7622. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7623. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7624. rGPR:$Rm, pred:$p)>;
  7625. // VLD3 single-lane pseudo-instructions. These need special handling for
  7626. // the lane index that an InstAlias can't handle, so we use these instead.
  7627. def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7628. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7629. pred:$p)>;
  7630. def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7631. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7632. pred:$p)>;
  7633. def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7634. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7635. pred:$p)>;
  7636. def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7637. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7638. pred:$p)>;
  7639. def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7640. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7641. pred:$p)>;
  7642. def VLD3LNdWB_fixed_Asm_8 :
  7643. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7644. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7645. pred:$p)>;
  7646. def VLD3LNdWB_fixed_Asm_16 :
  7647. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7648. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7649. pred:$p)>;
  7650. def VLD3LNdWB_fixed_Asm_32 :
  7651. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7652. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7653. pred:$p)>;
  7654. def VLD3LNqWB_fixed_Asm_16 :
  7655. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7656. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7657. pred:$p)>;
  7658. def VLD3LNqWB_fixed_Asm_32 :
  7659. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7660. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7661. pred:$p)>;
  7662. def VLD3LNdWB_register_Asm_8 :
  7663. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7664. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7665. rGPR:$Rm, pred:$p)>;
  7666. def VLD3LNdWB_register_Asm_16 :
  7667. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7668. (ins VecListThreeDHWordIndexed:$list,
  7669. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7670. def VLD3LNdWB_register_Asm_32 :
  7671. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7672. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7673. rGPR:$Rm, pred:$p)>;
  7674. def VLD3LNqWB_register_Asm_16 :
  7675. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7676. (ins VecListThreeQHWordIndexed:$list,
  7677. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7678. def VLD3LNqWB_register_Asm_32 :
  7679. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7680. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7681. rGPR:$Rm, pred:$p)>;
  7682. // VLD3 multiple structure pseudo-instructions. These need special handling for
  7683. // the vector operands that the normal instructions don't yet model.
  7684. // FIXME: Remove these when the register classes and instructions are updated.
  7685. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7686. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7687. def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7688. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7689. def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7690. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7691. def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7692. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7693. def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7694. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7695. def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7696. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7697. def VLD3dWB_fixed_Asm_8 :
  7698. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7699. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7700. def VLD3dWB_fixed_Asm_16 :
  7701. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7702. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7703. def VLD3dWB_fixed_Asm_32 :
  7704. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7705. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7706. def VLD3qWB_fixed_Asm_8 :
  7707. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7708. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7709. def VLD3qWB_fixed_Asm_16 :
  7710. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7711. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7712. def VLD3qWB_fixed_Asm_32 :
  7713. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7714. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7715. def VLD3dWB_register_Asm_8 :
  7716. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7717. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7718. rGPR:$Rm, pred:$p)>;
  7719. def VLD3dWB_register_Asm_16 :
  7720. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7721. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7722. rGPR:$Rm, pred:$p)>;
  7723. def VLD3dWB_register_Asm_32 :
  7724. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7725. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7726. rGPR:$Rm, pred:$p)>;
  7727. def VLD3qWB_register_Asm_8 :
  7728. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7729. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7730. rGPR:$Rm, pred:$p)>;
  7731. def VLD3qWB_register_Asm_16 :
  7732. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7733. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7734. rGPR:$Rm, pred:$p)>;
  7735. def VLD3qWB_register_Asm_32 :
  7736. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7737. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7738. rGPR:$Rm, pred:$p)>;
  7739. // VST3 single-lane pseudo-instructions. These need special handling for
  7740. // the lane index that an InstAlias can't handle, so we use these instead.
  7741. def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7742. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7743. pred:$p)>;
  7744. def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7745. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7746. pred:$p)>;
  7747. def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7748. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7749. pred:$p)>;
  7750. def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7751. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7752. pred:$p)>;
  7753. def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7754. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7755. pred:$p)>;
  7756. def VST3LNdWB_fixed_Asm_8 :
  7757. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7758. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7759. pred:$p)>;
  7760. def VST3LNdWB_fixed_Asm_16 :
  7761. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7762. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7763. pred:$p)>;
  7764. def VST3LNdWB_fixed_Asm_32 :
  7765. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7766. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7767. pred:$p)>;
  7768. def VST3LNqWB_fixed_Asm_16 :
  7769. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7770. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7771. pred:$p)>;
  7772. def VST3LNqWB_fixed_Asm_32 :
  7773. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7774. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7775. pred:$p)>;
  7776. def VST3LNdWB_register_Asm_8 :
  7777. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7778. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7779. rGPR:$Rm, pred:$p)>;
  7780. def VST3LNdWB_register_Asm_16 :
  7781. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7782. (ins VecListThreeDHWordIndexed:$list,
  7783. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7784. def VST3LNdWB_register_Asm_32 :
  7785. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7786. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7787. rGPR:$Rm, pred:$p)>;
  7788. def VST3LNqWB_register_Asm_16 :
  7789. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7790. (ins VecListThreeQHWordIndexed:$list,
  7791. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7792. def VST3LNqWB_register_Asm_32 :
  7793. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7794. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7795. rGPR:$Rm, pred:$p)>;
  7796. // VST3 multiple structure pseudo-instructions. These need special handling for
  7797. // the vector operands that the normal instructions don't yet model.
  7798. // FIXME: Remove these when the register classes and instructions are updated.
  7799. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7800. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7801. def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7802. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7803. def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7804. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7805. def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7806. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7807. def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7808. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7809. def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7810. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7811. def VST3dWB_fixed_Asm_8 :
  7812. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7813. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7814. def VST3dWB_fixed_Asm_16 :
  7815. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7816. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7817. def VST3dWB_fixed_Asm_32 :
  7818. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7819. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7820. def VST3qWB_fixed_Asm_8 :
  7821. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7822. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7823. def VST3qWB_fixed_Asm_16 :
  7824. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7825. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7826. def VST3qWB_fixed_Asm_32 :
  7827. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7828. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7829. def VST3dWB_register_Asm_8 :
  7830. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7831. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7832. rGPR:$Rm, pred:$p)>;
  7833. def VST3dWB_register_Asm_16 :
  7834. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7835. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7836. rGPR:$Rm, pred:$p)>;
  7837. def VST3dWB_register_Asm_32 :
  7838. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7839. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7840. rGPR:$Rm, pred:$p)>;
  7841. def VST3qWB_register_Asm_8 :
  7842. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7843. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7844. rGPR:$Rm, pred:$p)>;
  7845. def VST3qWB_register_Asm_16 :
  7846. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7847. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7848. rGPR:$Rm, pred:$p)>;
  7849. def VST3qWB_register_Asm_32 :
  7850. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7851. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7852. rGPR:$Rm, pred:$p)>;
  7853. // VLD4 all-lanes pseudo-instructions. These need special handling for
  7854. // the lane index that an InstAlias can't handle, so we use these instead.
  7855. def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7856. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7857. pred:$p)>;
  7858. def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7859. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7860. pred:$p)>;
  7861. def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7862. (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
  7863. pred:$p)>;
  7864. def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7865. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7866. pred:$p)>;
  7867. def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7868. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7869. pred:$p)>;
  7870. def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7871. (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
  7872. pred:$p)>;
  7873. def VLD4DUPdWB_fixed_Asm_8 :
  7874. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7875. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7876. pred:$p)>;
  7877. def VLD4DUPdWB_fixed_Asm_16 :
  7878. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7879. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7880. pred:$p)>;
  7881. def VLD4DUPdWB_fixed_Asm_32 :
  7882. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7883. (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
  7884. pred:$p)>;
  7885. def VLD4DUPqWB_fixed_Asm_8 :
  7886. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7887. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7888. pred:$p)>;
  7889. def VLD4DUPqWB_fixed_Asm_16 :
  7890. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7891. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7892. pred:$p)>;
  7893. def VLD4DUPqWB_fixed_Asm_32 :
  7894. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7895. (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
  7896. pred:$p)>;
  7897. def VLD4DUPdWB_register_Asm_8 :
  7898. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7899. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7900. rGPR:$Rm, pred:$p)>;
  7901. def VLD4DUPdWB_register_Asm_16 :
  7902. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7903. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7904. rGPR:$Rm, pred:$p)>;
  7905. def VLD4DUPdWB_register_Asm_32 :
  7906. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7907. (ins VecListFourDAllLanes:$list,
  7908. addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
  7909. def VLD4DUPqWB_register_Asm_8 :
  7910. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7911. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7912. rGPR:$Rm, pred:$p)>;
  7913. def VLD4DUPqWB_register_Asm_16 :
  7914. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7915. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7916. rGPR:$Rm, pred:$p)>;
  7917. def VLD4DUPqWB_register_Asm_32 :
  7918. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7919. (ins VecListFourQAllLanes:$list,
  7920. addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
  7921. // VLD4 single-lane pseudo-instructions. These need special handling for
  7922. // the lane index that an InstAlias can't handle, so we use these instead.
  7923. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7924. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7925. pred:$p)>;
  7926. def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7927. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7928. pred:$p)>;
  7929. def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7930. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7931. pred:$p)>;
  7932. def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7933. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7934. pred:$p)>;
  7935. def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7936. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7937. pred:$p)>;
  7938. def VLD4LNdWB_fixed_Asm_8 :
  7939. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7940. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7941. pred:$p)>;
  7942. def VLD4LNdWB_fixed_Asm_16 :
  7943. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7944. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7945. pred:$p)>;
  7946. def VLD4LNdWB_fixed_Asm_32 :
  7947. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7948. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7949. pred:$p)>;
  7950. def VLD4LNqWB_fixed_Asm_16 :
  7951. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7952. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7953. pred:$p)>;
  7954. def VLD4LNqWB_fixed_Asm_32 :
  7955. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7956. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7957. pred:$p)>;
  7958. def VLD4LNdWB_register_Asm_8 :
  7959. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7960. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7961. rGPR:$Rm, pred:$p)>;
  7962. def VLD4LNdWB_register_Asm_16 :
  7963. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7964. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7965. rGPR:$Rm, pred:$p)>;
  7966. def VLD4LNdWB_register_Asm_32 :
  7967. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7968. (ins VecListFourDWordIndexed:$list,
  7969. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7970. def VLD4LNqWB_register_Asm_16 :
  7971. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7972. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7973. rGPR:$Rm, pred:$p)>;
  7974. def VLD4LNqWB_register_Asm_32 :
  7975. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7976. (ins VecListFourQWordIndexed:$list,
  7977. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7978. // VLD4 multiple structure pseudo-instructions. These need special handling for
  7979. // the vector operands that the normal instructions don't yet model.
  7980. // FIXME: Remove these when the register classes and instructions are updated.
  7981. def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7982. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7983. pred:$p)>;
  7984. def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7985. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7986. pred:$p)>;
  7987. def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7988. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7989. pred:$p)>;
  7990. def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7991. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7992. pred:$p)>;
  7993. def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7994. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7995. pred:$p)>;
  7996. def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7997. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7998. pred:$p)>;
  7999. def VLD4dWB_fixed_Asm_8 :
  8000. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  8001. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8002. pred:$p)>;
  8003. def VLD4dWB_fixed_Asm_16 :
  8004. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  8005. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8006. pred:$p)>;
  8007. def VLD4dWB_fixed_Asm_32 :
  8008. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  8009. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8010. pred:$p)>;
  8011. def VLD4qWB_fixed_Asm_8 :
  8012. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  8013. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8014. pred:$p)>;
  8015. def VLD4qWB_fixed_Asm_16 :
  8016. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  8017. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8018. pred:$p)>;
  8019. def VLD4qWB_fixed_Asm_32 :
  8020. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  8021. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8022. pred:$p)>;
  8023. def VLD4dWB_register_Asm_8 :
  8024. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  8025. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8026. rGPR:$Rm, pred:$p)>;
  8027. def VLD4dWB_register_Asm_16 :
  8028. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  8029. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8030. rGPR:$Rm, pred:$p)>;
  8031. def VLD4dWB_register_Asm_32 :
  8032. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  8033. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8034. rGPR:$Rm, pred:$p)>;
  8035. def VLD4qWB_register_Asm_8 :
  8036. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  8037. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8038. rGPR:$Rm, pred:$p)>;
  8039. def VLD4qWB_register_Asm_16 :
  8040. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  8041. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8042. rGPR:$Rm, pred:$p)>;
  8043. def VLD4qWB_register_Asm_32 :
  8044. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  8045. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8046. rGPR:$Rm, pred:$p)>;
  8047. // VST4 single-lane pseudo-instructions. These need special handling for
  8048. // the lane index that an InstAlias can't handle, so we use these instead.
  8049. def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8050. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  8051. pred:$p)>;
  8052. def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8053. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  8054. pred:$p)>;
  8055. def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8056. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  8057. pred:$p)>;
  8058. def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8059. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  8060. pred:$p)>;
  8061. def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8062. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  8063. pred:$p)>;
  8064. def VST4LNdWB_fixed_Asm_8 :
  8065. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8066. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  8067. pred:$p)>;
  8068. def VST4LNdWB_fixed_Asm_16 :
  8069. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8070. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  8071. pred:$p)>;
  8072. def VST4LNdWB_fixed_Asm_32 :
  8073. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8074. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  8075. pred:$p)>;
  8076. def VST4LNqWB_fixed_Asm_16 :
  8077. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8078. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  8079. pred:$p)>;
  8080. def VST4LNqWB_fixed_Asm_32 :
  8081. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8082. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  8083. pred:$p)>;
  8084. def VST4LNdWB_register_Asm_8 :
  8085. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8086. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  8087. rGPR:$Rm, pred:$p)>;
  8088. def VST4LNdWB_register_Asm_16 :
  8089. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8090. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  8091. rGPR:$Rm, pred:$p)>;
  8092. def VST4LNdWB_register_Asm_32 :
  8093. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8094. (ins VecListFourDWordIndexed:$list,
  8095. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  8096. def VST4LNqWB_register_Asm_16 :
  8097. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8098. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  8099. rGPR:$Rm, pred:$p)>;
  8100. def VST4LNqWB_register_Asm_32 :
  8101. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8102. (ins VecListFourQWordIndexed:$list,
  8103. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  8104. // VST4 multiple structure pseudo-instructions. These need special handling for
  8105. // the vector operands that the normal instructions don't yet model.
  8106. // FIXME: Remove these when the register classes and instructions are updated.
  8107. def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8108. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8109. pred:$p)>;
  8110. def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8111. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8112. pred:$p)>;
  8113. def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8114. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8115. pred:$p)>;
  8116. def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8117. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8118. pred:$p)>;
  8119. def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8120. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8121. pred:$p)>;
  8122. def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8123. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8124. pred:$p)>;
  8125. def VST4dWB_fixed_Asm_8 :
  8126. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8127. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8128. pred:$p)>;
  8129. def VST4dWB_fixed_Asm_16 :
  8130. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8131. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8132. pred:$p)>;
  8133. def VST4dWB_fixed_Asm_32 :
  8134. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8135. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8136. pred:$p)>;
  8137. def VST4qWB_fixed_Asm_8 :
  8138. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8139. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8140. pred:$p)>;
  8141. def VST4qWB_fixed_Asm_16 :
  8142. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8143. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8144. pred:$p)>;
  8145. def VST4qWB_fixed_Asm_32 :
  8146. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8147. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8148. pred:$p)>;
  8149. def VST4dWB_register_Asm_8 :
  8150. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8151. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8152. rGPR:$Rm, pred:$p)>;
  8153. def VST4dWB_register_Asm_16 :
  8154. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8155. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8156. rGPR:$Rm, pred:$p)>;
  8157. def VST4dWB_register_Asm_32 :
  8158. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8159. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8160. rGPR:$Rm, pred:$p)>;
  8161. def VST4qWB_register_Asm_8 :
  8162. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8163. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8164. rGPR:$Rm, pred:$p)>;
  8165. def VST4qWB_register_Asm_16 :
  8166. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8167. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8168. rGPR:$Rm, pred:$p)>;
  8169. def VST4qWB_register_Asm_32 :
  8170. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8171. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8172. rGPR:$Rm, pred:$p)>;
  8173. // VMOV/VMVN takes an optional datatype suffix
  8174. defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
  8175. (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
  8176. defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
  8177. (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
  8178. defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
  8179. (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
  8180. defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
  8181. (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
  8182. // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
  8183. // D-register versions.
  8184. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
  8185. (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8186. def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
  8187. (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8188. def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
  8189. (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8190. def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
  8191. (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8192. def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
  8193. (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8194. def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
  8195. (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8196. def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
  8197. (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8198. let Predicates = [HasNEON, HasFullFP16] in
  8199. def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
  8200. (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8201. // Q-register versions.
  8202. def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
  8203. (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8204. def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
  8205. (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8206. def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
  8207. (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8208. def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
  8209. (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8210. def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
  8211. (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8212. def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
  8213. (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8214. def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
  8215. (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8216. let Predicates = [HasNEON, HasFullFP16] in
  8217. def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
  8218. (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8219. // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
  8220. // D-register versions.
  8221. def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
  8222. (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8223. def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
  8224. (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8225. def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
  8226. (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8227. def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
  8228. (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8229. def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
  8230. (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8231. def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
  8232. (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8233. def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
  8234. (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8235. let Predicates = [HasNEON, HasFullFP16] in
  8236. def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
  8237. (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8238. // Q-register versions.
  8239. def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
  8240. (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8241. def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
  8242. (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8243. def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
  8244. (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8245. def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
  8246. (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8247. def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
  8248. (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8249. def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
  8250. (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8251. def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
  8252. (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8253. let Predicates = [HasNEON, HasFullFP16] in
  8254. def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
  8255. (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8256. // VSWP allows, but does not require, a type suffix.
  8257. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
  8258. (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
  8259. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
  8260. (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
  8261. // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
  8262. defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
  8263. (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8264. defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
  8265. (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8266. defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
  8267. (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8268. defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
  8269. (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8270. defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
  8271. (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8272. defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
  8273. (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8274. // "vmov Rd, #-imm" can be handled via "vmvn".
  8275. def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
  8276. (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8277. def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
  8278. (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8279. def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
  8280. (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8281. def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
  8282. (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8283. // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
  8284. // these should restrict to just the Q register variants, but the register
  8285. // classes are enough to match correctly regardless, so we keep it simple
  8286. // and just use MnemonicAlias.
  8287. def : NEONMnemonicAlias<"vbicq", "vbic">;
  8288. def : NEONMnemonicAlias<"vandq", "vand">;
  8289. def : NEONMnemonicAlias<"veorq", "veor">;
  8290. def : NEONMnemonicAlias<"vorrq", "vorr">;
  8291. def : NEONMnemonicAlias<"vmovq", "vmov">;
  8292. def : NEONMnemonicAlias<"vmvnq", "vmvn">;
  8293. // Explicit versions for floating point so that the FPImm variants get
  8294. // handled early. The parser gets confused otherwise.
  8295. def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
  8296. def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
  8297. def : NEONMnemonicAlias<"vaddq", "vadd">;
  8298. def : NEONMnemonicAlias<"vsubq", "vsub">;
  8299. def : NEONMnemonicAlias<"vminq", "vmin">;
  8300. def : NEONMnemonicAlias<"vmaxq", "vmax">;
  8301. def : NEONMnemonicAlias<"vmulq", "vmul">;
  8302. def : NEONMnemonicAlias<"vabsq", "vabs">;
  8303. def : NEONMnemonicAlias<"vshlq", "vshl">;
  8304. def : NEONMnemonicAlias<"vshrq", "vshr">;
  8305. def : NEONMnemonicAlias<"vcvtq", "vcvt">;
  8306. def : NEONMnemonicAlias<"vcleq", "vcle">;
  8307. def : NEONMnemonicAlias<"vceqq", "vceq">;
  8308. def : NEONMnemonicAlias<"vzipq", "vzip">;
  8309. def : NEONMnemonicAlias<"vswpq", "vswp">;
  8310. def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
  8311. def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
  8312. // Alias for loading floating point immediates that aren't representable
  8313. // using the vmov.f32 encoding but the bitpattern is representable using
  8314. // the .i32 encoding.
  8315. def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
  8316. (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
  8317. def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
  8318. (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
  8319. // ARMv8.6a BFloat16 instructions.
  8320. let Predicates = [HasBF16, HasNEON] in {
  8321. class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
  8322. dag oops, dag iops, list<dag> pattern>
  8323. : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
  8324. N3RegFrm, IIC_VDOTPROD, "", "", pattern>
  8325. {
  8326. let DecoderNamespace = "VFPV8";
  8327. }
  8328. class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
  8329. : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
  8330. (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
  8331. [(set (AccumTy RegTy:$dst),
  8332. (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
  8333. (InputTy RegTy:$Vn),
  8334. (InputTy RegTy:$Vm)))]> {
  8335. let Constraints = "$dst = $Vd";
  8336. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
  8337. let DecoderNamespace = "VFPV8";
  8338. }
  8339. multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
  8340. ValueType InputTy, dag RHS> {
  8341. def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
  8342. (ins RegTy:$Vd, RegTy:$Vn,
  8343. DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
  8344. bit lane;
  8345. let Inst{5} = lane;
  8346. let Constraints = "$dst = $Vd";
  8347. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
  8348. let DecoderNamespace = "VFPV8";
  8349. }
  8350. def : Pat<
  8351. (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
  8352. (InputTy RegTy:$Vn),
  8353. (InputTy (bitconvert (AccumTy
  8354. (ARMvduplane (AccumTy RegTy:$Vm),
  8355. VectorIndex32:$lane)))))),
  8356. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  8357. }
  8358. def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
  8359. def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
  8360. defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
  8361. defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  8362. class BF16MM<bit Q, RegisterClass RegTy,
  8363. string opc>
  8364. : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
  8365. (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
  8366. N3RegFrm, IIC_VDOTPROD, "", "",
  8367. [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
  8368. (v8bf16 QPR:$Vn),
  8369. (v8bf16 QPR:$Vm)))]> {
  8370. let Constraints = "$dst = $Vd";
  8371. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
  8372. let DecoderNamespace = "VFPV8";
  8373. }
  8374. def VMMLA : BF16MM<1, QPR, "vmmla">;
  8375. class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
  8376. : N3VCP8<0b00, 0b11, T, 1,
  8377. (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
  8378. NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
  8379. [(set (v4f32 QPR:$dst),
  8380. (OpNode (v4f32 QPR:$Vd),
  8381. (v8bf16 QPR:$Vn),
  8382. (v8bf16 QPR:$Vm)))]> {
  8383. let Constraints = "$dst = $Vd";
  8384. let DecoderNamespace = "VFPV8";
  8385. }
  8386. def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>;
  8387. def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>;
  8388. multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
  8389. def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
  8390. (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
  8391. IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
  8392. bits<2> idx;
  8393. let Inst{5} = idx{1};
  8394. let Inst{3} = idx{0};
  8395. let Constraints = "$dst = $Vd";
  8396. let DecoderNamespace = "VFPV8";
  8397. }
  8398. def : Pat<
  8399. (v4f32 (OpNode (v4f32 QPR:$Vd),
  8400. (v8bf16 QPR:$Vn),
  8401. (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
  8402. VectorIndex16:$lane)))),
  8403. (!cast<Instruction>(NAME) QPR:$Vd,
  8404. QPR:$Vn,
  8405. (EXTRACT_SUBREG QPR:$Vm,
  8406. (DSubReg_i16_reg VectorIndex16:$lane)),
  8407. (SubReg_i16_lane VectorIndex16:$lane))>;
  8408. }
  8409. defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>;
  8410. defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>;
  8411. def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
  8412. (outs DPR:$Vd), (ins QPR:$Vm),
  8413. NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
  8414. }
  8415. // End of BFloat16 instructions