ARMInstrNEON.td 442 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251
  1. //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file describes the ARM NEON instruction set.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // NEON-specific Operands.
  14. //===----------------------------------------------------------------------===//
  15. def nModImm : Operand<i32> {
  16. let PrintMethod = "printVMOVModImmOperand";
  17. }
  18. def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
  19. def nImmSplatI8 : Operand<i32> {
  20. let PrintMethod = "printVMOVModImmOperand";
  21. let ParserMatchClass = nImmSplatI8AsmOperand;
  22. }
  23. def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
  24. def nImmSplatI16 : Operand<i32> {
  25. let PrintMethod = "printVMOVModImmOperand";
  26. let ParserMatchClass = nImmSplatI16AsmOperand;
  27. }
  28. def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
  29. def nImmSplatI32 : Operand<i32> {
  30. let PrintMethod = "printVMOVModImmOperand";
  31. let ParserMatchClass = nImmSplatI32AsmOperand;
  32. }
  33. def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
  34. def nImmSplatNotI16 : Operand<i32> {
  35. let ParserMatchClass = nImmSplatNotI16AsmOperand;
  36. }
  37. def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
  38. def nImmSplatNotI32 : Operand<i32> {
  39. let ParserMatchClass = nImmSplatNotI32AsmOperand;
  40. }
  41. def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
  42. def nImmVMOVI32 : Operand<i32> {
  43. let PrintMethod = "printVMOVModImmOperand";
  44. let ParserMatchClass = nImmVMOVI32AsmOperand;
  45. }
  46. class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
  47. : AsmOperandClass {
  48. let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
  49. let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
  50. let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
  51. }
  52. class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
  53. : AsmOperandClass {
  54. let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
  55. let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
  56. let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
  57. }
  58. class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
  59. let PrintMethod = "printVMOVModImmOperand";
  60. let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
  61. }
  62. class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
  63. let PrintMethod = "printVMOVModImmOperand";
  64. let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
  65. }
  66. def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
  67. def nImmVMOVI32Neg : Operand<i32> {
  68. let PrintMethod = "printVMOVModImmOperand";
  69. let ParserMatchClass = nImmVMOVI32NegAsmOperand;
  70. }
  71. def nImmVMOVF32 : Operand<i32> {
  72. let PrintMethod = "printFPImmOperand";
  73. let ParserMatchClass = FPImmOperand;
  74. }
  75. def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
  76. def nImmSplatI64 : Operand<i32> {
  77. let PrintMethod = "printVMOVModImmOperand";
  78. let ParserMatchClass = nImmSplatI64AsmOperand;
  79. }
  80. def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
  81. def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
  82. def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
  83. def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
  84. def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
  85. return ((uint64_t)Imm) < 8;
  86. }]> {
  87. let ParserMatchClass = VectorIndex8Operand;
  88. let PrintMethod = "printVectorIndex";
  89. let MIOperandInfo = (ops i32imm);
  90. }
  91. def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
  92. return ((uint64_t)Imm) < 4;
  93. }]> {
  94. let ParserMatchClass = VectorIndex16Operand;
  95. let PrintMethod = "printVectorIndex";
  96. let MIOperandInfo = (ops i32imm);
  97. }
  98. def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
  99. return ((uint64_t)Imm) < 2;
  100. }]> {
  101. let ParserMatchClass = VectorIndex32Operand;
  102. let PrintMethod = "printVectorIndex";
  103. let MIOperandInfo = (ops i32imm);
  104. }
  105. def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
  106. return ((uint64_t)Imm) < 1;
  107. }]> {
  108. let ParserMatchClass = VectorIndex64Operand;
  109. let PrintMethod = "printVectorIndex";
  110. let MIOperandInfo = (ops i32imm);
  111. }
  112. // Register list of one D register.
  113. def VecListOneDAsmOperand : AsmOperandClass {
  114. let Name = "VecListOneD";
  115. let ParserMethod = "parseVectorList";
  116. let RenderMethod = "addVecListOperands";
  117. }
  118. def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
  119. let ParserMatchClass = VecListOneDAsmOperand;
  120. }
  121. // Register list of two sequential D registers.
  122. def VecListDPairAsmOperand : AsmOperandClass {
  123. let Name = "VecListDPair";
  124. let ParserMethod = "parseVectorList";
  125. let RenderMethod = "addVecListOperands";
  126. }
  127. def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
  128. let ParserMatchClass = VecListDPairAsmOperand;
  129. }
  130. // Register list of three sequential D registers.
  131. def VecListThreeDAsmOperand : AsmOperandClass {
  132. let Name = "VecListThreeD";
  133. let ParserMethod = "parseVectorList";
  134. let RenderMethod = "addVecListOperands";
  135. }
  136. def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
  137. let ParserMatchClass = VecListThreeDAsmOperand;
  138. }
  139. // Register list of four sequential D registers.
  140. def VecListFourDAsmOperand : AsmOperandClass {
  141. let Name = "VecListFourD";
  142. let ParserMethod = "parseVectorList";
  143. let RenderMethod = "addVecListOperands";
  144. }
  145. def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
  146. let ParserMatchClass = VecListFourDAsmOperand;
  147. }
  148. // Register list of two D registers spaced by 2 (two sequential Q registers).
  149. def VecListDPairSpacedAsmOperand : AsmOperandClass {
  150. let Name = "VecListDPairSpaced";
  151. let ParserMethod = "parseVectorList";
  152. let RenderMethod = "addVecListOperands";
  153. }
  154. def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
  155. let ParserMatchClass = VecListDPairSpacedAsmOperand;
  156. }
  157. // Register list of three D registers spaced by 2 (three Q registers).
  158. def VecListThreeQAsmOperand : AsmOperandClass {
  159. let Name = "VecListThreeQ";
  160. let ParserMethod = "parseVectorList";
  161. let RenderMethod = "addVecListOperands";
  162. }
  163. def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
  164. let ParserMatchClass = VecListThreeQAsmOperand;
  165. }
  166. // Register list of three D registers spaced by 2 (three Q registers).
  167. def VecListFourQAsmOperand : AsmOperandClass {
  168. let Name = "VecListFourQ";
  169. let ParserMethod = "parseVectorList";
  170. let RenderMethod = "addVecListOperands";
  171. }
  172. def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
  173. let ParserMatchClass = VecListFourQAsmOperand;
  174. }
  175. // Register list of one D register, with "all lanes" subscripting.
  176. def VecListOneDAllLanesAsmOperand : AsmOperandClass {
  177. let Name = "VecListOneDAllLanes";
  178. let ParserMethod = "parseVectorList";
  179. let RenderMethod = "addVecListOperands";
  180. }
  181. def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
  182. let ParserMatchClass = VecListOneDAllLanesAsmOperand;
  183. }
  184. // Register list of two D registers, with "all lanes" subscripting.
  185. def VecListDPairAllLanesAsmOperand : AsmOperandClass {
  186. let Name = "VecListDPairAllLanes";
  187. let ParserMethod = "parseVectorList";
  188. let RenderMethod = "addVecListOperands";
  189. }
  190. def VecListDPairAllLanes : RegisterOperand<DPair,
  191. "printVectorListTwoAllLanes"> {
  192. let ParserMatchClass = VecListDPairAllLanesAsmOperand;
  193. }
  194. // Register list of two D registers spaced by 2 (two sequential Q registers).
  195. def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
  196. let Name = "VecListDPairSpacedAllLanes";
  197. let ParserMethod = "parseVectorList";
  198. let RenderMethod = "addVecListOperands";
  199. }
  200. def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
  201. "printVectorListTwoSpacedAllLanes"> {
  202. let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
  203. }
  204. // Register list of three D registers, with "all lanes" subscripting.
  205. def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
  206. let Name = "VecListThreeDAllLanes";
  207. let ParserMethod = "parseVectorList";
  208. let RenderMethod = "addVecListOperands";
  209. }
  210. def VecListThreeDAllLanes : RegisterOperand<DPR,
  211. "printVectorListThreeAllLanes"> {
  212. let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
  213. }
  214. // Register list of three D registers spaced by 2 (three sequential Q regs).
  215. def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
  216. let Name = "VecListThreeQAllLanes";
  217. let ParserMethod = "parseVectorList";
  218. let RenderMethod = "addVecListOperands";
  219. }
  220. def VecListThreeQAllLanes : RegisterOperand<DPR,
  221. "printVectorListThreeSpacedAllLanes"> {
  222. let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
  223. }
  224. // Register list of four D registers, with "all lanes" subscripting.
  225. def VecListFourDAllLanesAsmOperand : AsmOperandClass {
  226. let Name = "VecListFourDAllLanes";
  227. let ParserMethod = "parseVectorList";
  228. let RenderMethod = "addVecListOperands";
  229. }
  230. def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
  231. let ParserMatchClass = VecListFourDAllLanesAsmOperand;
  232. }
  233. // Register list of four D registers spaced by 2 (four sequential Q regs).
  234. def VecListFourQAllLanesAsmOperand : AsmOperandClass {
  235. let Name = "VecListFourQAllLanes";
  236. let ParserMethod = "parseVectorList";
  237. let RenderMethod = "addVecListOperands";
  238. }
  239. def VecListFourQAllLanes : RegisterOperand<DPR,
  240. "printVectorListFourSpacedAllLanes"> {
  241. let ParserMatchClass = VecListFourQAllLanesAsmOperand;
  242. }
  243. // Register list of one D register, with byte lane subscripting.
  244. def VecListOneDByteIndexAsmOperand : AsmOperandClass {
  245. let Name = "VecListOneDByteIndexed";
  246. let ParserMethod = "parseVectorList";
  247. let RenderMethod = "addVecListIndexedOperands";
  248. }
  249. def VecListOneDByteIndexed : Operand<i32> {
  250. let ParserMatchClass = VecListOneDByteIndexAsmOperand;
  251. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  252. }
  253. // ...with half-word lane subscripting.
  254. def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
  255. let Name = "VecListOneDHWordIndexed";
  256. let ParserMethod = "parseVectorList";
  257. let RenderMethod = "addVecListIndexedOperands";
  258. }
  259. def VecListOneDHWordIndexed : Operand<i32> {
  260. let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
  261. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  262. }
  263. // ...with word lane subscripting.
  264. def VecListOneDWordIndexAsmOperand : AsmOperandClass {
  265. let Name = "VecListOneDWordIndexed";
  266. let ParserMethod = "parseVectorList";
  267. let RenderMethod = "addVecListIndexedOperands";
  268. }
  269. def VecListOneDWordIndexed : Operand<i32> {
  270. let ParserMatchClass = VecListOneDWordIndexAsmOperand;
  271. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  272. }
  273. // Register list of two D registers with byte lane subscripting.
  274. def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
  275. let Name = "VecListTwoDByteIndexed";
  276. let ParserMethod = "parseVectorList";
  277. let RenderMethod = "addVecListIndexedOperands";
  278. }
  279. def VecListTwoDByteIndexed : Operand<i32> {
  280. let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
  281. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  282. }
  283. // ...with half-word lane subscripting.
  284. def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
  285. let Name = "VecListTwoDHWordIndexed";
  286. let ParserMethod = "parseVectorList";
  287. let RenderMethod = "addVecListIndexedOperands";
  288. }
  289. def VecListTwoDHWordIndexed : Operand<i32> {
  290. let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
  291. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  292. }
  293. // ...with word lane subscripting.
  294. def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
  295. let Name = "VecListTwoDWordIndexed";
  296. let ParserMethod = "parseVectorList";
  297. let RenderMethod = "addVecListIndexedOperands";
  298. }
  299. def VecListTwoDWordIndexed : Operand<i32> {
  300. let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
  301. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  302. }
  303. // Register list of two Q registers with half-word lane subscripting.
  304. def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
  305. let Name = "VecListTwoQHWordIndexed";
  306. let ParserMethod = "parseVectorList";
  307. let RenderMethod = "addVecListIndexedOperands";
  308. }
  309. def VecListTwoQHWordIndexed : Operand<i32> {
  310. let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
  311. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  312. }
  313. // ...with word lane subscripting.
  314. def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
  315. let Name = "VecListTwoQWordIndexed";
  316. let ParserMethod = "parseVectorList";
  317. let RenderMethod = "addVecListIndexedOperands";
  318. }
  319. def VecListTwoQWordIndexed : Operand<i32> {
  320. let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
  321. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  322. }
  323. // Register list of three D registers with byte lane subscripting.
  324. def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
  325. let Name = "VecListThreeDByteIndexed";
  326. let ParserMethod = "parseVectorList";
  327. let RenderMethod = "addVecListIndexedOperands";
  328. }
  329. def VecListThreeDByteIndexed : Operand<i32> {
  330. let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
  331. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  332. }
  333. // ...with half-word lane subscripting.
  334. def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
  335. let Name = "VecListThreeDHWordIndexed";
  336. let ParserMethod = "parseVectorList";
  337. let RenderMethod = "addVecListIndexedOperands";
  338. }
  339. def VecListThreeDHWordIndexed : Operand<i32> {
  340. let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
  341. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  342. }
  343. // ...with word lane subscripting.
  344. def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
  345. let Name = "VecListThreeDWordIndexed";
  346. let ParserMethod = "parseVectorList";
  347. let RenderMethod = "addVecListIndexedOperands";
  348. }
  349. def VecListThreeDWordIndexed : Operand<i32> {
  350. let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
  351. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  352. }
  353. // Register list of three Q registers with half-word lane subscripting.
  354. def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
  355. let Name = "VecListThreeQHWordIndexed";
  356. let ParserMethod = "parseVectorList";
  357. let RenderMethod = "addVecListIndexedOperands";
  358. }
  359. def VecListThreeQHWordIndexed : Operand<i32> {
  360. let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
  361. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  362. }
  363. // ...with word lane subscripting.
  364. def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
  365. let Name = "VecListThreeQWordIndexed";
  366. let ParserMethod = "parseVectorList";
  367. let RenderMethod = "addVecListIndexedOperands";
  368. }
  369. def VecListThreeQWordIndexed : Operand<i32> {
  370. let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
  371. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  372. }
  373. // Register list of four D registers with byte lane subscripting.
  374. def VecListFourDByteIndexAsmOperand : AsmOperandClass {
  375. let Name = "VecListFourDByteIndexed";
  376. let ParserMethod = "parseVectorList";
  377. let RenderMethod = "addVecListIndexedOperands";
  378. }
  379. def VecListFourDByteIndexed : Operand<i32> {
  380. let ParserMatchClass = VecListFourDByteIndexAsmOperand;
  381. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  382. }
  383. // ...with half-word lane subscripting.
  384. def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
  385. let Name = "VecListFourDHWordIndexed";
  386. let ParserMethod = "parseVectorList";
  387. let RenderMethod = "addVecListIndexedOperands";
  388. }
  389. def VecListFourDHWordIndexed : Operand<i32> {
  390. let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
  391. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  392. }
  393. // ...with word lane subscripting.
  394. def VecListFourDWordIndexAsmOperand : AsmOperandClass {
  395. let Name = "VecListFourDWordIndexed";
  396. let ParserMethod = "parseVectorList";
  397. let RenderMethod = "addVecListIndexedOperands";
  398. }
  399. def VecListFourDWordIndexed : Operand<i32> {
  400. let ParserMatchClass = VecListFourDWordIndexAsmOperand;
  401. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  402. }
  403. // Register list of four Q registers with half-word lane subscripting.
  404. def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
  405. let Name = "VecListFourQHWordIndexed";
  406. let ParserMethod = "parseVectorList";
  407. let RenderMethod = "addVecListIndexedOperands";
  408. }
  409. def VecListFourQHWordIndexed : Operand<i32> {
  410. let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
  411. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  412. }
  413. // ...with word lane subscripting.
  414. def VecListFourQWordIndexAsmOperand : AsmOperandClass {
  415. let Name = "VecListFourQWordIndexed";
  416. let ParserMethod = "parseVectorList";
  417. let RenderMethod = "addVecListIndexedOperands";
  418. }
  419. def VecListFourQWordIndexed : Operand<i32> {
  420. let ParserMatchClass = VecListFourQWordIndexAsmOperand;
  421. let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
  422. }
  423. def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  424. return cast<LoadSDNode>(N)->getAlignment() >= 8;
  425. }]>;
  426. def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  427. (store node:$val, node:$ptr), [{
  428. return cast<StoreSDNode>(N)->getAlignment() >= 8;
  429. }]>;
  430. def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  431. return cast<LoadSDNode>(N)->getAlignment() == 4;
  432. }]>;
  433. def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  434. (store node:$val, node:$ptr), [{
  435. return cast<StoreSDNode>(N)->getAlignment() == 4;
  436. }]>;
  437. def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  438. return cast<LoadSDNode>(N)->getAlignment() == 2;
  439. }]>;
  440. def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  441. (store node:$val, node:$ptr), [{
  442. return cast<StoreSDNode>(N)->getAlignment() == 2;
  443. }]>;
  444. def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  445. return cast<LoadSDNode>(N)->getAlignment() == 1;
  446. }]>;
  447. def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  448. (store node:$val, node:$ptr), [{
  449. return cast<StoreSDNode>(N)->getAlignment() == 1;
  450. }]>;
  451. def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
  452. return cast<LoadSDNode>(N)->getAlignment() < 4;
  453. }]>;
  454. def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
  455. (store node:$val, node:$ptr), [{
  456. return cast<StoreSDNode>(N)->getAlignment() < 4;
  457. }]>;
  458. //===----------------------------------------------------------------------===//
  459. // NEON-specific DAG Nodes.
  460. //===----------------------------------------------------------------------===//
  461. def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
  462. def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>;
  463. // Types for vector shift by immediates. The "SHX" version is for long and
  464. // narrow operations where the source and destination vectors have different
  465. // types. The "SHINS" version is for shift and insert operations.
  466. def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
  467. SDTCisVT<2, i32>]>;
  468. def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
  469. SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
  470. def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
  471. def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
  472. def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
  473. def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
  474. def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
  475. def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
  476. def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
  477. def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
  478. def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
  479. def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
  480. def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
  481. def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
  482. def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
  483. def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
  484. def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
  485. def NEONvbsp : SDNode<"ARMISD::VBSP",
  486. SDTypeProfile<1, 3, [SDTCisVec<0>,
  487. SDTCisSameAs<0, 1>,
  488. SDTCisSameAs<0, 2>,
  489. SDTCisSameAs<0, 3>]>>;
  490. def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
  491. SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
  492. def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
  493. def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
  494. SDTCisSameAs<0, 2>,
  495. SDTCisSameAs<0, 3>]>;
  496. def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
  497. def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
  498. def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
  499. def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
  500. SDTCisVT<2, v8i8>]>;
  501. def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
  502. SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
  503. def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
  504. def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
  505. //===----------------------------------------------------------------------===//
  506. // NEON load / store instructions
  507. //===----------------------------------------------------------------------===//
  508. // Use VLDM to load a Q register as a D register pair.
  509. // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
  510. def VLDMQIA
  511. : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
  512. IIC_fpLoad_m, "",
  513. [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
  514. // Use VSTM to store a Q register as a D register pair.
  515. // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
  516. def VSTMQIA
  517. : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
  518. IIC_fpStore_m, "",
  519. [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
  520. // Classes for VLD* pseudo-instructions with multi-register operands.
  521. // These are expanded to real instructions after register allocation.
  522. class VLDQPseudo<InstrItinClass itin>
  523. : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
  524. class VLDQWBPseudo<InstrItinClass itin>
  525. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  526. (ins addrmode6:$addr, am6offset:$offset), itin,
  527. "$addr.addr = $wb">;
  528. class VLDQWBfixedPseudo<InstrItinClass itin>
  529. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  530. (ins addrmode6:$addr), itin,
  531. "$addr.addr = $wb">;
  532. class VLDQWBregisterPseudo<InstrItinClass itin>
  533. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  534. (ins addrmode6:$addr, rGPR:$offset), itin,
  535. "$addr.addr = $wb">;
  536. class VLDQQPseudo<InstrItinClass itin>
  537. : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
  538. class VLDQQWBPseudo<InstrItinClass itin>
  539. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  540. (ins addrmode6:$addr, am6offset:$offset), itin,
  541. "$addr.addr = $wb">;
  542. class VLDQQWBfixedPseudo<InstrItinClass itin>
  543. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  544. (ins addrmode6:$addr), itin,
  545. "$addr.addr = $wb">;
  546. class VLDQQWBregisterPseudo<InstrItinClass itin>
  547. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  548. (ins addrmode6:$addr, rGPR:$offset), itin,
  549. "$addr.addr = $wb">;
  550. class VLDQQQQPseudo<InstrItinClass itin>
  551. : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
  552. "$src = $dst">;
  553. class VLDQQQQWBPseudo<InstrItinClass itin>
  554. : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
  555. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
  556. "$addr.addr = $wb, $src = $dst">;
  557. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  558. // VLD1 : Vector Load (multiple single elements)
  559. class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
  560. : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
  561. (ins AddrMode:$Rn), IIC_VLD1,
  562. "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
  563. let Rm = 0b1111;
  564. let Inst{4} = Rn{4};
  565. let DecoderMethod = "DecodeVLDST1Instruction";
  566. }
  567. class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
  568. : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
  569. (ins AddrMode:$Rn), IIC_VLD1x2,
  570. "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
  571. let Rm = 0b1111;
  572. let Inst{5-4} = Rn{5-4};
  573. let DecoderMethod = "DecodeVLDST1Instruction";
  574. }
  575. def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
  576. def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
  577. def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
  578. def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
  579. def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
  580. def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
  581. def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
  582. def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
  583. // ...with address register writeback:
  584. multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  585. def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
  586. (ins AddrMode:$Rn), IIC_VLD1u,
  587. "vld1", Dt, "$Vd, $Rn!",
  588. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  589. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  590. let Inst{4} = Rn{4};
  591. let DecoderMethod = "DecodeVLDST1Instruction";
  592. }
  593. def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
  594. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
  595. "vld1", Dt, "$Vd, $Rn, $Rm",
  596. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  597. let Inst{4} = Rn{4};
  598. let DecoderMethod = "DecodeVLDST1Instruction";
  599. }
  600. }
  601. multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  602. def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
  603. (ins AddrMode:$Rn), IIC_VLD1x2u,
  604. "vld1", Dt, "$Vd, $Rn!",
  605. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  606. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  607. let Inst{5-4} = Rn{5-4};
  608. let DecoderMethod = "DecodeVLDST1Instruction";
  609. }
  610. def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
  611. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  612. "vld1", Dt, "$Vd, $Rn, $Rm",
  613. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  614. let Inst{5-4} = Rn{5-4};
  615. let DecoderMethod = "DecodeVLDST1Instruction";
  616. }
  617. }
  618. defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
  619. defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
  620. defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
  621. defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
  622. defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
  623. defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
  624. defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
  625. defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
  626. // ...with 3 registers
  627. class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
  628. : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
  629. (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
  630. "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
  631. let Rm = 0b1111;
  632. let Inst{4} = Rn{4};
  633. let DecoderMethod = "DecodeVLDST1Instruction";
  634. }
  635. multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  636. def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
  637. (ins AddrMode:$Rn), IIC_VLD1x2u,
  638. "vld1", Dt, "$Vd, $Rn!",
  639. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  640. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  641. let Inst{4} = Rn{4};
  642. let DecoderMethod = "DecodeVLDST1Instruction";
  643. }
  644. def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
  645. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  646. "vld1", Dt, "$Vd, $Rn, $Rm",
  647. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  648. let Inst{4} = Rn{4};
  649. let DecoderMethod = "DecodeVLDST1Instruction";
  650. }
  651. }
  652. def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
  653. def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
  654. def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
  655. def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
  656. defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
  657. defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
  658. defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
  659. defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
  660. def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  661. def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  662. def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  663. def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  664. def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  665. def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  666. def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  667. def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  668. def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  669. def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  670. def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  671. def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  672. def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  673. def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  674. def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  675. def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  676. def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  677. def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  678. def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  679. def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  680. def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  681. def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  682. def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  683. def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
  684. // ...with 4 registers
  685. class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
  686. : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
  687. (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
  688. "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
  689. let Rm = 0b1111;
  690. let Inst{5-4} = Rn{5-4};
  691. let DecoderMethod = "DecodeVLDST1Instruction";
  692. }
  693. multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  694. def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
  695. (ins AddrMode:$Rn), IIC_VLD1x2u,
  696. "vld1", Dt, "$Vd, $Rn!",
  697. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  698. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  699. let Inst{5-4} = Rn{5-4};
  700. let DecoderMethod = "DecodeVLDST1Instruction";
  701. }
  702. def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
  703. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
  704. "vld1", Dt, "$Vd, $Rn, $Rm",
  705. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  706. let Inst{5-4} = Rn{5-4};
  707. let DecoderMethod = "DecodeVLDST1Instruction";
  708. }
  709. }
  710. def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
  711. def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
  712. def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
  713. def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
  714. defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  715. defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  716. defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  717. defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
  718. def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  719. def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  720. def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  721. def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  722. def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  723. def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  724. def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  725. def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  726. def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  727. def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  728. def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  729. def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  730. def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  731. def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  732. def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  733. def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  734. def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  735. def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  736. def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  737. def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  738. def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  739. def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  740. def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  741. def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
  742. // VLD2 : Vector Load (multiple 2-element structures)
  743. class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
  744. InstrItinClass itin, Operand AddrMode>
  745. : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
  746. (ins AddrMode:$Rn), itin,
  747. "vld2", Dt, "$Vd, $Rn", "", []> {
  748. let Rm = 0b1111;
  749. let Inst{5-4} = Rn{5-4};
  750. let DecoderMethod = "DecodeVLDST2Instruction";
  751. }
  752. def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
  753. addrmode6align64or128>, Sched<[WriteVLD2]>;
  754. def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
  755. addrmode6align64or128>, Sched<[WriteVLD2]>;
  756. def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
  757. addrmode6align64or128>, Sched<[WriteVLD2]>;
  758. def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
  759. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  760. def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
  761. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  762. def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
  763. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  764. def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  765. def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  766. def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
  767. // ...with address register writeback:
  768. multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
  769. RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
  770. def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
  771. (ins AddrMode:$Rn), itin,
  772. "vld2", Dt, "$Vd, $Rn!",
  773. "$Rn.addr = $wb", []> {
  774. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  775. let Inst{5-4} = Rn{5-4};
  776. let DecoderMethod = "DecodeVLDST2Instruction";
  777. }
  778. def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
  779. (ins AddrMode:$Rn, rGPR:$Rm), itin,
  780. "vld2", Dt, "$Vd, $Rn, $Rm",
  781. "$Rn.addr = $wb", []> {
  782. let Inst{5-4} = Rn{5-4};
  783. let DecoderMethod = "DecodeVLDST2Instruction";
  784. }
  785. }
  786. defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
  787. addrmode6align64or128>, Sched<[WriteVLD2]>;
  788. defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
  789. addrmode6align64or128>, Sched<[WriteVLD2]>;
  790. defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
  791. addrmode6align64or128>, Sched<[WriteVLD2]>;
  792. defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
  793. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  794. defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
  795. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  796. defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
  797. addrmode6align64or128or256>, Sched<[WriteVLD4]>;
  798. def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  799. def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  800. def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  801. def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  802. def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  803. def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
  804. // ...with double-spaced registers
  805. def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
  806. addrmode6align64or128>, Sched<[WriteVLD2]>;
  807. def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
  808. addrmode6align64or128>, Sched<[WriteVLD2]>;
  809. def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
  810. addrmode6align64or128>, Sched<[WriteVLD2]>;
  811. defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
  812. addrmode6align64or128>, Sched<[WriteVLD2]>;
  813. defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
  814. addrmode6align64or128>, Sched<[WriteVLD2]>;
  815. defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
  816. addrmode6align64or128>, Sched<[WriteVLD2]>;
  817. // VLD3 : Vector Load (multiple 3-element structures)
  818. class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
  819. : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  820. (ins addrmode6:$Rn), IIC_VLD3,
  821. "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
  822. let Rm = 0b1111;
  823. let Inst{4} = Rn{4};
  824. let DecoderMethod = "DecodeVLDST3Instruction";
  825. }
  826. def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
  827. def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
  828. def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
  829. def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  830. def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  831. def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  832. // ...with address register writeback:
  833. class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  834. : NLdSt<0, 0b10, op11_8, op7_4,
  835. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  836. (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
  837. "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
  838. "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
  839. let Inst{4} = Rn{4};
  840. let DecoderMethod = "DecodeVLDST3Instruction";
  841. }
  842. def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
  843. def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
  844. def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
  845. def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  846. def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  847. def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  848. // ...with double-spaced registers:
  849. def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
  850. def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
  851. def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
  852. def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
  853. def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
  854. def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
  855. def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  856. def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  857. def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  858. // ...alternate versions to be allocated odd register numbers:
  859. def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  860. def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  861. def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
  862. def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  863. def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  864. def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
  865. // VLD4 : Vector Load (multiple 4-element structures)
  866. class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
  867. : NLdSt<0, 0b10, op11_8, op7_4,
  868. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  869. (ins addrmode6:$Rn), IIC_VLD4,
  870. "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
  871. Sched<[WriteVLD4]> {
  872. let Rm = 0b1111;
  873. let Inst{5-4} = Rn{5-4};
  874. let DecoderMethod = "DecodeVLDST4Instruction";
  875. }
  876. def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
  877. def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
  878. def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
  879. def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  880. def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  881. def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  882. // ...with address register writeback:
  883. class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  884. : NLdSt<0, 0b10, op11_8, op7_4,
  885. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  886. (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
  887. "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
  888. "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
  889. let Inst{5-4} = Rn{5-4};
  890. let DecoderMethod = "DecodeVLDST4Instruction";
  891. }
  892. def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
  893. def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
  894. def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
  895. def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  896. def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  897. def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  898. // ...with double-spaced registers:
  899. def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
  900. def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
  901. def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
  902. def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
  903. def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
  904. def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
  905. def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  906. def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  907. def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  908. // ...alternate versions to be allocated odd register numbers:
  909. def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  910. def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  911. def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
  912. def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  913. def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  914. def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
  915. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  916. // Classes for VLD*LN pseudo-instructions with multi-register operands.
  917. // These are expanded to real instructions after register allocation.
  918. class VLDQLNPseudo<InstrItinClass itin>
  919. : PseudoNLdSt<(outs QPR:$dst),
  920. (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
  921. itin, "$src = $dst">;
  922. class VLDQLNWBPseudo<InstrItinClass itin>
  923. : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
  924. (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
  925. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  926. class VLDQQLNPseudo<InstrItinClass itin>
  927. : PseudoNLdSt<(outs QQPR:$dst),
  928. (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
  929. itin, "$src = $dst">;
  930. class VLDQQLNWBPseudo<InstrItinClass itin>
  931. : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
  932. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
  933. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  934. class VLDQQQQLNPseudo<InstrItinClass itin>
  935. : PseudoNLdSt<(outs QQQQPR:$dst),
  936. (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
  937. itin, "$src = $dst">;
  938. class VLDQQQQLNWBPseudo<InstrItinClass itin>
  939. : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
  940. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
  941. nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
  942. // VLD1LN : Vector Load (single element to one lane)
  943. class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  944. PatFrag LoadOp>
  945. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
  946. (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
  947. IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
  948. "$src = $Vd",
  949. [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
  950. (i32 (LoadOp addrmode6:$Rn)),
  951. imm:$lane))]> {
  952. let Rm = 0b1111;
  953. let DecoderMethod = "DecodeVLD1LN";
  954. }
  955. class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  956. PatFrag LoadOp>
  957. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
  958. (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
  959. IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
  960. "$src = $Vd",
  961. [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
  962. (i32 (LoadOp addrmode6oneL32:$Rn)),
  963. imm:$lane))]>, Sched<[WriteVLD1]> {
  964. let Rm = 0b1111;
  965. let DecoderMethod = "DecodeVLD1LN";
  966. }
  967. class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
  968. Sched<[WriteVLD1]> {
  969. let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
  970. (i32 (LoadOp addrmode6:$addr)),
  971. imm:$lane))];
  972. }
  973. def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
  974. let Inst{7-5} = lane{2-0};
  975. }
  976. def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
  977. let Inst{7-6} = lane{1-0};
  978. let Inst{5-4} = Rn{5-4};
  979. }
  980. def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
  981. let Inst{7} = lane{0};
  982. let Inst{5-4} = Rn{5-4};
  983. }
  984. def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
  985. def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
  986. def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
  987. let Predicates = [HasNEON] in {
  988. def : Pat<(vector_insert (v4f16 DPR:$src),
  989. (f16 (load addrmode6:$addr)), imm:$lane),
  990. (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  991. def : Pat<(vector_insert (v8f16 QPR:$src),
  992. (f16 (load addrmode6:$addr)), imm:$lane),
  993. (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  994. def : Pat<(vector_insert (v4bf16 DPR:$src),
  995. (bf16 (load addrmode6:$addr)), imm:$lane),
  996. (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  997. def : Pat<(vector_insert (v8bf16 QPR:$src),
  998. (bf16 (load addrmode6:$addr)), imm:$lane),
  999. (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1000. def : Pat<(vector_insert (v2f32 DPR:$src),
  1001. (f32 (load addrmode6:$addr)), imm:$lane),
  1002. (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1003. def : Pat<(vector_insert (v4f32 QPR:$src),
  1004. (f32 (load addrmode6:$addr)), imm:$lane),
  1005. (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1006. // A 64-bit subvector insert to the first 128-bit vector position
  1007. // is a subregister copy that needs no instruction.
  1008. def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
  1009. (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1010. def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
  1011. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1012. def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
  1013. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1014. def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
  1015. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1016. def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
  1017. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1018. def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
  1019. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  1020. }
  1021. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  1022. // ...with address register writeback:
  1023. class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1024. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
  1025. (ins addrmode6:$Rn, am6offset:$Rm,
  1026. DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
  1027. "\\{$Vd[$lane]\\}, $Rn$Rm",
  1028. "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1029. let DecoderMethod = "DecodeVLD1LN";
  1030. }
  1031. def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
  1032. let Inst{7-5} = lane{2-0};
  1033. }
  1034. def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
  1035. let Inst{7-6} = lane{1-0};
  1036. let Inst{4} = Rn{4};
  1037. }
  1038. def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
  1039. let Inst{7} = lane{0};
  1040. let Inst{5} = Rn{4};
  1041. let Inst{4} = Rn{4};
  1042. }
  1043. def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1044. def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1045. def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
  1046. // VLD2LN : Vector Load (single 2-element structure to one lane)
  1047. class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1048. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
  1049. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
  1050. IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
  1051. "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
  1052. let Rm = 0b1111;
  1053. let Inst{4} = Rn{4};
  1054. let DecoderMethod = "DecodeVLD2LN";
  1055. }
  1056. def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
  1057. let Inst{7-5} = lane{2-0};
  1058. }
  1059. def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
  1060. let Inst{7-6} = lane{1-0};
  1061. }
  1062. def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
  1063. let Inst{7} = lane{0};
  1064. }
  1065. def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1066. def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1067. def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1068. // ...with double-spaced registers:
  1069. def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
  1070. let Inst{7-6} = lane{1-0};
  1071. }
  1072. def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
  1073. let Inst{7} = lane{0};
  1074. }
  1075. def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1076. def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
  1077. // ...with address register writeback:
  1078. class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1079. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
  1080. (ins addrmode6:$Rn, am6offset:$Rm,
  1081. DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
  1082. "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
  1083. "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
  1084. let Inst{4} = Rn{4};
  1085. let DecoderMethod = "DecodeVLD2LN";
  1086. }
  1087. def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
  1088. let Inst{7-5} = lane{2-0};
  1089. }
  1090. def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
  1091. let Inst{7-6} = lane{1-0};
  1092. }
  1093. def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
  1094. let Inst{7} = lane{0};
  1095. }
  1096. def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1097. def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1098. def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1099. def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
  1100. let Inst{7-6} = lane{1-0};
  1101. }
  1102. def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
  1103. let Inst{7} = lane{0};
  1104. }
  1105. def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1106. def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
  1107. // VLD3LN : Vector Load (single 3-element structure to one lane)
  1108. class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1109. : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  1110. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
  1111. nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
  1112. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
  1113. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
  1114. let Rm = 0b1111;
  1115. let DecoderMethod = "DecodeVLD3LN";
  1116. }
  1117. def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
  1118. let Inst{7-5} = lane{2-0};
  1119. }
  1120. def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
  1121. let Inst{7-6} = lane{1-0};
  1122. }
  1123. def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
  1124. let Inst{7} = lane{0};
  1125. }
  1126. def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1127. def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1128. def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1129. // ...with double-spaced registers:
  1130. def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
  1131. let Inst{7-6} = lane{1-0};
  1132. }
  1133. def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
  1134. let Inst{7} = lane{0};
  1135. }
  1136. def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1137. def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
  1138. // ...with address register writeback:
  1139. class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1140. : NLdStLn<1, 0b10, op11_8, op7_4,
  1141. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  1142. (ins addrmode6:$Rn, am6offset:$Rm,
  1143. DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
  1144. IIC_VLD3lnu, "vld3", Dt,
  1145. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
  1146. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
  1147. []>, Sched<[WriteVLD2]> {
  1148. let DecoderMethod = "DecodeVLD3LN";
  1149. }
  1150. def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
  1151. let Inst{7-5} = lane{2-0};
  1152. }
  1153. def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
  1154. let Inst{7-6} = lane{1-0};
  1155. }
  1156. def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
  1157. let Inst{7} = lane{0};
  1158. }
  1159. def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1160. def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1161. def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1162. def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
  1163. let Inst{7-6} = lane{1-0};
  1164. }
  1165. def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
  1166. let Inst{7} = lane{0};
  1167. }
  1168. def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1169. def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
  1170. // VLD4LN : Vector Load (single 4-element structure to one lane)
  1171. class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  1172. : NLdStLn<1, 0b10, op11_8, op7_4,
  1173. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  1174. (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
  1175. nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
  1176. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
  1177. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
  1178. Sched<[WriteVLD2]> {
  1179. let Rm = 0b1111;
  1180. let Inst{4} = Rn{4};
  1181. let DecoderMethod = "DecodeVLD4LN";
  1182. }
  1183. def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
  1184. let Inst{7-5} = lane{2-0};
  1185. }
  1186. def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
  1187. let Inst{7-6} = lane{1-0};
  1188. }
  1189. def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
  1190. let Inst{7} = lane{0};
  1191. let Inst{5} = Rn{5};
  1192. }
  1193. def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1194. def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1195. def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1196. // ...with double-spaced registers:
  1197. def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
  1198. let Inst{7-6} = lane{1-0};
  1199. }
  1200. def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
  1201. let Inst{7} = lane{0};
  1202. let Inst{5} = Rn{5};
  1203. }
  1204. def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1205. def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
  1206. // ...with address register writeback:
  1207. class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1208. : NLdStLn<1, 0b10, op11_8, op7_4,
  1209. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  1210. (ins addrmode6:$Rn, am6offset:$Rm,
  1211. DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
  1212. IIC_VLD4lnu, "vld4", Dt,
  1213. "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
  1214. "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
  1215. []> {
  1216. let Inst{4} = Rn{4};
  1217. let DecoderMethod = "DecodeVLD4LN" ;
  1218. }
  1219. def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
  1220. let Inst{7-5} = lane{2-0};
  1221. }
  1222. def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
  1223. let Inst{7-6} = lane{1-0};
  1224. }
  1225. def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
  1226. let Inst{7} = lane{0};
  1227. let Inst{5} = Rn{5};
  1228. }
  1229. def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1230. def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1231. def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1232. def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
  1233. let Inst{7-6} = lane{1-0};
  1234. }
  1235. def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
  1236. let Inst{7} = lane{0};
  1237. let Inst{5} = Rn{5};
  1238. }
  1239. def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1240. def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
  1241. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  1242. // VLD1DUP : Vector Load (single element to all lanes)
  1243. class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
  1244. Operand AddrMode>
  1245. : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
  1246. (ins AddrMode:$Rn),
  1247. IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
  1248. [(set VecListOneDAllLanes:$Vd,
  1249. (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
  1250. Sched<[WriteVLD2]> {
  1251. let Rm = 0b1111;
  1252. let Inst{4} = Rn{4};
  1253. let DecoderMethod = "DecodeVLD1DupInstruction";
  1254. }
  1255. def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
  1256. addrmode6dupalignNone>;
  1257. def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
  1258. addrmode6dupalign16>;
  1259. def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
  1260. addrmode6dupalign32>;
  1261. let Predicates = [HasNEON] in {
  1262. def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
  1263. (VLD1DUPd32 addrmode6:$addr)>;
  1264. }
  1265. class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
  1266. Operand AddrMode>
  1267. : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
  1268. (ins AddrMode:$Rn), IIC_VLD1dup,
  1269. "vld1", Dt, "$Vd, $Rn", "",
  1270. [(set VecListDPairAllLanes:$Vd,
  1271. (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
  1272. let Rm = 0b1111;
  1273. let Inst{4} = Rn{4};
  1274. let DecoderMethod = "DecodeVLD1DupInstruction";
  1275. }
  1276. def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
  1277. addrmode6dupalignNone>;
  1278. def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
  1279. addrmode6dupalign16>;
  1280. def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
  1281. addrmode6dupalign32>;
  1282. let Predicates = [HasNEON] in {
  1283. def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
  1284. (VLD1DUPq32 addrmode6:$addr)>;
  1285. }
  1286. let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
  1287. // ...with address register writeback:
  1288. multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1289. def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
  1290. (outs VecListOneDAllLanes:$Vd, GPR:$wb),
  1291. (ins AddrMode:$Rn), IIC_VLD1dupu,
  1292. "vld1", Dt, "$Vd, $Rn!",
  1293. "$Rn.addr = $wb", []> {
  1294. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1295. let Inst{4} = Rn{4};
  1296. let DecoderMethod = "DecodeVLD1DupInstruction";
  1297. }
  1298. def _register : NLdSt<1, 0b10, 0b1100, op7_4,
  1299. (outs VecListOneDAllLanes:$Vd, GPR:$wb),
  1300. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
  1301. "vld1", Dt, "$Vd, $Rn, $Rm",
  1302. "$Rn.addr = $wb", []> {
  1303. let Inst{4} = Rn{4};
  1304. let DecoderMethod = "DecodeVLD1DupInstruction";
  1305. }
  1306. }
  1307. multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1308. def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
  1309. (outs VecListDPairAllLanes:$Vd, GPR:$wb),
  1310. (ins AddrMode:$Rn), IIC_VLD1dupu,
  1311. "vld1", Dt, "$Vd, $Rn!",
  1312. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1313. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1314. let Inst{4} = Rn{4};
  1315. let DecoderMethod = "DecodeVLD1DupInstruction";
  1316. }
  1317. def _register : NLdSt<1, 0b10, 0b1100, op7_4,
  1318. (outs VecListDPairAllLanes:$Vd, GPR:$wb),
  1319. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
  1320. "vld1", Dt, "$Vd, $Rn, $Rm",
  1321. "$Rn.addr = $wb", []> {
  1322. let Inst{4} = Rn{4};
  1323. let DecoderMethod = "DecodeVLD1DupInstruction";
  1324. }
  1325. }
  1326. defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
  1327. defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
  1328. defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
  1329. defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
  1330. defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
  1331. defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
  1332. // VLD2DUP : Vector Load (single 2-element structure to all lanes)
  1333. class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
  1334. : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
  1335. (ins AddrMode:$Rn), IIC_VLD2dup,
  1336. "vld2", Dt, "$Vd, $Rn", "", []> {
  1337. let Rm = 0b1111;
  1338. let Inst{4} = Rn{4};
  1339. let DecoderMethod = "DecodeVLD2DupInstruction";
  1340. }
  1341. def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
  1342. addrmode6dupalign16>;
  1343. def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
  1344. addrmode6dupalign32>;
  1345. def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
  1346. addrmode6dupalign64>;
  1347. // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
  1348. // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
  1349. // ...with double-spaced registers
  1350. def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
  1351. addrmode6dupalign16>;
  1352. def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
  1353. addrmode6dupalign32>;
  1354. def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
  1355. addrmode6dupalign64>;
  1356. def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1357. def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1358. def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1359. def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1360. def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1361. def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1362. // ...with address register writeback:
  1363. multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
  1364. Operand AddrMode> {
  1365. def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
  1366. (outs VdTy:$Vd, GPR:$wb),
  1367. (ins AddrMode:$Rn), IIC_VLD2dupu,
  1368. "vld2", Dt, "$Vd, $Rn!",
  1369. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1370. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1371. let Inst{4} = Rn{4};
  1372. let DecoderMethod = "DecodeVLD2DupInstruction";
  1373. }
  1374. def _register : NLdSt<1, 0b10, 0b1101, op7_4,
  1375. (outs VdTy:$Vd, GPR:$wb),
  1376. (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
  1377. "vld2", Dt, "$Vd, $Rn, $Rm",
  1378. "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
  1379. let Inst{4} = Rn{4};
  1380. let DecoderMethod = "DecodeVLD2DupInstruction";
  1381. }
  1382. }
  1383. defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
  1384. addrmode6dupalign16>;
  1385. defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
  1386. addrmode6dupalign32>;
  1387. defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
  1388. addrmode6dupalign64>;
  1389. defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
  1390. addrmode6dupalign16>;
  1391. defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
  1392. addrmode6dupalign32>;
  1393. defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
  1394. addrmode6dupalign64>;
  1395. def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1396. def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1397. def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1398. def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1399. def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1400. def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
  1401. // VLD3DUP : Vector Load (single 3-element structure to all lanes)
  1402. class VLD3DUP<bits<4> op7_4, string Dt>
  1403. : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
  1404. (ins addrmode6dup:$Rn), IIC_VLD3dup,
  1405. "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
  1406. Sched<[WriteVLD2]> {
  1407. let Rm = 0b1111;
  1408. let Inst{4} = 0;
  1409. let DecoderMethod = "DecodeVLD3DupInstruction";
  1410. }
  1411. def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
  1412. def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
  1413. def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
  1414. def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1415. def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1416. def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1417. // ...with double-spaced registers (not used for codegen):
  1418. def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
  1419. def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
  1420. def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
  1421. def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1422. def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1423. def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1424. def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1425. def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1426. def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
  1427. // ...with address register writeback:
  1428. class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
  1429. : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
  1430. (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
  1431. "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
  1432. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  1433. let Inst{4} = 0;
  1434. let DecoderMethod = "DecodeVLD3DupInstruction";
  1435. }
  1436. def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
  1437. def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
  1438. def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
  1439. def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
  1440. def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
  1441. def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
  1442. def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1443. def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1444. def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1445. def VLD3DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1446. def VLD3DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1447. def VLD3DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
  1448. // VLD4DUP : Vector Load (single 4-element structure to all lanes)
  1449. class VLD4DUP<bits<4> op7_4, string Dt>
  1450. : NLdSt<1, 0b10, 0b1111, op7_4,
  1451. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
  1452. (ins addrmode6dup:$Rn), IIC_VLD4dup,
  1453. "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
  1454. let Rm = 0b1111;
  1455. let Inst{4} = Rn{4};
  1456. let DecoderMethod = "DecodeVLD4DupInstruction";
  1457. }
  1458. def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
  1459. def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
  1460. def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
  1461. def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1462. def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1463. def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1464. // ...with double-spaced registers (not used for codegen):
  1465. def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
  1466. def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
  1467. def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
  1468. def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1469. def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1470. def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1471. def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1472. def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1473. def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
  1474. // ...with address register writeback:
  1475. class VLD4DUPWB<bits<4> op7_4, string Dt>
  1476. : NLdSt<1, 0b10, 0b1111, op7_4,
  1477. (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
  1478. (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
  1479. "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
  1480. "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
  1481. let Inst{4} = Rn{4};
  1482. let DecoderMethod = "DecodeVLD4DupInstruction";
  1483. }
  1484. def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
  1485. def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
  1486. def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
  1487. def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
  1488. def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
  1489. def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
  1490. def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1491. def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1492. def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1493. def VLD4DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1494. def VLD4DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1495. def VLD4DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
  1496. } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
  1497. let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
  1498. // Classes for VST* pseudo-instructions with multi-register operands.
  1499. // These are expanded to real instructions after register allocation.
  1500. class VSTQPseudo<InstrItinClass itin>
  1501. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
  1502. class VSTQWBPseudo<InstrItinClass itin>
  1503. : PseudoNLdSt<(outs GPR:$wb),
  1504. (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
  1505. "$addr.addr = $wb">;
  1506. class VSTQWBfixedPseudo<InstrItinClass itin>
  1507. : PseudoNLdSt<(outs GPR:$wb),
  1508. (ins addrmode6:$addr, QPR:$src), itin,
  1509. "$addr.addr = $wb">;
  1510. class VSTQWBregisterPseudo<InstrItinClass itin>
  1511. : PseudoNLdSt<(outs GPR:$wb),
  1512. (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
  1513. "$addr.addr = $wb">;
  1514. class VSTQQPseudo<InstrItinClass itin>
  1515. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
  1516. class VSTQQWBPseudo<InstrItinClass itin>
  1517. : PseudoNLdSt<(outs GPR:$wb),
  1518. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
  1519. "$addr.addr = $wb">;
  1520. class VSTQQWBfixedPseudo<InstrItinClass itin>
  1521. : PseudoNLdSt<(outs GPR:$wb),
  1522. (ins addrmode6:$addr, QQPR:$src), itin,
  1523. "$addr.addr = $wb">;
  1524. class VSTQQWBregisterPseudo<InstrItinClass itin>
  1525. : PseudoNLdSt<(outs GPR:$wb),
  1526. (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
  1527. "$addr.addr = $wb">;
  1528. class VSTQQQQPseudo<InstrItinClass itin>
  1529. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
  1530. class VSTQQQQWBPseudo<InstrItinClass itin>
  1531. : PseudoNLdSt<(outs GPR:$wb),
  1532. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
  1533. "$addr.addr = $wb">;
  1534. // VST1 : Vector Store (multiple single elements)
  1535. class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
  1536. : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
  1537. IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
  1538. let Rm = 0b1111;
  1539. let Inst{4} = Rn{4};
  1540. let DecoderMethod = "DecodeVLDST1Instruction";
  1541. }
  1542. class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
  1543. : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
  1544. IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
  1545. let Rm = 0b1111;
  1546. let Inst{5-4} = Rn{5-4};
  1547. let DecoderMethod = "DecodeVLDST1Instruction";
  1548. }
  1549. def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
  1550. def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
  1551. def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
  1552. def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
  1553. def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
  1554. def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
  1555. def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
  1556. def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
  1557. // ...with address register writeback:
  1558. multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1559. def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
  1560. (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
  1561. "vst1", Dt, "$Vd, $Rn!",
  1562. "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
  1563. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1564. let Inst{4} = Rn{4};
  1565. let DecoderMethod = "DecodeVLDST1Instruction";
  1566. }
  1567. def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
  1568. (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
  1569. IIC_VLD1u,
  1570. "vst1", Dt, "$Vd, $Rn, $Rm",
  1571. "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
  1572. let Inst{4} = Rn{4};
  1573. let DecoderMethod = "DecodeVLDST1Instruction";
  1574. }
  1575. }
  1576. multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1577. def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
  1578. (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
  1579. "vst1", Dt, "$Vd, $Rn!",
  1580. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1581. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1582. let Inst{5-4} = Rn{5-4};
  1583. let DecoderMethod = "DecodeVLDST1Instruction";
  1584. }
  1585. def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
  1586. (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
  1587. IIC_VLD1x2u,
  1588. "vst1", Dt, "$Vd, $Rn, $Rm",
  1589. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1590. let Inst{5-4} = Rn{5-4};
  1591. let DecoderMethod = "DecodeVLDST1Instruction";
  1592. }
  1593. }
  1594. defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
  1595. defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
  1596. defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
  1597. defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
  1598. defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
  1599. defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
  1600. defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
  1601. defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
  1602. // ...with 3 registers
  1603. class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
  1604. : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
  1605. (ins AddrMode:$Rn, VecListThreeD:$Vd),
  1606. IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
  1607. let Rm = 0b1111;
  1608. let Inst{4} = Rn{4};
  1609. let DecoderMethod = "DecodeVLDST1Instruction";
  1610. }
  1611. multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1612. def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
  1613. (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
  1614. "vst1", Dt, "$Vd, $Rn!",
  1615. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1616. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1617. let Inst{5-4} = Rn{5-4};
  1618. let DecoderMethod = "DecodeVLDST1Instruction";
  1619. }
  1620. def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
  1621. (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
  1622. IIC_VLD1x3u,
  1623. "vst1", Dt, "$Vd, $Rn, $Rm",
  1624. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1625. let Inst{5-4} = Rn{5-4};
  1626. let DecoderMethod = "DecodeVLDST1Instruction";
  1627. }
  1628. }
  1629. def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
  1630. def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
  1631. def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
  1632. def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
  1633. defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
  1634. defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
  1635. defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
  1636. defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
  1637. def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1638. def VST1d8TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1639. def VST1d8TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1640. def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1641. def VST1d16TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1642. def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1643. def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1644. def VST1d32TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1645. def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1646. def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1647. def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1648. def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
  1649. def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1650. def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1651. def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1652. def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1653. def VST1q8HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1654. def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1655. def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1656. def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1657. def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1658. def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1659. def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1660. def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
  1661. // ...with 4 registers
  1662. class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
  1663. : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
  1664. (ins AddrMode:$Rn, VecListFourD:$Vd),
  1665. IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
  1666. []>, Sched<[WriteVST4]> {
  1667. let Rm = 0b1111;
  1668. let Inst{5-4} = Rn{5-4};
  1669. let DecoderMethod = "DecodeVLDST1Instruction";
  1670. }
  1671. multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1672. def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
  1673. (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
  1674. "vst1", Dt, "$Vd, $Rn!",
  1675. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1676. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1677. let Inst{5-4} = Rn{5-4};
  1678. let DecoderMethod = "DecodeVLDST1Instruction";
  1679. }
  1680. def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
  1681. (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
  1682. IIC_VLD1x4u,
  1683. "vst1", Dt, "$Vd, $Rn, $Rm",
  1684. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1685. let Inst{5-4} = Rn{5-4};
  1686. let DecoderMethod = "DecodeVLDST1Instruction";
  1687. }
  1688. }
  1689. def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1690. def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1691. def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1692. def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
  1693. defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1694. defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1695. defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1696. defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
  1697. def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1698. def VST1d8QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1699. def VST1d8QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1700. def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1701. def VST1d16QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1702. def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1703. def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1704. def VST1d32QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1705. def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1706. def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1707. def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1708. def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
  1709. def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1710. def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1711. def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1712. def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1713. def VST1q8HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1714. def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1715. def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1716. def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1717. def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1718. def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1719. def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1720. def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
  1721. // VST2 : Vector Store (multiple 2-element structures)
  1722. class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
  1723. InstrItinClass itin, Operand AddrMode>
  1724. : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
  1725. itin, "vst2", Dt, "$Vd, $Rn", "", []> {
  1726. let Rm = 0b1111;
  1727. let Inst{5-4} = Rn{5-4};
  1728. let DecoderMethod = "DecodeVLDST2Instruction";
  1729. }
  1730. def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
  1731. addrmode6align64or128>, Sched<[WriteVST2]>;
  1732. def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
  1733. addrmode6align64or128>, Sched<[WriteVST2]>;
  1734. def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
  1735. addrmode6align64or128>, Sched<[WriteVST2]>;
  1736. def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
  1737. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1738. def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
  1739. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1740. def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
  1741. addrmode6align64or128or256>, Sched<[WriteVST4]>;
  1742. def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1743. def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1744. def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
  1745. // ...with address register writeback:
  1746. multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
  1747. RegisterOperand VdTy, Operand AddrMode> {
  1748. def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1749. (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
  1750. "vst2", Dt, "$Vd, $Rn!",
  1751. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1752. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1753. let Inst{5-4} = Rn{5-4};
  1754. let DecoderMethod = "DecodeVLDST2Instruction";
  1755. }
  1756. def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1757. (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
  1758. "vst2", Dt, "$Vd, $Rn, $Rm",
  1759. "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
  1760. let Inst{5-4} = Rn{5-4};
  1761. let DecoderMethod = "DecodeVLDST2Instruction";
  1762. }
  1763. }
  1764. multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
  1765. def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
  1766. (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
  1767. "vst2", Dt, "$Vd, $Rn!",
  1768. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1769. let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
  1770. let Inst{5-4} = Rn{5-4};
  1771. let DecoderMethod = "DecodeVLDST2Instruction";
  1772. }
  1773. def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
  1774. (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
  1775. IIC_VLD1u,
  1776. "vst2", Dt, "$Vd, $Rn, $Rm",
  1777. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1778. let Inst{5-4} = Rn{5-4};
  1779. let DecoderMethod = "DecodeVLDST2Instruction";
  1780. }
  1781. }
  1782. defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
  1783. addrmode6align64or128>;
  1784. defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
  1785. addrmode6align64or128>;
  1786. defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
  1787. addrmode6align64or128>;
  1788. defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
  1789. defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
  1790. defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
  1791. def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1792. def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1793. def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1794. def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1795. def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1796. def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
  1797. // ...with double-spaced registers
  1798. def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
  1799. addrmode6align64or128>;
  1800. def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
  1801. addrmode6align64or128>;
  1802. def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
  1803. addrmode6align64or128>;
  1804. defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
  1805. addrmode6align64or128>;
  1806. defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
  1807. addrmode6align64or128>;
  1808. defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
  1809. addrmode6align64or128>;
  1810. // VST3 : Vector Store (multiple 3-element structures)
  1811. class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
  1812. : NLdSt<0, 0b00, op11_8, op7_4, (outs),
  1813. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
  1814. "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
  1815. let Rm = 0b1111;
  1816. let Inst{4} = Rn{4};
  1817. let DecoderMethod = "DecodeVLDST3Instruction";
  1818. }
  1819. def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
  1820. def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
  1821. def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
  1822. def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1823. def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1824. def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1825. // ...with address register writeback:
  1826. class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1827. : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1828. (ins addrmode6:$Rn, am6offset:$Rm,
  1829. DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
  1830. "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
  1831. "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
  1832. let Inst{4} = Rn{4};
  1833. let DecoderMethod = "DecodeVLDST3Instruction";
  1834. }
  1835. def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
  1836. def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
  1837. def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
  1838. def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1839. def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1840. def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1841. // ...with double-spaced registers:
  1842. def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
  1843. def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
  1844. def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
  1845. def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
  1846. def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
  1847. def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
  1848. def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1849. def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1850. def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1851. // ...alternate versions to be allocated odd register numbers:
  1852. def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1853. def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1854. def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
  1855. def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1856. def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1857. def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
  1858. // VST4 : Vector Store (multiple 4-element structures)
  1859. class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
  1860. : NLdSt<0, 0b00, op11_8, op7_4, (outs),
  1861. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
  1862. IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
  1863. "", []>, Sched<[WriteVST4]> {
  1864. let Rm = 0b1111;
  1865. let Inst{5-4} = Rn{5-4};
  1866. let DecoderMethod = "DecodeVLDST4Instruction";
  1867. }
  1868. def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
  1869. def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
  1870. def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
  1871. def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1872. def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1873. def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1874. // ...with address register writeback:
  1875. class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  1876. : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1877. (ins addrmode6:$Rn, am6offset:$Rm,
  1878. DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
  1879. "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
  1880. "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
  1881. let Inst{5-4} = Rn{5-4};
  1882. let DecoderMethod = "DecodeVLDST4Instruction";
  1883. }
  1884. def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
  1885. def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
  1886. def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
  1887. def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1888. def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1889. def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1890. // ...with double-spaced registers:
  1891. def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
  1892. def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
  1893. def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
  1894. def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
  1895. def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
  1896. def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
  1897. def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1898. def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1899. def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1900. // ...alternate versions to be allocated odd register numbers:
  1901. def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1902. def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1903. def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
  1904. def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1905. def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1906. def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
  1907. } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
  1908. // Classes for VST*LN pseudo-instructions with multi-register operands.
  1909. // These are expanded to real instructions after register allocation.
  1910. class VSTQLNPseudo<InstrItinClass itin>
  1911. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
  1912. itin, "">;
  1913. class VSTQLNWBPseudo<InstrItinClass itin>
  1914. : PseudoNLdSt<(outs GPR:$wb),
  1915. (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
  1916. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1917. class VSTQQLNPseudo<InstrItinClass itin>
  1918. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
  1919. itin, "">;
  1920. class VSTQQLNWBPseudo<InstrItinClass itin>
  1921. : PseudoNLdSt<(outs GPR:$wb),
  1922. (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
  1923. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1924. class VSTQQQQLNPseudo<InstrItinClass itin>
  1925. : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
  1926. itin, "">;
  1927. class VSTQQQQLNWBPseudo<InstrItinClass itin>
  1928. : PseudoNLdSt<(outs GPR:$wb),
  1929. (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
  1930. nohash_imm:$lane), itin, "$addr.addr = $wb">;
  1931. // VST1LN : Vector Store (single element from one lane)
  1932. class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  1933. PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
  1934. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  1935. (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
  1936. IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
  1937. [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
  1938. Sched<[WriteVST1]> {
  1939. let Rm = 0b1111;
  1940. let DecoderMethod = "DecodeVST1LN";
  1941. }
  1942. class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
  1943. : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
  1944. let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
  1945. addrmode6:$addr)];
  1946. }
  1947. def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
  1948. ARMvgetlaneu, addrmode6> {
  1949. let Inst{7-5} = lane{2-0};
  1950. }
  1951. def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
  1952. ARMvgetlaneu, addrmode6> {
  1953. let Inst{7-6} = lane{1-0};
  1954. let Inst{4} = Rn{4};
  1955. }
  1956. def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
  1957. addrmode6oneL32> {
  1958. let Inst{7} = lane{0};
  1959. let Inst{5-4} = Rn{5-4};
  1960. }
  1961. def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
  1962. def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
  1963. def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
  1964. let Predicates = [HasNEON] in {
  1965. def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
  1966. (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1967. def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
  1968. (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1969. def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
  1970. (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
  1971. def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
  1972. (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
  1973. }
  1974. // ...with address register writeback:
  1975. class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
  1976. PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
  1977. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  1978. (ins AdrMode:$Rn, am6offset:$Rm,
  1979. DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
  1980. "\\{$Vd[$lane]\\}, $Rn$Rm",
  1981. "$Rn.addr = $wb",
  1982. [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
  1983. AdrMode:$Rn, am6offset:$Rm))]>,
  1984. Sched<[WriteVST1]> {
  1985. let DecoderMethod = "DecodeVST1LN";
  1986. }
  1987. class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
  1988. : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
  1989. let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
  1990. addrmode6:$addr, am6offset:$offset))];
  1991. }
  1992. def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
  1993. ARMvgetlaneu, addrmode6> {
  1994. let Inst{7-5} = lane{2-0};
  1995. }
  1996. def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
  1997. ARMvgetlaneu, addrmode6> {
  1998. let Inst{7-6} = lane{1-0};
  1999. let Inst{4} = Rn{4};
  2000. }
  2001. def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
  2002. extractelt, addrmode6oneL32> {
  2003. let Inst{7} = lane{0};
  2004. let Inst{5-4} = Rn{5-4};
  2005. }
  2006. def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
  2007. def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
  2008. def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
  2009. let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
  2010. // VST2LN : Vector Store (single 2-element structure from one lane)
  2011. class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2012. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2013. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
  2014. IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
  2015. "", []>, Sched<[WriteVST1]> {
  2016. let Rm = 0b1111;
  2017. let Inst{4} = Rn{4};
  2018. let DecoderMethod = "DecodeVST2LN";
  2019. }
  2020. def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
  2021. let Inst{7-5} = lane{2-0};
  2022. }
  2023. def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
  2024. let Inst{7-6} = lane{1-0};
  2025. }
  2026. def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
  2027. let Inst{7} = lane{0};
  2028. }
  2029. def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2030. def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2031. def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2032. // ...with double-spaced registers:
  2033. def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
  2034. let Inst{7-6} = lane{1-0};
  2035. let Inst{4} = Rn{4};
  2036. }
  2037. def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
  2038. let Inst{7} = lane{0};
  2039. let Inst{4} = Rn{4};
  2040. }
  2041. def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2042. def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
  2043. // ...with address register writeback:
  2044. class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2045. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2046. (ins addrmode6:$Rn, am6offset:$Rm,
  2047. DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
  2048. "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
  2049. "$Rn.addr = $wb", []> {
  2050. let Inst{4} = Rn{4};
  2051. let DecoderMethod = "DecodeVST2LN";
  2052. }
  2053. def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
  2054. let Inst{7-5} = lane{2-0};
  2055. }
  2056. def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
  2057. let Inst{7-6} = lane{1-0};
  2058. }
  2059. def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
  2060. let Inst{7} = lane{0};
  2061. }
  2062. def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2063. def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2064. def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2065. def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
  2066. let Inst{7-6} = lane{1-0};
  2067. }
  2068. def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
  2069. let Inst{7} = lane{0};
  2070. }
  2071. def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2072. def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
  2073. // VST3LN : Vector Store (single 3-element structure from one lane)
  2074. class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2075. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2076. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
  2077. nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
  2078. "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
  2079. Sched<[WriteVST2]> {
  2080. let Rm = 0b1111;
  2081. let DecoderMethod = "DecodeVST3LN";
  2082. }
  2083. def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
  2084. let Inst{7-5} = lane{2-0};
  2085. }
  2086. def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
  2087. let Inst{7-6} = lane{1-0};
  2088. }
  2089. def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
  2090. let Inst{7} = lane{0};
  2091. }
  2092. def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2093. def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2094. def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
  2095. // ...with double-spaced registers:
  2096. def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
  2097. let Inst{7-6} = lane{1-0};
  2098. }
  2099. def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
  2100. let Inst{7} = lane{0};
  2101. }
  2102. def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
  2103. def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
  2104. // ...with address register writeback:
  2105. class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2106. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2107. (ins addrmode6:$Rn, am6offset:$Rm,
  2108. DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
  2109. IIC_VST3lnu, "vst3", Dt,
  2110. "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
  2111. "$Rn.addr = $wb", []> {
  2112. let DecoderMethod = "DecodeVST3LN";
  2113. }
  2114. def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
  2115. let Inst{7-5} = lane{2-0};
  2116. }
  2117. def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
  2118. let Inst{7-6} = lane{1-0};
  2119. }
  2120. def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
  2121. let Inst{7} = lane{0};
  2122. }
  2123. def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2124. def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2125. def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2126. def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
  2127. let Inst{7-6} = lane{1-0};
  2128. }
  2129. def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
  2130. let Inst{7} = lane{0};
  2131. }
  2132. def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2133. def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
  2134. // VST4LN : Vector Store (single 4-element structure from one lane)
  2135. class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
  2136. : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
  2137. (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
  2138. nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
  2139. "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
  2140. "", []>, Sched<[WriteVST2]> {
  2141. let Rm = 0b1111;
  2142. let Inst{4} = Rn{4};
  2143. let DecoderMethod = "DecodeVST4LN";
  2144. }
  2145. def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
  2146. let Inst{7-5} = lane{2-0};
  2147. }
  2148. def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
  2149. let Inst{7-6} = lane{1-0};
  2150. }
  2151. def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
  2152. let Inst{7} = lane{0};
  2153. let Inst{5} = Rn{5};
  2154. }
  2155. def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2156. def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2157. def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2158. // ...with double-spaced registers:
  2159. def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
  2160. let Inst{7-6} = lane{1-0};
  2161. }
  2162. def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
  2163. let Inst{7} = lane{0};
  2164. let Inst{5} = Rn{5};
  2165. }
  2166. def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2167. def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
  2168. // ...with address register writeback:
  2169. class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
  2170. : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
  2171. (ins addrmode6:$Rn, am6offset:$Rm,
  2172. DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
  2173. IIC_VST4lnu, "vst4", Dt,
  2174. "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
  2175. "$Rn.addr = $wb", []> {
  2176. let Inst{4} = Rn{4};
  2177. let DecoderMethod = "DecodeVST4LN";
  2178. }
  2179. def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
  2180. let Inst{7-5} = lane{2-0};
  2181. }
  2182. def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
  2183. let Inst{7-6} = lane{1-0};
  2184. }
  2185. def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
  2186. let Inst{7} = lane{0};
  2187. let Inst{5} = Rn{5};
  2188. }
  2189. def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2190. def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2191. def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2192. def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
  2193. let Inst{7-6} = lane{1-0};
  2194. }
  2195. def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
  2196. let Inst{7} = lane{0};
  2197. let Inst{5} = Rn{5};
  2198. }
  2199. def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2200. def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
  2201. } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
  2202. // Use vld1/vst1 for unaligned f64 load / store
  2203. let Predicates = [IsLE,HasNEON] in {
  2204. def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
  2205. (VLD1d16 addrmode6:$addr)>;
  2206. def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2207. (VST1d16 addrmode6:$addr, DPR:$value)>;
  2208. def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
  2209. (VLD1d8 addrmode6:$addr)>;
  2210. def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2211. (VST1d8 addrmode6:$addr, DPR:$value)>;
  2212. }
  2213. let Predicates = [IsBE,HasNEON] in {
  2214. def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
  2215. (VLD1d64 addrmode6:$addr)>;
  2216. def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
  2217. (VST1d64 addrmode6:$addr, DPR:$value)>;
  2218. }
  2219. // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
  2220. // load / store if it's legal.
  2221. let Predicates = [HasNEON] in {
  2222. def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
  2223. (VLD1q64 addrmode6:$addr)>;
  2224. def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2225. (VST1q64 addrmode6:$addr, QPR:$value)>;
  2226. }
  2227. let Predicates = [IsLE,HasNEON] in {
  2228. def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
  2229. (VLD1q32 addrmode6:$addr)>;
  2230. def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2231. (VST1q32 addrmode6:$addr, QPR:$value)>;
  2232. def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
  2233. (VLD1q16 addrmode6:$addr)>;
  2234. def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2235. (VST1q16 addrmode6:$addr, QPR:$value)>;
  2236. def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
  2237. (VLD1q8 addrmode6:$addr)>;
  2238. def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  2239. (VST1q8 addrmode6:$addr, QPR:$value)>;
  2240. }
  2241. //===----------------------------------------------------------------------===//
  2242. // Instruction Classes
  2243. //===----------------------------------------------------------------------===//
  2244. // Basic 2-register operations: double- and quad-register.
  2245. class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2246. bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
  2247. string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
  2248. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2249. (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
  2250. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
  2251. class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2252. bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
  2253. string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
  2254. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2255. (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
  2256. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
  2257. // Basic 2-register intrinsics, both double- and quad-register.
  2258. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2259. bits<2> op17_16, bits<5> op11_7, bit op4,
  2260. InstrItinClass itin, string OpcodeStr, string Dt,
  2261. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2262. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2263. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2264. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2265. class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2266. bits<2> op17_16, bits<5> op11_7, bit op4,
  2267. InstrItinClass itin, string OpcodeStr, string Dt,
  2268. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2269. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2270. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2271. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2272. // Same as above, but not predicated.
  2273. class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
  2274. InstrItinClass itin, string OpcodeStr, string Dt,
  2275. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2276. : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
  2277. itin, OpcodeStr, Dt,
  2278. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2279. class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
  2280. InstrItinClass itin, string OpcodeStr, string Dt,
  2281. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2282. : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
  2283. itin, OpcodeStr, Dt,
  2284. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2285. // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
  2286. class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
  2287. bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
  2288. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2289. : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
  2290. itin, OpcodeStr, Dt,
  2291. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2292. // Same as N2VQIntXnp but with Vd as a src register.
  2293. class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
  2294. bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
  2295. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2296. : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
  2297. (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
  2298. itin, OpcodeStr, Dt,
  2299. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
  2300. let Constraints = "$src = $Vd";
  2301. }
  2302. // Narrow 2-register operations.
  2303. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2304. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2305. InstrItinClass itin, string OpcodeStr, string Dt,
  2306. ValueType TyD, ValueType TyQ, SDNode OpNode>
  2307. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
  2308. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2309. [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
  2310. // Narrow 2-register intrinsics.
  2311. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2312. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2313. InstrItinClass itin, string OpcodeStr, string Dt,
  2314. ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
  2315. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
  2316. (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2317. [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
  2318. // Long 2-register operations (currently only used for VMOVL).
  2319. class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2320. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2321. InstrItinClass itin, string OpcodeStr, string Dt,
  2322. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2323. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
  2324. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2325. [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
  2326. // Long 2-register intrinsics.
  2327. class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2328. bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
  2329. InstrItinClass itin, string OpcodeStr, string Dt,
  2330. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
  2331. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
  2332. (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
  2333. [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
  2334. // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
  2335. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
  2336. : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
  2337. (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
  2338. OpcodeStr, Dt, "$Vd, $Vm",
  2339. "$src1 = $Vd, $src2 = $Vm", []>;
  2340. class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
  2341. InstrItinClass itin, string OpcodeStr, string Dt>
  2342. : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
  2343. (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
  2344. "$src1 = $Vd, $src2 = $Vm", []>;
  2345. // Basic 3-register operations: double- and quad-register.
  2346. class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2347. InstrItinClass itin, string OpcodeStr, string Dt,
  2348. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2349. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2350. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2351. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2352. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2353. // All of these have a two-operand InstAlias.
  2354. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2355. let isCommutable = Commutable;
  2356. }
  2357. // Same as N3VD but no data type.
  2358. class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2359. InstrItinClass itin, string OpcodeStr,
  2360. ValueType ResTy, ValueType OpTy,
  2361. SDNode OpNode, bit Commutable>
  2362. : N3VX<op24, op23, op21_20, op11_8, 0, op4,
  2363. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2364. OpcodeStr, "$Vd, $Vn, $Vm", "",
  2365. [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
  2366. // All of these have a two-operand InstAlias.
  2367. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2368. let isCommutable = Commutable;
  2369. }
  2370. class N3VDSL<bits<2> op21_20, bits<4> op11_8,
  2371. InstrItinClass itin, string OpcodeStr, string Dt,
  2372. ValueType Ty, SDNode ShOp>
  2373. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2374. (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2375. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2376. [(set (Ty DPR:$Vd),
  2377. (Ty (ShOp (Ty DPR:$Vn),
  2378. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
  2379. // All of these have a two-operand InstAlias.
  2380. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2381. let isCommutable = 0;
  2382. }
  2383. class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
  2384. string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
  2385. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2386. (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2387. NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
  2388. [(set (Ty DPR:$Vd),
  2389. (Ty (ShOp (Ty DPR:$Vn),
  2390. (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
  2391. // All of these have a two-operand InstAlias.
  2392. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2393. let isCommutable = 0;
  2394. }
  2395. class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2396. InstrItinClass itin, string OpcodeStr, string Dt,
  2397. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2398. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2399. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2400. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2401. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2402. // All of these have a two-operand InstAlias.
  2403. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2404. let isCommutable = Commutable;
  2405. }
  2406. class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2407. InstrItinClass itin, string OpcodeStr,
  2408. ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
  2409. : N3VX<op24, op23, op21_20, op11_8, 1, op4,
  2410. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2411. OpcodeStr, "$Vd, $Vn, $Vm", "",
  2412. [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
  2413. // All of these have a two-operand InstAlias.
  2414. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2415. let isCommutable = Commutable;
  2416. }
  2417. class N3VQSL<bits<2> op21_20, bits<4> op11_8,
  2418. InstrItinClass itin, string OpcodeStr, string Dt,
  2419. ValueType ResTy, ValueType OpTy, SDNode ShOp>
  2420. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2421. (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2422. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2423. [(set (ResTy QPR:$Vd),
  2424. (ResTy (ShOp (ResTy QPR:$Vn),
  2425. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2426. imm:$lane)))))]> {
  2427. // All of these have a two-operand InstAlias.
  2428. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2429. let isCommutable = 0;
  2430. }
  2431. class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
  2432. ValueType ResTy, ValueType OpTy, SDNode ShOp>
  2433. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2434. (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2435. NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
  2436. [(set (ResTy QPR:$Vd),
  2437. (ResTy (ShOp (ResTy QPR:$Vn),
  2438. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2439. imm:$lane)))))]> {
  2440. // All of these have a two-operand InstAlias.
  2441. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2442. let isCommutable = 0;
  2443. }
  2444. // Basic 3-register intrinsics, both double- and quad-register.
  2445. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2446. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2447. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
  2448. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2449. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
  2450. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2451. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2452. // All of these have a two-operand InstAlias.
  2453. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2454. let isCommutable = Commutable;
  2455. }
  2456. class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2457. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2458. string Dt, ValueType ResTy, ValueType OpTy,
  2459. SDPatternOperator IntOp, bit Commutable>
  2460. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2461. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
  2462. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2463. let isCommutable = Commutable;
  2464. }
  2465. class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2466. string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
  2467. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2468. (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2469. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2470. [(set (Ty DPR:$Vd),
  2471. (Ty (IntOp (Ty DPR:$Vn),
  2472. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
  2473. imm:$lane)))))]> {
  2474. let isCommutable = 0;
  2475. }
  2476. class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2477. string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
  2478. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2479. (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2480. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2481. [(set (Ty DPR:$Vd),
  2482. (Ty (IntOp (Ty DPR:$Vn),
  2483. (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
  2484. let isCommutable = 0;
  2485. }
  2486. class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2487. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2488. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2489. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2490. (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
  2491. OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
  2492. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
  2493. let TwoOperandAliasConstraint = "$Vm = $Vd";
  2494. let isCommutable = 0;
  2495. }
  2496. class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2497. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2498. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
  2499. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2500. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
  2501. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2502. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2503. // All of these have a two-operand InstAlias.
  2504. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2505. let isCommutable = Commutable;
  2506. }
  2507. class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2508. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2509. string Dt, ValueType ResTy, ValueType OpTy,
  2510. SDPatternOperator IntOp, bit Commutable>
  2511. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2512. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
  2513. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
  2514. let isCommutable = Commutable;
  2515. }
  2516. // Same as N3VQIntnp but with Vd as a src register.
  2517. class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2518. bit op4, Format f, InstrItinClass itin, string OpcodeStr,
  2519. string Dt, ValueType ResTy, ValueType OpTy,
  2520. SDPatternOperator IntOp>
  2521. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2522. (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
  2523. f, itin, OpcodeStr, Dt,
  2524. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
  2525. (OpTy QPR:$Vm))))]> {
  2526. let Constraints = "$src = $Vd";
  2527. let isCommutable = 0;
  2528. }
  2529. class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2530. string OpcodeStr, string Dt,
  2531. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2532. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2533. (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2534. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2535. [(set (ResTy QPR:$Vd),
  2536. (ResTy (IntOp (ResTy QPR:$Vn),
  2537. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2538. imm:$lane)))))]> {
  2539. let isCommutable = 0;
  2540. }
  2541. class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2542. string OpcodeStr, string Dt,
  2543. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2544. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2545. (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2546. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2547. [(set (ResTy QPR:$Vd),
  2548. (ResTy (IntOp (ResTy QPR:$Vn),
  2549. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2550. imm:$lane)))))]> {
  2551. let isCommutable = 0;
  2552. }
  2553. class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2554. Format f, InstrItinClass itin, string OpcodeStr, string Dt,
  2555. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2556. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2557. (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
  2558. OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
  2559. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
  2560. let TwoOperandAliasConstraint = "$Vm = $Vd";
  2561. let isCommutable = 0;
  2562. }
  2563. // Multiply-Add/Sub operations: double- and quad-register.
  2564. class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2565. InstrItinClass itin, string OpcodeStr, string Dt,
  2566. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
  2567. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2568. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2569. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2570. [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
  2571. (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
  2572. class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2573. string OpcodeStr, string Dt,
  2574. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
  2575. : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
  2576. (outs DPR:$Vd),
  2577. (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2578. NVMulSLFrm, itin,
  2579. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2580. [(set (Ty DPR:$Vd),
  2581. (Ty (ShOp (Ty DPR:$src1),
  2582. (Ty (MulOp DPR:$Vn,
  2583. (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
  2584. imm:$lane)))))))]>;
  2585. class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2586. string OpcodeStr, string Dt,
  2587. ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
  2588. : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
  2589. (outs DPR:$Vd),
  2590. (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2591. NVMulSLFrm, itin,
  2592. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2593. [(set (Ty DPR:$Vd),
  2594. (Ty (ShOp (Ty DPR:$src1),
  2595. (Ty (MulOp DPR:$Vn,
  2596. (Ty (ARMvduplane (Ty DPR_8:$Vm),
  2597. imm:$lane)))))))]>;
  2598. class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2599. InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
  2600. SDPatternOperator MulOp, SDPatternOperator OpNode>
  2601. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2602. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2603. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2604. [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
  2605. (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
  2606. class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2607. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2608. SDPatternOperator MulOp, SDPatternOperator ShOp>
  2609. : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
  2610. (outs QPR:$Vd),
  2611. (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2612. NVMulSLFrm, itin,
  2613. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2614. [(set (ResTy QPR:$Vd),
  2615. (ResTy (ShOp (ResTy QPR:$src1),
  2616. (ResTy (MulOp QPR:$Vn,
  2617. (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2618. imm:$lane)))))))]>;
  2619. class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2620. string OpcodeStr, string Dt,
  2621. ValueType ResTy, ValueType OpTy,
  2622. SDPatternOperator MulOp, SDPatternOperator ShOp>
  2623. : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
  2624. (outs QPR:$Vd),
  2625. (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2626. NVMulSLFrm, itin,
  2627. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2628. [(set (ResTy QPR:$Vd),
  2629. (ResTy (ShOp (ResTy QPR:$src1),
  2630. (ResTy (MulOp QPR:$Vn,
  2631. (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
  2632. imm:$lane)))))))]>;
  2633. // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
  2634. class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2635. InstrItinClass itin, string OpcodeStr, string Dt,
  2636. ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
  2637. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2638. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2639. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2640. [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
  2641. (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
  2642. class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2643. InstrItinClass itin, string OpcodeStr, string Dt,
  2644. ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
  2645. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2646. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2647. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2648. [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
  2649. (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
  2650. // Neon 3-argument intrinsics, both double- and quad-register.
  2651. // The destination register is also used as the first source operand register.
  2652. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2653. InstrItinClass itin, string OpcodeStr, string Dt,
  2654. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2655. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2656. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2657. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2658. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
  2659. (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
  2660. class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2661. InstrItinClass itin, string OpcodeStr, string Dt,
  2662. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2663. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  2664. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  2665. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2666. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
  2667. (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
  2668. // Long Multiply-Add/Sub operations.
  2669. class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2670. InstrItinClass itin, string OpcodeStr, string Dt,
  2671. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2672. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2673. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2674. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2675. [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
  2676. (TyQ (MulOp (TyD DPR:$Vn),
  2677. (TyD DPR:$Vm)))))]>;
  2678. class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
  2679. InstrItinClass itin, string OpcodeStr, string Dt,
  2680. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2681. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
  2682. (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2683. NVMulSLFrm, itin,
  2684. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2685. [(set QPR:$Vd,
  2686. (OpNode (TyQ QPR:$src1),
  2687. (TyQ (MulOp (TyD DPR:$Vn),
  2688. (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
  2689. imm:$lane))))))]>;
  2690. class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2691. InstrItinClass itin, string OpcodeStr, string Dt,
  2692. ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
  2693. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
  2694. (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2695. NVMulSLFrm, itin,
  2696. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2697. [(set QPR:$Vd,
  2698. (OpNode (TyQ QPR:$src1),
  2699. (TyQ (MulOp (TyD DPR:$Vn),
  2700. (TyD (ARMvduplane (TyD DPR_8:$Vm),
  2701. imm:$lane))))))]>;
  2702. // Long Intrinsic-Op vector operations with explicit extend (VABAL).
  2703. class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2704. InstrItinClass itin, string OpcodeStr, string Dt,
  2705. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
  2706. SDNode OpNode>
  2707. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2708. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2709. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2710. [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
  2711. (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
  2712. (TyD DPR:$Vm)))))))]>;
  2713. // Neon Long 3-argument intrinsic. The destination register is
  2714. // a quad-register and is also used as the first source operand register.
  2715. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2716. InstrItinClass itin, string OpcodeStr, string Dt,
  2717. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
  2718. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2719. (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2720. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
  2721. [(set QPR:$Vd,
  2722. (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
  2723. class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2724. string OpcodeStr, string Dt,
  2725. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2726. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2727. (outs QPR:$Vd),
  2728. (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2729. NVMulSLFrm, itin,
  2730. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2731. [(set (ResTy QPR:$Vd),
  2732. (ResTy (IntOp (ResTy QPR:$src1),
  2733. (OpTy DPR:$Vn),
  2734. (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2735. imm:$lane)))))]>;
  2736. class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2737. InstrItinClass itin, string OpcodeStr, string Dt,
  2738. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2739. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2740. (outs QPR:$Vd),
  2741. (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2742. NVMulSLFrm, itin,
  2743. OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
  2744. [(set (ResTy QPR:$Vd),
  2745. (ResTy (IntOp (ResTy QPR:$src1),
  2746. (OpTy DPR:$Vn),
  2747. (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
  2748. imm:$lane)))))]>;
  2749. // Narrowing 3-register intrinsics.
  2750. class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2751. string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
  2752. SDPatternOperator IntOp, bit Commutable>
  2753. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2754. (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
  2755. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2756. [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
  2757. let isCommutable = Commutable;
  2758. }
  2759. // Long 3-register operations.
  2760. class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2761. InstrItinClass itin, string OpcodeStr, string Dt,
  2762. ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
  2763. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2764. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2765. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2766. [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
  2767. let isCommutable = Commutable;
  2768. }
  2769. class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
  2770. InstrItinClass itin, string OpcodeStr, string Dt,
  2771. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2772. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2773. (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2774. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2775. [(set QPR:$Vd,
  2776. (TyQ (OpNode (TyD DPR:$Vn),
  2777. (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
  2778. class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2779. InstrItinClass itin, string OpcodeStr, string Dt,
  2780. ValueType TyQ, ValueType TyD, SDNode OpNode>
  2781. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2782. (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2783. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2784. [(set QPR:$Vd,
  2785. (TyQ (OpNode (TyD DPR:$Vn),
  2786. (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
  2787. // Long 3-register operations with explicitly extended operands.
  2788. class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2789. InstrItinClass itin, string OpcodeStr, string Dt,
  2790. ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp,
  2791. bit Commutable>
  2792. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2793. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2794. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2795. [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
  2796. (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
  2797. let isCommutable = Commutable;
  2798. }
  2799. // Long 3-register intrinsics with explicit extend (VABDL).
  2800. class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2801. InstrItinClass itin, string OpcodeStr, string Dt,
  2802. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
  2803. bit Commutable>
  2804. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2805. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2806. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2807. [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
  2808. (TyD DPR:$Vm))))))]> {
  2809. let isCommutable = Commutable;
  2810. }
  2811. // Long 3-register intrinsics.
  2812. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2813. InstrItinClass itin, string OpcodeStr, string Dt,
  2814. ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
  2815. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2816. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  2817. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2818. [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
  2819. let isCommutable = Commutable;
  2820. }
  2821. // Same as above, but not predicated.
  2822. class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
  2823. bit op4, InstrItinClass itin, string OpcodeStr,
  2824. string Dt, ValueType ResTy, ValueType OpTy,
  2825. SDPatternOperator IntOp, bit Commutable>
  2826. : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
  2827. (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
  2828. [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
  2829. let isCommutable = Commutable;
  2830. }
  2831. class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
  2832. string OpcodeStr, string Dt,
  2833. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2834. : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
  2835. (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  2836. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2837. [(set (ResTy QPR:$Vd),
  2838. (ResTy (IntOp (OpTy DPR:$Vn),
  2839. (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
  2840. imm:$lane)))))]>;
  2841. class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
  2842. InstrItinClass itin, string OpcodeStr, string Dt,
  2843. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2844. : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
  2845. (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
  2846. NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
  2847. [(set (ResTy QPR:$Vd),
  2848. (ResTy (IntOp (OpTy DPR:$Vn),
  2849. (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
  2850. imm:$lane)))))]>;
  2851. // Wide 3-register operations.
  2852. class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  2853. string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
  2854. SDNode OpNode, SDPatternOperator ExtOp, bit Commutable>
  2855. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  2856. (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
  2857. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  2858. [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
  2859. (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
  2860. // All of these have a two-operand InstAlias.
  2861. let TwoOperandAliasConstraint = "$Vn = $Vd";
  2862. let isCommutable = Commutable;
  2863. }
  2864. // Pairwise long 2-register intrinsics, both double- and quad-register.
  2865. class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2866. bits<2> op17_16, bits<5> op11_7, bit op4,
  2867. string OpcodeStr, string Dt,
  2868. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2869. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
  2870. (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  2871. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
  2872. class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2873. bits<2> op17_16, bits<5> op11_7, bit op4,
  2874. string OpcodeStr, string Dt,
  2875. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2876. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
  2877. (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  2878. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
  2879. // Pairwise long 2-register accumulate intrinsics,
  2880. // both double- and quad-register.
  2881. // The destination register is also used as the first source operand register.
  2882. class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2883. bits<2> op17_16, bits<5> op11_7, bit op4,
  2884. string OpcodeStr, string Dt,
  2885. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2886. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
  2887. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
  2888. OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
  2889. [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
  2890. class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
  2891. bits<2> op17_16, bits<5> op11_7, bit op4,
  2892. string OpcodeStr, string Dt,
  2893. ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
  2894. : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
  2895. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
  2896. OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
  2897. [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
  2898. // Shift by immediate,
  2899. // both double- and quad-register.
  2900. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2901. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2902. Format f, InstrItinClass itin, Operand ImmTy,
  2903. string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
  2904. : N2VImm<op24, op23, op11_8, op7, 0, op4,
  2905. (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
  2906. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2907. [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
  2908. class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2909. Format f, InstrItinClass itin, Operand ImmTy,
  2910. string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
  2911. : N2VImm<op24, op23, op11_8, op7, 1, op4,
  2912. (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
  2913. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2914. [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
  2915. }
  2916. // Long shift by immediate.
  2917. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
  2918. string OpcodeStr, string Dt,
  2919. ValueType ResTy, ValueType OpTy, Operand ImmTy,
  2920. SDPatternOperator OpNode>
  2921. : N2VImm<op24, op23, op11_8, op7, op6, op4,
  2922. (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
  2923. IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2924. [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
  2925. // Narrow shift by immediate.
  2926. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
  2927. InstrItinClass itin, string OpcodeStr, string Dt,
  2928. ValueType ResTy, ValueType OpTy, Operand ImmTy,
  2929. SDPatternOperator OpNode>
  2930. : N2VImm<op24, op23, op11_8, op7, op6, op4,
  2931. (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
  2932. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2933. [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
  2934. (i32 ImmTy:$SIMM))))]>;
  2935. // Shift right by immediate and accumulate,
  2936. // both double- and quad-register.
  2937. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2938. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2939. Operand ImmTy, string OpcodeStr, string Dt,
  2940. ValueType Ty, SDNode ShOp>
  2941. : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
  2942. (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
  2943. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2944. [(set DPR:$Vd, (Ty (add DPR:$src1,
  2945. (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
  2946. class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2947. Operand ImmTy, string OpcodeStr, string Dt,
  2948. ValueType Ty, SDNode ShOp>
  2949. : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
  2950. (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
  2951. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2952. [(set QPR:$Vd, (Ty (add QPR:$src1,
  2953. (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
  2954. }
  2955. // Shift by immediate and insert,
  2956. // both double- and quad-register.
  2957. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  2958. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2959. Operand ImmTy, Format f, string OpcodeStr, string Dt,
  2960. ValueType Ty,SDNode ShOp>
  2961. : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
  2962. (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
  2963. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2964. [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
  2965. class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2966. Operand ImmTy, Format f, string OpcodeStr, string Dt,
  2967. ValueType Ty,SDNode ShOp>
  2968. : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
  2969. (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
  2970. OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
  2971. [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
  2972. }
  2973. // Convert, with fractional bits immediate,
  2974. // both double- and quad-register.
  2975. class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2976. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2977. SDPatternOperator IntOp>
  2978. : N2VImm<op24, op23, op11_8, op7, 0, op4,
  2979. (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
  2980. IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2981. [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
  2982. class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
  2983. string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
  2984. SDPatternOperator IntOp>
  2985. : N2VImm<op24, op23, op11_8, op7, 1, op4,
  2986. (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
  2987. IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
  2988. [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
  2989. //===----------------------------------------------------------------------===//
  2990. // Multiclasses
  2991. //===----------------------------------------------------------------------===//
  2992. // Abbreviations used in multiclass suffixes:
  2993. // Q = quarter int (8 bit) elements
  2994. // H = half int (16 bit) elements
  2995. // S = single int (32 bit) elements
  2996. // D = double int (64 bit) elements
  2997. // Neon 2-register vector operations and intrinsics.
  2998. // Neon 2-register comparisons.
  2999. // source operand element sizes of 8, 16 and 32 bits:
  3000. multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3001. bits<5> op11_7, bit op4, string opc, string Dt,
  3002. string asm, PatFrag fc> {
  3003. // 64-bit vector types.
  3004. def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
  3005. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3006. opc, !strconcat(Dt, "8"), asm, "",
  3007. [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
  3008. def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
  3009. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3010. opc, !strconcat(Dt, "16"), asm, "",
  3011. [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
  3012. def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
  3013. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3014. opc, !strconcat(Dt, "32"), asm, "",
  3015. [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
  3016. def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
  3017. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3018. opc, "f32", asm, "",
  3019. [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
  3020. let Inst{10} = 1; // overwrite F = 1
  3021. }
  3022. def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
  3023. (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
  3024. opc, "f16", asm, "",
  3025. [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
  3026. Requires<[HasNEON,HasFullFP16]> {
  3027. let Inst{10} = 1; // overwrite F = 1
  3028. }
  3029. // 128-bit vector types.
  3030. def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
  3031. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3032. opc, !strconcat(Dt, "8"), asm, "",
  3033. [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
  3034. def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
  3035. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3036. opc, !strconcat(Dt, "16"), asm, "",
  3037. [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
  3038. def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
  3039. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3040. opc, !strconcat(Dt, "32"), asm, "",
  3041. [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
  3042. def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
  3043. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3044. opc, "f32", asm, "",
  3045. [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
  3046. let Inst{10} = 1; // overwrite F = 1
  3047. }
  3048. def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
  3049. (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
  3050. opc, "f16", asm, "",
  3051. [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
  3052. Requires<[HasNEON,HasFullFP16]> {
  3053. let Inst{10} = 1; // overwrite F = 1
  3054. }
  3055. }
  3056. // Neon 3-register comparisons.
  3057. class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  3058. InstrItinClass itin, string OpcodeStr, string Dt,
  3059. ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
  3060. : N3V<op24, op23, op21_20, op11_8, 1, op4,
  3061. (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
  3062. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  3063. [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
  3064. // All of these have a two-operand InstAlias.
  3065. let TwoOperandAliasConstraint = "$Vn = $Vd";
  3066. let isCommutable = Commutable;
  3067. }
  3068. class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
  3069. InstrItinClass itin, string OpcodeStr, string Dt,
  3070. ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
  3071. : N3V<op24, op23, op21_20, op11_8, 0, op4,
  3072. (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
  3073. OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
  3074. [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
  3075. // All of these have a two-operand InstAlias.
  3076. let TwoOperandAliasConstraint = "$Vn = $Vd";
  3077. let isCommutable = Commutable;
  3078. }
  3079. multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
  3080. InstrItinClass itinD16, InstrItinClass itinD32,
  3081. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3082. string OpcodeStr, string Dt,
  3083. PatFrag fc, bit Commutable = 0> {
  3084. // 64-bit vector types.
  3085. def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
  3086. OpcodeStr, !strconcat(Dt, "8"),
  3087. v8i8, v8i8, fc, Commutable>;
  3088. def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
  3089. OpcodeStr, !strconcat(Dt, "16"),
  3090. v4i16, v4i16, fc, Commutable>;
  3091. def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
  3092. OpcodeStr, !strconcat(Dt, "32"),
  3093. v2i32, v2i32, fc, Commutable>;
  3094. // 128-bit vector types.
  3095. def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
  3096. OpcodeStr, !strconcat(Dt, "8"),
  3097. v16i8, v16i8, fc, Commutable>;
  3098. def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
  3099. OpcodeStr, !strconcat(Dt, "16"),
  3100. v8i16, v8i16, fc, Commutable>;
  3101. def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
  3102. OpcodeStr, !strconcat(Dt, "32"),
  3103. v4i32, v4i32, fc, Commutable>;
  3104. }
  3105. // Neon 2-register vector intrinsics,
  3106. // element sizes of 8, 16 and 32 bits:
  3107. multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3108. bits<5> op11_7, bit op4,
  3109. InstrItinClass itinD, InstrItinClass itinQ,
  3110. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3111. // 64-bit vector types.
  3112. def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3113. itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
  3114. def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3115. itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
  3116. def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3117. itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
  3118. // 128-bit vector types.
  3119. def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3120. itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
  3121. def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3122. itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
  3123. def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3124. itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
  3125. }
  3126. // Neon Narrowing 2-register vector operations,
  3127. // source operand element sizes of 16, 32 and 64 bits:
  3128. multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3129. bits<5> op11_7, bit op6, bit op4,
  3130. InstrItinClass itin, string OpcodeStr, string Dt,
  3131. SDNode OpNode> {
  3132. def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
  3133. itin, OpcodeStr, !strconcat(Dt, "16"),
  3134. v8i8, v8i16, OpNode>;
  3135. def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
  3136. itin, OpcodeStr, !strconcat(Dt, "32"),
  3137. v4i16, v4i32, OpNode>;
  3138. def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
  3139. itin, OpcodeStr, !strconcat(Dt, "64"),
  3140. v2i32, v2i64, OpNode>;
  3141. }
  3142. // Neon Narrowing 2-register vector intrinsics,
  3143. // source operand element sizes of 16, 32 and 64 bits:
  3144. multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3145. bits<5> op11_7, bit op6, bit op4,
  3146. InstrItinClass itin, string OpcodeStr, string Dt,
  3147. SDPatternOperator IntOp> {
  3148. def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
  3149. itin, OpcodeStr, !strconcat(Dt, "16"),
  3150. v8i8, v8i16, IntOp>;
  3151. def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
  3152. itin, OpcodeStr, !strconcat(Dt, "32"),
  3153. v4i16, v4i32, IntOp>;
  3154. def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
  3155. itin, OpcodeStr, !strconcat(Dt, "64"),
  3156. v2i32, v2i64, IntOp>;
  3157. }
  3158. // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
  3159. // source operand element sizes of 16, 32 and 64 bits:
  3160. multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
  3161. string OpcodeStr, string Dt, SDNode OpNode> {
  3162. def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3163. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
  3164. def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3165. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
  3166. def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
  3167. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
  3168. }
  3169. // Neon 3-register vector operations.
  3170. // First with only element sizes of 8, 16 and 32 bits:
  3171. multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3172. InstrItinClass itinD16, InstrItinClass itinD32,
  3173. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3174. string OpcodeStr, string Dt,
  3175. SDNode OpNode, bit Commutable = 0> {
  3176. // 64-bit vector types.
  3177. def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
  3178. OpcodeStr, !strconcat(Dt, "8"),
  3179. v8i8, v8i8, OpNode, Commutable>;
  3180. def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
  3181. OpcodeStr, !strconcat(Dt, "16"),
  3182. v4i16, v4i16, OpNode, Commutable>;
  3183. def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
  3184. OpcodeStr, !strconcat(Dt, "32"),
  3185. v2i32, v2i32, OpNode, Commutable>;
  3186. // 128-bit vector types.
  3187. def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
  3188. OpcodeStr, !strconcat(Dt, "8"),
  3189. v16i8, v16i8, OpNode, Commutable>;
  3190. def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
  3191. OpcodeStr, !strconcat(Dt, "16"),
  3192. v8i16, v8i16, OpNode, Commutable>;
  3193. def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
  3194. OpcodeStr, !strconcat(Dt, "32"),
  3195. v4i32, v4i32, OpNode, Commutable>;
  3196. }
  3197. multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
  3198. def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
  3199. def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
  3200. def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
  3201. def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
  3202. v4i32, v2i32, ShOp>;
  3203. }
  3204. // ....then also with element size 64 bits:
  3205. multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3206. InstrItinClass itinD, InstrItinClass itinQ,
  3207. string OpcodeStr, string Dt,
  3208. SDNode OpNode, bit Commutable = 0>
  3209. : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
  3210. OpcodeStr, Dt, OpNode, Commutable> {
  3211. def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
  3212. OpcodeStr, !strconcat(Dt, "64"),
  3213. v1i64, v1i64, OpNode, Commutable>;
  3214. def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
  3215. OpcodeStr, !strconcat(Dt, "64"),
  3216. v2i64, v2i64, OpNode, Commutable>;
  3217. }
  3218. // Neon 3-register vector intrinsics.
  3219. // First with only element sizes of 16 and 32 bits:
  3220. multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3221. InstrItinClass itinD16, InstrItinClass itinD32,
  3222. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3223. string OpcodeStr, string Dt,
  3224. SDPatternOperator IntOp, bit Commutable = 0> {
  3225. // 64-bit vector types.
  3226. def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
  3227. OpcodeStr, !strconcat(Dt, "16"),
  3228. v4i16, v4i16, IntOp, Commutable>;
  3229. def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
  3230. OpcodeStr, !strconcat(Dt, "32"),
  3231. v2i32, v2i32, IntOp, Commutable>;
  3232. // 128-bit vector types.
  3233. def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
  3234. OpcodeStr, !strconcat(Dt, "16"),
  3235. v8i16, v8i16, IntOp, Commutable>;
  3236. def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
  3237. OpcodeStr, !strconcat(Dt, "32"),
  3238. v4i32, v4i32, IntOp, Commutable>;
  3239. }
  3240. multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3241. InstrItinClass itinD16, InstrItinClass itinD32,
  3242. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3243. string OpcodeStr, string Dt,
  3244. SDPatternOperator IntOp> {
  3245. // 64-bit vector types.
  3246. def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
  3247. OpcodeStr, !strconcat(Dt, "16"),
  3248. v4i16, v4i16, IntOp>;
  3249. def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
  3250. OpcodeStr, !strconcat(Dt, "32"),
  3251. v2i32, v2i32, IntOp>;
  3252. // 128-bit vector types.
  3253. def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
  3254. OpcodeStr, !strconcat(Dt, "16"),
  3255. v8i16, v8i16, IntOp>;
  3256. def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
  3257. OpcodeStr, !strconcat(Dt, "32"),
  3258. v4i32, v4i32, IntOp>;
  3259. }
  3260. multiclass N3VIntSL_HS<bits<4> op11_8,
  3261. InstrItinClass itinD16, InstrItinClass itinD32,
  3262. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3263. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3264. def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
  3265. OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
  3266. def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
  3267. OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
  3268. def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
  3269. OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
  3270. def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
  3271. OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
  3272. }
  3273. // ....then also with element size of 8 bits:
  3274. multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3275. InstrItinClass itinD16, InstrItinClass itinD32,
  3276. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3277. string OpcodeStr, string Dt,
  3278. SDPatternOperator IntOp, bit Commutable = 0>
  3279. : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3280. OpcodeStr, Dt, IntOp, Commutable> {
  3281. def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
  3282. OpcodeStr, !strconcat(Dt, "8"),
  3283. v8i8, v8i8, IntOp, Commutable>;
  3284. def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
  3285. OpcodeStr, !strconcat(Dt, "8"),
  3286. v16i8, v16i8, IntOp, Commutable>;
  3287. }
  3288. multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3289. InstrItinClass itinD16, InstrItinClass itinD32,
  3290. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3291. string OpcodeStr, string Dt,
  3292. SDPatternOperator IntOp>
  3293. : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3294. OpcodeStr, Dt, IntOp> {
  3295. def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
  3296. OpcodeStr, !strconcat(Dt, "8"),
  3297. v8i8, v8i8, IntOp>;
  3298. def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
  3299. OpcodeStr, !strconcat(Dt, "8"),
  3300. v16i8, v16i8, IntOp>;
  3301. }
  3302. // ....then also with element size of 64 bits:
  3303. multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3304. InstrItinClass itinD16, InstrItinClass itinD32,
  3305. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3306. string OpcodeStr, string Dt,
  3307. SDPatternOperator IntOp, bit Commutable = 0>
  3308. : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3309. OpcodeStr, Dt, IntOp, Commutable> {
  3310. def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
  3311. OpcodeStr, !strconcat(Dt, "64"),
  3312. v1i64, v1i64, IntOp, Commutable>;
  3313. def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
  3314. OpcodeStr, !strconcat(Dt, "64"),
  3315. v2i64, v2i64, IntOp, Commutable>;
  3316. }
  3317. multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
  3318. InstrItinClass itinD16, InstrItinClass itinD32,
  3319. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3320. string OpcodeStr, string Dt,
  3321. SDPatternOperator IntOp>
  3322. : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
  3323. OpcodeStr, Dt, IntOp> {
  3324. def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
  3325. OpcodeStr, !strconcat(Dt, "64"),
  3326. v1i64, v1i64, IntOp>;
  3327. def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
  3328. OpcodeStr, !strconcat(Dt, "64"),
  3329. v2i64, v2i64, IntOp>;
  3330. }
  3331. // Neon Narrowing 3-register vector intrinsics,
  3332. // source operand element sizes of 16, 32 and 64 bits:
  3333. multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3334. string OpcodeStr, string Dt,
  3335. SDPatternOperator IntOp, bit Commutable = 0> {
  3336. def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
  3337. OpcodeStr, !strconcat(Dt, "16"),
  3338. v8i8, v8i16, IntOp, Commutable>;
  3339. def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
  3340. OpcodeStr, !strconcat(Dt, "32"),
  3341. v4i16, v4i32, IntOp, Commutable>;
  3342. def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
  3343. OpcodeStr, !strconcat(Dt, "64"),
  3344. v2i32, v2i64, IntOp, Commutable>;
  3345. }
  3346. // Neon Long 3-register vector operations.
  3347. multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3348. InstrItinClass itin16, InstrItinClass itin32,
  3349. string OpcodeStr, string Dt,
  3350. SDNode OpNode, bit Commutable = 0> {
  3351. def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
  3352. OpcodeStr, !strconcat(Dt, "8"),
  3353. v8i16, v8i8, OpNode, Commutable>;
  3354. def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
  3355. OpcodeStr, !strconcat(Dt, "16"),
  3356. v4i32, v4i16, OpNode, Commutable>;
  3357. def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
  3358. OpcodeStr, !strconcat(Dt, "32"),
  3359. v2i64, v2i32, OpNode, Commutable>;
  3360. }
  3361. multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
  3362. InstrItinClass itin, string OpcodeStr, string Dt,
  3363. SDNode OpNode> {
  3364. def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
  3365. !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
  3366. def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
  3367. !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
  3368. }
  3369. multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3370. InstrItinClass itin16, InstrItinClass itin32,
  3371. string OpcodeStr, string Dt,
  3372. SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> {
  3373. def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
  3374. OpcodeStr, !strconcat(Dt, "8"),
  3375. v8i16, v8i8, OpNode, ExtOp, Commutable>;
  3376. def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
  3377. OpcodeStr, !strconcat(Dt, "16"),
  3378. v4i32, v4i16, OpNode, ExtOp, Commutable>;
  3379. def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
  3380. OpcodeStr, !strconcat(Dt, "32"),
  3381. v2i64, v2i32, OpNode, ExtOp, Commutable>;
  3382. }
  3383. // Neon Long 3-register vector intrinsics.
  3384. // First with only element sizes of 16 and 32 bits:
  3385. multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3386. InstrItinClass itin16, InstrItinClass itin32,
  3387. string OpcodeStr, string Dt,
  3388. SDPatternOperator IntOp, bit Commutable = 0> {
  3389. def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
  3390. OpcodeStr, !strconcat(Dt, "16"),
  3391. v4i32, v4i16, IntOp, Commutable>;
  3392. def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
  3393. OpcodeStr, !strconcat(Dt, "32"),
  3394. v2i64, v2i32, IntOp, Commutable>;
  3395. }
  3396. multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
  3397. InstrItinClass itin, string OpcodeStr, string Dt,
  3398. SDPatternOperator IntOp> {
  3399. def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
  3400. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
  3401. def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
  3402. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3403. }
  3404. // ....then also with element size of 8 bits:
  3405. multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3406. InstrItinClass itin16, InstrItinClass itin32,
  3407. string OpcodeStr, string Dt,
  3408. SDPatternOperator IntOp, bit Commutable = 0>
  3409. : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
  3410. IntOp, Commutable> {
  3411. def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
  3412. OpcodeStr, !strconcat(Dt, "8"),
  3413. v8i16, v8i8, IntOp, Commutable>;
  3414. }
  3415. // ....with explicit extend (VABDL).
  3416. multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3417. InstrItinClass itin, string OpcodeStr, string Dt,
  3418. SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
  3419. def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
  3420. OpcodeStr, !strconcat(Dt, "8"),
  3421. v8i16, v8i8, IntOp, ExtOp, Commutable>;
  3422. def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
  3423. OpcodeStr, !strconcat(Dt, "16"),
  3424. v4i32, v4i16, IntOp, ExtOp, Commutable>;
  3425. def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
  3426. OpcodeStr, !strconcat(Dt, "32"),
  3427. v2i64, v2i32, IntOp, ExtOp, Commutable>;
  3428. }
  3429. // Neon Wide 3-register vector intrinsics,
  3430. // source operand element sizes of 8, 16 and 32 bits:
  3431. multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3432. string OpcodeStr, string Dt,
  3433. SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> {
  3434. def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
  3435. OpcodeStr, !strconcat(Dt, "8"),
  3436. v8i16, v8i8, OpNode, ExtOp, Commutable>;
  3437. def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
  3438. OpcodeStr, !strconcat(Dt, "16"),
  3439. v4i32, v4i16, OpNode, ExtOp, Commutable>;
  3440. def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
  3441. OpcodeStr, !strconcat(Dt, "32"),
  3442. v2i64, v2i32, OpNode, ExtOp, Commutable>;
  3443. }
  3444. // Neon Multiply-Op vector operations,
  3445. // element sizes of 8, 16 and 32 bits:
  3446. multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3447. InstrItinClass itinD16, InstrItinClass itinD32,
  3448. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3449. string OpcodeStr, string Dt, SDNode OpNode> {
  3450. // 64-bit vector types.
  3451. def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
  3452. OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
  3453. def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
  3454. OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
  3455. def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
  3456. OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
  3457. // 128-bit vector types.
  3458. def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
  3459. OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
  3460. def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
  3461. OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
  3462. def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
  3463. OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
  3464. }
  3465. multiclass N3VMulOpSL_HS<bits<4> op11_8,
  3466. InstrItinClass itinD16, InstrItinClass itinD32,
  3467. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3468. string OpcodeStr, string Dt, SDPatternOperator ShOp> {
  3469. def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
  3470. OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
  3471. def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
  3472. OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
  3473. def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
  3474. OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
  3475. mul, ShOp>;
  3476. def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
  3477. OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
  3478. mul, ShOp>;
  3479. }
  3480. // Neon Intrinsic-Op vector operations,
  3481. // element sizes of 8, 16 and 32 bits:
  3482. multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3483. InstrItinClass itinD, InstrItinClass itinQ,
  3484. string OpcodeStr, string Dt, SDPatternOperator IntOp,
  3485. SDNode OpNode> {
  3486. // 64-bit vector types.
  3487. def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
  3488. OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
  3489. def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
  3490. OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
  3491. def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
  3492. OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
  3493. // 128-bit vector types.
  3494. def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
  3495. OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
  3496. def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
  3497. OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
  3498. def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
  3499. OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
  3500. }
  3501. // Neon 3-argument intrinsics,
  3502. // element sizes of 16 and 32 bits:
  3503. multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3504. InstrItinClass itinD16, InstrItinClass itinD32,
  3505. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3506. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3507. // 64-bit vector types.
  3508. def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
  3509. OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
  3510. def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
  3511. OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
  3512. // 128-bit vector types.
  3513. def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
  3514. OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
  3515. def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
  3516. OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
  3517. }
  3518. // element sizes of 8, 16 and 32 bits:
  3519. multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3520. InstrItinClass itinD16, InstrItinClass itinD32,
  3521. InstrItinClass itinQ16, InstrItinClass itinQ32,
  3522. string OpcodeStr, string Dt, SDPatternOperator IntOp>
  3523. :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
  3524. itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
  3525. // 64-bit vector types.
  3526. def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
  3527. OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
  3528. // 128-bit vector types.
  3529. def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
  3530. OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
  3531. }
  3532. // Neon Long Multiply-Op vector operations,
  3533. // element sizes of 8, 16 and 32 bits:
  3534. multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3535. InstrItinClass itin16, InstrItinClass itin32,
  3536. string OpcodeStr, string Dt, SDNode MulOp,
  3537. SDNode OpNode> {
  3538. def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
  3539. !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
  3540. def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
  3541. !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
  3542. def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
  3543. !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
  3544. }
  3545. multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
  3546. string Dt, SDNode MulOp, SDNode OpNode> {
  3547. def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
  3548. !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
  3549. def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
  3550. !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
  3551. }
  3552. // Neon Long 3-argument intrinsics.
  3553. // First with only element sizes of 16 and 32 bits:
  3554. multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3555. InstrItinClass itin16, InstrItinClass itin32,
  3556. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3557. def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
  3558. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
  3559. def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
  3560. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3561. }
  3562. multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
  3563. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3564. def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
  3565. OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
  3566. def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
  3567. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
  3568. }
  3569. // ....then also with element size of 8 bits:
  3570. multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3571. InstrItinClass itin16, InstrItinClass itin32,
  3572. string OpcodeStr, string Dt, SDPatternOperator IntOp>
  3573. : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
  3574. def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
  3575. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
  3576. }
  3577. // ....with explicit extend (VABAL).
  3578. multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
  3579. InstrItinClass itin, string OpcodeStr, string Dt,
  3580. SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
  3581. def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
  3582. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
  3583. IntOp, ExtOp, OpNode>;
  3584. def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
  3585. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
  3586. IntOp, ExtOp, OpNode>;
  3587. def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
  3588. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
  3589. IntOp, ExtOp, OpNode>;
  3590. }
  3591. // Neon Pairwise long 2-register intrinsics,
  3592. // element sizes of 8, 16 and 32 bits:
  3593. multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3594. bits<5> op11_7, bit op4,
  3595. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3596. // 64-bit vector types.
  3597. def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3598. OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
  3599. def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3600. OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
  3601. def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3602. OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
  3603. // 128-bit vector types.
  3604. def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3605. OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
  3606. def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3607. OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
  3608. def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3609. OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
  3610. }
  3611. // Neon Pairwise long 2-register accumulate intrinsics,
  3612. // element sizes of 8, 16 and 32 bits:
  3613. multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
  3614. bits<5> op11_7, bit op4,
  3615. string OpcodeStr, string Dt, SDPatternOperator IntOp> {
  3616. // 64-bit vector types.
  3617. def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3618. OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
  3619. def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3620. OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
  3621. def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3622. OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
  3623. // 128-bit vector types.
  3624. def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
  3625. OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
  3626. def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
  3627. OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
  3628. def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
  3629. OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
  3630. }
  3631. // Neon 2-register vector shift by immediate,
  3632. // with f of either N2RegVShLFrm or N2RegVShRFrm
  3633. // element sizes of 8, 16, 32 and 64 bits:
  3634. multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3635. InstrItinClass itin, string OpcodeStr, string Dt,
  3636. SDNode OpNode> {
  3637. // 64-bit vector types.
  3638. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3639. OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
  3640. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3641. }
  3642. def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3643. OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
  3644. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3645. }
  3646. def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3647. OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
  3648. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3649. }
  3650. def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
  3651. OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
  3652. // imm6 = xxxxxx
  3653. // 128-bit vector types.
  3654. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3655. OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
  3656. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3657. }
  3658. def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3659. OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
  3660. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3661. }
  3662. def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
  3663. OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
  3664. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3665. }
  3666. def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
  3667. OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
  3668. // imm6 = xxxxxx
  3669. }
  3670. multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3671. InstrItinClass itin, string OpcodeStr, string Dt,
  3672. SDNode OpNode> {
  3673. // 64-bit vector types.
  3674. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
  3675. OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
  3676. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3677. }
  3678. def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
  3679. OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
  3680. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3681. }
  3682. def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
  3683. OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
  3684. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3685. }
  3686. def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
  3687. OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
  3688. // imm6 = xxxxxx
  3689. // 128-bit vector types.
  3690. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
  3691. OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
  3692. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3693. }
  3694. def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
  3695. OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
  3696. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3697. }
  3698. def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
  3699. OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
  3700. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3701. }
  3702. def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
  3703. OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
  3704. // imm6 = xxxxxx
  3705. }
  3706. // Neon Shift-Accumulate vector operations,
  3707. // element sizes of 8, 16, 32 and 64 bits:
  3708. multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3709. string OpcodeStr, string Dt, SDNode ShOp> {
  3710. // 64-bit vector types.
  3711. def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
  3712. OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
  3713. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3714. }
  3715. def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
  3716. OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
  3717. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3718. }
  3719. def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
  3720. OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
  3721. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3722. }
  3723. def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
  3724. OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
  3725. // imm6 = xxxxxx
  3726. // 128-bit vector types.
  3727. def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
  3728. OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
  3729. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3730. }
  3731. def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
  3732. OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
  3733. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3734. }
  3735. def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
  3736. OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
  3737. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3738. }
  3739. def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
  3740. OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
  3741. // imm6 = xxxxxx
  3742. }
  3743. // Neon Shift-Insert vector operations,
  3744. // with f of either N2RegVShLFrm or N2RegVShRFrm
  3745. // element sizes of 8, 16, 32 and 64 bits:
  3746. multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3747. string OpcodeStr> {
  3748. // 64-bit vector types.
  3749. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3750. N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
  3751. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3752. }
  3753. def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3754. N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
  3755. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3756. }
  3757. def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
  3758. N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
  3759. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3760. }
  3761. def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
  3762. N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
  3763. // imm6 = xxxxxx
  3764. // 128-bit vector types.
  3765. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3766. N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
  3767. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3768. }
  3769. def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3770. N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
  3771. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3772. }
  3773. def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
  3774. N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
  3775. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3776. }
  3777. def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
  3778. N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
  3779. // imm6 = xxxxxx
  3780. }
  3781. multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  3782. string OpcodeStr> {
  3783. // 64-bit vector types.
  3784. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
  3785. N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
  3786. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3787. }
  3788. def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
  3789. N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
  3790. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3791. }
  3792. def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
  3793. N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
  3794. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3795. }
  3796. def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
  3797. N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
  3798. // imm6 = xxxxxx
  3799. // 128-bit vector types.
  3800. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
  3801. N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
  3802. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3803. }
  3804. def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
  3805. N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
  3806. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3807. }
  3808. def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
  3809. N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
  3810. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3811. }
  3812. def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
  3813. N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
  3814. // imm6 = xxxxxx
  3815. }
  3816. // Neon Shift Long operations,
  3817. // element sizes of 8, 16, 32 bits:
  3818. multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
  3819. bit op4, string OpcodeStr, string Dt,
  3820. SDPatternOperator OpNode> {
  3821. def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3822. OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
  3823. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3824. }
  3825. def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3826. OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
  3827. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3828. }
  3829. def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
  3830. OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
  3831. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3832. }
  3833. }
  3834. // Neon Shift Narrow operations,
  3835. // element sizes of 16, 32, 64 bits:
  3836. multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
  3837. bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
  3838. SDPatternOperator OpNode> {
  3839. def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3840. OpcodeStr, !strconcat(Dt, "16"),
  3841. v8i8, v8i16, shr_imm8, OpNode> {
  3842. let Inst{21-19} = 0b001; // imm6 = 001xxx
  3843. }
  3844. def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3845. OpcodeStr, !strconcat(Dt, "32"),
  3846. v4i16, v4i32, shr_imm16, OpNode> {
  3847. let Inst{21-20} = 0b01; // imm6 = 01xxxx
  3848. }
  3849. def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
  3850. OpcodeStr, !strconcat(Dt, "64"),
  3851. v2i32, v2i64, shr_imm32, OpNode> {
  3852. let Inst{21} = 0b1; // imm6 = 1xxxxx
  3853. }
  3854. }
  3855. //===----------------------------------------------------------------------===//
  3856. // Instruction Definitions.
  3857. //===----------------------------------------------------------------------===//
  3858. // Vector Add Operations.
  3859. // VADD : Vector Add (integer and floating-point)
  3860. defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
  3861. add, 1>;
  3862. def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
  3863. v2f32, v2f32, fadd, 1>;
  3864. def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
  3865. v4f32, v4f32, fadd, 1>;
  3866. def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
  3867. v4f16, v4f16, fadd, 1>,
  3868. Requires<[HasNEON,HasFullFP16]>;
  3869. def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
  3870. v8f16, v8f16, fadd, 1>,
  3871. Requires<[HasNEON,HasFullFP16]>;
  3872. // VADDL : Vector Add Long (Q = D + D)
  3873. defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
  3874. "vaddl", "s", add, sext, 1>;
  3875. defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
  3876. "vaddl", "u", add, zanyext, 1>;
  3877. // VADDW : Vector Add Wide (Q = Q + D)
  3878. defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
  3879. defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
  3880. // VHADD : Vector Halving Add
  3881. defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
  3882. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3883. "vhadd", "s", int_arm_neon_vhadds, 1>;
  3884. defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
  3885. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3886. "vhadd", "u", int_arm_neon_vhaddu, 1>;
  3887. // VRHADD : Vector Rounding Halving Add
  3888. defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
  3889. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3890. "vrhadd", "s", int_arm_neon_vrhadds, 1>;
  3891. defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
  3892. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3893. "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
  3894. // VQADD : Vector Saturating Add
  3895. defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
  3896. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3897. "vqadd", "s", saddsat, 1>;
  3898. defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
  3899. IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
  3900. "vqadd", "u", uaddsat, 1>;
  3901. // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
  3902. defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
  3903. // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
  3904. defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
  3905. int_arm_neon_vraddhn, 1>;
  3906. let Predicates = [HasNEON] in {
  3907. def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
  3908. (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
  3909. def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
  3910. (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
  3911. def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
  3912. (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
  3913. }
  3914. // Vector Multiply Operations.
  3915. // VMUL : Vector Multiply (integer, polynomial and floating-point)
  3916. defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
  3917. IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
  3918. def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
  3919. "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
  3920. def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
  3921. "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
  3922. def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
  3923. v2f32, v2f32, fmul, 1>;
  3924. def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
  3925. v4f32, v4f32, fmul, 1>;
  3926. def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
  3927. v4f16, v4f16, fmul, 1>,
  3928. Requires<[HasNEON,HasFullFP16]>;
  3929. def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
  3930. v8f16, v8f16, fmul, 1>,
  3931. Requires<[HasNEON,HasFullFP16]>;
  3932. defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
  3933. def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
  3934. def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
  3935. v2f32, fmul>;
  3936. def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
  3937. Requires<[HasNEON,HasFullFP16]>;
  3938. def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
  3939. v4f16, fmul>,
  3940. Requires<[HasNEON,HasFullFP16]>;
  3941. let Predicates = [HasNEON] in {
  3942. def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
  3943. (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
  3944. (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
  3945. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3946. (DSubReg_i16_reg imm:$lane))),
  3947. (SubReg_i16_lane imm:$lane)))>;
  3948. def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
  3949. (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
  3950. (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
  3951. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  3952. (DSubReg_i32_reg imm:$lane))),
  3953. (SubReg_i32_lane imm:$lane)))>;
  3954. def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
  3955. (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
  3956. (v4f32 (VMULslfq (v4f32 QPR:$src1),
  3957. (v2f32 (EXTRACT_SUBREG QPR:$src2,
  3958. (DSubReg_i32_reg imm:$lane))),
  3959. (SubReg_i32_lane imm:$lane)))>;
  3960. def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
  3961. (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
  3962. (v8f16 (VMULslhq(v8f16 QPR:$src1),
  3963. (v4f16 (EXTRACT_SUBREG QPR:$src2,
  3964. (DSubReg_i16_reg imm:$lane))),
  3965. (SubReg_i16_lane imm:$lane)))>;
  3966. def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
  3967. (VMULslfd DPR:$Rn,
  3968. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
  3969. (i32 0))>;
  3970. def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
  3971. (VMULslhd DPR:$Rn,
  3972. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
  3973. (i32 0))>;
  3974. def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
  3975. (VMULslfq QPR:$Rn,
  3976. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
  3977. (i32 0))>;
  3978. def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
  3979. (VMULslhq QPR:$Rn,
  3980. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
  3981. (i32 0))>;
  3982. }
  3983. // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
  3984. defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
  3985. IIC_VMULi16Q, IIC_VMULi32Q,
  3986. "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
  3987. defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
  3988. IIC_VMULi16Q, IIC_VMULi32Q,
  3989. "vqdmulh", "s", int_arm_neon_vqdmulh>;
  3990. let Predicates = [HasNEON] in {
  3991. def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
  3992. (v8i16 (ARMvduplane (v8i16 QPR:$src2),
  3993. imm:$lane)))),
  3994. (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
  3995. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  3996. (DSubReg_i16_reg imm:$lane))),
  3997. (SubReg_i16_lane imm:$lane)))>;
  3998. def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
  3999. (v4i32 (ARMvduplane (v4i32 QPR:$src2),
  4000. imm:$lane)))),
  4001. (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
  4002. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  4003. (DSubReg_i32_reg imm:$lane))),
  4004. (SubReg_i32_lane imm:$lane)))>;
  4005. }
  4006. // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
  4007. defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
  4008. IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
  4009. "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
  4010. defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
  4011. IIC_VMULi16Q, IIC_VMULi32Q,
  4012. "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
  4013. let Predicates = [HasNEON] in {
  4014. def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
  4015. (v8i16 (ARMvduplane (v8i16 QPR:$src2),
  4016. imm:$lane)))),
  4017. (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
  4018. (v4i16 (EXTRACT_SUBREG QPR:$src2,
  4019. (DSubReg_i16_reg imm:$lane))),
  4020. (SubReg_i16_lane imm:$lane)))>;
  4021. def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
  4022. (v4i32 (ARMvduplane (v4i32 QPR:$src2),
  4023. imm:$lane)))),
  4024. (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
  4025. (v2i32 (EXTRACT_SUBREG QPR:$src2,
  4026. (DSubReg_i32_reg imm:$lane))),
  4027. (SubReg_i32_lane imm:$lane)))>;
  4028. }
  4029. // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
  4030. let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
  4031. DecoderNamespace = "NEONData" in {
  4032. defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
  4033. "vmull", "s", ARMvmulls, 1>;
  4034. defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
  4035. "vmull", "u", ARMvmullu, 1>;
  4036. def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
  4037. v8i16, v8i8, int_arm_neon_vmullp, 1>;
  4038. def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
  4039. "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
  4040. Requires<[HasV8, HasAES]>;
  4041. }
  4042. defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>;
  4043. defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>;
  4044. // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
  4045. defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
  4046. "vqdmull", "s", int_arm_neon_vqdmull, 1>;
  4047. defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
  4048. "vqdmull", "s", int_arm_neon_vqdmull>;
  4049. // Vector Multiply-Accumulate and Multiply-Subtract Operations.
  4050. // VMLA : Vector Multiply Accumulate (integer and floating-point)
  4051. defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4052. IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  4053. def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
  4054. v2f32, fmul_su, fadd_mlx>,
  4055. Requires<[HasNEON, UseFPVMLx]>;
  4056. def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
  4057. v4f32, fmul_su, fadd_mlx>,
  4058. Requires<[HasNEON, UseFPVMLx]>;
  4059. def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
  4060. v4f16, fmul_su, fadd_mlx>,
  4061. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4062. def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
  4063. v8f16, fmul_su, fadd_mlx>,
  4064. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4065. defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
  4066. IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  4067. def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
  4068. v2f32, fmul_su, fadd_mlx>,
  4069. Requires<[HasNEON, UseFPVMLx]>;
  4070. def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
  4071. v4f32, v2f32, fmul_su, fadd_mlx>,
  4072. Requires<[HasNEON, UseFPVMLx]>;
  4073. def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
  4074. v4f16, fmul, fadd>,
  4075. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4076. def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
  4077. v8f16, v4f16, fmul, fadd>,
  4078. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4079. let Predicates = [HasNEON] in {
  4080. def : Pat<(v8i16 (add (v8i16 QPR:$src1),
  4081. (mul (v8i16 QPR:$src2),
  4082. (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
  4083. (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
  4084. (v4i16 (EXTRACT_SUBREG QPR:$src3,
  4085. (DSubReg_i16_reg imm:$lane))),
  4086. (SubReg_i16_lane imm:$lane)))>;
  4087. def : Pat<(v4i32 (add (v4i32 QPR:$src1),
  4088. (mul (v4i32 QPR:$src2),
  4089. (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
  4090. (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
  4091. (v2i32 (EXTRACT_SUBREG QPR:$src3,
  4092. (DSubReg_i32_reg imm:$lane))),
  4093. (SubReg_i32_lane imm:$lane)))>;
  4094. }
  4095. def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
  4096. (fmul_su (v4f32 QPR:$src2),
  4097. (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
  4098. (v4f32 (VMLAslfq (v4f32 QPR:$src1),
  4099. (v4f32 QPR:$src2),
  4100. (v2f32 (EXTRACT_SUBREG QPR:$src3,
  4101. (DSubReg_i32_reg imm:$lane))),
  4102. (SubReg_i32_lane imm:$lane)))>,
  4103. Requires<[HasNEON, UseFPVMLx]>;
  4104. // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
  4105. defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
  4106. "vmlal", "s", ARMvmulls, add>;
  4107. defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
  4108. "vmlal", "u", ARMvmullu, add>;
  4109. defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>;
  4110. defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>;
  4111. let Predicates = [HasNEON, HasV8_1a] in {
  4112. // v8.1a Neon Rounding Double Multiply-Op vector operations,
  4113. // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
  4114. // (Q += D * D)
  4115. defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
  4116. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
  4117. null_frag>;
  4118. def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1), (v4i16 DPR:$Vn),
  4119. (v4i16 DPR:$Vm))),
  4120. (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4121. def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1), (v2i32 DPR:$Vn),
  4122. (v2i32 DPR:$Vm))),
  4123. (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4124. def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1), (v8i16 QPR:$Vn),
  4125. (v8i16 QPR:$Vm))),
  4126. (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4127. def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1), (v4i32 QPR:$Vn),
  4128. (v4i32 QPR:$Vm))),
  4129. (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4130. defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
  4131. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
  4132. null_frag>;
  4133. def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1),
  4134. (v4i16 DPR:$Vn),
  4135. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4136. imm:$lane)))),
  4137. (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
  4138. imm:$lane))>;
  4139. def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1),
  4140. (v2i32 DPR:$Vn),
  4141. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4142. imm:$lane)))),
  4143. (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4144. imm:$lane))>;
  4145. def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1),
  4146. (v8i16 QPR:$src2),
  4147. (v8i16 (ARMvduplane (v8i16 QPR:$src3),
  4148. imm:$lane)))),
  4149. (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
  4150. (v8i16 QPR:$src2),
  4151. (v4i16 (EXTRACT_SUBREG
  4152. QPR:$src3,
  4153. (DSubReg_i16_reg imm:$lane))),
  4154. (SubReg_i16_lane imm:$lane)))>;
  4155. def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1),
  4156. (v4i32 QPR:$src2),
  4157. (v4i32 (ARMvduplane (v4i32 QPR:$src3),
  4158. imm:$lane)))),
  4159. (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
  4160. (v4i32 QPR:$src2),
  4161. (v2i32 (EXTRACT_SUBREG
  4162. QPR:$src3,
  4163. (DSubReg_i32_reg imm:$lane))),
  4164. (SubReg_i32_lane imm:$lane)))>;
  4165. // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
  4166. // (Q -= D * D)
  4167. defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
  4168. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
  4169. null_frag>;
  4170. def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1), (v4i16 DPR:$Vn),
  4171. (v4i16 DPR:$Vm))),
  4172. (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4173. def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1), (v2i32 DPR:$Vn),
  4174. (v2i32 DPR:$Vm))),
  4175. (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
  4176. def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1), (v8i16 QPR:$Vn),
  4177. (v8i16 QPR:$Vm))),
  4178. (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4179. def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1), (v4i32 QPR:$Vn),
  4180. (v4i32 QPR:$Vm))),
  4181. (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
  4182. defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
  4183. IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
  4184. null_frag>;
  4185. def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1),
  4186. (v4i16 DPR:$Vn),
  4187. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4188. imm:$lane)))),
  4189. (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
  4190. def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1),
  4191. (v2i32 DPR:$Vn),
  4192. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4193. imm:$lane)))),
  4194. (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4195. imm:$lane))>;
  4196. def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1),
  4197. (v8i16 QPR:$src2),
  4198. (v8i16 (ARMvduplane (v8i16 QPR:$src3),
  4199. imm:$lane)))),
  4200. (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
  4201. (v8i16 QPR:$src2),
  4202. (v4i16 (EXTRACT_SUBREG
  4203. QPR:$src3,
  4204. (DSubReg_i16_reg imm:$lane))),
  4205. (SubReg_i16_lane imm:$lane)))>;
  4206. def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1),
  4207. (v4i32 QPR:$src2),
  4208. (v4i32 (ARMvduplane (v4i32 QPR:$src3),
  4209. imm:$lane)))),
  4210. (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
  4211. (v4i32 QPR:$src2),
  4212. (v2i32 (EXTRACT_SUBREG
  4213. QPR:$src3,
  4214. (DSubReg_i32_reg imm:$lane))),
  4215. (SubReg_i32_lane imm:$lane)))>;
  4216. }
  4217. // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
  4218. defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4219. "vqdmlal", "s", null_frag>;
  4220. defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
  4221. let Predicates = [HasNEON] in {
  4222. def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
  4223. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4224. (v4i16 DPR:$Vm))))),
  4225. (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4226. def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
  4227. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4228. (v2i32 DPR:$Vm))))),
  4229. (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4230. def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
  4231. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4232. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4233. imm:$lane)))))),
  4234. (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
  4235. def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
  4236. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4237. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4238. imm:$lane)))))),
  4239. (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
  4240. }
  4241. // VMLS : Vector Multiply Subtract (integer and floating-point)
  4242. defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
  4243. IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  4244. def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
  4245. v2f32, fmul_su, fsub_mlx>,
  4246. Requires<[HasNEON, UseFPVMLx]>;
  4247. def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
  4248. v4f32, fmul_su, fsub_mlx>,
  4249. Requires<[HasNEON, UseFPVMLx]>;
  4250. def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
  4251. v4f16, fmul, fsub>,
  4252. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4253. def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
  4254. v8f16, fmul, fsub>,
  4255. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4256. defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
  4257. IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  4258. def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
  4259. v2f32, fmul_su, fsub_mlx>,
  4260. Requires<[HasNEON, UseFPVMLx]>;
  4261. def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
  4262. v4f32, v2f32, fmul_su, fsub_mlx>,
  4263. Requires<[HasNEON, UseFPVMLx]>;
  4264. def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
  4265. v4f16, fmul, fsub>,
  4266. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4267. def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
  4268. v8f16, v4f16, fmul, fsub>,
  4269. Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
  4270. let Predicates = [HasNEON] in {
  4271. def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
  4272. (mul (v8i16 QPR:$src2),
  4273. (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
  4274. (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
  4275. (v4i16 (EXTRACT_SUBREG QPR:$src3,
  4276. (DSubReg_i16_reg imm:$lane))),
  4277. (SubReg_i16_lane imm:$lane)))>;
  4278. def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
  4279. (mul (v4i32 QPR:$src2),
  4280. (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
  4281. (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
  4282. (v2i32 (EXTRACT_SUBREG QPR:$src3,
  4283. (DSubReg_i32_reg imm:$lane))),
  4284. (SubReg_i32_lane imm:$lane)))>;
  4285. }
  4286. def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
  4287. (fmul_su (v4f32 QPR:$src2),
  4288. (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
  4289. (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
  4290. (v2f32 (EXTRACT_SUBREG QPR:$src3,
  4291. (DSubReg_i32_reg imm:$lane))),
  4292. (SubReg_i32_lane imm:$lane)))>,
  4293. Requires<[HasNEON, UseFPVMLx]>;
  4294. // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
  4295. defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
  4296. "vmlsl", "s", ARMvmulls, sub>;
  4297. defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
  4298. "vmlsl", "u", ARMvmullu, sub>;
  4299. defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>;
  4300. defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>;
  4301. // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
  4302. defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
  4303. "vqdmlsl", "s", null_frag>;
  4304. defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
  4305. let Predicates = [HasNEON] in {
  4306. def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
  4307. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4308. (v4i16 DPR:$Vm))))),
  4309. (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4310. def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
  4311. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4312. (v2i32 DPR:$Vm))))),
  4313. (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
  4314. def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
  4315. (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
  4316. (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
  4317. imm:$lane)))))),
  4318. (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
  4319. def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
  4320. (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
  4321. (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
  4322. imm:$lane)))))),
  4323. (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
  4324. }
  4325. // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
  4326. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
  4327. v2f32, fmul_su, fadd_mlx>,
  4328. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4329. def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
  4330. v4f32, fmul_su, fadd_mlx>,
  4331. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4332. def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
  4333. v4f16, fmul, fadd>,
  4334. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4335. def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
  4336. v8f16, fmul, fadd>,
  4337. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4338. // Fused Vector Multiply Subtract (floating-point)
  4339. def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
  4340. v2f32, fmul_su, fsub_mlx>,
  4341. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4342. def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
  4343. v4f32, fmul_su, fsub_mlx>,
  4344. Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
  4345. def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
  4346. v4f16, fmul, fsub>,
  4347. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4348. def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
  4349. v8f16, fmul, fsub>,
  4350. Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
  4351. // Match @llvm.fma.* intrinsics
  4352. def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
  4353. (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4354. Requires<[HasNEON,HasFullFP16]>;
  4355. def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
  4356. (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4357. Requires<[HasNEON,HasFullFP16]>;
  4358. def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
  4359. (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4360. Requires<[HasNEON,HasVFP4]>;
  4361. def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
  4362. (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4363. Requires<[HasNEON,HasVFP4]>;
  4364. def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
  4365. (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
  4366. Requires<[HasNEON,HasVFP4]>;
  4367. def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
  4368. (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
  4369. Requires<[HasNEON,HasVFP4]>;
  4370. // ARMv8.2a dot product instructions.
  4371. // We put them in the VFPV8 decoder namespace because the ARM and Thumb
  4372. // encodings are the same and thus no further bit twiddling is necessary
  4373. // in the disassembler.
  4374. class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
  4375. string AsmTy, ValueType AccumTy, ValueType InputTy,
  4376. SDPatternOperator OpNode> :
  4377. N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
  4378. (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
  4379. Asm, AsmTy,
  4380. [(set (AccumTy RegTy:$dst),
  4381. (OpNode (AccumTy RegTy:$Vd),
  4382. (InputTy RegTy:$Vn),
  4383. (InputTy RegTy:$Vm)))]> {
  4384. let Predicates = [HasDotProd];
  4385. let DecoderNamespace = "VFPV8";
  4386. let Constraints = "$dst = $Vd";
  4387. }
  4388. def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
  4389. def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
  4390. def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
  4391. def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
  4392. // Indexed dot product instructions:
  4393. multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
  4394. ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
  4395. dag RHS> {
  4396. def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
  4397. (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
  4398. N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
  4399. bit lane;
  4400. let Inst{5} = lane;
  4401. let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
  4402. let Constraints = "$dst = $Vd";
  4403. let Predicates = [HasDotProd];
  4404. let DecoderNamespace = "VFPV8";
  4405. }
  4406. def : Pat<
  4407. (AccumType (OpNode (AccumType Ty:$Vd),
  4408. (InputType Ty:$Vn),
  4409. (InputType (bitconvert (AccumType
  4410. (ARMvduplane (AccumType Ty:$Vm),
  4411. VectorIndex32:$lane)))))),
  4412. (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
  4413. }
  4414. defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
  4415. int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
  4416. defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
  4417. int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
  4418. defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
  4419. int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4420. defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
  4421. int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4422. // v8.6A matrix multiplication extension
  4423. let Predicates = [HasMatMulInt8] in {
  4424. class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
  4425. SDPatternOperator OpNode>
  4426. : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
  4427. (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
  4428. Asm, AsmTy,
  4429. [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
  4430. (v16i8 QPR:$Vn),
  4431. (v16i8 QPR:$Vm)))]> {
  4432. let DecoderNamespace = "VFPV8";
  4433. let Constraints = "$dst = $Vd";
  4434. }
  4435. multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
  4436. ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
  4437. dag RHS> {
  4438. def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
  4439. (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
  4440. NoItinerary, Asm, AsmTy, []> {
  4441. bit lane;
  4442. let Inst{5} = lane;
  4443. let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
  4444. let DecoderNamespace = "VFPV8";
  4445. let Constraints = "$dst = $Vd";
  4446. }
  4447. def : Pat<
  4448. (AccumTy (OpNode (AccumTy RegTy:$Vd),
  4449. (InputTy RegTy:$Vn),
  4450. (InputTy (bitconvert (AccumTy
  4451. (ARMvduplane (AccumTy RegTy:$Vm),
  4452. VectorIndex32:$lane)))))),
  4453. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  4454. }
  4455. multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
  4456. : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
  4457. def : Pat<
  4458. (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
  4459. (InputTy (bitconvert (AccumTy
  4460. (ARMvduplane (AccumTy RegTy:$Vm),
  4461. VectorIndex32:$lane)))),
  4462. (InputTy RegTy:$Vn))),
  4463. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  4464. }
  4465. def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>;
  4466. def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>;
  4467. def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
  4468. def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>;
  4469. def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
  4470. defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
  4471. int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
  4472. defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
  4473. int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4474. defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
  4475. defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  4476. }
  4477. // ARMv8.3 complex operations
  4478. class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
  4479. InstrItinClass itin, dag oops, dag iops,
  4480. string opc, string dt, list<dag> pattern>
  4481. : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
  4482. iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
  4483. bits<2> rot;
  4484. let Inst{24-23} = rot;
  4485. }
  4486. class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
  4487. InstrItinClass itin, dag oops, dag iops, string opc,
  4488. string dt, list<dag> pattern>
  4489. : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
  4490. iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
  4491. bits<1> rot;
  4492. let Inst{24} = rot;
  4493. }
  4494. class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
  4495. dag oops, dag iops, string opc, string dt,
  4496. list<dag> pattern>
  4497. : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
  4498. "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
  4499. bits<2> rot;
  4500. bit lane;
  4501. let Inst{21-20} = rot;
  4502. let Inst{5} = lane;
  4503. }
  4504. class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
  4505. dag oops, dag iops, string opc, string dt,
  4506. list<dag> pattern>
  4507. : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
  4508. "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
  4509. bits<2> rot;
  4510. bit lane;
  4511. let Inst{21-20} = rot;
  4512. let Inst{5} = Vm{4};
  4513. // This is needed because the lane operand does not have any bits in the
  4514. // encoding (it only has one possible value), so we need to manually set it
  4515. // to it's default value.
  4516. let DecoderMethod = "DecodeNEONComplexLane64Instruction";
  4517. }
  4518. multiclass N3VCP8ComplexTied<bit op21, bit op4,
  4519. string OpcodeStr> {
  4520. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4521. def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
  4522. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
  4523. OpcodeStr, "f16", []>;
  4524. def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
  4525. (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
  4526. OpcodeStr, "f16", []>;
  4527. }
  4528. let Predicates = [HasNEON,HasV8_3a] in {
  4529. def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
  4530. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
  4531. OpcodeStr, "f32", []>;
  4532. def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
  4533. (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
  4534. OpcodeStr, "f32", []>;
  4535. }
  4536. }
  4537. multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
  4538. string OpcodeStr> {
  4539. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4540. def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
  4541. (outs DPR:$Vd),
  4542. (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
  4543. OpcodeStr, "f16", []>;
  4544. def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
  4545. (outs QPR:$Vd),
  4546. (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
  4547. OpcodeStr, "f16", []>;
  4548. }
  4549. let Predicates = [HasNEON,HasV8_3a] in {
  4550. def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
  4551. (outs DPR:$Vd),
  4552. (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
  4553. OpcodeStr, "f32", []>;
  4554. def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
  4555. (outs QPR:$Vd),
  4556. (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
  4557. OpcodeStr, "f32", []>;
  4558. }
  4559. }
  4560. // These instructions index by pairs of lanes, so the VectorIndexes are twice
  4561. // as wide as the data types.
  4562. multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr> {
  4563. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4564. def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
  4565. (outs DPR:$Vd),
  4566. (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
  4567. VectorIndex32:$lane, complexrotateop:$rot),
  4568. OpcodeStr, "f16", []>;
  4569. def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
  4570. (outs QPR:$Vd),
  4571. (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
  4572. VectorIndex32:$lane, complexrotateop:$rot),
  4573. OpcodeStr, "f16", []>;
  4574. }
  4575. let Predicates = [HasNEON,HasV8_3a] in {
  4576. def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
  4577. (outs DPR:$Vd),
  4578. (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
  4579. complexrotateop:$rot),
  4580. OpcodeStr, "f32", []>;
  4581. def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
  4582. (outs QPR:$Vd),
  4583. (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
  4584. complexrotateop:$rot),
  4585. OpcodeStr, "f32", []>;
  4586. }
  4587. }
  4588. defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla">;
  4589. defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd">;
  4590. defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla">;
  4591. let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
  4592. def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
  4593. (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
  4594. def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
  4595. (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
  4596. def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
  4597. (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
  4598. def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
  4599. (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
  4600. }
  4601. let Predicates = [HasNEON,HasV8_3a] in {
  4602. def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
  4603. (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
  4604. def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
  4605. (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
  4606. def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
  4607. (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
  4608. def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
  4609. (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
  4610. }
  4611. // Vector Subtract Operations.
  4612. // VSUB : Vector Subtract (integer and floating-point)
  4613. defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
  4614. "vsub", "i", sub, 0>;
  4615. def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
  4616. v2f32, v2f32, fsub, 0>;
  4617. def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
  4618. v4f32, v4f32, fsub, 0>;
  4619. def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
  4620. v4f16, v4f16, fsub, 0>,
  4621. Requires<[HasNEON,HasFullFP16]>;
  4622. def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
  4623. v8f16, v8f16, fsub, 0>,
  4624. Requires<[HasNEON,HasFullFP16]>;
  4625. // VSUBL : Vector Subtract Long (Q = D - D)
  4626. defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
  4627. "vsubl", "s", sub, sext, 0>;
  4628. defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
  4629. "vsubl", "u", sub, zanyext, 0>;
  4630. // VSUBW : Vector Subtract Wide (Q = Q - D)
  4631. defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
  4632. defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
  4633. // VHSUB : Vector Halving Subtract
  4634. defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
  4635. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4636. "vhsub", "s", int_arm_neon_vhsubs, 0>;
  4637. defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
  4638. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4639. "vhsub", "u", int_arm_neon_vhsubu, 0>;
  4640. // VQSUB : Vector Saturing Subtract
  4641. defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
  4642. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4643. "vqsub", "s", ssubsat, 0>;
  4644. defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
  4645. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  4646. "vqsub", "u", usubsat, 0>;
  4647. // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
  4648. defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
  4649. // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
  4650. defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
  4651. int_arm_neon_vrsubhn, 0>;
  4652. let Predicates = [HasNEON] in {
  4653. def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
  4654. (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
  4655. def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
  4656. (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
  4657. def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
  4658. (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
  4659. }
  4660. // Vector Comparisons.
  4661. // VCEQ : Vector Compare Equal
  4662. defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4663. IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
  4664. def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
  4665. ARMCCeq, 1>;
  4666. def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
  4667. ARMCCeq, 1>;
  4668. def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
  4669. ARMCCeq, 1>,
  4670. Requires<[HasNEON, HasFullFP16]>;
  4671. def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
  4672. ARMCCeq, 1>,
  4673. Requires<[HasNEON, HasFullFP16]>;
  4674. let TwoOperandAliasConstraint = "$Vm = $Vd" in
  4675. defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
  4676. "$Vd, $Vm, #0", ARMCCeq>;
  4677. // VCGE : Vector Compare Greater Than or Equal
  4678. defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4679. IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
  4680. defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4681. IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
  4682. def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
  4683. ARMCCge, 0>;
  4684. def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
  4685. ARMCCge, 0>;
  4686. def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
  4687. ARMCCge, 0>,
  4688. Requires<[HasNEON, HasFullFP16]>;
  4689. def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
  4690. ARMCCge, 0>,
  4691. Requires<[HasNEON, HasFullFP16]>;
  4692. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  4693. defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
  4694. "$Vd, $Vm, #0", ARMCCge>;
  4695. defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
  4696. "$Vd, $Vm, #0", ARMCCle>;
  4697. }
  4698. // VCGT : Vector Compare Greater Than
  4699. defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4700. IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
  4701. defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
  4702. IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
  4703. def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
  4704. ARMCCgt, 0>;
  4705. def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
  4706. ARMCCgt, 0>;
  4707. def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
  4708. ARMCCgt, 0>,
  4709. Requires<[HasNEON, HasFullFP16]>;
  4710. def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
  4711. ARMCCgt, 0>,
  4712. Requires<[HasNEON, HasFullFP16]>;
  4713. let TwoOperandAliasConstraint = "$Vm = $Vd" in {
  4714. defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
  4715. "$Vd, $Vm, #0", ARMCCgt>;
  4716. defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
  4717. "$Vd, $Vm, #0", ARMCClt>;
  4718. }
  4719. // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
  4720. def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
  4721. "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
  4722. def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
  4723. "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
  4724. def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
  4725. "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
  4726. Requires<[HasNEON, HasFullFP16]>;
  4727. def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
  4728. "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
  4729. Requires<[HasNEON, HasFullFP16]>;
  4730. // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
  4731. def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
  4732. "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
  4733. def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
  4734. "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
  4735. def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
  4736. "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
  4737. Requires<[HasNEON, HasFullFP16]>;
  4738. def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
  4739. "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
  4740. Requires<[HasNEON, HasFullFP16]>;
  4741. // VTST : Vector Test Bits
  4742. defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
  4743. IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
  4744. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
  4745. (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4746. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
  4747. (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4748. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
  4749. (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4750. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
  4751. (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4752. let Predicates = [HasNEON, HasFullFP16] in {
  4753. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
  4754. (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4755. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
  4756. (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4757. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
  4758. (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
  4759. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
  4760. (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
  4761. }
  4762. // +fp16fml Floating Point Multiplication Variants
  4763. let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
  4764. class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
  4765. RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
  4766. : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
  4767. asm, "f16", "$Vd, $Vn, $Vm", "", []>;
  4768. class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
  4769. RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
  4770. : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
  4771. asm, "f16", "$Vd, $Vn, $Vm", "", []>;
  4772. // Vd, Vs, Vs[0-15], Idx[0-1]
  4773. class VFMD<string opc, string type, bits<2> S>
  4774. : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
  4775. (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
  4776. IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
  4777. bit idx;
  4778. let Inst{3} = idx;
  4779. let Inst{19-16} = Vn{4-1};
  4780. let Inst{7} = Vn{0};
  4781. let Inst{5} = Vm{0};
  4782. let Inst{2-0} = Vm{3-1};
  4783. }
  4784. // Vq, Vd, Vd[0-7], Idx[0-3]
  4785. class VFMQ<string opc, string type, bits<2> S>
  4786. : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
  4787. (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
  4788. IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
  4789. bits<2> idx;
  4790. let Inst{5} = idx{1};
  4791. let Inst{3} = idx{0};
  4792. }
  4793. // op1 op2 op3
  4794. def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
  4795. def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
  4796. def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
  4797. def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
  4798. def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
  4799. def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
  4800. def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
  4801. def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
  4802. } // HasNEON, HasFP16FML
  4803. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
  4804. (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4805. def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
  4806. (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4807. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
  4808. (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4809. def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
  4810. (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4811. let Predicates = [HasNEON, HasFullFP16] in {
  4812. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
  4813. (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4814. def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
  4815. (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4816. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
  4817. (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
  4818. def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
  4819. (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
  4820. }
  4821. // Vector Bitwise Operations.
  4822. def vnotd : PatFrag<(ops node:$in),
  4823. (xor node:$in, ARMimmAllOnesD)>;
  4824. def vnotq : PatFrag<(ops node:$in),
  4825. (xor node:$in, ARMimmAllOnesV)>;
  4826. // VAND : Vector Bitwise AND
  4827. def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
  4828. v2i32, v2i32, and, 1>;
  4829. def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
  4830. v4i32, v4i32, and, 1>;
  4831. // VEOR : Vector Bitwise Exclusive OR
  4832. def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
  4833. v2i32, v2i32, xor, 1>;
  4834. def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
  4835. v4i32, v4i32, xor, 1>;
  4836. // VORR : Vector Bitwise OR
  4837. def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
  4838. v2i32, v2i32, or, 1>;
  4839. def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
  4840. v4i32, v4i32, or, 1>;
  4841. multiclass BitwisePatterns<string Name, SDPatternOperator OpNodeD,
  4842. SDPatternOperator OpNodeQ> {
  4843. def : Pat<(v8i8 (OpNodeD DPR:$LHS, DPR:$RHS)),
  4844. (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>;
  4845. def : Pat<(v4i16 (OpNodeD DPR:$LHS, DPR:$RHS)),
  4846. (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>;
  4847. def : Pat<(v1i64 (OpNodeD DPR:$LHS, DPR:$RHS)),
  4848. (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>;
  4849. def : Pat<(v16i8 (OpNodeQ QPR:$LHS, QPR:$RHS)),
  4850. (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>;
  4851. def : Pat<(v8i16 (OpNodeQ QPR:$LHS, QPR:$RHS)),
  4852. (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>;
  4853. def : Pat<(v2i64 (OpNodeQ QPR:$LHS, QPR:$RHS)),
  4854. (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>;
  4855. }
  4856. let Predicates = [HasNEON] in {
  4857. defm : BitwisePatterns<"VAND", and, and>;
  4858. defm : BitwisePatterns<"VORR", or, or>;
  4859. defm : BitwisePatterns<"VEOR", xor, xor>;
  4860. }
  4861. def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
  4862. (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
  4863. IIC_VMOVImm,
  4864. "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
  4865. [(set DPR:$Vd,
  4866. (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
  4867. let Inst{9} = SIMM{9};
  4868. }
  4869. def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
  4870. (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
  4871. IIC_VMOVImm,
  4872. "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
  4873. [(set DPR:$Vd,
  4874. (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
  4875. let Inst{10-9} = SIMM{10-9};
  4876. }
  4877. def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
  4878. (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
  4879. IIC_VMOVImm,
  4880. "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
  4881. [(set QPR:$Vd,
  4882. (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
  4883. let Inst{9} = SIMM{9};
  4884. }
  4885. def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
  4886. (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
  4887. IIC_VMOVImm,
  4888. "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
  4889. [(set QPR:$Vd,
  4890. (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
  4891. let Inst{10-9} = SIMM{10-9};
  4892. }
  4893. // VBIC : Vector Bitwise Bit Clear (AND NOT)
  4894. let TwoOperandAliasConstraint = "$Vn = $Vd" in {
  4895. def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
  4896. (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
  4897. "vbic", "$Vd, $Vn, $Vm", "",
  4898. [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
  4899. (vnotd DPR:$Vm))))]>;
  4900. def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
  4901. (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
  4902. "vbic", "$Vd, $Vn, $Vm", "",
  4903. [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
  4904. (vnotq QPR:$Vm))))]>;
  4905. }
  4906. let Predicates = [HasNEON] in {
  4907. defm : BitwisePatterns<"VBIC", BinOpFrag<(and node:$LHS, (vnotd node:$RHS))>,
  4908. BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>>;
  4909. }
  4910. def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
  4911. (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
  4912. IIC_VMOVImm,
  4913. "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
  4914. [(set DPR:$Vd,
  4915. (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
  4916. let Inst{9} = SIMM{9};
  4917. }
  4918. def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
  4919. (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
  4920. IIC_VMOVImm,
  4921. "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
  4922. [(set DPR:$Vd,
  4923. (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
  4924. let Inst{10-9} = SIMM{10-9};
  4925. }
  4926. def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
  4927. (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
  4928. IIC_VMOVImm,
  4929. "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
  4930. [(set QPR:$Vd,
  4931. (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
  4932. let Inst{9} = SIMM{9};
  4933. }
  4934. def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
  4935. (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
  4936. IIC_VMOVImm,
  4937. "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
  4938. [(set QPR:$Vd,
  4939. (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
  4940. let Inst{10-9} = SIMM{10-9};
  4941. }
  4942. // VORN : Vector Bitwise OR NOT
  4943. def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
  4944. (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
  4945. "vorn", "$Vd, $Vn, $Vm", "",
  4946. [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
  4947. (vnotd DPR:$Vm))))]>;
  4948. def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
  4949. (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
  4950. "vorn", "$Vd, $Vn, $Vm", "",
  4951. [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
  4952. (vnotq QPR:$Vm))))]>;
  4953. let Predicates = [HasNEON] in {
  4954. defm : BitwisePatterns<"VORN", BinOpFrag<(or node:$LHS, (vnotd node:$RHS))>,
  4955. BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>>;
  4956. }
  4957. // VMVN : Vector Bitwise NOT (Immediate)
  4958. let isReMaterializable = 1 in {
  4959. def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
  4960. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  4961. "vmvn", "i16", "$Vd, $SIMM", "",
  4962. [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
  4963. let Inst{9} = SIMM{9};
  4964. }
  4965. def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
  4966. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  4967. "vmvn", "i16", "$Vd, $SIMM", "",
  4968. [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
  4969. let Inst{9} = SIMM{9};
  4970. }
  4971. def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
  4972. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  4973. "vmvn", "i32", "$Vd, $SIMM", "",
  4974. [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
  4975. let Inst{11-8} = SIMM{11-8};
  4976. }
  4977. def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
  4978. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  4979. "vmvn", "i32", "$Vd, $SIMM", "",
  4980. [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
  4981. let Inst{11-8} = SIMM{11-8};
  4982. }
  4983. }
  4984. // VMVN : Vector Bitwise NOT
  4985. def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
  4986. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
  4987. "vmvn", "$Vd, $Vm", "",
  4988. [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
  4989. def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
  4990. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
  4991. "vmvn", "$Vd, $Vm", "",
  4992. [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
  4993. let Predicates = [HasNEON] in {
  4994. def : Pat<(v1i64 (vnotd DPR:$src)),
  4995. (VMVNd DPR:$src)>;
  4996. def : Pat<(v4i16 (vnotd DPR:$src)),
  4997. (VMVNd DPR:$src)>;
  4998. def : Pat<(v8i8 (vnotd DPR:$src)),
  4999. (VMVNd DPR:$src)>;
  5000. def : Pat<(v2i64 (vnotq QPR:$src)),
  5001. (VMVNq QPR:$src)>;
  5002. def : Pat<(v8i16 (vnotq QPR:$src)),
  5003. (VMVNq QPR:$src)>;
  5004. def : Pat<(v16i8 (vnotq QPR:$src)),
  5005. (VMVNq QPR:$src)>;
  5006. }
  5007. // The TwoAddress pass will not go looking for equivalent operations
  5008. // with different register constraints; it just inserts copies.
  5009. // That is why pseudo VBSP implemented. Is is expanded later into
  5010. // VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
  5011. def VBSPd
  5012. : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5013. IIC_VBINiD, "",
  5014. [(set DPR:$Vd,
  5015. (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
  5016. let Predicates = [HasNEON] in {
  5017. def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
  5018. (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
  5019. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5020. def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
  5021. (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
  5022. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5023. def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
  5024. (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
  5025. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5026. def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
  5027. (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
  5028. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5029. def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
  5030. (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
  5031. (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
  5032. def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd),
  5033. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5034. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5035. def : Pat<(v4i16 (or (and DPR:$Vn, DPR:$Vd),
  5036. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5037. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5038. def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
  5039. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5040. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5041. def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
  5042. (and DPR:$Vm, (vnotd DPR:$Vd)))),
  5043. (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
  5044. }
  5045. def VBSPq
  5046. : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5047. IIC_VBINiQ, "",
  5048. [(set QPR:$Vd,
  5049. (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
  5050. let Predicates = [HasNEON] in {
  5051. def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
  5052. (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
  5053. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5054. def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
  5055. (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
  5056. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5057. def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
  5058. (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
  5059. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5060. def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
  5061. (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
  5062. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5063. def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
  5064. (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
  5065. (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
  5066. def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd),
  5067. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5068. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5069. def : Pat<(v8i16 (or (and QPR:$Vn, QPR:$Vd),
  5070. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5071. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5072. def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
  5073. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5074. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5075. def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
  5076. (and QPR:$Vm, (vnotq QPR:$Vd)))),
  5077. (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
  5078. }
  5079. // VBSL : Vector Bitwise Select
  5080. def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
  5081. (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5082. N3RegFrm, IIC_VBINiD,
  5083. "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5084. []>;
  5085. def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
  5086. (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5087. N3RegFrm, IIC_VBINiQ,
  5088. "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5089. []>;
  5090. // VBIF : Vector Bitwise Insert if False
  5091. // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
  5092. def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
  5093. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5094. N3RegFrm, IIC_VBINiD,
  5095. "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5096. []>;
  5097. def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
  5098. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5099. N3RegFrm, IIC_VBINiQ,
  5100. "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5101. []>;
  5102. // VBIT : Vector Bitwise Insert if True
  5103. // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
  5104. def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
  5105. (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
  5106. N3RegFrm, IIC_VBINiD,
  5107. "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5108. []>;
  5109. def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
  5110. (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
  5111. N3RegFrm, IIC_VBINiQ,
  5112. "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
  5113. []>;
  5114. // Vector Absolute Differences.
  5115. // VABD : Vector Absolute Difference
  5116. defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
  5117. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5118. "vabd", "s", int_arm_neon_vabds, 1>;
  5119. defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
  5120. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5121. "vabd", "u", int_arm_neon_vabdu, 1>;
  5122. def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
  5123. "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
  5124. def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
  5125. "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
  5126. def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
  5127. "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
  5128. Requires<[HasNEON, HasFullFP16]>;
  5129. def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
  5130. "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
  5131. Requires<[HasNEON, HasFullFP16]>;
  5132. // VABDL : Vector Absolute Difference Long (Q = | D - D |)
  5133. defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
  5134. "vabdl", "s", int_arm_neon_vabds, zext, 1>;
  5135. defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
  5136. "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
  5137. let Predicates = [HasNEON] in {
  5138. def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
  5139. (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
  5140. def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
  5141. (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
  5142. }
  5143. // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
  5144. // shift/xor pattern for ABS.
  5145. def abd_shr :
  5146. PatFrag<(ops node:$in1, node:$in2, node:$shift),
  5147. (ARMvshrsImm (sub (zext node:$in1),
  5148. (zext node:$in2)), (i32 $shift))>;
  5149. let Predicates = [HasNEON] in {
  5150. def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)),
  5151. (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
  5152. (zext (v2i32 DPR:$opB))),
  5153. (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
  5154. (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
  5155. }
  5156. // VABA : Vector Absolute Difference and Accumulate
  5157. defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
  5158. "vaba", "s", int_arm_neon_vabds, add>;
  5159. defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
  5160. "vaba", "u", int_arm_neon_vabdu, add>;
  5161. // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
  5162. defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
  5163. "vabal", "s", int_arm_neon_vabds, zext, add>;
  5164. defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
  5165. "vabal", "u", int_arm_neon_vabdu, zext, add>;
  5166. // Vector Maximum and Minimum.
  5167. // VMAX : Vector Maximum
  5168. defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
  5169. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5170. "vmax", "s", smax, 1>;
  5171. defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
  5172. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5173. "vmax", "u", umax, 1>;
  5174. def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5175. "vmax", "f32",
  5176. v2f32, v2f32, fmaximum, 1>;
  5177. def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5178. "vmax", "f32",
  5179. v4f32, v4f32, fmaximum, 1>;
  5180. def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5181. "vmax", "f16",
  5182. v4f16, v4f16, fmaximum, 1>,
  5183. Requires<[HasNEON, HasFullFP16]>;
  5184. def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5185. "vmax", "f16",
  5186. v8f16, v8f16, fmaximum, 1>,
  5187. Requires<[HasNEON, HasFullFP16]>;
  5188. // VMAXNM
  5189. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  5190. def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
  5191. N3RegFrm, NoItinerary, "vmaxnm", "f32",
  5192. v2f32, v2f32, fmaxnum, 1>,
  5193. Requires<[HasV8, HasNEON]>;
  5194. def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
  5195. N3RegFrm, NoItinerary, "vmaxnm", "f32",
  5196. v4f32, v4f32, fmaxnum, 1>,
  5197. Requires<[HasV8, HasNEON]>;
  5198. def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
  5199. N3RegFrm, NoItinerary, "vmaxnm", "f16",
  5200. v4f16, v4f16, fmaxnum, 1>,
  5201. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5202. def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
  5203. N3RegFrm, NoItinerary, "vmaxnm", "f16",
  5204. v8f16, v8f16, fmaxnum, 1>,
  5205. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5206. }
  5207. // VMIN : Vector Minimum
  5208. defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
  5209. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5210. "vmin", "s", smin, 1>;
  5211. defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
  5212. IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
  5213. "vmin", "u", umin, 1>;
  5214. def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5215. "vmin", "f32",
  5216. v2f32, v2f32, fminimum, 1>;
  5217. def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5218. "vmin", "f32",
  5219. v4f32, v4f32, fminimum, 1>;
  5220. def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
  5221. "vmin", "f16",
  5222. v4f16, v4f16, fminimum, 1>,
  5223. Requires<[HasNEON, HasFullFP16]>;
  5224. def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
  5225. "vmin", "f16",
  5226. v8f16, v8f16, fminimum, 1>,
  5227. Requires<[HasNEON, HasFullFP16]>;
  5228. // VMINNM
  5229. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  5230. def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
  5231. N3RegFrm, NoItinerary, "vminnm", "f32",
  5232. v2f32, v2f32, fminnum, 1>,
  5233. Requires<[HasV8, HasNEON]>;
  5234. def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
  5235. N3RegFrm, NoItinerary, "vminnm", "f32",
  5236. v4f32, v4f32, fminnum, 1>,
  5237. Requires<[HasV8, HasNEON]>;
  5238. def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
  5239. N3RegFrm, NoItinerary, "vminnm", "f16",
  5240. v4f16, v4f16, fminnum, 1>,
  5241. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5242. def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
  5243. N3RegFrm, NoItinerary, "vminnm", "f16",
  5244. v8f16, v8f16, fminnum, 1>,
  5245. Requires<[HasV8, HasNEON, HasFullFP16]>;
  5246. }
  5247. // Vector Pairwise Operations.
  5248. // VPADD : Vector Pairwise Add
  5249. def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5250. "vpadd", "i8",
  5251. v8i8, v8i8, int_arm_neon_vpadd, 0>;
  5252. def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5253. "vpadd", "i16",
  5254. v4i16, v4i16, int_arm_neon_vpadd, 0>;
  5255. def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
  5256. "vpadd", "i32",
  5257. v2i32, v2i32, int_arm_neon_vpadd, 0>;
  5258. def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
  5259. IIC_VPBIND, "vpadd", "f32",
  5260. v2f32, v2f32, int_arm_neon_vpadd, 0>;
  5261. def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
  5262. IIC_VPBIND, "vpadd", "f16",
  5263. v4f16, v4f16, int_arm_neon_vpadd, 0>,
  5264. Requires<[HasNEON, HasFullFP16]>;
  5265. // VPADDL : Vector Pairwise Add Long
  5266. defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
  5267. int_arm_neon_vpaddls>;
  5268. defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
  5269. int_arm_neon_vpaddlu>;
  5270. // VPADAL : Vector Pairwise Add and Accumulate Long
  5271. defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
  5272. int_arm_neon_vpadals>;
  5273. defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
  5274. int_arm_neon_vpadalu>;
  5275. // VPMAX : Vector Pairwise Maximum
  5276. def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5277. "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
  5278. def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5279. "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
  5280. def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5281. "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
  5282. def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5283. "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
  5284. def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5285. "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
  5286. def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
  5287. "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
  5288. def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
  5289. "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
  5290. def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
  5291. "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
  5292. Requires<[HasNEON, HasFullFP16]>;
  5293. // VPMIN : Vector Pairwise Minimum
  5294. def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5295. "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
  5296. def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5297. "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
  5298. def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5299. "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
  5300. def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5301. "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
  5302. def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5303. "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
  5304. def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
  5305. "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
  5306. def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
  5307. "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
  5308. def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
  5309. "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
  5310. Requires<[HasNEON, HasFullFP16]>;
  5311. // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
  5312. // VRECPE : Vector Reciprocal Estimate
  5313. def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
  5314. IIC_VUNAD, "vrecpe", "u32",
  5315. v2i32, v2i32, int_arm_neon_vrecpe>;
  5316. def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
  5317. IIC_VUNAQ, "vrecpe", "u32",
  5318. v4i32, v4i32, int_arm_neon_vrecpe>;
  5319. def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
  5320. IIC_VUNAD, "vrecpe", "f32",
  5321. v2f32, v2f32, int_arm_neon_vrecpe>;
  5322. def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
  5323. IIC_VUNAQ, "vrecpe", "f32",
  5324. v4f32, v4f32, int_arm_neon_vrecpe>;
  5325. def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
  5326. IIC_VUNAD, "vrecpe", "f16",
  5327. v4f16, v4f16, int_arm_neon_vrecpe>,
  5328. Requires<[HasNEON, HasFullFP16]>;
  5329. def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
  5330. IIC_VUNAQ, "vrecpe", "f16",
  5331. v8f16, v8f16, int_arm_neon_vrecpe>,
  5332. Requires<[HasNEON, HasFullFP16]>;
  5333. // VRECPS : Vector Reciprocal Step
  5334. def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
  5335. IIC_VRECSD, "vrecps", "f32",
  5336. v2f32, v2f32, int_arm_neon_vrecps, 1>;
  5337. def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
  5338. IIC_VRECSQ, "vrecps", "f32",
  5339. v4f32, v4f32, int_arm_neon_vrecps, 1>;
  5340. def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
  5341. IIC_VRECSD, "vrecps", "f16",
  5342. v4f16, v4f16, int_arm_neon_vrecps, 1>,
  5343. Requires<[HasNEON, HasFullFP16]>;
  5344. def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
  5345. IIC_VRECSQ, "vrecps", "f16",
  5346. v8f16, v8f16, int_arm_neon_vrecps, 1>,
  5347. Requires<[HasNEON, HasFullFP16]>;
  5348. // VRSQRTE : Vector Reciprocal Square Root Estimate
  5349. def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
  5350. IIC_VUNAD, "vrsqrte", "u32",
  5351. v2i32, v2i32, int_arm_neon_vrsqrte>;
  5352. def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
  5353. IIC_VUNAQ, "vrsqrte", "u32",
  5354. v4i32, v4i32, int_arm_neon_vrsqrte>;
  5355. def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
  5356. IIC_VUNAD, "vrsqrte", "f32",
  5357. v2f32, v2f32, int_arm_neon_vrsqrte>;
  5358. def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
  5359. IIC_VUNAQ, "vrsqrte", "f32",
  5360. v4f32, v4f32, int_arm_neon_vrsqrte>;
  5361. def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
  5362. IIC_VUNAD, "vrsqrte", "f16",
  5363. v4f16, v4f16, int_arm_neon_vrsqrte>,
  5364. Requires<[HasNEON, HasFullFP16]>;
  5365. def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
  5366. IIC_VUNAQ, "vrsqrte", "f16",
  5367. v8f16, v8f16, int_arm_neon_vrsqrte>,
  5368. Requires<[HasNEON, HasFullFP16]>;
  5369. // VRSQRTS : Vector Reciprocal Square Root Step
  5370. def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
  5371. IIC_VRECSD, "vrsqrts", "f32",
  5372. v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
  5373. def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
  5374. IIC_VRECSQ, "vrsqrts", "f32",
  5375. v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
  5376. def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
  5377. IIC_VRECSD, "vrsqrts", "f16",
  5378. v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
  5379. Requires<[HasNEON, HasFullFP16]>;
  5380. def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
  5381. IIC_VRECSQ, "vrsqrts", "f16",
  5382. v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
  5383. Requires<[HasNEON, HasFullFP16]>;
  5384. // Vector Shifts.
  5385. // VSHL : Vector Shift
  5386. defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
  5387. IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
  5388. "vshl", "s", int_arm_neon_vshifts>;
  5389. defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
  5390. IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
  5391. "vshl", "u", int_arm_neon_vshiftu>;
  5392. let Predicates = [HasNEON] in {
  5393. def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
  5394. (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
  5395. def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
  5396. (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
  5397. def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
  5398. (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
  5399. def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
  5400. (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
  5401. def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
  5402. (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
  5403. def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
  5404. (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
  5405. def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
  5406. (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
  5407. def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
  5408. (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
  5409. def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
  5410. (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
  5411. def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
  5412. (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
  5413. def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
  5414. (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
  5415. def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
  5416. (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
  5417. def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
  5418. (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
  5419. def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
  5420. (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
  5421. def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
  5422. (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
  5423. def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
  5424. (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
  5425. }
  5426. // VSHL : Vector Shift Left (Immediate)
  5427. defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
  5428. // VSHR : Vector Shift Right (Immediate)
  5429. defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",
  5430. ARMvshrsImm>;
  5431. defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",
  5432. ARMvshruImm>;
  5433. // VSHLL : Vector Shift Left Long
  5434. defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
  5435. PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
  5436. defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
  5437. PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
  5438. // VSHLL : Vector Shift Left Long (with maximum shift count)
  5439. class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
  5440. bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
  5441. ValueType OpTy, Operand ImmTy>
  5442. : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
  5443. ResTy, OpTy, ImmTy, null_frag> {
  5444. let Inst{21-16} = op21_16;
  5445. let DecoderMethod = "DecodeVSHLMaxInstruction";
  5446. }
  5447. def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
  5448. v8i16, v8i8, imm8>;
  5449. def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
  5450. v4i32, v4i16, imm16>;
  5451. def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
  5452. v2i64, v2i32, imm32>;
  5453. let Predicates = [HasNEON] in {
  5454. def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
  5455. (VSHLLi8 DPR:$Rn, 8)>;
  5456. def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
  5457. (VSHLLi16 DPR:$Rn, 16)>;
  5458. def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
  5459. (VSHLLi32 DPR:$Rn, 32)>;
  5460. def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
  5461. (VSHLLi8 DPR:$Rn, 8)>;
  5462. def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
  5463. (VSHLLi16 DPR:$Rn, 16)>;
  5464. def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
  5465. (VSHLLi32 DPR:$Rn, 32)>;
  5466. def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
  5467. (VSHLLi8 DPR:$Rn, 8)>;
  5468. def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
  5469. (VSHLLi16 DPR:$Rn, 16)>;
  5470. def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
  5471. (VSHLLi32 DPR:$Rn, 32)>;
  5472. }
  5473. // VSHRN : Vector Shift Right and Narrow
  5474. defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
  5475. PatFrag<(ops node:$Rn, node:$amt),
  5476. (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
  5477. let Predicates = [HasNEON] in {
  5478. def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
  5479. (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
  5480. def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
  5481. (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
  5482. def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
  5483. (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
  5484. }
  5485. // VRSHL : Vector Rounding Shift
  5486. defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
  5487. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5488. "vrshl", "s", int_arm_neon_vrshifts>;
  5489. defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
  5490. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5491. "vrshl", "u", int_arm_neon_vrshiftu>;
  5492. // VRSHR : Vector Rounding Shift Right
  5493. defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",
  5494. NEONvrshrsImm>;
  5495. defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",
  5496. NEONvrshruImm>;
  5497. // VRSHRN : Vector Rounding Shift Right and Narrow
  5498. defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
  5499. NEONvrshrnImm>;
  5500. // VQSHL : Vector Saturating Shift
  5501. defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
  5502. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5503. "vqshl", "s", int_arm_neon_vqshifts>;
  5504. defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
  5505. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5506. "vqshl", "u", int_arm_neon_vqshiftu>;
  5507. // VQSHL : Vector Saturating Shift Left (Immediate)
  5508. defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
  5509. defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
  5510. // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
  5511. defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
  5512. // VQSHRN : Vector Saturating Shift Right and Narrow
  5513. defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
  5514. NEONvqshrnsImm>;
  5515. defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
  5516. NEONvqshrnuImm>;
  5517. // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
  5518. defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
  5519. NEONvqshrnsuImm>;
  5520. // VQRSHL : Vector Saturating Rounding Shift
  5521. defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
  5522. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5523. "vqrshl", "s", int_arm_neon_vqrshifts>;
  5524. defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
  5525. IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
  5526. "vqrshl", "u", int_arm_neon_vqrshiftu>;
  5527. // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
  5528. defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
  5529. NEONvqrshrnsImm>;
  5530. defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
  5531. NEONvqrshrnuImm>;
  5532. // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
  5533. defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
  5534. NEONvqrshrnsuImm>;
  5535. // VSRA : Vector Shift Right and Accumulate
  5536. defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
  5537. defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
  5538. // VRSRA : Vector Rounding Shift Right and Accumulate
  5539. defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
  5540. defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
  5541. // VSLI : Vector Shift Left and Insert
  5542. defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
  5543. // VSRI : Vector Shift Right and Insert
  5544. defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
  5545. // Vector Absolute and Saturating Absolute.
  5546. // VABS : Vector Absolute Value
  5547. defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
  5548. IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
  5549. def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
  5550. "vabs", "f32",
  5551. v2f32, v2f32, fabs>;
  5552. def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
  5553. "vabs", "f32",
  5554. v4f32, v4f32, fabs>;
  5555. def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
  5556. "vabs", "f16",
  5557. v4f16, v4f16, fabs>,
  5558. Requires<[HasNEON, HasFullFP16]>;
  5559. def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
  5560. "vabs", "f16",
  5561. v8f16, v8f16, fabs>,
  5562. Requires<[HasNEON, HasFullFP16]>;
  5563. // VQABS : Vector Saturating Absolute Value
  5564. defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
  5565. IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
  5566. int_arm_neon_vqabs>;
  5567. // Vector Negate.
  5568. def vnegd : PatFrag<(ops node:$in),
  5569. (sub ARMimmAllZerosD, node:$in)>;
  5570. def vnegq : PatFrag<(ops node:$in),
  5571. (sub ARMimmAllZerosV, node:$in)>;
  5572. class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  5573. : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
  5574. IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
  5575. [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
  5576. class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  5577. : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
  5578. IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
  5579. [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
  5580. // VNEG : Vector Negate (integer)
  5581. def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
  5582. def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
  5583. def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
  5584. def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
  5585. def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
  5586. def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
  5587. // VNEG : Vector Negate (floating-point)
  5588. def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
  5589. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
  5590. "vneg", "f32", "$Vd, $Vm", "",
  5591. [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
  5592. def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
  5593. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
  5594. "vneg", "f32", "$Vd, $Vm", "",
  5595. [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
  5596. def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
  5597. (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
  5598. "vneg", "f16", "$Vd, $Vm", "",
  5599. [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
  5600. Requires<[HasNEON, HasFullFP16]>;
  5601. def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
  5602. (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
  5603. "vneg", "f16", "$Vd, $Vm", "",
  5604. [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
  5605. Requires<[HasNEON, HasFullFP16]>;
  5606. let Predicates = [HasNEON] in {
  5607. def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
  5608. def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
  5609. def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
  5610. def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
  5611. def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
  5612. def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
  5613. }
  5614. // VQNEG : Vector Saturating Negate
  5615. defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
  5616. IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
  5617. int_arm_neon_vqneg>;
  5618. // Vector Bit Counting Operations.
  5619. // VCLS : Vector Count Leading Sign Bits
  5620. defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
  5621. IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
  5622. int_arm_neon_vcls>;
  5623. // VCLZ : Vector Count Leading Zeros
  5624. defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
  5625. IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
  5626. ctlz>;
  5627. // VCNT : Vector Count One Bits
  5628. def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
  5629. IIC_VCNTiD, "vcnt", "8",
  5630. v8i8, v8i8, ctpop>;
  5631. def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
  5632. IIC_VCNTiQ, "vcnt", "8",
  5633. v16i8, v16i8, ctpop>;
  5634. // Vector Swap
  5635. def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
  5636. (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
  5637. NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
  5638. []>;
  5639. def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
  5640. (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
  5641. NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
  5642. []>;
  5643. // Vector Move Operations.
  5644. // VMOV : Vector Move (Register)
  5645. def : NEONInstAlias<"vmov${p} $Vd, $Vm",
  5646. (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
  5647. def : NEONInstAlias<"vmov${p} $Vd, $Vm",
  5648. (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
  5649. // VMOV : Vector Move (Immediate)
  5650. // Although VMOVs are not strictly speaking cheap, they are as expensive
  5651. // as their copies counterpart (VORR), so we should prefer rematerialization
  5652. // over splitting when it applies.
  5653. let isReMaterializable = 1, isAsCheapAsAMove=1 in {
  5654. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
  5655. (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
  5656. "vmov", "i8", "$Vd, $SIMM", "",
  5657. [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
  5658. def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
  5659. (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
  5660. "vmov", "i8", "$Vd, $SIMM", "",
  5661. [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
  5662. def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
  5663. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  5664. "vmov", "i16", "$Vd, $SIMM", "",
  5665. [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
  5666. let Inst{9} = SIMM{9};
  5667. }
  5668. def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
  5669. (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
  5670. "vmov", "i16", "$Vd, $SIMM", "",
  5671. [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
  5672. let Inst{9} = SIMM{9};
  5673. }
  5674. def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
  5675. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  5676. "vmov", "i32", "$Vd, $SIMM", "",
  5677. [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
  5678. let Inst{11-8} = SIMM{11-8};
  5679. }
  5680. def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
  5681. (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
  5682. "vmov", "i32", "$Vd, $SIMM", "",
  5683. [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
  5684. let Inst{11-8} = SIMM{11-8};
  5685. }
  5686. def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
  5687. (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
  5688. "vmov", "i64", "$Vd, $SIMM", "",
  5689. [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
  5690. def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
  5691. (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
  5692. "vmov", "i64", "$Vd, $SIMM", "",
  5693. [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
  5694. def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
  5695. (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
  5696. "vmov", "f32", "$Vd, $SIMM", "",
  5697. [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
  5698. def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
  5699. (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
  5700. "vmov", "f32", "$Vd, $SIMM", "",
  5701. [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
  5702. } // isReMaterializable, isAsCheapAsAMove
  5703. // Add support for bytes replication feature, so it could be GAS compatible.
  5704. multiclass NEONImmReplicateI8InstAlias<ValueType To> {
  5705. // E.g. instructions below:
  5706. // "vmov.i32 d0, #0xffffffff"
  5707. // "vmov.i32 d0, #0xabababab"
  5708. // "vmov.i16 d0, #0xabab"
  5709. // are incorrect, but we could deal with such cases.
  5710. // For last two instructions, for example, it should emit:
  5711. // "vmov.i8 d0, #0xab"
  5712. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5713. (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
  5714. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5715. (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
  5716. // Also add same support for VMVN instructions. So instruction:
  5717. // "vmvn.i32 d0, #0xabababab"
  5718. // actually means:
  5719. // "vmov.i8 d0, #0x54"
  5720. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5721. (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
  5722. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5723. (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
  5724. }
  5725. defm : NEONImmReplicateI8InstAlias<i16>;
  5726. defm : NEONImmReplicateI8InstAlias<i32>;
  5727. defm : NEONImmReplicateI8InstAlias<i64>;
  5728. // Similar to above for types other than i8, e.g.:
  5729. // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
  5730. // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
  5731. // In this case we do not canonicalize VMVN to VMOV
  5732. multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
  5733. NeonI NV8, NeonI NV16, ValueType To> {
  5734. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5735. (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5736. def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
  5737. (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5738. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5739. (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5740. def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
  5741. (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
  5742. }
  5743. defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
  5744. VMVNv4i16, VMVNv8i16, i32>;
  5745. defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
  5746. VMVNv4i16, VMVNv8i16, i64>;
  5747. defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
  5748. VMVNv2i32, VMVNv4i32, i64>;
  5749. // TODO: add "VMOV <-> VMVN" conversion for cases like
  5750. // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
  5751. // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
  5752. // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
  5753. // require zero cycles to execute so they should be used wherever possible for
  5754. // setting a register to zero.
  5755. // Even without these pseudo-insts we would probably end up with the correct
  5756. // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
  5757. // since they are sometimes rather expensive (in general).
  5758. let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
  5759. def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
  5760. [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
  5761. (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
  5762. Requires<[HasZCZ]>;
  5763. def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
  5764. [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
  5765. (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
  5766. Requires<[HasZCZ]>;
  5767. }
  5768. // VMOV : Vector Get Lane (move scalar to ARM core register)
  5769. def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
  5770. (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
  5771. IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
  5772. [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
  5773. imm:$lane))]> {
  5774. let Inst{21} = lane{2};
  5775. let Inst{6-5} = lane{1-0};
  5776. }
  5777. def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
  5778. (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
  5779. IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
  5780. [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
  5781. imm:$lane))]> {
  5782. let Inst{21} = lane{1};
  5783. let Inst{6} = lane{0};
  5784. }
  5785. def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
  5786. (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
  5787. IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
  5788. [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
  5789. imm:$lane))]> {
  5790. let Inst{21} = lane{2};
  5791. let Inst{6-5} = lane{1-0};
  5792. }
  5793. def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
  5794. (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
  5795. IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
  5796. [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
  5797. imm:$lane))]> {
  5798. let Inst{21} = lane{1};
  5799. let Inst{6} = lane{0};
  5800. }
  5801. def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
  5802. (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
  5803. IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
  5804. [(set GPR:$R, (extractelt (v2i32 DPR:$V),
  5805. imm:$lane))]>,
  5806. Requires<[HasFPRegs, HasFastVGETLNi32]> {
  5807. let Inst{21} = lane{0};
  5808. }
  5809. let Predicates = [HasNEON] in {
  5810. // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
  5811. def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
  5812. (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
  5813. (DSubReg_i8_reg imm:$lane))),
  5814. (SubReg_i8_lane imm:$lane))>;
  5815. def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
  5816. (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5817. (DSubReg_i16_reg imm:$lane))),
  5818. (SubReg_i16_lane imm:$lane))>;
  5819. def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
  5820. (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
  5821. (DSubReg_i8_reg imm:$lane))),
  5822. (SubReg_i8_lane imm:$lane))>;
  5823. def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
  5824. (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5825. (DSubReg_i16_reg imm:$lane))),
  5826. (SubReg_i16_lane imm:$lane))>;
  5827. def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane),
  5828. (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src,
  5829. (DSubReg_i16_reg imm:$lane))),
  5830. (SubReg_i16_lane imm:$lane))>;
  5831. def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane),
  5832. (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>;
  5833. def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane),
  5834. (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src,
  5835. (DSubReg_i16_reg imm:$lane))),
  5836. (SubReg_i16_lane imm:$lane))>;
  5837. def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane),
  5838. (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>;
  5839. }
  5840. def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
  5841. (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
  5842. (DSubReg_i32_reg imm:$lane))),
  5843. (SubReg_i32_lane imm:$lane))>,
  5844. Requires<[HasNEON, HasFastVGETLNi32]>;
  5845. def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
  5846. (COPY_TO_REGCLASS
  5847. (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
  5848. Requires<[HasNEON, HasSlowVGETLNi32]>;
  5849. def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
  5850. (COPY_TO_REGCLASS
  5851. (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
  5852. Requires<[HasNEON, HasSlowVGETLNi32]>;
  5853. let Predicates = [HasNEON] in {
  5854. def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
  5855. (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
  5856. (SSubReg_f32_reg imm:$src2))>;
  5857. def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
  5858. (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
  5859. (SSubReg_f32_reg imm:$src2))>;
  5860. //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
  5861. // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
  5862. def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
  5863. (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
  5864. }
  5865. multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> {
  5866. def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
  5867. (EXTRACT_SUBREG
  5868. (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
  5869. (SSubReg_f16_reg imm_even:$lane))>;
  5870. def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
  5871. (EXTRACT_SUBREG
  5872. (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
  5873. (SSubReg_f16_reg imm_even:$lane))>;
  5874. }
  5875. multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> {
  5876. def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
  5877. (COPY_TO_REGCLASS
  5878. (VMOVH (EXTRACT_SUBREG
  5879. (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
  5880. (SSubReg_f16_reg imm_odd:$lane))),
  5881. HPR)>;
  5882. def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
  5883. (COPY_TO_REGCLASS
  5884. (VMOVH (EXTRACT_SUBREG
  5885. (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
  5886. (SSubReg_f16_reg imm_odd:$lane))),
  5887. HPR)>;
  5888. }
  5889. let Predicates = [HasNEON] in {
  5890. defm : ExtractEltEvenF16<v4f16, v8f16>;
  5891. defm : ExtractEltOddF16VMOVH<v4f16, v8f16>;
  5892. }
  5893. let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in {
  5894. // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes
  5895. defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>;
  5896. }
  5897. let Predicates = [HasBF16, HasNEON] in {
  5898. defm : ExtractEltEvenF16<v4bf16, v8bf16>;
  5899. // Otherwise, if VMOVH is not available resort to extracting the odd lane
  5900. // into a GPR and then moving to HPR
  5901. def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
  5902. (COPY_TO_REGCLASS
  5903. (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
  5904. HPR)>;
  5905. def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
  5906. (COPY_TO_REGCLASS
  5907. (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
  5908. (DSubReg_i16_reg imm:$lane))),
  5909. (SubReg_i16_lane imm:$lane)),
  5910. HPR)>;
  5911. }
  5912. // VMOV : Vector Set Lane (move ARM core register to scalar)
  5913. let Constraints = "$src1 = $V" in {
  5914. def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
  5915. (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
  5916. IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
  5917. [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
  5918. GPR:$R, imm:$lane))]> {
  5919. let Inst{21} = lane{2};
  5920. let Inst{6-5} = lane{1-0};
  5921. }
  5922. def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
  5923. (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
  5924. IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
  5925. [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
  5926. GPR:$R, imm:$lane))]> {
  5927. let Inst{21} = lane{1};
  5928. let Inst{6} = lane{0};
  5929. }
  5930. def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
  5931. (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
  5932. IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
  5933. [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
  5934. GPR:$R, imm:$lane))]>,
  5935. Requires<[HasVFP2]> {
  5936. let Inst{21} = lane{0};
  5937. // This instruction is equivalent as
  5938. // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
  5939. let isInsertSubreg = 1;
  5940. }
  5941. }
  5942. // TODO: for odd lanes we could optimize this a bit by using the VINS
  5943. // FullFP16 instruction when it is available
  5944. multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> {
  5945. def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
  5946. (VT4 (VSETLNi16 DPR:$src1,
  5947. (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
  5948. def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
  5949. (VT8 (INSERT_SUBREG QPR:$src1,
  5950. (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
  5951. (DSubReg_i16_reg imm:$lane))),
  5952. (COPY_TO_REGCLASS HPR:$src2, GPR),
  5953. (SubReg_i16_lane imm:$lane))),
  5954. (DSubReg_i16_reg imm:$lane)))>;
  5955. }
  5956. let Predicates = [HasNEON] in {
  5957. def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
  5958. (v16i8 (INSERT_SUBREG QPR:$src1,
  5959. (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
  5960. (DSubReg_i8_reg imm:$lane))),
  5961. GPR:$src2, (SubReg_i8_lane imm:$lane))),
  5962. (DSubReg_i8_reg imm:$lane)))>;
  5963. def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
  5964. (v8i16 (INSERT_SUBREG QPR:$src1,
  5965. (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
  5966. (DSubReg_i16_reg imm:$lane))),
  5967. GPR:$src2, (SubReg_i16_lane imm:$lane))),
  5968. (DSubReg_i16_reg imm:$lane)))>;
  5969. def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
  5970. (v4i32 (INSERT_SUBREG QPR:$src1,
  5971. (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
  5972. (DSubReg_i32_reg imm:$lane))),
  5973. GPR:$src2, (SubReg_i32_lane imm:$lane))),
  5974. (DSubReg_i32_reg imm:$lane)))>;
  5975. def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
  5976. (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
  5977. SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
  5978. def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
  5979. (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
  5980. SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
  5981. defm : InsertEltF16<f16, v4f16, v8f16>;
  5982. def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
  5983. (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
  5984. def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
  5985. (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
  5986. def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
  5987. (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
  5988. def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
  5989. (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
  5990. def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))),
  5991. (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
  5992. def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
  5993. (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
  5994. def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
  5995. (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5996. def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
  5997. (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  5998. def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
  5999. (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
  6000. def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
  6001. (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
  6002. (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  6003. dsub_0)>;
  6004. def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
  6005. (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
  6006. (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  6007. dsub_0)>;
  6008. def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
  6009. (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
  6010. (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
  6011. dsub_0)>;
  6012. }
  6013. let Predicates = [HasNEON, HasBF16] in
  6014. defm : InsertEltF16<bf16, v4bf16, v8bf16>;
  6015. // VDUP : Vector Duplicate (from ARM core register to all elements)
  6016. class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
  6017. : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
  6018. IIC_VMOVIS, "vdup", Dt, "$V, $R",
  6019. [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
  6020. class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
  6021. : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
  6022. IIC_VMOVIS, "vdup", Dt, "$V, $R",
  6023. [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
  6024. def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
  6025. def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
  6026. def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
  6027. Requires<[HasNEON, HasFastVDUP32]>;
  6028. def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
  6029. def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
  6030. def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
  6031. // ARMvdup patterns for uarchs with fast VDUP.32.
  6032. def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
  6033. Requires<[HasNEON,HasFastVDUP32]>;
  6034. def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
  6035. Requires<[HasNEON]>;
  6036. // ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
  6037. def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
  6038. Requires<[HasNEON,HasSlowVDUP32]>;
  6039. def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
  6040. Requires<[HasNEON,HasSlowVDUP32]>;
  6041. // VDUP : Vector Duplicate Lane (from scalar to all elements)
  6042. class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
  6043. ValueType Ty, Operand IdxTy>
  6044. : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
  6045. IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
  6046. [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
  6047. class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
  6048. ValueType ResTy, ValueType OpTy, Operand IdxTy>
  6049. : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
  6050. IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
  6051. [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
  6052. VectorIndex32:$lane)))]>;
  6053. // Inst{19-16} is partially specified depending on the element size.
  6054. def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
  6055. bits<3> lane;
  6056. let Inst{19-17} = lane{2-0};
  6057. }
  6058. def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
  6059. bits<2> lane;
  6060. let Inst{19-18} = lane{1-0};
  6061. }
  6062. def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
  6063. bits<1> lane;
  6064. let Inst{19} = lane{0};
  6065. }
  6066. def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
  6067. bits<3> lane;
  6068. let Inst{19-17} = lane{2-0};
  6069. }
  6070. def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
  6071. bits<2> lane;
  6072. let Inst{19-18} = lane{1-0};
  6073. }
  6074. def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
  6075. bits<1> lane;
  6076. let Inst{19} = lane{0};
  6077. }
  6078. let Predicates = [HasNEON] in {
  6079. def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
  6080. (VDUPLN32d DPR:$Vm, imm:$lane)>;
  6081. def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
  6082. (VDUPLN32d DPR:$Vm, imm:$lane)>;
  6083. def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
  6084. (VDUPLN32q DPR:$Vm, imm:$lane)>;
  6085. def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
  6086. (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
  6087. (DSubReg_i8_reg imm:$lane))),
  6088. (SubReg_i8_lane imm:$lane)))>;
  6089. def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
  6090. (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
  6091. (DSubReg_i16_reg imm:$lane))),
  6092. (SubReg_i16_lane imm:$lane)))>;
  6093. def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
  6094. (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
  6095. (DSubReg_i16_reg imm:$lane))),
  6096. (SubReg_i16_lane imm:$lane)))>;
  6097. def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
  6098. (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
  6099. (DSubReg_i32_reg imm:$lane))),
  6100. (SubReg_i32_lane imm:$lane)))>;
  6101. def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
  6102. (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
  6103. (DSubReg_i32_reg imm:$lane))),
  6104. (SubReg_i32_lane imm:$lane)))>;
  6105. def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))),
  6106. (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
  6107. (f16 HPR:$src), ssub_0), (i32 0)))>;
  6108. def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
  6109. (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6110. SPR:$src, ssub_0), (i32 0)))>;
  6111. def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
  6112. (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6113. SPR:$src, ssub_0), (i32 0)))>;
  6114. def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))),
  6115. (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
  6116. (f16 HPR:$src), ssub_0), (i32 0)))>;
  6117. }
  6118. let Predicates = [HasNEON, HasBF16] in {
  6119. def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
  6120. (VDUPLN16d DPR:$Vm, imm:$lane)>;
  6121. def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
  6122. (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src,
  6123. (DSubReg_i16_reg imm:$lane))),
  6124. (SubReg_i16_lane imm:$lane)))>;
  6125. def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))),
  6126. (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
  6127. (bf16 HPR:$src), ssub_0), (i32 0)))>;
  6128. def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))),
  6129. (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
  6130. (bf16 HPR:$src), ssub_0), (i32 0)))>;
  6131. }
  6132. // VMOVN : Vector Narrowing Move
  6133. defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
  6134. "vmovn", "i", trunc>;
  6135. // VQMOVN : Vector Saturating Narrowing Move
  6136. defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
  6137. "vqmovn", "s", int_arm_neon_vqmovns>;
  6138. defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
  6139. "vqmovn", "u", int_arm_neon_vqmovnu>;
  6140. defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
  6141. "vqmovun", "s", int_arm_neon_vqmovnsu>;
  6142. // VMOVL : Vector Lengthening Move
  6143. defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
  6144. defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
  6145. let Predicates = [HasNEON] in {
  6146. def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
  6147. def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
  6148. def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
  6149. }
  6150. // Vector Conversions.
  6151. // VCVT : Vector Convert Between Floating-Point and Integers
  6152. def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
  6153. v2i32, v2f32, fp_to_sint>;
  6154. def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
  6155. v2i32, v2f32, fp_to_uint>;
  6156. def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
  6157. v2f32, v2i32, sint_to_fp>;
  6158. def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
  6159. v2f32, v2i32, uint_to_fp>;
  6160. def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
  6161. v4i32, v4f32, fp_to_sint>;
  6162. def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
  6163. v4i32, v4f32, fp_to_uint>;
  6164. def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
  6165. v4f32, v4i32, sint_to_fp>;
  6166. def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
  6167. v4f32, v4i32, uint_to_fp>;
  6168. def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
  6169. v4i16, v4f16, fp_to_sint>,
  6170. Requires<[HasNEON, HasFullFP16]>;
  6171. def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
  6172. v4i16, v4f16, fp_to_uint>,
  6173. Requires<[HasNEON, HasFullFP16]>;
  6174. def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
  6175. v4f16, v4i16, sint_to_fp>,
  6176. Requires<[HasNEON, HasFullFP16]>;
  6177. def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
  6178. v4f16, v4i16, uint_to_fp>,
  6179. Requires<[HasNEON, HasFullFP16]>;
  6180. def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
  6181. v8i16, v8f16, fp_to_sint>,
  6182. Requires<[HasNEON, HasFullFP16]>;
  6183. def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
  6184. v8i16, v8f16, fp_to_uint>,
  6185. Requires<[HasNEON, HasFullFP16]>;
  6186. def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
  6187. v8f16, v8i16, sint_to_fp>,
  6188. Requires<[HasNEON, HasFullFP16]>;
  6189. def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
  6190. v8f16, v8i16, uint_to_fp>,
  6191. Requires<[HasNEON, HasFullFP16]>;
  6192. // VCVT{A, N, P, M}
  6193. multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
  6194. SDPatternOperator IntU> {
  6195. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  6196. def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6197. "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
  6198. def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6199. "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
  6200. def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6201. "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
  6202. def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6203. "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
  6204. def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6205. "s16.f16", v4i16, v4f16, IntS>,
  6206. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6207. def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
  6208. "s16.f16", v8i16, v8f16, IntS>,
  6209. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6210. def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6211. "u16.f16", v4i16, v4f16, IntU>,
  6212. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6213. def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
  6214. "u16.f16", v8i16, v8f16, IntU>,
  6215. Requires<[HasV8, HasNEON, HasFullFP16]>;
  6216. }
  6217. }
  6218. defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
  6219. defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
  6220. defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
  6221. defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
  6222. // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
  6223. let DecoderMethod = "DecodeVCVTD" in {
  6224. def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
  6225. v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
  6226. def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
  6227. v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
  6228. def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
  6229. v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
  6230. def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
  6231. v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
  6232. let Predicates = [HasNEON, HasFullFP16] in {
  6233. def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
  6234. v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
  6235. def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
  6236. v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
  6237. def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
  6238. v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
  6239. def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
  6240. v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
  6241. } // Predicates = [HasNEON, HasFullFP16]
  6242. }
  6243. let DecoderMethod = "DecodeVCVTQ" in {
  6244. def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
  6245. v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
  6246. def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
  6247. v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
  6248. def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
  6249. v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
  6250. def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
  6251. v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
  6252. let Predicates = [HasNEON, HasFullFP16] in {
  6253. def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
  6254. v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
  6255. def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
  6256. v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
  6257. def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
  6258. v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
  6259. def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
  6260. v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
  6261. } // Predicates = [HasNEON, HasFullFP16]
  6262. }
  6263. def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
  6264. (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6265. def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
  6266. (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
  6267. def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
  6268. (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6269. def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
  6270. (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6271. def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
  6272. (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6273. def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
  6274. (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6275. def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
  6276. (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6277. def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
  6278. (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6279. def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
  6280. (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6281. def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
  6282. (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
  6283. def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
  6284. (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6285. def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
  6286. (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
  6287. def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
  6288. (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6289. def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
  6290. (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6291. def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
  6292. (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6293. def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
  6294. (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
  6295. // VCVT : Vector Convert Between Half-Precision and Single-Precision.
  6296. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
  6297. IIC_VUNAQ, "vcvt", "f16.f32",
  6298. v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
  6299. Requires<[HasNEON, HasFP16]>;
  6300. def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
  6301. IIC_VUNAQ, "vcvt", "f32.f16",
  6302. v4f32, v4i16, int_arm_neon_vcvthf2fp>,
  6303. Requires<[HasNEON, HasFP16]>;
  6304. // Vector Reverse.
  6305. // VREV64 : Vector Reverse elements within 64-bit doublewords
  6306. class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6307. : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
  6308. (ins DPR:$Vm), IIC_VMOVD,
  6309. OpcodeStr, Dt, "$Vd, $Vm", "",
  6310. [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
  6311. class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6312. : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
  6313. (ins QPR:$Vm), IIC_VMOVQ,
  6314. OpcodeStr, Dt, "$Vd, $Vm", "",
  6315. [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
  6316. def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
  6317. def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
  6318. def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
  6319. let Predicates = [HasNEON] in {
  6320. def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
  6321. }
  6322. def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
  6323. def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
  6324. def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
  6325. let Predicates = [HasNEON] in {
  6326. def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
  6327. (VREV64q32 QPR:$Vm)>;
  6328. def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
  6329. (VREV64q16 QPR:$Vm)>;
  6330. def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
  6331. (VREV64d16 DPR:$Vm)>;
  6332. }
  6333. // VREV32 : Vector Reverse elements within 32-bit words
  6334. class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6335. : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
  6336. (ins DPR:$Vm), IIC_VMOVD,
  6337. OpcodeStr, Dt, "$Vd, $Vm", "",
  6338. [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
  6339. class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6340. : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
  6341. (ins QPR:$Vm), IIC_VMOVQ,
  6342. OpcodeStr, Dt, "$Vd, $Vm", "",
  6343. [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
  6344. def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
  6345. def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
  6346. def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
  6347. def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
  6348. let Predicates = [HasNEON] in {
  6349. def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
  6350. (VREV32q16 QPR:$Vm)>;
  6351. def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
  6352. (VREV32d16 DPR:$Vm)>;
  6353. }
  6354. // VREV16 : Vector Reverse elements within 16-bit halfwords
  6355. class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6356. : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
  6357. (ins DPR:$Vm), IIC_VMOVD,
  6358. OpcodeStr, Dt, "$Vd, $Vm", "",
  6359. [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
  6360. class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
  6361. : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
  6362. (ins QPR:$Vm), IIC_VMOVQ,
  6363. OpcodeStr, Dt, "$Vd, $Vm", "",
  6364. [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
  6365. def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
  6366. def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
  6367. // Other Vector Shuffles.
  6368. // Aligned extractions: really just dropping registers
  6369. class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
  6370. : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
  6371. (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
  6372. Requires<[HasNEON]>;
  6373. def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
  6374. def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
  6375. def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
  6376. def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
  6377. def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
  6378. def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
  6379. // VEXT : Vector Extract
  6380. // All of these have a two-operand InstAlias.
  6381. let TwoOperandAliasConstraint = "$Vn = $Vd" in {
  6382. class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
  6383. : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
  6384. (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
  6385. IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
  6386. [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
  6387. (Ty DPR:$Vm), imm:$index)))]> {
  6388. bits<3> index;
  6389. let Inst{11} = 0b0;
  6390. let Inst{10-8} = index{2-0};
  6391. }
  6392. class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
  6393. : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
  6394. (ins QPR:$Vn, QPR:$Vm, immTy:$index), NVExtFrm,
  6395. IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
  6396. [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
  6397. (Ty QPR:$Vm), imm:$index)))]> {
  6398. bits<4> index;
  6399. let Inst{11-8} = index{3-0};
  6400. }
  6401. }
  6402. def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
  6403. let Inst{10-8} = index{2-0};
  6404. }
  6405. def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
  6406. let Inst{10-9} = index{1-0};
  6407. let Inst{8} = 0b0;
  6408. }
  6409. let Predicates = [HasNEON] in {
  6410. def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
  6411. (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6412. }
  6413. def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
  6414. let Inst{10} = index{0};
  6415. let Inst{9-8} = 0b00;
  6416. }
  6417. let Predicates = [HasNEON] in {
  6418. def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
  6419. (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
  6420. }
  6421. def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
  6422. let Inst{11-8} = index{3-0};
  6423. }
  6424. def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
  6425. let Inst{11-9} = index{2-0};
  6426. let Inst{8} = 0b0;
  6427. }
  6428. let Predicates = [HasNEON] in {
  6429. def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
  6430. (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6431. }
  6432. def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
  6433. let Inst{11-10} = index{1-0};
  6434. let Inst{9-8} = 0b00;
  6435. }
  6436. def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
  6437. let Inst{11} = index{0};
  6438. let Inst{10-8} = 0b000;
  6439. }
  6440. let Predicates = [HasNEON] in {
  6441. def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
  6442. (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
  6443. }
  6444. // VTRN : Vector Transpose
  6445. def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
  6446. def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
  6447. def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
  6448. def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
  6449. def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
  6450. def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
  6451. // VUZP : Vector Unzip (Deinterleave)
  6452. def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
  6453. def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
  6454. // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  6455. def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
  6456. (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
  6457. def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
  6458. def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
  6459. def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
  6460. // VZIP : Vector Zip (Interleave)
  6461. def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
  6462. def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
  6463. // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  6464. def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
  6465. (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
  6466. def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
  6467. def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
  6468. def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
  6469. // Vector Table Lookup and Table Extension.
  6470. // VTBL : Vector Table Lookup
  6471. let DecoderMethod = "DecodeTBLInstruction" in {
  6472. def VTBL1
  6473. : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
  6474. (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
  6475. "vtbl", "8", "$Vd, $Vn, $Vm", "",
  6476. [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
  6477. let hasExtraSrcRegAllocReq = 1 in {
  6478. def VTBL2
  6479. : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
  6480. (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
  6481. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6482. def VTBL3
  6483. : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
  6484. (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
  6485. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6486. def VTBL4
  6487. : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
  6488. (ins VecListFourD:$Vn, DPR:$Vm),
  6489. NVTBLFrm, IIC_VTB4,
  6490. "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
  6491. } // hasExtraSrcRegAllocReq = 1
  6492. def VTBL3Pseudo
  6493. : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
  6494. def VTBL4Pseudo
  6495. : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
  6496. // VTBX : Vector Table Extension
  6497. def VTBX1
  6498. : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
  6499. (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
  6500. "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
  6501. [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
  6502. DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
  6503. let hasExtraSrcRegAllocReq = 1 in {
  6504. def VTBX2
  6505. : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
  6506. (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
  6507. "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
  6508. def VTBX3
  6509. : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
  6510. (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
  6511. NVTBLFrm, IIC_VTBX3,
  6512. "vtbx", "8", "$Vd, $Vn, $Vm",
  6513. "$orig = $Vd", []>;
  6514. def VTBX4
  6515. : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
  6516. (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
  6517. "vtbx", "8", "$Vd, $Vn, $Vm",
  6518. "$orig = $Vd", []>;
  6519. } // hasExtraSrcRegAllocReq = 1
  6520. def VTBX3Pseudo
  6521. : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
  6522. IIC_VTBX3, "$orig = $dst", []>;
  6523. def VTBX4Pseudo
  6524. : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
  6525. IIC_VTBX4, "$orig = $dst", []>;
  6526. } // DecoderMethod = "DecodeTBLInstruction"
  6527. let Predicates = [HasNEON] in {
  6528. def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
  6529. (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
  6530. v8i8:$Vn1, dsub_1),
  6531. v8i8:$Vm))>;
  6532. def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6533. v8i8:$Vm)),
  6534. (v8i8 (VTBX2 v8i8:$orig,
  6535. (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
  6536. v8i8:$Vn1, dsub_1),
  6537. v8i8:$Vm))>;
  6538. def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
  6539. v8i8:$Vn2, v8i8:$Vm)),
  6540. (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6541. v8i8:$Vn1, dsub_1,
  6542. v8i8:$Vn2, dsub_2,
  6543. (v8i8 (IMPLICIT_DEF)), dsub_3),
  6544. v8i8:$Vm))>;
  6545. def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6546. v8i8:$Vn2, v8i8:$Vm)),
  6547. (v8i8 (VTBX3Pseudo v8i8:$orig,
  6548. (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6549. v8i8:$Vn1, dsub_1,
  6550. v8i8:$Vn2, dsub_2,
  6551. (v8i8 (IMPLICIT_DEF)), dsub_3),
  6552. v8i8:$Vm))>;
  6553. def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
  6554. v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
  6555. (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6556. v8i8:$Vn1, dsub_1,
  6557. v8i8:$Vn2, dsub_2,
  6558. v8i8:$Vn3, dsub_3),
  6559. v8i8:$Vm))>;
  6560. def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
  6561. v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
  6562. (v8i8 (VTBX4Pseudo v8i8:$orig,
  6563. (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
  6564. v8i8:$Vn1, dsub_1,
  6565. v8i8:$Vn2, dsub_2,
  6566. v8i8:$Vn3, dsub_3),
  6567. v8i8:$Vm))>;
  6568. }
  6569. // VRINT : Vector Rounding
  6570. multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
  6571. let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
  6572. def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
  6573. !strconcat("vrint", op), "f32",
  6574. v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
  6575. let Inst{9-7} = op9_7;
  6576. }
  6577. def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
  6578. !strconcat("vrint", op), "f32",
  6579. v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
  6580. let Inst{9-7} = op9_7;
  6581. }
  6582. def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
  6583. !strconcat("vrint", op), "f16",
  6584. v4f16, v4f16, Int>,
  6585. Requires<[HasV8, HasNEON, HasFullFP16]> {
  6586. let Inst{9-7} = op9_7;
  6587. }
  6588. def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
  6589. !strconcat("vrint", op), "f16",
  6590. v8f16, v8f16, Int>,
  6591. Requires<[HasV8, HasNEON, HasFullFP16]> {
  6592. let Inst{9-7} = op9_7;
  6593. }
  6594. }
  6595. def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
  6596. (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
  6597. def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
  6598. (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
  6599. let Predicates = [HasNEON, HasFullFP16] in {
  6600. def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
  6601. (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
  6602. def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
  6603. (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
  6604. }
  6605. }
  6606. defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
  6607. defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
  6608. defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
  6609. defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
  6610. defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
  6611. defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
  6612. // Cryptography instructions
  6613. let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
  6614. DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
  6615. class AES<string op, bit op7, bit op6, SDPatternOperator Int>
  6616. : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
  6617. !strconcat("aes", op), "8", v16i8, v16i8, Int>;
  6618. class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
  6619. : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
  6620. !strconcat("aes", op), "8", v16i8, v16i8, Int>;
  6621. class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
  6622. SDPatternOperator Int>
  6623. : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
  6624. !strconcat("sha", op), "32", v4i32, v4i32, Int>;
  6625. class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
  6626. SDPatternOperator Int>
  6627. : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
  6628. !strconcat("sha", op), "32", v4i32, v4i32, Int>;
  6629. class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
  6630. : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
  6631. !strconcat("sha", op), "32", v4i32, v4i32, Int>;
  6632. }
  6633. let Predicates = [HasV8, HasAES] in {
  6634. def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
  6635. def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
  6636. def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
  6637. def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
  6638. }
  6639. let Predicates = [HasV8, HasSHA2] in {
  6640. def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
  6641. def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
  6642. def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
  6643. def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
  6644. def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
  6645. def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
  6646. def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
  6647. def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
  6648. def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
  6649. def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
  6650. }
  6651. let Predicates = [HasNEON] in {
  6652. def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
  6653. (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
  6654. (SHA1H (SUBREG_TO_REG (i64 0),
  6655. (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
  6656. ssub_0)),
  6657. ssub_0)), GPR)>;
  6658. def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6659. (SHA1C v4i32:$hash_abcd,
  6660. (SUBREG_TO_REG (i64 0),
  6661. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6662. ssub_0),
  6663. v4i32:$wk)>;
  6664. def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6665. (SHA1M v4i32:$hash_abcd,
  6666. (SUBREG_TO_REG (i64 0),
  6667. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6668. ssub_0),
  6669. v4i32:$wk)>;
  6670. def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
  6671. (SHA1P v4i32:$hash_abcd,
  6672. (SUBREG_TO_REG (i64 0),
  6673. (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
  6674. ssub_0),
  6675. v4i32:$wk)>;
  6676. }
  6677. //===----------------------------------------------------------------------===//
  6678. // NEON instructions for single-precision FP math
  6679. //===----------------------------------------------------------------------===//
  6680. class N2VSPat<SDNode OpNode, NeonI Inst>
  6681. : NEONFPPat<(f32 (OpNode SPR:$a)),
  6682. (EXTRACT_SUBREG
  6683. (v2f32 (COPY_TO_REGCLASS (Inst
  6684. (INSERT_SUBREG
  6685. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6686. SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
  6687. class N3VSPat<SDNode OpNode, NeonI Inst>
  6688. : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
  6689. (EXTRACT_SUBREG
  6690. (v2f32 (COPY_TO_REGCLASS (Inst
  6691. (INSERT_SUBREG
  6692. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6693. SPR:$a, ssub_0),
  6694. (INSERT_SUBREG
  6695. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6696. SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6697. class N3VSPatFP16<SDNode OpNode, NeonI Inst>
  6698. : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
  6699. (EXTRACT_SUBREG
  6700. (v4f16 (COPY_TO_REGCLASS (Inst
  6701. (INSERT_SUBREG
  6702. (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
  6703. HPR:$a, ssub_0),
  6704. (INSERT_SUBREG
  6705. (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
  6706. HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6707. class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
  6708. : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
  6709. (EXTRACT_SUBREG
  6710. (v2f32 (COPY_TO_REGCLASS (Inst
  6711. (INSERT_SUBREG
  6712. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6713. SPR:$acc, ssub_0),
  6714. (INSERT_SUBREG
  6715. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6716. SPR:$a, ssub_0),
  6717. (INSERT_SUBREG
  6718. (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
  6719. SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  6720. class NVCVTIFPat<SDNode OpNode, NeonI Inst>
  6721. : NEONFPPat<(f32 (OpNode GPR:$a)),
  6722. (f32 (EXTRACT_SUBREG
  6723. (v2f32 (Inst
  6724. (INSERT_SUBREG
  6725. (v2f32 (IMPLICIT_DEF)),
  6726. (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
  6727. ssub_0))>;
  6728. class NVCVTFIPat<SDNode OpNode, NeonI Inst>
  6729. : NEONFPPat<(i32 (OpNode SPR:$a)),
  6730. (i32 (EXTRACT_SUBREG
  6731. (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
  6732. SPR:$a, ssub_0))),
  6733. ssub_0))>;
  6734. def : N3VSPat<fadd, VADDfd>;
  6735. def : N3VSPat<fsub, VSUBfd>;
  6736. def : N3VSPat<fmul, VMULfd>;
  6737. def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
  6738. Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
  6739. def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
  6740. Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
  6741. def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
  6742. Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
  6743. def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
  6744. Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
  6745. def : N2VSPat<fabs, VABSfd>;
  6746. def : N2VSPat<fneg, VNEGfd>;
  6747. def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
  6748. def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
  6749. def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
  6750. def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
  6751. def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
  6752. def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
  6753. def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
  6754. def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
  6755. // NEON doesn't have any f64 conversions, so provide patterns to make
  6756. // sure the VFP conversions match when extracting from a vector.
  6757. def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
  6758. (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6759. def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
  6760. (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6761. def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
  6762. (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6763. def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
  6764. (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
  6765. // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
  6766. def : Pat<(f32 (bitconvert GPR:$a)),
  6767. (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
  6768. Requires<[HasNEON, DontUseVMOVSR]>;
  6769. def : Pat<(arm_vmovsr GPR:$a),
  6770. (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
  6771. Requires<[HasNEON, DontUseVMOVSR]>;
  6772. //===----------------------------------------------------------------------===//
  6773. // Non-Instruction Patterns or Endianess - Revert Patterns
  6774. //===----------------------------------------------------------------------===//
  6775. // bit_convert
  6776. // 64 bit conversions
  6777. let Predicates = [HasNEON] in {
  6778. def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
  6779. def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
  6780. def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
  6781. def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
  6782. def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>;
  6783. def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>;
  6784. def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>;
  6785. def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>;
  6786. // 128 bit conversions
  6787. def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
  6788. def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
  6789. def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
  6790. def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
  6791. def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
  6792. def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
  6793. def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>;
  6794. def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>;
  6795. }
  6796. let Predicates = [IsLE,HasNEON] in {
  6797. // 64 bit conversions
  6798. def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
  6799. def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
  6800. def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
  6801. def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>;
  6802. def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
  6803. def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
  6804. def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
  6805. def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
  6806. def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
  6807. def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>;
  6808. def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
  6809. def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
  6810. def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
  6811. def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
  6812. def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
  6813. def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>;
  6814. def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
  6815. def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
  6816. def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
  6817. def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
  6818. def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
  6819. def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>;
  6820. def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
  6821. def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
  6822. def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
  6823. def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
  6824. def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
  6825. def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
  6826. def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>;
  6827. def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>;
  6828. def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>;
  6829. def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>;
  6830. def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>;
  6831. def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>;
  6832. def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
  6833. def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
  6834. def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
  6835. def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
  6836. def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
  6837. def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
  6838. def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
  6839. def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
  6840. def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
  6841. def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>;
  6842. def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>;
  6843. def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
  6844. // 128 bit conversions
  6845. def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
  6846. def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
  6847. def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
  6848. def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>;
  6849. def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
  6850. def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
  6851. def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
  6852. def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
  6853. def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
  6854. def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>;
  6855. def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
  6856. def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
  6857. def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
  6858. def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
  6859. def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
  6860. def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>;
  6861. def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
  6862. def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
  6863. def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
  6864. def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
  6865. def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
  6866. def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>;
  6867. def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
  6868. def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
  6869. def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
  6870. def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
  6871. def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
  6872. def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
  6873. def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
  6874. def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>;
  6875. def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>;
  6876. def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>;
  6877. def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>;
  6878. def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>;
  6879. def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
  6880. def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
  6881. def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
  6882. def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
  6883. def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
  6884. def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
  6885. def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
  6886. def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
  6887. def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
  6888. def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
  6889. def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>;
  6890. def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
  6891. }
  6892. let Predicates = [IsBE,HasNEON] in {
  6893. // 64 bit conversions
  6894. def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6895. def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6896. def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6897. def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6898. def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6899. def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
  6900. def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6901. def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
  6902. def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6903. def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6904. def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
  6905. def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
  6906. def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6907. def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6908. def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6909. def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6910. def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6911. def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
  6912. def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6913. def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
  6914. def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6915. def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6916. def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
  6917. def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
  6918. def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6919. def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6920. def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6921. def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6922. def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6923. def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6924. def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6925. def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6926. def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6927. def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6928. def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6929. def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
  6930. def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6931. def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
  6932. def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
  6933. def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
  6934. def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
  6935. def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
  6936. def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
  6937. def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6938. def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6939. def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
  6940. // 128 bit conversions
  6941. def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6942. def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6943. def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6944. def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6945. def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6946. def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
  6947. def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6948. def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
  6949. def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6950. def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6951. def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
  6952. def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
  6953. def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6954. def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6955. def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6956. def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6957. def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6958. def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
  6959. def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6960. def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
  6961. def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6962. def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6963. def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
  6964. def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
  6965. def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6966. def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6967. def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6968. def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6969. def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6970. def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6971. def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6972. def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6973. def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6974. def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6975. def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6976. def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
  6977. def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6978. def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
  6979. def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
  6980. def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
  6981. def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
  6982. def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
  6983. def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
  6984. def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>;
  6985. def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>;
  6986. def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
  6987. }
  6988. let Predicates = [HasNEON] in {
  6989. // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
  6990. // rather than the more general 'ARMVectorRegCast' which would also
  6991. // match some bitconverts. If we use the latter in cases where the
  6992. // input and output types are the same, the bitconvert gets elided
  6993. // and we end up generating a nonsense match of nothing.
  6994. foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
  6995. foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
  6996. def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
  6997. foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
  6998. foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
  6999. def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
  7000. }
  7001. // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
  7002. let Predicates = [IsBE,HasNEON] in {
  7003. def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
  7004. (VREV64q8 (VLD1q8 addrmode6:$addr))>;
  7005. def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  7006. (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
  7007. def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
  7008. (VREV64q16 (VLD1q16 addrmode6:$addr))>;
  7009. def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
  7010. (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
  7011. }
  7012. // Fold extracting an element out of a v2i32 into a vfp register.
  7013. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
  7014. (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
  7015. Requires<[HasNEON]>;
  7016. // Vector lengthening move with load, matching extending loads.
  7017. // extload, zextload and sextload for a standard lengthening load. Example:
  7018. // Lengthen_Single<"8", "i16", "8"> =
  7019. // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
  7020. // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
  7021. // (f64 (IMPLICIT_DEF)), (i32 0)))>;
  7022. multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
  7023. let AddedComplexity = 10 in {
  7024. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7025. (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
  7026. (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
  7027. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  7028. Requires<[HasNEON]>;
  7029. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7030. (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
  7031. (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
  7032. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  7033. Requires<[HasNEON]>;
  7034. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7035. (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
  7036. (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
  7037. (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
  7038. Requires<[HasNEON]>;
  7039. }
  7040. }
  7041. // extload, zextload and sextload for a lengthening load which only uses
  7042. // half the lanes available. Example:
  7043. // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
  7044. // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
  7045. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
  7046. // (f64 (IMPLICIT_DEF)), (i32 0))),
  7047. // dsub_0)>;
  7048. multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
  7049. string InsnLanes, string InsnTy> {
  7050. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7051. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7052. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7053. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7054. dsub_0)>,
  7055. Requires<[HasNEON]>;
  7056. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7057. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7058. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7059. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7060. dsub_0)>,
  7061. Requires<[HasNEON]>;
  7062. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7063. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7064. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
  7065. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7066. dsub_0)>,
  7067. Requires<[HasNEON]>;
  7068. }
  7069. // The following class definition is basically a copy of the
  7070. // Lengthen_HalfSingle definition above, however with an additional parameter
  7071. // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
  7072. // data loaded by VLD1LN into proper vector format in big endian mode.
  7073. multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7074. string InsnLanes, string InsnTy, string RevLanes> {
  7075. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7076. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7077. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7078. (!cast<Instruction>("VREV32d" # RevLanes)
  7079. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7080. dsub_0)>,
  7081. Requires<[HasNEON]>;
  7082. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7083. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7084. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
  7085. (!cast<Instruction>("VREV32d" # RevLanes)
  7086. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7087. dsub_0)>,
  7088. Requires<[HasNEON]>;
  7089. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7090. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7091. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
  7092. (!cast<Instruction>("VREV32d" # RevLanes)
  7093. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7094. dsub_0)>,
  7095. Requires<[HasNEON]>;
  7096. }
  7097. // extload, zextload and sextload for a lengthening load followed by another
  7098. // lengthening load, to quadruple the initial length.
  7099. //
  7100. // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
  7101. // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
  7102. // (EXTRACT_SUBREG (VMOVLuv4i32
  7103. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
  7104. // (f64 (IMPLICIT_DEF)),
  7105. // (i32 0))),
  7106. // dsub_0)),
  7107. // dsub_0)>;
  7108. multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
  7109. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7110. string Insn2Ty> {
  7111. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7112. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7113. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7114. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7115. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7116. dsub_0))>,
  7117. Requires<[HasNEON]>;
  7118. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7119. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7120. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7121. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7122. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7123. dsub_0))>,
  7124. Requires<[HasNEON]>;
  7125. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7126. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7127. (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7128. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7129. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7130. dsub_0))>,
  7131. Requires<[HasNEON]>;
  7132. }
  7133. // The following class definition is basically a copy of the
  7134. // Lengthen_Double definition above, however with an additional parameter
  7135. // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
  7136. // data loaded by VLD1LN into proper vector format in big endian mode.
  7137. multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7138. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7139. string Insn2Ty, string RevLanes> {
  7140. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7141. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
  7142. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7143. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7144. (!cast<Instruction>("VREV32d" # RevLanes)
  7145. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7146. dsub_0))>,
  7147. Requires<[HasNEON]>;
  7148. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7149. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7150. (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7151. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7152. (!cast<Instruction>("VREV32d" # RevLanes)
  7153. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7154. dsub_0))>,
  7155. Requires<[HasNEON]>;
  7156. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7157. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
  7158. (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7159. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7160. (!cast<Instruction>("VREV32d" # RevLanes)
  7161. (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7162. dsub_0))>,
  7163. Requires<[HasNEON]>;
  7164. }
  7165. // extload, zextload and sextload for a lengthening load followed by another
  7166. // lengthening load, to quadruple the initial length, but which ends up only
  7167. // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
  7168. //
  7169. // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
  7170. // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
  7171. // (EXTRACT_SUBREG (VMOVLuv4i32
  7172. // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
  7173. // (f64 (IMPLICIT_DEF)), (i32 0))),
  7174. // dsub_0)),
  7175. // dsub_0)>;
  7176. multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
  7177. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7178. string Insn2Ty> {
  7179. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7180. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
  7181. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7182. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7183. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7184. dsub_0)),
  7185. dsub_0)>,
  7186. Requires<[HasNEON]>;
  7187. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7188. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
  7189. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7190. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7191. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7192. dsub_0)),
  7193. dsub_0)>,
  7194. Requires<[HasNEON]>;
  7195. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7196. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
  7197. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7198. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7199. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
  7200. dsub_0)),
  7201. dsub_0)>,
  7202. Requires<[HasNEON]>;
  7203. }
  7204. // The following class definition is basically a copy of the
  7205. // Lengthen_HalfDouble definition above, however with an additional VREV16d8
  7206. // instruction to convert data loaded by VLD1LN into proper vector format
  7207. // in big endian mode.
  7208. multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
  7209. string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
  7210. string Insn2Ty> {
  7211. def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7212. (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
  7213. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7214. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7215. (!cast<Instruction>("VREV16d8")
  7216. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7217. dsub_0)),
  7218. dsub_0)>,
  7219. Requires<[HasNEON]>;
  7220. def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7221. (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
  7222. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
  7223. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
  7224. (!cast<Instruction>("VREV16d8")
  7225. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7226. dsub_0)),
  7227. dsub_0)>,
  7228. Requires<[HasNEON]>;
  7229. def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
  7230. (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
  7231. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
  7232. (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
  7233. (!cast<Instruction>("VREV16d8")
  7234. (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
  7235. dsub_0)),
  7236. dsub_0)>,
  7237. Requires<[HasNEON]>;
  7238. }
  7239. defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
  7240. defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
  7241. defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
  7242. let Predicates = [HasNEON,IsLE] in {
  7243. defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
  7244. defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
  7245. // Double lengthening - v4i8 -> v4i16 -> v4i32
  7246. defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
  7247. // v2i8 -> v2i16 -> v2i32
  7248. defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
  7249. // v2i16 -> v2i32 -> v2i64
  7250. defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
  7251. }
  7252. let Predicates = [HasNEON,IsBE] in {
  7253. defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
  7254. defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
  7255. // Double lengthening - v4i8 -> v4i16 -> v4i32
  7256. defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
  7257. // v2i8 -> v2i16 -> v2i32
  7258. defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
  7259. // v2i16 -> v2i32 -> v2i64
  7260. defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
  7261. }
  7262. // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
  7263. let Predicates = [HasNEON,IsLE] in {
  7264. def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
  7265. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7266. (VLD1LNd16 addrmode6:$addr,
  7267. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7268. def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
  7269. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7270. (VLD1LNd16 addrmode6:$addr,
  7271. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7272. def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
  7273. (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
  7274. (VLD1LNd16 addrmode6:$addr,
  7275. (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
  7276. }
  7277. // The following patterns are basically a copy of the patterns above,
  7278. // however with an additional VREV16d instruction to convert data
  7279. // loaded by VLD1LN into proper vector format in big endian mode.
  7280. let Predicates = [HasNEON,IsBE] in {
  7281. def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
  7282. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7283. (!cast<Instruction>("VREV16d8")
  7284. (VLD1LNd16 addrmode6:$addr,
  7285. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7286. def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
  7287. (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
  7288. (!cast<Instruction>("VREV16d8")
  7289. (VLD1LNd16 addrmode6:$addr,
  7290. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7291. def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
  7292. (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
  7293. (!cast<Instruction>("VREV16d8")
  7294. (VLD1LNd16 addrmode6:$addr,
  7295. (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
  7296. }
  7297. let Predicates = [HasNEON] in {
  7298. def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7299. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7300. def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7301. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7302. def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7303. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7304. def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7305. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7306. def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7307. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7308. def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7309. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7310. def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)),
  7311. (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
  7312. }
  7313. //===----------------------------------------------------------------------===//
  7314. // Assembler aliases
  7315. //
  7316. def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
  7317. (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
  7318. def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
  7319. (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
  7320. // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
  7321. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
  7322. (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7323. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
  7324. (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7325. defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
  7326. (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7327. defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
  7328. (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7329. defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
  7330. (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7331. defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
  7332. (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7333. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
  7334. (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  7335. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
  7336. (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  7337. // ... two-operand aliases
  7338. defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
  7339. (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7340. defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
  7341. (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7342. defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
  7343. (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7344. defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
  7345. (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7346. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
  7347. (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
  7348. defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
  7349. (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
  7350. // ... immediates
  7351. def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
  7352. (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
  7353. def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
  7354. (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
  7355. def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
  7356. (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
  7357. def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
  7358. (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
  7359. // VLD1 single-lane pseudo-instructions. These need special handling for
  7360. // the lane index that an InstAlias can't handle, so we use these instead.
  7361. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
  7362. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7363. pred:$p)>;
  7364. def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
  7365. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7366. pred:$p)>;
  7367. def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
  7368. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7369. pred:$p)>;
  7370. def VLD1LNdWB_fixed_Asm_8 :
  7371. NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
  7372. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7373. pred:$p)>;
  7374. def VLD1LNdWB_fixed_Asm_16 :
  7375. NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
  7376. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7377. pred:$p)>;
  7378. def VLD1LNdWB_fixed_Asm_32 :
  7379. NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
  7380. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7381. pred:$p)>;
  7382. def VLD1LNdWB_register_Asm_8 :
  7383. NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
  7384. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7385. rGPR:$Rm, pred:$p)>;
  7386. def VLD1LNdWB_register_Asm_16 :
  7387. NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
  7388. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7389. rGPR:$Rm, pred:$p)>;
  7390. def VLD1LNdWB_register_Asm_32 :
  7391. NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
  7392. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7393. rGPR:$Rm, pred:$p)>;
  7394. // VST1 single-lane pseudo-instructions. These need special handling for
  7395. // the lane index that an InstAlias can't handle, so we use these instead.
  7396. def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
  7397. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7398. pred:$p)>;
  7399. def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
  7400. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7401. pred:$p)>;
  7402. def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
  7403. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7404. pred:$p)>;
  7405. def VST1LNdWB_fixed_Asm_8 :
  7406. NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
  7407. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7408. pred:$p)>;
  7409. def VST1LNdWB_fixed_Asm_16 :
  7410. NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
  7411. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7412. pred:$p)>;
  7413. def VST1LNdWB_fixed_Asm_32 :
  7414. NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
  7415. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7416. pred:$p)>;
  7417. def VST1LNdWB_register_Asm_8 :
  7418. NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
  7419. (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
  7420. rGPR:$Rm, pred:$p)>;
  7421. def VST1LNdWB_register_Asm_16 :
  7422. NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
  7423. (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
  7424. rGPR:$Rm, pred:$p)>;
  7425. def VST1LNdWB_register_Asm_32 :
  7426. NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
  7427. (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
  7428. rGPR:$Rm, pred:$p)>;
  7429. // VLD2 single-lane pseudo-instructions. These need special handling for
  7430. // the lane index that an InstAlias can't handle, so we use these instead.
  7431. def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
  7432. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7433. pred:$p)>;
  7434. def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
  7435. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7436. pred:$p)>;
  7437. def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
  7438. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
  7439. def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
  7440. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7441. pred:$p)>;
  7442. def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
  7443. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7444. pred:$p)>;
  7445. def VLD2LNdWB_fixed_Asm_8 :
  7446. NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
  7447. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7448. pred:$p)>;
  7449. def VLD2LNdWB_fixed_Asm_16 :
  7450. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
  7451. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7452. pred:$p)>;
  7453. def VLD2LNdWB_fixed_Asm_32 :
  7454. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
  7455. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7456. pred:$p)>;
  7457. def VLD2LNqWB_fixed_Asm_16 :
  7458. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
  7459. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7460. pred:$p)>;
  7461. def VLD2LNqWB_fixed_Asm_32 :
  7462. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
  7463. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7464. pred:$p)>;
  7465. def VLD2LNdWB_register_Asm_8 :
  7466. NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
  7467. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7468. rGPR:$Rm, pred:$p)>;
  7469. def VLD2LNdWB_register_Asm_16 :
  7470. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
  7471. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7472. rGPR:$Rm, pred:$p)>;
  7473. def VLD2LNdWB_register_Asm_32 :
  7474. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
  7475. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7476. rGPR:$Rm, pred:$p)>;
  7477. def VLD2LNqWB_register_Asm_16 :
  7478. NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
  7479. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7480. rGPR:$Rm, pred:$p)>;
  7481. def VLD2LNqWB_register_Asm_32 :
  7482. NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
  7483. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7484. rGPR:$Rm, pred:$p)>;
  7485. // VST2 single-lane pseudo-instructions. These need special handling for
  7486. // the lane index that an InstAlias can't handle, so we use these instead.
  7487. def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
  7488. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7489. pred:$p)>;
  7490. def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
  7491. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7492. pred:$p)>;
  7493. def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
  7494. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7495. pred:$p)>;
  7496. def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
  7497. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7498. pred:$p)>;
  7499. def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
  7500. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7501. pred:$p)>;
  7502. def VST2LNdWB_fixed_Asm_8 :
  7503. NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
  7504. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7505. pred:$p)>;
  7506. def VST2LNdWB_fixed_Asm_16 :
  7507. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
  7508. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7509. pred:$p)>;
  7510. def VST2LNdWB_fixed_Asm_32 :
  7511. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
  7512. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7513. pred:$p)>;
  7514. def VST2LNqWB_fixed_Asm_16 :
  7515. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
  7516. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7517. pred:$p)>;
  7518. def VST2LNqWB_fixed_Asm_32 :
  7519. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
  7520. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7521. pred:$p)>;
  7522. def VST2LNdWB_register_Asm_8 :
  7523. NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
  7524. (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
  7525. rGPR:$Rm, pred:$p)>;
  7526. def VST2LNdWB_register_Asm_16 :
  7527. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
  7528. (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
  7529. rGPR:$Rm, pred:$p)>;
  7530. def VST2LNdWB_register_Asm_32 :
  7531. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
  7532. (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
  7533. rGPR:$Rm, pred:$p)>;
  7534. def VST2LNqWB_register_Asm_16 :
  7535. NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
  7536. (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
  7537. rGPR:$Rm, pred:$p)>;
  7538. def VST2LNqWB_register_Asm_32 :
  7539. NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
  7540. (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
  7541. rGPR:$Rm, pred:$p)>;
  7542. // VLD3 all-lanes pseudo-instructions. These need special handling for
  7543. // the lane index that an InstAlias can't handle, so we use these instead.
  7544. def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7545. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7546. pred:$p)>;
  7547. def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7548. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7549. pred:$p)>;
  7550. def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7551. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7552. pred:$p)>;
  7553. def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7554. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7555. pred:$p)>;
  7556. def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7557. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7558. pred:$p)>;
  7559. def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7560. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7561. pred:$p)>;
  7562. def VLD3DUPdWB_fixed_Asm_8 :
  7563. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7564. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7565. pred:$p)>;
  7566. def VLD3DUPdWB_fixed_Asm_16 :
  7567. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7568. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7569. pred:$p)>;
  7570. def VLD3DUPdWB_fixed_Asm_32 :
  7571. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7572. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7573. pred:$p)>;
  7574. def VLD3DUPqWB_fixed_Asm_8 :
  7575. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7576. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7577. pred:$p)>;
  7578. def VLD3DUPqWB_fixed_Asm_16 :
  7579. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7580. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7581. pred:$p)>;
  7582. def VLD3DUPqWB_fixed_Asm_32 :
  7583. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7584. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7585. pred:$p)>;
  7586. def VLD3DUPdWB_register_Asm_8 :
  7587. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7588. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7589. rGPR:$Rm, pred:$p)>;
  7590. def VLD3DUPdWB_register_Asm_16 :
  7591. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7592. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7593. rGPR:$Rm, pred:$p)>;
  7594. def VLD3DUPdWB_register_Asm_32 :
  7595. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7596. (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
  7597. rGPR:$Rm, pred:$p)>;
  7598. def VLD3DUPqWB_register_Asm_8 :
  7599. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7600. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7601. rGPR:$Rm, pred:$p)>;
  7602. def VLD3DUPqWB_register_Asm_16 :
  7603. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7604. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7605. rGPR:$Rm, pred:$p)>;
  7606. def VLD3DUPqWB_register_Asm_32 :
  7607. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7608. (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
  7609. rGPR:$Rm, pred:$p)>;
  7610. // VLD3 single-lane pseudo-instructions. These need special handling for
  7611. // the lane index that an InstAlias can't handle, so we use these instead.
  7612. def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7613. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7614. pred:$p)>;
  7615. def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7616. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7617. pred:$p)>;
  7618. def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7619. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7620. pred:$p)>;
  7621. def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7622. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7623. pred:$p)>;
  7624. def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7625. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7626. pred:$p)>;
  7627. def VLD3LNdWB_fixed_Asm_8 :
  7628. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7629. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7630. pred:$p)>;
  7631. def VLD3LNdWB_fixed_Asm_16 :
  7632. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7633. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7634. pred:$p)>;
  7635. def VLD3LNdWB_fixed_Asm_32 :
  7636. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7637. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7638. pred:$p)>;
  7639. def VLD3LNqWB_fixed_Asm_16 :
  7640. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7641. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7642. pred:$p)>;
  7643. def VLD3LNqWB_fixed_Asm_32 :
  7644. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7645. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7646. pred:$p)>;
  7647. def VLD3LNdWB_register_Asm_8 :
  7648. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7649. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7650. rGPR:$Rm, pred:$p)>;
  7651. def VLD3LNdWB_register_Asm_16 :
  7652. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7653. (ins VecListThreeDHWordIndexed:$list,
  7654. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7655. def VLD3LNdWB_register_Asm_32 :
  7656. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7657. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7658. rGPR:$Rm, pred:$p)>;
  7659. def VLD3LNqWB_register_Asm_16 :
  7660. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7661. (ins VecListThreeQHWordIndexed:$list,
  7662. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7663. def VLD3LNqWB_register_Asm_32 :
  7664. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7665. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7666. rGPR:$Rm, pred:$p)>;
  7667. // VLD3 multiple structure pseudo-instructions. These need special handling for
  7668. // the vector operands that the normal instructions don't yet model.
  7669. // FIXME: Remove these when the register classes and instructions are updated.
  7670. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7671. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7672. def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7673. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7674. def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7675. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7676. def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
  7677. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7678. def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
  7679. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7680. def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
  7681. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7682. def VLD3dWB_fixed_Asm_8 :
  7683. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7684. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7685. def VLD3dWB_fixed_Asm_16 :
  7686. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7687. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7688. def VLD3dWB_fixed_Asm_32 :
  7689. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7690. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7691. def VLD3qWB_fixed_Asm_8 :
  7692. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
  7693. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7694. def VLD3qWB_fixed_Asm_16 :
  7695. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
  7696. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7697. def VLD3qWB_fixed_Asm_32 :
  7698. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
  7699. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7700. def VLD3dWB_register_Asm_8 :
  7701. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7702. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7703. rGPR:$Rm, pred:$p)>;
  7704. def VLD3dWB_register_Asm_16 :
  7705. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7706. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7707. rGPR:$Rm, pred:$p)>;
  7708. def VLD3dWB_register_Asm_32 :
  7709. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7710. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7711. rGPR:$Rm, pred:$p)>;
  7712. def VLD3qWB_register_Asm_8 :
  7713. NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
  7714. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7715. rGPR:$Rm, pred:$p)>;
  7716. def VLD3qWB_register_Asm_16 :
  7717. NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
  7718. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7719. rGPR:$Rm, pred:$p)>;
  7720. def VLD3qWB_register_Asm_32 :
  7721. NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
  7722. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7723. rGPR:$Rm, pred:$p)>;
  7724. // VST3 single-lane pseudo-instructions. These need special handling for
  7725. // the lane index that an InstAlias can't handle, so we use these instead.
  7726. def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7727. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7728. pred:$p)>;
  7729. def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7730. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7731. pred:$p)>;
  7732. def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7733. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7734. pred:$p)>;
  7735. def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7736. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7737. pred:$p)>;
  7738. def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7739. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7740. pred:$p)>;
  7741. def VST3LNdWB_fixed_Asm_8 :
  7742. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7743. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7744. pred:$p)>;
  7745. def VST3LNdWB_fixed_Asm_16 :
  7746. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7747. (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
  7748. pred:$p)>;
  7749. def VST3LNdWB_fixed_Asm_32 :
  7750. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7751. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7752. pred:$p)>;
  7753. def VST3LNqWB_fixed_Asm_16 :
  7754. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7755. (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
  7756. pred:$p)>;
  7757. def VST3LNqWB_fixed_Asm_32 :
  7758. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7759. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7760. pred:$p)>;
  7761. def VST3LNdWB_register_Asm_8 :
  7762. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7763. (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
  7764. rGPR:$Rm, pred:$p)>;
  7765. def VST3LNdWB_register_Asm_16 :
  7766. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7767. (ins VecListThreeDHWordIndexed:$list,
  7768. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7769. def VST3LNdWB_register_Asm_32 :
  7770. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7771. (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
  7772. rGPR:$Rm, pred:$p)>;
  7773. def VST3LNqWB_register_Asm_16 :
  7774. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7775. (ins VecListThreeQHWordIndexed:$list,
  7776. addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
  7777. def VST3LNqWB_register_Asm_32 :
  7778. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7779. (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
  7780. rGPR:$Rm, pred:$p)>;
  7781. // VST3 multiple structure pseudo-instructions. These need special handling for
  7782. // the vector operands that the normal instructions don't yet model.
  7783. // FIXME: Remove these when the register classes and instructions are updated.
  7784. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7785. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7786. def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7787. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7788. def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7789. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7790. def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
  7791. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7792. def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
  7793. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7794. def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
  7795. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7796. def VST3dWB_fixed_Asm_8 :
  7797. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7798. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7799. def VST3dWB_fixed_Asm_16 :
  7800. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7801. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7802. def VST3dWB_fixed_Asm_32 :
  7803. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7804. (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
  7805. def VST3qWB_fixed_Asm_8 :
  7806. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
  7807. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7808. def VST3qWB_fixed_Asm_16 :
  7809. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
  7810. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7811. def VST3qWB_fixed_Asm_32 :
  7812. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
  7813. (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
  7814. def VST3dWB_register_Asm_8 :
  7815. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7816. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7817. rGPR:$Rm, pred:$p)>;
  7818. def VST3dWB_register_Asm_16 :
  7819. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7820. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7821. rGPR:$Rm, pred:$p)>;
  7822. def VST3dWB_register_Asm_32 :
  7823. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7824. (ins VecListThreeD:$list, addrmode6align64:$addr,
  7825. rGPR:$Rm, pred:$p)>;
  7826. def VST3qWB_register_Asm_8 :
  7827. NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
  7828. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7829. rGPR:$Rm, pred:$p)>;
  7830. def VST3qWB_register_Asm_16 :
  7831. NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
  7832. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7833. rGPR:$Rm, pred:$p)>;
  7834. def VST3qWB_register_Asm_32 :
  7835. NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
  7836. (ins VecListThreeQ:$list, addrmode6align64:$addr,
  7837. rGPR:$Rm, pred:$p)>;
  7838. // VLD4 all-lanes pseudo-instructions. These need special handling for
  7839. // the lane index that an InstAlias can't handle, so we use these instead.
  7840. def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7841. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7842. pred:$p)>;
  7843. def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7844. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7845. pred:$p)>;
  7846. def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7847. (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
  7848. pred:$p)>;
  7849. def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7850. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7851. pred:$p)>;
  7852. def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7853. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7854. pred:$p)>;
  7855. def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7856. (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
  7857. pred:$p)>;
  7858. def VLD4DUPdWB_fixed_Asm_8 :
  7859. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7860. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7861. pred:$p)>;
  7862. def VLD4DUPdWB_fixed_Asm_16 :
  7863. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7864. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7865. pred:$p)>;
  7866. def VLD4DUPdWB_fixed_Asm_32 :
  7867. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7868. (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
  7869. pred:$p)>;
  7870. def VLD4DUPqWB_fixed_Asm_8 :
  7871. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7872. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7873. pred:$p)>;
  7874. def VLD4DUPqWB_fixed_Asm_16 :
  7875. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7876. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7877. pred:$p)>;
  7878. def VLD4DUPqWB_fixed_Asm_32 :
  7879. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7880. (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
  7881. pred:$p)>;
  7882. def VLD4DUPdWB_register_Asm_8 :
  7883. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7884. (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
  7885. rGPR:$Rm, pred:$p)>;
  7886. def VLD4DUPdWB_register_Asm_16 :
  7887. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7888. (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
  7889. rGPR:$Rm, pred:$p)>;
  7890. def VLD4DUPdWB_register_Asm_32 :
  7891. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7892. (ins VecListFourDAllLanes:$list,
  7893. addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
  7894. def VLD4DUPqWB_register_Asm_8 :
  7895. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7896. (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
  7897. rGPR:$Rm, pred:$p)>;
  7898. def VLD4DUPqWB_register_Asm_16 :
  7899. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7900. (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
  7901. rGPR:$Rm, pred:$p)>;
  7902. def VLD4DUPqWB_register_Asm_32 :
  7903. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7904. (ins VecListFourQAllLanes:$list,
  7905. addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
  7906. // VLD4 single-lane pseudo-instructions. These need special handling for
  7907. // the lane index that an InstAlias can't handle, so we use these instead.
  7908. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7909. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7910. pred:$p)>;
  7911. def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7912. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7913. pred:$p)>;
  7914. def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7915. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7916. pred:$p)>;
  7917. def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7918. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7919. pred:$p)>;
  7920. def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7921. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7922. pred:$p)>;
  7923. def VLD4LNdWB_fixed_Asm_8 :
  7924. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7925. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7926. pred:$p)>;
  7927. def VLD4LNdWB_fixed_Asm_16 :
  7928. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7929. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7930. pred:$p)>;
  7931. def VLD4LNdWB_fixed_Asm_32 :
  7932. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7933. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  7934. pred:$p)>;
  7935. def VLD4LNqWB_fixed_Asm_16 :
  7936. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7937. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7938. pred:$p)>;
  7939. def VLD4LNqWB_fixed_Asm_32 :
  7940. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7941. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  7942. pred:$p)>;
  7943. def VLD4LNdWB_register_Asm_8 :
  7944. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  7945. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  7946. rGPR:$Rm, pred:$p)>;
  7947. def VLD4LNdWB_register_Asm_16 :
  7948. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7949. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  7950. rGPR:$Rm, pred:$p)>;
  7951. def VLD4LNdWB_register_Asm_32 :
  7952. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7953. (ins VecListFourDWordIndexed:$list,
  7954. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7955. def VLD4LNqWB_register_Asm_16 :
  7956. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  7957. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  7958. rGPR:$Rm, pred:$p)>;
  7959. def VLD4LNqWB_register_Asm_32 :
  7960. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  7961. (ins VecListFourQWordIndexed:$list,
  7962. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  7963. // VLD4 multiple structure pseudo-instructions. These need special handling for
  7964. // the vector operands that the normal instructions don't yet model.
  7965. // FIXME: Remove these when the register classes and instructions are updated.
  7966. def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7967. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7968. pred:$p)>;
  7969. def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7970. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7971. pred:$p)>;
  7972. def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7973. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7974. pred:$p)>;
  7975. def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
  7976. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7977. pred:$p)>;
  7978. def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
  7979. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7980. pred:$p)>;
  7981. def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
  7982. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7983. pred:$p)>;
  7984. def VLD4dWB_fixed_Asm_8 :
  7985. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7986. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7987. pred:$p)>;
  7988. def VLD4dWB_fixed_Asm_16 :
  7989. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  7990. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7991. pred:$p)>;
  7992. def VLD4dWB_fixed_Asm_32 :
  7993. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  7994. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  7995. pred:$p)>;
  7996. def VLD4qWB_fixed_Asm_8 :
  7997. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
  7998. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  7999. pred:$p)>;
  8000. def VLD4qWB_fixed_Asm_16 :
  8001. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
  8002. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8003. pred:$p)>;
  8004. def VLD4qWB_fixed_Asm_32 :
  8005. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
  8006. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8007. pred:$p)>;
  8008. def VLD4dWB_register_Asm_8 :
  8009. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  8010. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8011. rGPR:$Rm, pred:$p)>;
  8012. def VLD4dWB_register_Asm_16 :
  8013. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  8014. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8015. rGPR:$Rm, pred:$p)>;
  8016. def VLD4dWB_register_Asm_32 :
  8017. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  8018. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8019. rGPR:$Rm, pred:$p)>;
  8020. def VLD4qWB_register_Asm_8 :
  8021. NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
  8022. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8023. rGPR:$Rm, pred:$p)>;
  8024. def VLD4qWB_register_Asm_16 :
  8025. NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
  8026. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8027. rGPR:$Rm, pred:$p)>;
  8028. def VLD4qWB_register_Asm_32 :
  8029. NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
  8030. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8031. rGPR:$Rm, pred:$p)>;
  8032. // VST4 single-lane pseudo-instructions. These need special handling for
  8033. // the lane index that an InstAlias can't handle, so we use these instead.
  8034. def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8035. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  8036. pred:$p)>;
  8037. def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8038. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  8039. pred:$p)>;
  8040. def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8041. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  8042. pred:$p)>;
  8043. def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8044. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  8045. pred:$p)>;
  8046. def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8047. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  8048. pred:$p)>;
  8049. def VST4LNdWB_fixed_Asm_8 :
  8050. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8051. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  8052. pred:$p)>;
  8053. def VST4LNdWB_fixed_Asm_16 :
  8054. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8055. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  8056. pred:$p)>;
  8057. def VST4LNdWB_fixed_Asm_32 :
  8058. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8059. (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
  8060. pred:$p)>;
  8061. def VST4LNqWB_fixed_Asm_16 :
  8062. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8063. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  8064. pred:$p)>;
  8065. def VST4LNqWB_fixed_Asm_32 :
  8066. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8067. (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
  8068. pred:$p)>;
  8069. def VST4LNdWB_register_Asm_8 :
  8070. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8071. (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
  8072. rGPR:$Rm, pred:$p)>;
  8073. def VST4LNdWB_register_Asm_16 :
  8074. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8075. (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
  8076. rGPR:$Rm, pred:$p)>;
  8077. def VST4LNdWB_register_Asm_32 :
  8078. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8079. (ins VecListFourDWordIndexed:$list,
  8080. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  8081. def VST4LNqWB_register_Asm_16 :
  8082. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8083. (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
  8084. rGPR:$Rm, pred:$p)>;
  8085. def VST4LNqWB_register_Asm_32 :
  8086. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8087. (ins VecListFourQWordIndexed:$list,
  8088. addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
  8089. // VST4 multiple structure pseudo-instructions. These need special handling for
  8090. // the vector operands that the normal instructions don't yet model.
  8091. // FIXME: Remove these when the register classes and instructions are updated.
  8092. def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8093. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8094. pred:$p)>;
  8095. def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8096. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8097. pred:$p)>;
  8098. def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8099. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8100. pred:$p)>;
  8101. def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
  8102. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8103. pred:$p)>;
  8104. def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
  8105. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8106. pred:$p)>;
  8107. def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
  8108. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8109. pred:$p)>;
  8110. def VST4dWB_fixed_Asm_8 :
  8111. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8112. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8113. pred:$p)>;
  8114. def VST4dWB_fixed_Asm_16 :
  8115. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8116. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8117. pred:$p)>;
  8118. def VST4dWB_fixed_Asm_32 :
  8119. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8120. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8121. pred:$p)>;
  8122. def VST4qWB_fixed_Asm_8 :
  8123. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
  8124. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8125. pred:$p)>;
  8126. def VST4qWB_fixed_Asm_16 :
  8127. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
  8128. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8129. pred:$p)>;
  8130. def VST4qWB_fixed_Asm_32 :
  8131. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
  8132. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8133. pred:$p)>;
  8134. def VST4dWB_register_Asm_8 :
  8135. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8136. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8137. rGPR:$Rm, pred:$p)>;
  8138. def VST4dWB_register_Asm_16 :
  8139. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8140. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8141. rGPR:$Rm, pred:$p)>;
  8142. def VST4dWB_register_Asm_32 :
  8143. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8144. (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
  8145. rGPR:$Rm, pred:$p)>;
  8146. def VST4qWB_register_Asm_8 :
  8147. NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
  8148. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8149. rGPR:$Rm, pred:$p)>;
  8150. def VST4qWB_register_Asm_16 :
  8151. NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
  8152. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8153. rGPR:$Rm, pred:$p)>;
  8154. def VST4qWB_register_Asm_32 :
  8155. NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
  8156. (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
  8157. rGPR:$Rm, pred:$p)>;
  8158. // VMOV/VMVN takes an optional datatype suffix
  8159. defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
  8160. (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
  8161. defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
  8162. (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
  8163. defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
  8164. (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
  8165. defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
  8166. (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
  8167. // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
  8168. // D-register versions.
  8169. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
  8170. (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8171. def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
  8172. (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8173. def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
  8174. (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8175. def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
  8176. (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8177. def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
  8178. (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8179. def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
  8180. (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8181. def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
  8182. (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8183. let Predicates = [HasNEON, HasFullFP16] in
  8184. def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
  8185. (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8186. // Q-register versions.
  8187. def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
  8188. (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8189. def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
  8190. (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8191. def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
  8192. (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8193. def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
  8194. (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8195. def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
  8196. (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8197. def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
  8198. (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8199. def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
  8200. (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8201. let Predicates = [HasNEON, HasFullFP16] in
  8202. def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
  8203. (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8204. // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
  8205. // D-register versions.
  8206. def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
  8207. (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8208. def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
  8209. (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8210. def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
  8211. (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8212. def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
  8213. (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8214. def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
  8215. (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8216. def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
  8217. (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8218. def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
  8219. (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8220. let Predicates = [HasNEON, HasFullFP16] in
  8221. def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
  8222. (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
  8223. // Q-register versions.
  8224. def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
  8225. (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8226. def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
  8227. (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8228. def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
  8229. (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8230. def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
  8231. (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8232. def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
  8233. (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8234. def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
  8235. (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8236. def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
  8237. (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8238. let Predicates = [HasNEON, HasFullFP16] in
  8239. def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
  8240. (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
  8241. // VSWP allows, but does not require, a type suffix.
  8242. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
  8243. (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
  8244. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
  8245. (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
  8246. // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
  8247. defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
  8248. (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8249. defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
  8250. (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8251. defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
  8252. (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
  8253. defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
  8254. (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8255. defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
  8256. (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8257. defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
  8258. (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
  8259. // "vmov Rd, #-imm" can be handled via "vmvn".
  8260. def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
  8261. (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8262. def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
  8263. (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8264. def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
  8265. (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8266. def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
  8267. (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
  8268. // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
  8269. // these should restrict to just the Q register variants, but the register
  8270. // classes are enough to match correctly regardless, so we keep it simple
  8271. // and just use MnemonicAlias.
  8272. def : NEONMnemonicAlias<"vbicq", "vbic">;
  8273. def : NEONMnemonicAlias<"vandq", "vand">;
  8274. def : NEONMnemonicAlias<"veorq", "veor">;
  8275. def : NEONMnemonicAlias<"vorrq", "vorr">;
  8276. def : NEONMnemonicAlias<"vmovq", "vmov">;
  8277. def : NEONMnemonicAlias<"vmvnq", "vmvn">;
  8278. // Explicit versions for floating point so that the FPImm variants get
  8279. // handled early. The parser gets confused otherwise.
  8280. def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
  8281. def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
  8282. def : NEONMnemonicAlias<"vaddq", "vadd">;
  8283. def : NEONMnemonicAlias<"vsubq", "vsub">;
  8284. def : NEONMnemonicAlias<"vminq", "vmin">;
  8285. def : NEONMnemonicAlias<"vmaxq", "vmax">;
  8286. def : NEONMnemonicAlias<"vmulq", "vmul">;
  8287. def : NEONMnemonicAlias<"vabsq", "vabs">;
  8288. def : NEONMnemonicAlias<"vshlq", "vshl">;
  8289. def : NEONMnemonicAlias<"vshrq", "vshr">;
  8290. def : NEONMnemonicAlias<"vcvtq", "vcvt">;
  8291. def : NEONMnemonicAlias<"vcleq", "vcle">;
  8292. def : NEONMnemonicAlias<"vceqq", "vceq">;
  8293. def : NEONMnemonicAlias<"vzipq", "vzip">;
  8294. def : NEONMnemonicAlias<"vswpq", "vswp">;
  8295. def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
  8296. def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
  8297. // Alias for loading floating point immediates that aren't representable
  8298. // using the vmov.f32 encoding but the bitpattern is representable using
  8299. // the .i32 encoding.
  8300. def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
  8301. (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
  8302. def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
  8303. (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
  8304. // ARMv8.6a BFloat16 instructions.
  8305. let Predicates = [HasBF16, HasNEON] in {
  8306. class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
  8307. dag oops, dag iops, list<dag> pattern>
  8308. : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
  8309. N3RegFrm, IIC_VDOTPROD, "", "", pattern>
  8310. {
  8311. let DecoderNamespace = "VFPV8";
  8312. }
  8313. class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
  8314. : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
  8315. (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
  8316. [(set (AccumTy RegTy:$dst),
  8317. (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
  8318. (InputTy RegTy:$Vn),
  8319. (InputTy RegTy:$Vm)))]> {
  8320. let Constraints = "$dst = $Vd";
  8321. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
  8322. let DecoderNamespace = "VFPV8";
  8323. }
  8324. multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
  8325. ValueType InputTy, dag RHS> {
  8326. def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
  8327. (ins RegTy:$Vd, RegTy:$Vn,
  8328. DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
  8329. bit lane;
  8330. let Inst{5} = lane;
  8331. let Constraints = "$dst = $Vd";
  8332. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
  8333. let DecoderNamespace = "VFPV8";
  8334. }
  8335. def : Pat<
  8336. (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
  8337. (InputTy RegTy:$Vn),
  8338. (InputTy (bitconvert (AccumTy
  8339. (ARMvduplane (AccumTy RegTy:$Vm),
  8340. VectorIndex32:$lane)))))),
  8341. (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
  8342. }
  8343. def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
  8344. def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
  8345. defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
  8346. defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
  8347. class BF16MM<bit Q, RegisterClass RegTy,
  8348. string opc>
  8349. : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
  8350. (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
  8351. N3RegFrm, IIC_VDOTPROD, "", "",
  8352. [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
  8353. (v8bf16 QPR:$Vn),
  8354. (v8bf16 QPR:$Vm)))]> {
  8355. let Constraints = "$dst = $Vd";
  8356. let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
  8357. let DecoderNamespace = "VFPV8";
  8358. }
  8359. def VMMLA : BF16MM<1, QPR, "vmmla">;
  8360. class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
  8361. : N3VCP8<0b00, 0b11, T, 1,
  8362. (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
  8363. NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
  8364. [(set (v4f32 QPR:$dst),
  8365. (OpNode (v4f32 QPR:$Vd),
  8366. (v8bf16 QPR:$Vn),
  8367. (v8bf16 QPR:$Vm)))]> {
  8368. let Constraints = "$dst = $Vd";
  8369. let DecoderNamespace = "VFPV8";
  8370. }
  8371. def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>;
  8372. def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>;
  8373. multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
  8374. def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
  8375. (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
  8376. IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
  8377. bits<2> idx;
  8378. let Inst{5} = idx{1};
  8379. let Inst{3} = idx{0};
  8380. let Constraints = "$dst = $Vd";
  8381. let DecoderNamespace = "VFPV8";
  8382. }
  8383. def : Pat<
  8384. (v4f32 (OpNode (v4f32 QPR:$Vd),
  8385. (v8bf16 QPR:$Vn),
  8386. (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
  8387. VectorIndex16:$lane)))),
  8388. (!cast<Instruction>(NAME) QPR:$Vd,
  8389. QPR:$Vn,
  8390. (EXTRACT_SUBREG QPR:$Vm,
  8391. (DSubReg_i16_reg VectorIndex16:$lane)),
  8392. (SubReg_i16_lane VectorIndex16:$lane))>;
  8393. }
  8394. defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>;
  8395. defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>;
  8396. def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
  8397. (outs DPR:$Vd), (ins QPR:$Vm),
  8398. NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
  8399. }
  8400. // End of BFloat16 instructions