1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023 |
- //===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file describes the X86 SSE instruction set, defining the instructions,
- // and properties of the instructions which are needed for code generation,
- // machine code emission, and analysis.
- //
- //===----------------------------------------------------------------------===//
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 Instructions Classes
- //===----------------------------------------------------------------------===//
- /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
- multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- RegisterClass RC, X86MemOperand x86memop,
- Domain d, X86FoldableSchedWrite sched,
- bit Is2Addr = 1> {
- let isCodeGenOnly = 1 in {
- let isCommutable = 1 in {
- def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>,
- Sched<[sched]>;
- }
- def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
- multiclass sse12_fp_scalar_int<bits<8> opc,
- SDPatternOperator OpNode, RegisterClass RC,
- ValueType VT, string asm, Operand memopr,
- PatFrags mem_frags, Domain d,
- X86FoldableSchedWrite sched, bit Is2Addr = 1> {
- let hasSideEffects = 0 in {
- def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>,
- Sched<[sched]>;
- let mayLoad = 1 in
- def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- /// sse12_fp_packed - SSE 1 & 2 packed instructions class
- multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- RegisterClass RC, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_frag,
- Domain d, X86FoldableSchedWrite sched,
- bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>,
- Sched<[sched]>;
- let mayLoad = 1 in
- def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
- multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
- string OpcodeStr, X86MemOperand x86memop,
- X86FoldableSchedWrite sched,
- list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1> {
- let isCommutable = 1, hasSideEffects = 0 in
- def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, d>,
- Sched<[sched]>;
- let hasSideEffects = 0, mayLoad = 1 in
- def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
- // This is expanded by ExpandPostRAPseudos.
- let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero] in {
- def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
- def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>;
- def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>;
- }
- //===----------------------------------------------------------------------===//
- // AVX & SSE - Zero/One Vectors
- //===----------------------------------------------------------------------===//
- // Alias instruction that maps zero vector to pxor / xorp* for sse.
- // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
- // swizzled by ExecutionDomainFix to pxor.
- // We set canFoldAsLoad because this can be converted to a constant-pool
- // load of an all-zeros value if folding it would be beneficial.
- let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
- def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>;
- }
- let Predicates = [NoAVX512] in {
- def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
- def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
- def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
- def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
- def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
- }
- // The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
- // and doesn't need it because on sandy bridge the register is set to zero
- // at the rename stage without using any execution unit, so SET0PSY
- // and SET0PDY can be used for vector int instructions without penalty
- let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
- def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8i32 immAllZerosV))]>;
- }
- let Predicates = [NoAVX512] in {
- def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
- def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
- def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
- def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
- def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
- }
- // We set canFoldAsLoad because this can be converted to a constant-pool
- // load of an all-ones value if folding it would be beneficial.
- let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero] in {
- def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
- let Predicates = [HasAVX1Only, OptForMinSize] in {
- def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8i32 immAllOnesV))]>;
- }
- let Predicates = [HasAVX2] in
- def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8i32 immAllOnesV))]>;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Move FP Scalar Instructions
- //
- // Move Instructions. Register-to-register movss/movsd is not used for FR32/64
- // register copies because it's a partial register update; Register-to-register
- // movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
- // that the insert be implementable in terms of a copy, and just mentioned, we
- // don't use movss/movsd for copies.
- //===----------------------------------------------------------------------===//
- multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc,
- string asm_opr, Domain d, string Name> {
- let isCommutable = 1 in
- def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(base_opc, asm_opr),
- [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>,
- Sched<[SchedWriteFShuffle.XMM]>;
- // For the disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(base_opc, asm_opr), []>,
- Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>;
- }
- multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
- X86MemOperand x86memop, string OpcodeStr,
- Domain d, string Name, Predicate pred> {
- // AVX
- let Predicates = [UseAVX, OptForSize] in
- defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
- "V"#Name>,
- VEX_4V, VEX_LIG, VEX_WIG;
- def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], d>,
- VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG;
- // SSE1 & 2
- let Constraints = "$src1 = $dst" in {
- let Predicates = [pred, NoSSE41_Or_OptForSize] in
- defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}", d, Name>;
- }
- def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], d>,
- Sched<[WriteFStore]>;
- def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (!cast<Instruction>("V"#NAME#"rr_REV")
- VR128:$dst, VR128:$src1, VR128:$src2), 0>;
- def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}",
- (!cast<Instruction>(NAME#"rr_REV")
- VR128:$dst, VR128:$src2), 0>;
- }
- // Loading from memory automatically zeroing upper bits.
- multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
- PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
- Domain d> {
- def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
- VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
- def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
- Sched<[WriteFLoad]>;
- // _alt version uses FR32/FR64 register class.
- let isCodeGenOnly = 1 in {
- def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))], d>,
- VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
- def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))], d>,
- Sched<[WriteFLoad]>;
- }
- }
- defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
- SSEPackedSingle, "MOVSS", UseSSE1>, XS;
- defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
- SSEPackedDouble, "MOVSD", UseSSE2>, XD;
- let canFoldAsLoad = 1, isReMaterializable = 1 in {
- defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
- SSEPackedSingle>, XS;
- defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
- SSEPackedDouble>, XD;
- }
- // Patterns
- let Predicates = [UseAVX] in {
- def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (VMOVSSrm addr:$src)>;
- def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (VMOVSDrm addr:$src)>;
- // Represent the same patterns above but in the form they appear for
- // 256-bit types
- def : Pat<(v8f32 (X86vzload32 addr:$src)),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzload64 addr:$src)),
- (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
- }
- let Predicates = [UseAVX, OptForSize] in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSS to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
- // Move low f32 and clear high bits.
- def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v4f32 (VMOVSSrr (v4f32 (V_SET0)),
- (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>;
- def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
- (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
- }
- let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSS to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
- }
- let Predicates = [UseSSE2] in
- def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (MOVSDrm addr:$src)>;
- let Predicates = [UseSSE1] in
- def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (MOVSSrm addr:$src)>;
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
- //===----------------------------------------------------------------------===//
- multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
- X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d,
- X86SchedWriteMoveLS sched> {
- let hasSideEffects = 0, isMoveReg = 1 in
- def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
- Sched<[sched.RR]>;
- let canFoldAsLoad = 1, isReMaterializable = 1 in
- def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], d>,
- Sched<[sched.RM]>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
- SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS, VEX, VEX_WIG;
- defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
- SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD, VEX, VEX_WIG;
- defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
- SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS, VEX, VEX_WIG;
- defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
- SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD, VEX, VEX_WIG;
- defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
- SSEPackedSingle, SchedWriteFMoveLS.YMM>,
- PS, VEX, VEX_L, VEX_WIG;
- defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
- SSEPackedDouble, SchedWriteFMoveLS.YMM>,
- PD, VEX, VEX_L, VEX_WIG;
- defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
- SSEPackedSingle, SchedWriteFMoveLS.YMM>,
- PS, VEX, VEX_L, VEX_WIG;
- defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
- SSEPackedDouble, SchedWriteFMoveLS.YMM>,
- PD, VEX, VEX_L, VEX_WIG;
- }
- let Predicates = [UseSSE1] in {
- defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
- SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS;
- defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
- SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS;
- }
- let Predicates = [UseSSE2] in {
- defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
- SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD;
- defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
- SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD;
- }
- let Predicates = [HasAVX, NoVLX] in {
- let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
- def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
- def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
- def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(store (v4f32 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
- def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
- } // SchedRW
- let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
- def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
- def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f64 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
- def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(store (v8f32 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
- def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(store (v4f64 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
- } // SchedRW
- } // Predicate
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- isMoveReg = 1 in {
- let SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
- def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">;
- def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">;
- def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">;
- def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">;
- } // SchedRW
- let SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
- def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
- (ins VR256:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">;
- def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
- (ins VR256:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">;
- def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
- (ins VR256:$src),
- "movups\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">;
- def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
- (ins VR256:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">;
- } // SchedRW
- } // Predicate
- // Reversed version with ".s" suffix for GAS compatibility.
- def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
- (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
- (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
- (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
- (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
- (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>;
- def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
- (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>;
- def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
- (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>;
- def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
- (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>;
- let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
- def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
- def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
- def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(store (v4f32 VR128:$src), addr:$dst)]>;
- def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>;
- } // SchedRW
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
- def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVAPSrr">;
- def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVAPDrr">;
- def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVUPSrr">;
- def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVUPDrr">;
- }
- // Reversed version with ".s" suffix for GAS compatibility.
- def : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}",
- (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}",
- (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"movups.s\t{$src, $dst|$dst, $src}",
- (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}",
- (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
- let Predicates = [HasAVX, NoVLX] in {
- // 256-bit load/store need to use floating point load/store in case we don't
- // have AVX2. Execution domain fixing will convert to integer if AVX2 is
- // available and changing the domain is beneficial.
- def : Pat<(alignedloadv4i64 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(alignedloadv8i32 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(alignedloadv16i16 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(alignedloadv32i8 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(loadv4i64 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(loadv8i32 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(loadv16i16 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(loadv32i8 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v4i64 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v8i32 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v16i16 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v32i8 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- }
- // Use movaps / movups for SSE integer load / store (one byte shorter).
- // The instructions selected below are then converted to MOVDQA/MOVDQU
- // during the SSE domain pass.
- let Predicates = [UseSSE1] in {
- def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(alignedloadv8i16 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(alignedloadv16i8 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(loadv8i16 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(loadv16i8 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Move Low packed FP Instructions
- //===----------------------------------------------------------------------===//
- multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode,
- string base_opc, string asm_opr> {
- // No pattern as they need be special cased between high and low.
- let hasSideEffects = 0, mayLoad = 1 in
- def PSrm : PI<opc, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- !strconcat(base_opc, "s", asm_opr),
- [], SSEPackedSingle>, PS,
- Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
- def PDrm : PI<opc, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- !strconcat(base_opc, "d", asm_opr),
- [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))],
- SSEPackedDouble>, PD,
- Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
- }
- multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
- string base_opc> {
- let Predicates = [UseAVX] in
- defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
- VEX_4V, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
- "\t{$src2, $dst|$dst, $src2}">;
- }
- defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
- let SchedRW = [WriteFStore] in {
- let Predicates = [UseAVX] in {
- let mayStore = 1, hasSideEffects = 0 in
- def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlps\t{$src, $dst|$dst, $src}",
- []>,
- VEX, VEX_WIG;
- def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>,
- VEX, VEX_WIG;
- }// UseAVX
- let mayStore = 1, hasSideEffects = 0 in
- def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlps\t{$src, $dst|$dst, $src}",
- []>;
- def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
- } // SchedRW
- let Predicates = [UseSSE1] in {
- // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
- // end up with a movsd or blend instead of shufp.
- // No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1,
- (i8 -28)),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86vzload64 addr:$src)),
- (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
- def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
- (MOVLPSmr addr:$dst, VR128:$src)>;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Move Hi packed FP Instructions
- //===----------------------------------------------------------------------===//
- defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">;
- let SchedRW = [WriteFStore] in {
- // v2f64 extract element 1 is always custom lowered to unpack high to low
- // and extract element 0 so the non-store version isn't too horrible.
- let Predicates = [UseAVX] in {
- let mayStore = 1, hasSideEffects = 0 in
- def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhps\t{$src, $dst|$dst, $src}",
- []>, VEX, VEX_WIG;
- def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
- (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
- } // UseAVX
- let mayStore = 1, hasSideEffects = 0 in
- def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhps\t{$src, $dst|$dst, $src}",
- []>;
- def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
- (iPTR 0))), addr:$dst)]>;
- } // SchedRW
- let Predicates = [UseAVX] in {
- // MOVHPD patterns
- def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
- (VMOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(store (f64 (extractelt
- (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
- (iPTR 0))), addr:$dst),
- (VMOVHPDmr addr:$dst, VR128:$src)>;
- // MOVLPD patterns
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
- (VMOVLPDrm VR128:$src1, addr:$src2)>;
- }
- let Predicates = [UseSSE1] in {
- // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
- // end up with a movsd or blend instead of shufp.
- // No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))),
- (MOVHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
- (MOVHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
- addr:$dst),
- (MOVHPSmr addr:$dst, VR128:$src)>;
- }
- let Predicates = [UseSSE2] in {
- // MOVHPD patterns
- def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(store (f64 (extractelt
- (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
- (iPTR 0))), addr:$dst),
- (MOVHPDmr addr:$dst, VR128:$src)>;
- // MOVLPD patterns
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
- }
- let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
- // Use MOVLPD to load into the low bits from a full vector unless we can use
- // BLENDPD.
- def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
- //===----------------------------------------------------------------------===//
- let Predicates = [UseAVX] in {
- def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG;
- let isCommutable = 1 in
- def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG,
- NotMemoryFoldable;
- }
- let Constraints = "$src1 = $dst" in {
- def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movlhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>;
- let isCommutable = 1 in
- def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movhlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Conversion Instructions
- //===----------------------------------------------------------------------===//
- multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, string mem, X86FoldableSchedWrite sched,
- Domain d,
- SchedRead Int2Fpu = ReadDefault> {
- let ExeDomain = d in {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
- Sched<[sched, Int2Fpu]>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- mem#"\t{$src, $dst|$dst, $src}",
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
- Sched<[sched.Folded]>;
- }
- }
- multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
- ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
- string asm, Domain d, X86FoldableSchedWrite sched> {
- let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
- def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
- [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>,
- Sched<[sched]>;
- let mayLoad = 1 in
- def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
- [(set RC:$dst, (DstTy (any_sint_to_fp
- (SrcTy (ld_frag addr:$src)))))], d>,
- Sched<[sched.Folded]>;
- }
- }
- multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- X86MemOperand x86memop, string asm, string mem,
- X86FoldableSchedWrite sched, Domain d> {
- let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- Sched<[sched, ReadDefault, ReadInt2Fpu]>;
- let mayLoad = 1 in
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src),
- asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // hasSideEffects = 0
- }
- let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
- "cvttss2si", "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG;
- defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
- "cvttss2si", "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_W, VEX_LIG;
- defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
- "cvttsd2si", "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG;
- defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
- "cvttsd2si", "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_W, VEX_LIG;
- defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
- "cvtss2si", "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG;
- defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
- "cvtss2si", "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_W, VEX_LIG;
- defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
- "cvtsd2si", "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG;
- defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
- "cvtsd2si", "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_W, VEX_LIG;
- }
- // The assembler can recognize rr 64-bit instructions by seeing a rxx
- // register, but the same isn't true when only using memory operands,
- // provide other assembly "l" and "q" forms to address this explicitly
- // where appropriate to do so.
- let isCodeGenOnly = 1 in {
- defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
- WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
- VEX_LIG, SIMD_EXC;
- defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
- WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
- VEX_W, VEX_LIG, SIMD_EXC;
- defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
- WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
- VEX_LIG;
- defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
- WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
- VEX_W, VEX_LIG, SIMD_EXC;
- } // isCodeGenOnly = 1
- let Predicates = [UseAVX] in {
- def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
- (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
- (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
- (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
- (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f32 (any_sint_to_fp GR32:$src)),
- (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
- def : Pat<(f32 (any_sint_to_fp GR64:$src)),
- (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
- def : Pat<(f64 (any_sint_to_fp GR32:$src)),
- (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
- def : Pat<(f64 (any_sint_to_fp GR64:$src)),
- (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
- def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
- def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
- def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
- def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
- }
- let isCodeGenOnly = 1 in {
- defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
- "cvttss2si", "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
- defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
- "cvttss2si", "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
- defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
- "cvttsd2si", "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
- defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
- "cvttsd2si", "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
- defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
- "cvtss2si", "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
- defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
- "cvtss2si", "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
- defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
- "cvtsd2si", "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
- defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
- "cvtsd2si", "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
- defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
- "cvtsi2ss", "cvtsi2ss{l}",
- WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
- defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64,
- "cvtsi2ss", "cvtsi2ss{q}",
- WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
- defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32,
- "cvtsi2sd", "cvtsi2sd{l}",
- WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
- defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
- "cvtsi2sd", "cvtsi2sd{q}",
- WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
- } // isCodeGenOnly = 1
- let Predicates = [UseSSE1] in {
- def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
- def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
- def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
- }
- // Conversion Instructions Intrinsics - Match intrinsics which expect MM
- // and/or XMM operand(s).
- multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- ValueType DstVT, ValueType SrcVT, SDNode OpNode,
- Operand memop, PatFrags mem_frags, string asm,
- X86FoldableSchedWrite sched, Domain d> {
- let ExeDomain = d in {
- def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
- Sched<[sched]>;
- def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>,
- Sched<[sched.Folded]>;
- }
- }
- multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
- RegisterClass DstRC, X86MemOperand x86memop,
- string asm, string mem, X86FoldableSchedWrite sched,
- Domain d, bit Is2Addr = 1> {
- let hasSideEffects = 0, ExeDomain = d in {
- def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
- let mayLoad = 1 in
- def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2),
- !if(Is2Addr,
- asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}",
- asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let Predicates = [UseAVX] in {
- defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
- X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
- defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
- X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG;
- }
- defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
- sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
- SSEPackedDouble>, XD;
- defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
- sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
- SSEPackedDouble>, XD, REX_W;
- }
- let Predicates = [UseAVX] in {
- defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>,
- XS, VEX_4V, VEX_LIG, SIMD_EXC;
- defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
- XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
- defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
- XD, VEX_4V, VEX_LIG;
- defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
- XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
- }
- let Constraints = "$src1 = $dst" in {
- defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>,
- XS, SIMD_EXC;
- defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>,
- XS, REX_W, SIMD_EXC;
- defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>,
- XD;
- defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>,
- XD, REX_W, SIMD_EXC;
- }
- def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
- def : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
- def : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
- def : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
- def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
- def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
- def : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
- (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">;
- def : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">;
- def : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
- (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">;
- def : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">;
- def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
- (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">;
- def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
- (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">;
- /// SSE 1 Only
- // Aliases for intrinsics
- let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
- ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
- defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
- X86cvtts2Int, ssmem, sse_load_f32,
- "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG, VEX_W;
- defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
- sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
- defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
- X86cvtts2Int, sdmem, sse_load_f64,
- "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG, VEX_W;
- }
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
- ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS;
- defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
- X86cvtts2Int, ssmem, sse_load_f32,
- "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
- XS, REX_W;
- defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
- sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD;
- defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
- X86cvtts2Int, sdmem, sse_load_f64,
- "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>,
- XD, REX_W;
- }
- def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
- def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
- def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
- def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
- def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
- let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
- ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
- defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
- ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG;
- }
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
- ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS;
- defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
- ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, REX_W;
- defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, WriteCvtI2PS>,
- PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
- defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, WriteCvtI2PSY>,
- PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
- defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, WriteCvtI2PS>,
- PS, Requires<[UseSSE2]>;
- }
- // AVX aliases
- def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
- def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
- def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
- def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
- // SSE aliases
- def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
- (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
- (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
- def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
- (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
- (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
- def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
- (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
- (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
- def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
- /// SSE 2 Only
- // Convert scalar double to scalar single
- let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
- ExeDomain = SSEPackedSingle in {
- def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR64:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS]>, SIMD_EXC;
- let mayLoad = 1 in
- def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f64mem:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XD, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
- }
- def : Pat<(f32 (any_fpround FR64:$src)),
- (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
- Requires<[UseAVX]>;
- let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
- def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
- "cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (any_fpround FR64:$src))]>,
- Sched<[WriteCvtSD2SS]>, SIMD_EXC;
- def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
- "cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>,
- XD, Requires<[UseSSE2, OptForSize]>,
- Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
- }
- let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
- def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
- XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
- Sched<[WriteCvtSD2SS]>;
- def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
- XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
- Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
- let Constraints = "$src1 = $dst" in {
- def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
- XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
- def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
- XD, Requires<[UseSSE2]>,
- Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
- }
- }
- // Convert scalar single to scalar double
- // SSE2 instructions with XS prefix
- let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
- def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR32:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC;
- let mayLoad = 1 in
- def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f32mem:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
- Requires<[UseAVX, OptForSize]>, SIMD_EXC;
- } // isCodeGenOnly = 1, hasSideEffects = 0
- def : Pat<(f64 (any_fpextend FR32:$src)),
- (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
- def : Pat<(any_fpextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
- let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
- def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
- "cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (any_fpextend FR32:$src))]>,
- XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
- def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
- "cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>,
- XS, Requires<[UseSSE2, OptForSize]>,
- Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
- } // isCodeGenOnly = 1
- let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
- ExeDomain = SSEPackedSingle in {
- def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, VEX_WIG,
- Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
- let mayLoad = 1 in
- def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
- let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
- def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- []>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtSS2SD]>;
- let mayLoad = 1 in
- def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- []>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
- }
- } // hasSideEffects = 0
- // Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
- // (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
- // vmovs{s,d} instructions
- let Predicates = [UseAVX] in {
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector
- (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
- (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector
- (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
- (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
- (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
- (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
- (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
- (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
- (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
- (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
- (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
- (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
- } // Predicates = [UseAVX]
- let Predicates = [UseSSE2] in {
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector
- (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
- (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector
- (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
- (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
- (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
- (CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
- (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
- def : Pat<(v2f64 (X86Movsd
- (v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
- (CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
- } // Predicates = [UseSSE2]
- let Predicates = [UseSSE1] in {
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
- (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
- (CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
- (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
- def : Pat<(v4f32 (X86Movss
- (v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
- (CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
- } // Predicates = [UseSSE1]
- let Predicates = [HasAVX, NoVLX] in {
- // Convert packed single/double fp to doubleword
- def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC;
- def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
- VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC;
- def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC;
- def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC;
- }
- def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
- Sched<[WriteCvtPS2I]>, SIMD_EXC;
- def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>,
- Sched<[WriteCvtPS2ILd]>, SIMD_EXC;
- // Convert Packed Double FP to Packed DW Integers
- let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- // The assembler can recognize rr 256-bit instructions by seeing a ymm
- // register, but the same isn't true when using memory operands instead.
- // Provide other assembly rr and rm forms to address this explicitly.
- def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
- // XMM only
- def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
- Sched<[WriteCvtPD2ILd]>, VEX_WIG;
- // YMM only
- def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
- def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
- }
- def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
- def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>,
- Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
- def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
- Sched<[WriteCvtPD2I]>, SIMD_EXC;
- // Convert with truncation packed single/double fp to doubleword
- // SSE2 packed instructions with XS prefix
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let Predicates = [HasAVX, NoVLX] in {
- def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
- def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>,
- VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
- def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
- def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>,
- VEX, VEX_L,
- Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
- }
- def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
- Sched<[WriteCvtPS2I]>;
- def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>,
- Sched<[WriteCvtPS2ILd]>;
- }
- // The assembler can recognize rr 256-bit instructions by seeing a ymm
- // register, but the same isn't true when using memory operands instead.
- // Provide other assembly rr and rm forms to address this explicitly.
- let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- // XMM only
- def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
- def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>,
- VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
- // YMM only
- def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
- def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
- } // Predicates = [HasAVX, NoVLX]
- def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))),
- (VCVTTPD2DQYrr VR256:$src)>;
- def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))),
- (VCVTTPD2DQYrm addr:$src)>;
- }
- def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
- Sched<[WriteCvtPD2I]>, SIMD_EXC;
- def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>,
- Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
- // Convert packed single to packed double
- let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- // SSE2 instructions without OpSize prefix
- def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
- PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
- def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
- def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
- def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
- }
- let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
- def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
- PS, Sched<[WriteCvtPS2PD]>;
- def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, Sched<[WriteCvtPS2PD.Folded]>;
- }
- // Convert Packed DW Integers to Packed Double FP
- let Predicates = [HasAVX, NoVLX] in {
- let hasSideEffects = 0, mayLoad = 1 in
- def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86any_VSintToFP
- (bc_v4i32
- (v2i64 (scalar_to_vector
- (loadi64 addr:$src)))))))]>,
- VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
- def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtI2PD]>, VEX_WIG;
- def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
- VEX_WIG;
- def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
- }
- let hasSideEffects = 0, mayLoad = 1 in
- def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86any_VSintToFP
- (bc_v4i32
- (v2i64 (scalar_to_vector
- (loadi64 addr:$src)))))))]>,
- Sched<[WriteCvtI2PDLd]>;
- def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
- Sched<[WriteCvtI2PD]>;
- // AVX register conversion intrinsics
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
- (VCVTDQ2PDrm addr:$src)>;
- } // Predicates = [HasAVX, NoVLX]
- // SSE2 register conversion intrinsics
- let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
- (CVTDQ2PDrm addr:$src)>;
- } // Predicates = [UseSSE2]
- // Convert packed double to packed single
- // The assembler can recognize rr 256-bit instructions by seeing a ymm
- // register, but the same isn't true when using memory operands instead.
- // Provide other assembly rr and rm forms to address this explicitly.
- let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
- // XMM only
- def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
- def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
- VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
- def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
- def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
- } // Predicates = [HasAVX, NoVLX]
- def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">;
- def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">;
- def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
- Sched<[WriteCvtPD2PS]>, SIMD_EXC;
- def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
- Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Compare Instructions
- //===----------------------------------------------------------------------===//
- // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
- multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- Operand memop, SDNode OpNode, ValueType VT,
- PatFrag ld_frag, string asm,
- X86FoldableSchedWrite sched,
- PatFrags mem_frags> {
- def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm,
- [(set VR128:$dst, (OpNode (VT VR128:$src1),
- VR128:$src2, timm:$cc))]>,
- Sched<[sched]>, SIMD_EXC;
- let mayLoad = 1 in
- def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, memop:$src2, u8imm:$cc), asm,
- [(set VR128:$dst, (OpNode (VT VR128:$src1),
- (mem_frags addr:$src2), timm:$cc))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- let isCodeGenOnly = 1 in {
- let isCommutable = 1 in
- def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>,
- Sched<[sched]>, SIMD_EXC;
- def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (OpNode RC:$src1,
- (ld_frag addr:$src2), timm:$cc))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- }
- }
- let ExeDomain = SSEPackedSingle in
- defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
- "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
- let ExeDomain = SSEPackedDouble in
- defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
- "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
- let Constraints = "$src1 = $dst" in {
- let ExeDomain = SSEPackedSingle in
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
- let ExeDomain = SSEPackedDouble in
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
- "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
- }
- // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
- multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode,
- ValueType vt, X86MemOperand x86memop,
- PatFrag ld_frag, string OpcodeStr, Domain d,
- X86FoldableSchedWrite sched = WriteFComX> {
- let ExeDomain = d in {
- def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
- Sched<[sched]>, SIMD_EXC;
- let mayLoad = 1 in
- def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode (vt RC:$src1),
- (ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- }
- }
- // sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
- multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
- ValueType vt, Operand memop,
- PatFrags mem_frags, string OpcodeStr,
- Domain d,
- X86FoldableSchedWrite sched = WriteFComX> {
- let ExeDomain = d in {
- def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
- Sched<[sched]>, SIMD_EXC;
- let mayLoad = 1 in
- def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode (vt RC:$src1),
- (mem_frags addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- }
- }
- let Defs = [EFLAGS] in {
- defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
- defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
- defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
- "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
- defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
- "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
- let isCodeGenOnly = 1 in {
- defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
- defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
- defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
- defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
- }
- defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, PS;
- defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, PD;
- defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
- "comiss", SSEPackedSingle>, PS;
- defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
- "comisd", SSEPackedDouble>, PD;
- let isCodeGenOnly = 1 in {
- defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", SSEPackedSingle>, PS;
- defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", SSEPackedDouble>, PD;
- defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", SSEPackedSingle>, PS;
- defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", SSEPackedDouble>, PD;
- }
- } // Defs = [EFLAGS]
- // sse12_cmp_packed - sse 1 & 2 compare packed instructions
- multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- ValueType VT, string asm,
- X86FoldableSchedWrite sched,
- Domain d, PatFrag ld_frag> {
- let isCommutable = 1 in
- def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
- Sched<[sched]>, SIMD_EXC;
- def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
- [(set RC:$dst,
- (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- }
- defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
- "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
- defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
- "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
- "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
- "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst" in {
- defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
- "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
- defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
- "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
- }
- def CommutableCMPCC : PatLeaf<(timm), [{
- uint64_t Imm = N->getZExtValue() & 0x7;
- return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
- }]>;
- // Patterns to select compares with loads in first operand.
- let Predicates = [HasAVX] in {
- def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1,
- CommutableCMPCC:$cc)),
- (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1,
- CommutableCMPCC:$cc)),
- (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
- (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
- (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
- CommutableCMPCC:$cc)),
- (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
- CommutableCMPCC:$cc)),
- (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
- CommutableCMPCC:$cc)),
- (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
- }
- let Predicates = [UseSSE1] in {
- def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
- (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
- CommutableCMPCC:$cc)),
- (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Shuffle Instructions
- //===----------------------------------------------------------------------===//
- /// sse12_shuffle - sse 1 & 2 fp shuffle instructions
- multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
- ValueType vt, string asm, PatFrag mem_frag,
- X86FoldableSchedWrite sched, Domain d,
- bit IsCommutable = 0> {
- def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
- [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 timm:$src3))))], d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- let isCommutable = IsCommutable in
- def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
- [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 timm:$src3))))], d>,
- Sched<[sched]>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>,
- PS, VEX_4V, VEX_WIG;
- defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>,
- PS, VEX_4V, VEX_L, VEX_WIG;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>,
- PD, VEX_4V, VEX_WIG;
- defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>,
- PD, VEX_4V, VEX_L, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Unpack FP Instructions
- //===----------------------------------------------------------------------===//
- /// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave
- multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
- PatFrag mem_frag, RegisterClass RC,
- X86MemOperand x86memop, string asm,
- X86FoldableSchedWrite sched, Domain d,
- bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
- def rr : PI<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- asm, [(set RC:$dst,
- (vt (OpNode RC:$src1, RC:$src2)))], d>,
- Sched<[sched]>;
- def rm : PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- asm, [(set RC:$dst,
- (vt (OpNode RC:$src1,
- (mem_frag addr:$src2))))], d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
- VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
- defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
- VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
- defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
- VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
- defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
- VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
- defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
- VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
- defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
- VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
- defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
- VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
- defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
- VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
- }// Predicates = [HasAVX, NoVLX]
- let Constraints = "$src1 = $dst" in {
- defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
- VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
- VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
- defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
- VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
- VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
- } // Constraints = "$src1 = $dst"
- let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- }
- let Predicates = [UseSSE2] in {
- // Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
- def : Pat<(v2f64 (X86Unpckl VR128:$src1,
- (v2f64 (simple_load addr:$src2)))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
- }
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Extract Floating-Point Sign mask
- //===----------------------------------------------------------------------===//
- /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
- multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
- string asm, Domain d> {
- def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>,
- Sched<[WriteFMOVMSK]>;
- }
- let Predicates = [HasAVX] in {
- defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_WIG;
- defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_WIG;
- defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
- defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
- // Also support integer VTs to avoid a int->fp bitcast in the DAG.
- def : Pat<(X86movmsk (v4i32 VR128:$src)),
- (VMOVMSKPSrr VR128:$src)>;
- def : Pat<(X86movmsk (v2i64 VR128:$src)),
- (VMOVMSKPDrr VR128:$src)>;
- def : Pat<(X86movmsk (v8i32 VR256:$src)),
- (VMOVMSKPSYrr VR256:$src)>;
- def : Pat<(X86movmsk (v4i64 VR256:$src)),
- (VMOVMSKPDYrr VR256:$src)>;
- }
- defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS;
- defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD;
- let Predicates = [UseSSE2] in {
- // Also support integer VTs to avoid a int->fp bitcast in the DAG.
- def : Pat<(X86movmsk (v4i32 VR128:$src)),
- (MOVMSKPSrr VR128:$src)>;
- def : Pat<(X86movmsk (v2i64 VR128:$src)),
- (MOVMSKPDrr VR128:$src)>;
- }
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Logical Instructions
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in { // SSE integer instructions
- /// PDI_binop_rm - Simple SSE2 binary operator.
- multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, X86FoldableSchedWrite sched,
- bit IsCommutable, bit Is2Addr> {
- let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- } // ExeDomain = SSEPackedInt
- multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
- ValueType OpVT128, ValueType OpVT256,
- X86SchedWriteWidths sched, bit IsCommutable,
- Predicate prd> {
- let Predicates = [HasAVX, prd] in
- defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, load, i128mem, sched.XMM,
- IsCommutable, 0>, VEX_4V, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
- memop, i128mem, sched.XMM, IsCommutable, 1>;
- let Predicates = [HasAVX2, prd] in
- defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
- OpVT256, VR256, load, i256mem, sched.YMM,
- IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
- }
- // These are ordered here for pattern ordering requirements with the fp versions
- defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
- SchedWriteVecLogic, 1, NoVLX>;
- defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
- SchedWriteVecLogic, 1, NoVLX>;
- defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
- SchedWriteVecLogic, 1, NoVLX>;
- defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
- SchedWriteVecLogic, 0, NoVLX>;
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Logical Instructions
- //===----------------------------------------------------------------------===//
- /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
- ///
- /// There are no patterns here because isel prefers integer versions for SSE2
- /// and later. There are SSE1 v4f32 patterns later.
- multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- let Predicates = [HasAVX, NoVLX] in {
- defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
- [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
- defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
- [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
- defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
- [], [], 0>, PS, VEX_4V, VEX_WIG;
- defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
- [], [], 0>, PD, VEX_4V, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
- [], []>, PS;
- defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
- [], []>, PD;
- }
- }
- defm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>;
- defm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>;
- defm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>;
- let isCommutable = 0 in
- defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>;
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
- (VPANDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
- (VPANDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
- (VPANDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
- (VPORYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
- (VPORYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
- (VPORYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
- (VPXORYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
- (VPXORYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
- (VPXORYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
- (VPANDNYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
- (VPANDNYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
- (VPANDNYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
- (VPANDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
- (VPANDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
- (VPANDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
- (VPORYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
- (VPORYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
- (VPORYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
- (VPXORYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
- (VPXORYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
- (VPXORYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
- (VPANDNYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
- (VPANDNYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
- (VPANDNYrm VR256:$src1, addr:$src2)>;
- }
- // If only AVX1 is supported, we need to handle integer operations with
- // floating point instructions since the integer versions aren't available.
- let Predicates = [HasAVX1Only] in {
- def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
- (VANDPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
- (VANDPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
- (VANDPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
- (VANDPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
- (VORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
- (VORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
- (VORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
- (VORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
- (VXORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
- (VXORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
- (VXORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
- (VXORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
- (VANDNPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
- (VANDNPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
- (VANDNPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
- (VANDNPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
- (VANDPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
- (VANDPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
- (VANDPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
- (VANDPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
- (VORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
- (VORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
- (VORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
- (VORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
- (VXORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
- (VXORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
- (VXORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
- (VXORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
- (VANDNPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
- (VANDNPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
- (VANDNPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
- (VANDNPSYrm VR256:$src1, addr:$src2)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
- (VPANDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
- (VPANDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
- (VPANDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
- (VPORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
- (VPORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
- (VPORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
- (VPXORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
- (VPXORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
- (VPXORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
- (VPANDNrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
- (VPANDNrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
- (VPANDNrr VR128:$src1, VR128:$src2)>;
- def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
- (VPANDrm VR128:$src1, addr:$src2)>;
- def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
- (VPANDrm VR128:$src1, addr:$src2)>;
- def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
- (VPANDrm VR128:$src1, addr:$src2)>;
- def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
- (VPORrm VR128:$src1, addr:$src2)>;
- def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
- (VPORrm VR128:$src1, addr:$src2)>;
- def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
- (VPORrm VR128:$src1, addr:$src2)>;
- def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
- (VPXORrm VR128:$src1, addr:$src2)>;
- def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
- (VPXORrm VR128:$src1, addr:$src2)>;
- def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
- (VPXORrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
- (VPANDNrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
- (VPANDNrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
- (VPANDNrm VR128:$src1, addr:$src2)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
- (PANDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
- (PANDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
- (PANDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
- (PORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
- (PORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
- (PORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
- (PXORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
- (PXORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
- (PXORrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>;
- def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
- (PANDrm VR128:$src1, addr:$src2)>;
- def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
- (PANDrm VR128:$src1, addr:$src2)>;
- def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
- (PANDrm VR128:$src1, addr:$src2)>;
- def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
- (PORrm VR128:$src1, addr:$src2)>;
- def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
- (PORrm VR128:$src1, addr:$src2)>;
- def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
- (PORrm VR128:$src1, addr:$src2)>;
- def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
- (PXORrm VR128:$src1, addr:$src2)>;
- def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
- (PXORrm VR128:$src1, addr:$src2)>;
- def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
- (PXORrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
- (PANDNrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
- (PANDNrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
- (PANDNrm VR128:$src1, addr:$src2)>;
- }
- // Patterns for packed operations when we don't have integer type available.
- def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
- (ANDPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
- (ORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
- (XORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
- (ANDNPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
- (ANDPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
- (ORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
- (XORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
- (ANDNPSrm VR128:$src1, addr:$src2)>;
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Arithmetic Instructions
- //===----------------------------------------------------------------------===//
- /// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
- /// vector forms.
- ///
- /// In addition, we also have a special variant of the scalar form here to
- /// represent the associated intrinsic operation. This form is unlike the
- /// plain scalar form, in that it takes an entire vector (instead of a scalar)
- /// and leaves the top elements unmodified (therefore these cannot be commuted).
- ///
- /// These three forms can each be reg+reg or reg+mem.
- ///
- /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
- /// classes below
- multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode, X86SchedWriteSizes sched> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let Predicates = [HasAVX, NoVLX] in {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- VR128, v4f32, f128mem, loadv4f32,
- SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG;
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- VR128, v2f64, f128mem, loadv2f64,
- SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG;
- defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, VR256, v8f32, f256mem, loadv8f32,
- SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
- defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, VR256, v4f64, f256mem, loadv4f64,
- SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle,
- sched.PS.XMM>, PS;
- defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble,
- sched.PD.XMM>, PD;
- }
- }
- }
- multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- X86SchedWriteSizes sched> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
- OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
- defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
- OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
- let Constraints = "$src1 = $dst" in {
- defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
- OpNode, FR32, f32mem, SSEPackedSingle,
- sched.PS.Scl>, XS;
- defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
- OpNode, FR64, f64mem, SSEPackedDouble,
- sched.PD.Scl>, XD;
- }
- }
- }
- multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- X86SchedWriteSizes sched> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
- !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
- defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
- !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
- let Constraints = "$src1 = $dst" in {
- defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
- !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.PS.Scl>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
- !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.PD.Scl>, XD;
- }
- }
- }
- // Binary Arithmetic instructions
- defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>,
- basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>,
- basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
- defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
- basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
- basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
- let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
- basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
- defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
- basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
- basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
- defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
- basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
- basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>;
- defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
- basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
- basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>;
- }
- let isCodeGenOnly = 1 in {
- defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>,
- basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>;
- defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>,
- basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>;
- }
- // Patterns used to select SSE scalar fp arithmetic instructions from
- // either:
- //
- // (1) a scalar fp operation followed by a blend
- //
- // The effect is that the backend no longer emits unnecessary vector
- // insert instructions immediately after SSE scalar fp instructions
- // like addss or mulss.
- //
- // For example, given the following code:
- // __m128 foo(__m128 A, __m128 B) {
- // A[0] += B[0];
- // return A;
- // }
- //
- // Previously we generated:
- // addss %xmm0, %xmm1
- // movss %xmm1, %xmm0
- //
- // We now generate:
- // addss %xmm1, %xmm0
- //
- // (2) a vector packed single/double fp operation followed by a vector insert
- //
- // The effect is that the backend converts the packed fp instruction
- // followed by a vector insert into a single SSE scalar fp instruction.
- //
- // For example, given the following code:
- // __m128 foo(__m128 A, __m128 B) {
- // __m128 C = A + B;
- // return (__m128) {c[0], a[1], a[2], a[3]};
- // }
- //
- // Previously we generated:
- // addps %xmm0, %xmm1
- // movss %xmm1, %xmm0
- //
- // We now generate:
- // addss %xmm1, %xmm0
- // TODO: Some canonicalization in lowering would simplify the number of
- // patterns we have to try to match.
- multiclass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move,
- ValueType VT, ValueType EltTy,
- RegisterClass RC, PatFrag ld_frag,
- Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- // extracted scalar math op with insert via movss/movsd
- def : Pat<(VT (Move (VT VR128:$dst),
- (VT (scalar_to_vector
- (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
- RC:$src))))),
- (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
- (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
- def : Pat<(VT (Move (VT VR128:$dst),
- (VT (scalar_to_vector
- (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
- (ld_frag addr:$src)))))),
- (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
- }
- // Repeat for AVX versions of the instructions.
- let Predicates = [UseAVX] in {
- // extracted scalar math op with insert via movss/movsd
- def : Pat<(VT (Move (VT VR128:$dst),
- (VT (scalar_to_vector
- (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
- RC:$src))))),
- (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
- (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
- def : Pat<(VT (Move (VT VR128:$dst),
- (VT (scalar_to_vector
- (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
- (ld_frag addr:$src)))))),
- (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
- }
- }
- defm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
- defm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
- defm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
- defm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
- defm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
- defm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
- defm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
- defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
- /// Unop Arithmetic
- /// In addition, we also have a special variant of the scalar form here to
- /// represent the associated intrinsic operation. This form is unlike the
- /// plain scalar form, in that it takes an entire vector (instead of a
- /// scalar) and leaves the top elements undefined.
- ///
- /// And, we have a special variant form for a full-vector intrinsic form.
- /// sse_fp_unop_s - SSE1 unops in scalar form
- /// For the non-AVX defs, we need $src1 to be tied to $dst because
- /// the HW instructions are 2 operand / destructive.
- multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, Operand intmemop,
- SDPatternOperator OpNode, Domain d,
- X86FoldableSchedWrite sched, Predicate target> {
- let isCodeGenOnly = 1, hasSideEffects = 0 in {
- def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
- !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
- [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>,
- Requires<[target]>;
- let mayLoad = 1 in
- def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
- !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
- [(set RC:$dst, (OpNode (load addr:$src1)))], d>,
- Sched<[sched.Folded]>,
- Requires<[target, OptForSize]>;
- }
- let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in {
- def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
- Sched<[sched]>;
- let mayLoad = 1 in
- def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- multiclass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
- Intrinsic Intr, Predicate target> {
- let Predicates = [target] in {
- // These are unary operations, but they are modeled as having 2 source operands
- // because the high elements of the destination are unchanged in SSE.
- def : Pat<(Intr VR128:$src),
- (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>;
- }
- // We don't want to fold scalar loads into these instructions unless
- // optimizing for size. This is because the folded instruction will have a
- // partial register update, while the unfolded sequence will not, e.g.
- // movss mem, %xmm0
- // rcpss %xmm0, %xmm0
- // which has a clobber before the rcp, vs.
- // rcpss mem, %xmm0
- let Predicates = [target, OptForSize] in {
- def : Pat<(Intr (mem_frags addr:$src2)),
- (!cast<Instruction>(NAME#m_Int)
- (vt (IMPLICIT_DEF)), addr:$src2)>;
- }
- }
- multiclass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
- Intrinsic Intr, Predicate target> {
- let Predicates = [target] in {
- def : Pat<(Intr VR128:$src),
- (!cast<Instruction>(NAME#r_Int) VR128:$src,
- VR128:$src)>;
- }
- let Predicates = [target, OptForSize] in {
- def : Pat<(Intr (mem_frags addr:$src2)),
- (!cast<Instruction>(NAME#m_Int)
- (vt (IMPLICIT_DEF)), addr:$src2)>;
- }
- }
- multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- ValueType ScalarVT, X86MemOperand x86memop,
- Operand intmemop, SDPatternOperator OpNode, Domain d,
- X86FoldableSchedWrite sched, Predicate target> {
- let isCodeGenOnly = 1, hasSideEffects = 0 in {
- def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], d>, Sched<[sched]>;
- let mayLoad = 1 in
- def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let hasSideEffects = 0, ExeDomain = d in {
- def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched]>;
- let mayLoad = 1 in
- def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, intmemop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- // We don't want to fold scalar loads into these instructions unless
- // optimizing for size. This is because the folded instruction will have a
- // partial register update, while the unfolded sequence will not, e.g.
- // vmovss mem, %xmm0
- // vrcpss %xmm0, %xmm0, %xmm0
- // which has a clobber before the rcp, vs.
- // vrcpss mem, %xmm0, %xmm0
- // TODO: In theory, we could fold the load, and avoid the stall caused by
- // the partial register store, either in BreakFalseDeps or with smarter RA.
- let Predicates = [target] in {
- def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r)
- (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
- }
- let Predicates = [target, OptForSize] in {
- def : Pat<(ScalarVT (OpNode (load addr:$src))),
- (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)),
- addr:$src)>;
- }
- }
- /// sse1_fp_unop_p - SSE1 unops in packed form.
- multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- X86SchedWriteWidths sched, list<Predicate> prds> {
- let Predicates = prds in {
- def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat("v", OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
- VEX, Sched<[sched.XMM]>, VEX_WIG;
- def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat("v", OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>,
- VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
- def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat("v", OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
- def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat("v", OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
- }
- def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
- Sched<[sched.XMM]>;
- def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>,
- Sched<[sched.XMM.Folded]>;
- }
- /// sse2_fp_unop_p - SSE2 unops in vector forms.
- multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode, X86SchedWriteWidths sched> {
- let Predicates = [HasAVX, NoVLX] in {
- def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat("v", OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
- VEX, Sched<[sched.XMM]>, VEX_WIG;
- def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat("v", OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>,
- VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
- def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat("v", OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
- def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat("v", OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
- }
- def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
- Sched<[sched.XMM]>;
- def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>,
- Sched<[sched.XMM.Folded]>;
- }
- multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32,
- !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
- UseSSE1>, XS;
- defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32,
- !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
- AVXTarget>,
- XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
- }
- multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem,
- ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
- defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
- f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
- }
- multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem,
- sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
- defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
- f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
- }
- // Square root.
- defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
- sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
- sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>,
- sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC;
- // Reciprocal approximations. Note that these typically require refinement
- // in order to obtain suitable precision.
- defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
- sse1_fp_unop_s_intr<"rsqrt", HasAVX>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
- defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
- sse1_fp_unop_s_intr<"rcp", HasAVX>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
- // There is no f64 version of the reciprocal approximation instructions.
- multiclass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move,
- ValueType VT, Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
- }
- // Repeat for AVX versions of the instructions.
- let Predicates = [UseAVX] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
- }
- }
- defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
- defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
- multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
- SDNode Move, ValueType VT,
- Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
- (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
- }
- // Repeat for AVX versions of the instructions.
- let Predicates = [HasAVX] in {
- def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
- (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
- }
- }
- defm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss,
- v4f32, UseSSE1>;
- defm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
- v4f32, UseSSE1>;
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Non-temporal stores
- //===----------------------------------------------------------------------===//
- let AddedComplexity = 400 in { // Prefer non-temporal versions
- let Predicates = [HasAVX, NoVLX] in {
- let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG;
- } // SchedRW
- let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG;
- } // SchedRW
- let ExeDomain = SSEPackedInt in {
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG,
- Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins i256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG,
- Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
- } // ExeDomain
- } // Predicates
- let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
- def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
- def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
- } // SchedRW
- let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
- def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
- let SchedRW = [WriteStoreNT] in {
- // There is no AVX form for instructions below this point
- def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti{l}\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
- PS, Requires<[HasSSE2]>;
- def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "movnti{q}\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
- PS, Requires<[HasSSE2]>;
- } // SchedRW = [WriteStoreNT]
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
- (VMOVNTDQYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
- (VMOVNTDQYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
- (VMOVNTDQYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>;
- def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>;
- def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>;
- def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>;
- def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>;
- }
- } // AddedComplexity
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Prefetch and memory fence
- //===----------------------------------------------------------------------===//
- // Prefetch intrinsic.
- let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
- def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
- def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
- def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
- def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
- }
- // FIXME: How should flush instruction be modeled?
- let SchedRW = [WriteLoad] in {
- // Flush cache
- def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- PS, Requires<[HasSSE2]>;
- }
- let SchedRW = [WriteNop] in {
- // Pause. This "instruction" is encoded as "rep; nop", so even though it
- // was introduced with SSE2, it's backward compatible.
- def PAUSE : I<0x90, RawFrm, (outs), (ins),
- "pause", [(int_x86_sse2_pause)]>, OBXS;
- }
- let SchedRW = [WriteFence] in {
- // Load, store, and memory fence
- // TODO: As with mfence, we may want to ease the availability of sfence/lfence
- // to include any 64-bit target.
- def SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- PS, Requires<[HasSSE1]>;
- def LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
- PS, Requires<[HasSSE2]>;
- def MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
- PS, Requires<[HasMFence]>;
- } // SchedRW
- def : Pat<(X86MFence), (MFENCE)>;
- //===----------------------------------------------------------------------===//
- // SSE 1 & 2 - Load/Store XCSR register
- //===----------------------------------------------------------------------===//
- let mayLoad=1, hasSideEffects=1 in
- def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
- VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
- let mayStore=1, hasSideEffects=1 in
- def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
- VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
- let mayLoad=1, hasSideEffects=1 in
- def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
- PS, Sched<[WriteLDMXCSR]>;
- let mayStore=1, hasSideEffects=1 in
- def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
- PS, Sched<[WriteSTMXCSR]>;
- //===---------------------------------------------------------------------===//
- // SSE2 - Move Aligned/Unaligned Packed Integer Instructions
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in { // SSE integer instructions
- let hasSideEffects = 0 in {
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
- def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
- def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
- def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
- }
- // For Disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RR]>,
- VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
- def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RR]>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">;
- def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RR]>,
- VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
- def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RR]>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">;
- }
- let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
- def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>,
- VEX, VEX_L, VEX_WIG;
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv2i64 addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>,
- XS, VEX, VEX_WIG;
- def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>,
- XS, VEX, VEX_L, VEX_WIG;
- }
- let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2i64 VR128:$src), addr:$dst)]>,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG;
- def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i256mem:$dst, VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG;
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(store (v2i64 VR128:$src), addr:$dst)]>,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG;
- def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG;
- }
- let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
- let hasSideEffects = 0 in {
- def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>;
- def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- XS, Requires<[UseSSE2]>;
- }
- // For Disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVDQArr">;
- def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">;
- }
- } // SchedRW
- let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in {
- def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
- def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
- XS, Requires<[UseSSE2]>;
- }
- let mayStore = 1, hasSideEffects = 0,
- SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
- def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
- def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
- XS, Requires<[UseSSE2]>;
- }
- } // ExeDomain = SSEPackedInt
- // Reversed version with ".s" suffix for GAS compatibility.
- def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
- (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
- (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>;
- def : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
- (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
- (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>;
- // Reversed version with ".s" suffix for GAS compatibility.
- def : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}",
- (MOVDQArr_REV VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
- (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
- let Predicates = [HasAVX, NoVLX] in {
- // Additional patterns for other integer sizes.
- def : Pat<(alignedloadv4i32 addr:$src),
- (VMOVDQArm addr:$src)>;
- def : Pat<(alignedloadv8i16 addr:$src),
- (VMOVDQArm addr:$src)>;
- def : Pat<(alignedloadv16i8 addr:$src),
- (VMOVDQArm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (VMOVDQUrm addr:$src)>;
- def : Pat<(loadv8i16 addr:$src),
- (VMOVDQUrm addr:$src)>;
- def : Pat<(loadv16i8 addr:$src),
- (VMOVDQUrm addr:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (VMOVDQAmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (VMOVDQAmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (VMOVDQAmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (VMOVDQUmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (VMOVDQUmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (VMOVDQUmr addr:$dst, VR128:$src)>;
- }
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Arithmetic Instructions
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in { // SSE integer instructions
- /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
- multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType DstVT, ValueType SrcVT, RegisterClass RC,
- PatFrag memop_frag, X86MemOperand x86memop,
- X86FoldableSchedWrite sched, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
- Sched<[sched]>;
- def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- } // ExeDomain = SSEPackedInt
- defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
- SchedWriteVecALU, 1, NoVLX>;
- defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
- SchedWriteVecALU, 1, NoVLX>;
- defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
- SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
- defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
- SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
- defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
- SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
- defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
- SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
- defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
- SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
- defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
- SchedWriteVecALU, 0, NoVLX>;
- defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
- SchedWriteVecALU, 0, NoVLX>;
- defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
- SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
- defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
- SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
- defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
- SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
- defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
- SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
- defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
- SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
- defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
- SchedWriteVecIMul, 1, NoVLX>;
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
- defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- load, i128mem, SchedWriteVecIMul.XMM, 0>,
- VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
- defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
- VR256, load, i256mem, SchedWriteVecIMul.YMM,
- 0>, VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- memop, i128mem, SchedWriteVecIMul.XMM>;
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
- defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
- load, i128mem, SchedWritePSADBW.XMM, 0>,
- VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
- defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
- load, i256mem, SchedWritePSADBW.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
- memop, i128mem, SchedWritePSADBW.XMM>;
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Logical Instructions
- //===---------------------------------------------------------------------===//
- multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr, SDNode OpNode,
- SDNode OpNode2, RegisterClass RC,
- X86FoldableSchedWrite sched,
- X86FoldableSchedWrite schedImm,
- ValueType DstVT, ValueType SrcVT,
- PatFrag ld_frag, bit Is2Addr = 1> {
- // src2 is always 128-bit
- def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, VR128:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>,
- Sched<[sched]>;
- def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, i128mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode RC:$src1,
- (SrcVT (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
- (ins RC:$src1, u8imm:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>,
- Sched<[schedImm]>;
- }
- multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr, SDNode OpNode,
- SDNode OpNode2, ValueType DstVT128,
- ValueType DstVT256, ValueType SrcVT,
- X86SchedWriteWidths sched,
- X86SchedWriteWidths schedImm, Predicate prd> {
- let Predicates = [HasAVX, prd] in
- defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
- OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
- DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2, prd] in
- defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
- OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
- DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
- VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
- VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
- memop>;
- }
- multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
- SDNode OpNode, RegisterClass RC, ValueType VT,
- X86FoldableSchedWrite sched, bit Is2Addr = 1> {
- def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>,
- Sched<[sched]>;
- }
- multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
- SDNode OpNode, X86SchedWriteWidths sched> {
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
- defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
- defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR256, v32i8, sched.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8,
- sched.XMM>;
- }
- let ExeDomain = SSEPackedInt in {
- defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
- v8i16, v16i16, v8i16, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
- defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
- v4i32, v8i32, v4i32, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX>;
- defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
- v2i64, v4i64, v2i64, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX>;
- defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
- v8i16, v16i16, v8i16, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
- defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
- v4i32, v8i32, v4i32, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX>;
- defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
- v2i64, v4i64, v2i64, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX>;
- defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
- v8i16, v16i16, v8i16, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
- defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
- v4i32, v8i32, v4i32, SchedWriteVecShift,
- SchedWriteVecShiftImm, NoVLX>;
- defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq,
- SchedWriteShuffle>;
- defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq,
- SchedWriteShuffle>;
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Comparison Instructions
- //===---------------------------------------------------------------------===//
- defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
- SchedWriteVecALU, 1, TruePredicate>;
- defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
- SchedWriteVecALU, 1, TruePredicate>;
- defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
- SchedWriteVecALU, 1, TruePredicate>;
- defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
- SchedWriteVecALU, 0, TruePredicate>;
- defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
- SchedWriteVecALU, 0, TruePredicate>;
- defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
- SchedWriteVecALU, 0, TruePredicate>;
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Shuffle Instructions
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in {
- multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
- SDNode OpNode, X86SchedWriteWidths sched,
- Predicate prd> {
- let Predicates = [HasAVX, prd] in {
- def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, u8imm:$src2),
- !strconcat("v", OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
- VEX, Sched<[sched.XMM]>, VEX_WIG;
- def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, u8imm:$src2),
- !strconcat("v", OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt128 (OpNode (load addr:$src1),
- (i8 timm:$src2))))]>, VEX,
- Sched<[sched.XMM.Folded]>, VEX_WIG;
- }
- let Predicates = [HasAVX2, prd] in {
- def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, u8imm:$src2),
- !strconcat("v", OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
- VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
- def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
- (ins i256mem:$src1, u8imm:$src2),
- !strconcat("v", OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt256 (OpNode (load addr:$src1),
- (i8 timm:$src2))))]>, VEX, VEX_L,
- Sched<[sched.YMM.Folded]>, VEX_WIG;
- }
- let Predicates = [UseSSE2] in {
- def ri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
- Sched<[sched.XMM]>;
- def mi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt128 (OpNode (memop addr:$src1),
- (i8 timm:$src2))))]>,
- Sched<[sched.XMM.Folded]>;
- }
- }
- } // ExeDomain = SSEPackedInt
- defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd,
- SchedWriteShuffle, NoVLX>, PD;
- defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
- SchedWriteShuffle, NoVLX_Or_NoBWI>, XS;
- defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
- SchedWriteShuffle, NoVLX_Or_NoBWI>, XD;
- //===---------------------------------------------------------------------===//
- // Packed Integer Pack Instructions (SSE & AVX)
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in {
- multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
- ValueType ArgVT, SDNode OpNode, RegisterClass RC,
- X86MemOperand x86memop, X86FoldableSchedWrite sched,
- PatFrag ld_frag, bit Is2Addr = 1> {
- def rr : PDI<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
- Sched<[sched]>;
- def rm : PDI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (OutVT (OpNode (ArgVT RC:$src1),
- (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
- ValueType ArgVT, SDNode OpNode, RegisterClass RC,
- X86MemOperand x86memop, X86FoldableSchedWrite sched,
- PatFrag ld_frag, bit Is2Addr = 1> {
- def rr : SS48I<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
- Sched<[sched]>;
- def rm : SS48I<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (OutVT (OpNode (ArgVT RC:$src1),
- (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- }
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Unpack Instructions
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in {
- multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
- SDNode OpNode, RegisterClass RC, X86MemOperand x86memop,
- X86FoldableSchedWrite sched, PatFrag ld_frag,
- bit Is2Addr = 1> {
- def rr : PDI<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : PDI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memop>;
- }
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Integer Extract and Insert
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in {
- multiclass sse2_pinsrw<bit Is2Addr = 1> {
- def rr : Ii8<0xC4, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- GR32orGR64:$src2, u8imm:$src3),
- !if(Is2Addr,
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
- Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
- def rm : Ii8<0xC4, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- i16mem:$src2, u8imm:$src3),
- !if(Is2Addr,
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- timm:$src3))]>,
- Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
- }
- // Extract
- let Predicates = [HasAVX, NoBWI] in
- def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
- (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
- "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
- timm:$src2))]>,
- PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
- def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
- (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
- "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
- timm:$src2))]>,
- Sched<[WriteVecExtract]>;
- // Insert
- let Predicates = [HasAVX, NoBWI] in
- defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG;
- let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
- defm PINSRW : sse2_pinsrw, PD;
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // SSE2 - Packed Mask Creation
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt in {
- def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
- (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
- Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG;
- let Predicates = [HasAVX2] in {
- def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
- (ins VR256:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
- Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG;
- }
- def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
- Sched<[WriteVecMOVMSK]>;
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // SSE2 - Conditional Store
- //===---------------------------------------------------------------------===//
- let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
- let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in
- def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
- (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
- VEX, VEX_WIG;
- let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
- def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
- (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
- VEX, VEX_WIG, AdSize64;
- let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in
- def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs),
- (ins VR128:$src, VR128:$mask), "",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
- VEX, VEX_WIG, AdSize32 {
- let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}";
- let AsmVariantName = "NonParsable";
- }
- let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
- def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
- let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
- def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
- AdSize64;
- let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in
- def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "addr32 maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
- AdSize32 {
- let AsmVariantName = "NonParsable";
- }
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // SSE2 - Move Doubleword/Quadword
- //===---------------------------------------------------------------------===//
- //===---------------------------------------------------------------------===//
- // Move Int Doubleword to Packed Double Int
- //
- let ExeDomain = SSEPackedInt in {
- def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))]>,
- VEX, Sched<[WriteVecMoveFromGpr]>;
- def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
- VEX, Sched<[WriteVecLoad]>;
- def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector GR64:$src)))]>,
- VEX, Sched<[WriteVecMoveFromGpr]>;
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
- def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}", []>,
- VEX, Sched<[WriteVecLoad]>;
- let isCodeGenOnly = 1 in
- def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))]>,
- VEX, Sched<[WriteVecMoveFromGpr]>;
- def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))]>,
- Sched<[WriteVecMoveFromGpr]>;
- def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
- Sched<[WriteVecLoad]>;
- def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector GR64:$src)))]>,
- Sched<[WriteVecMoveFromGpr]>;
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
- def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteVecLoad]>;
- let isCodeGenOnly = 1 in
- def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))]>,
- Sched<[WriteVecMoveFromGpr]>;
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // Move Int Doubleword to Single Scalar
- //
- let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
- def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))]>,
- VEX, Sched<[WriteVecMoveFromGpr]>;
- def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))]>,
- Sched<[WriteVecMoveFromGpr]>;
- } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
- //===---------------------------------------------------------------------===//
- // Move Packed Doubleword Int to Packed Double Int
- //
- let ExeDomain = SSEPackedInt in {
- def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
- (iPTR 0)))]>, VEX,
- Sched<[WriteVecMoveToGpr]>;
- def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs),
- (ins i32mem:$dst, VR128:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (extractelt (v4i32 VR128:$src),
- (iPTR 0))), addr:$dst)]>,
- VEX, Sched<[WriteVecStore]>;
- def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
- (iPTR 0)))]>,
- Sched<[WriteVecMoveToGpr]>;
- def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (extractelt (v4i32 VR128:$src),
- (iPTR 0))), addr:$dst)]>,
- Sched<[WriteVecStore]>;
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // Move Packed Doubleword Int first element to Doubleword Int
- //
- let ExeDomain = SSEPackedInt in {
- let SchedRW = [WriteVecMoveToGpr] in {
- def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
- (iPTR 0)))]>,
- VEX;
- def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
- (iPTR 0)))]>;
- } //SchedRW
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
- def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>,
- VEX, Sched<[WriteVecStore]>;
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
- def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteVecStore]>;
- } // ExeDomain = SSEPackedInt
- //===---------------------------------------------------------------------===//
- // Bitcast FR64 <-> GR64
- //
- let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
- def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>,
- VEX, Sched<[WriteVecMoveToGpr]>;
- def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>,
- Sched<[WriteVecMoveToGpr]>;
- } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
- //===---------------------------------------------------------------------===//
- // Move Scalar Single to Double Int
- //
- let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
- def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))]>,
- VEX, Sched<[WriteVecMoveToGpr]>;
- def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))]>,
- Sched<[WriteVecMoveToGpr]>;
- } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
- let Predicates = [UseAVX] in {
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
- (VMOVDI2PDIrr GR32:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
- (VMOV64toPQIrr GR64:$src)>;
- // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
- // These instructions also write zeros in the high part of a 256-bit register.
- def : Pat<(v4i32 (X86vzload32 addr:$src)),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v8i32 (X86vzload32 addr:$src)),
- (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
- (MOVDI2PDIrr GR32:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
- (MOV64toPQIrr GR64:$src)>;
- def : Pat<(v4i32 (X86vzload32 addr:$src)),
- (MOVDI2PDIrm addr:$src)>;
- }
- // Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of
- // "movq" due to MacOS parsing limitation. In order to parse old assembly, we add
- // these aliases.
- def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
- (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
- def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
- (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
- // Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
- def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
- (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
- def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
- (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
- //===---------------------------------------------------------------------===//
- // SSE2 - Move Quadword
- //===---------------------------------------------------------------------===//
- //===---------------------------------------------------------------------===//
- // Move Quadword Int to Packed Quadword Int
- //
- let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in {
- def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- VEX, Requires<[UseAVX]>, VEX_WIG;
- def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
- XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
- } // ExeDomain, SchedRW
- //===---------------------------------------------------------------------===//
- // Move Packed Quadword Int to Quadword Int
- //
- let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
- def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (extractelt (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>,
- VEX, VEX_WIG;
- def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (extractelt (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
- } // ExeDomain, SchedRW
- // For disassembler only
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [SchedWriteVecLogic.XMM] in {
- def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
- def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>;
- }
- def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
- (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
- def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
- (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
- let Predicates = [UseAVX] in {
- def : Pat<(v2i64 (X86vzload64 addr:$src)),
- (VMOVQI2PQIrm addr:$src)>;
- def : Pat<(v4i64 (X86vzload64 addr:$src)),
- (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
- def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
- (VMOVPQI2QImr addr:$dst, VR128:$src)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
- def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
- (MOVPQI2QImr addr:$dst, VR128:$src)>;
- }
- //===---------------------------------------------------------------------===//
- // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
- // IA32 document. movq xmm1, xmm2 does clear the high bits.
- //
- let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
- def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
- XS, VEX, Requires<[UseAVX]>, VEX_WIG;
- def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
- XS, Requires<[UseSSE2]>;
- } // ExeDomain, SchedRW
- let Predicates = [UseAVX] in {
- def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (VMOVZPQILo2PQIrr VR128:$src)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (MOVZPQILo2PQIrr VR128:$src)>;
- }
- let Predicates = [UseAVX] in {
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2f64 (VMOVZPQILo2PQIrr
- (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
- sub_xmm)>;
- def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2i64 (VMOVZPQILo2PQIrr
- (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
- sub_xmm)>;
- }
- //===---------------------------------------------------------------------===//
- // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
- //===---------------------------------------------------------------------===//
- multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
- ValueType vt, RegisterClass RC, PatFrag mem_frag,
- X86MemOperand x86memop, X86FoldableSchedWrite sched> {
- def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (vt (OpNode RC:$src)))]>,
- Sched<[sched]>;
- def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>,
- Sched<[sched.Folded]>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v4f32, VR128, loadv4f32, f128mem,
- SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
- defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v4f32, VR128, loadv4f32, f128mem,
- SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
- defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v8f32, VR256, loadv8f32, f256mem,
- SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
- defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v8f32, VR256, loadv8f32, f256mem,
- SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
- }
- defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
- memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
- defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
- memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (X86Movshdup VR128:$src)),
- (VMOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
- (VMOVSHDUPrm addr:$src)>;
- def : Pat<(v4i32 (X86Movsldup VR128:$src)),
- (VMOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
- (VMOVSLDUPrm addr:$src)>;
- def : Pat<(v8i32 (X86Movshdup VR256:$src)),
- (VMOVSHDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
- (VMOVSHDUPYrm addr:$src)>;
- def : Pat<(v8i32 (X86Movsldup VR256:$src)),
- (VMOVSLDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
- (VMOVSLDUPYrm addr:$src)>;
- }
- let Predicates = [UseSSE3] in {
- def : Pat<(v4i32 (X86Movshdup VR128:$src)),
- (MOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
- (MOVSHDUPrm addr:$src)>;
- def : Pat<(v4i32 (X86Movsldup VR128:$src)),
- (MOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
- (MOVSLDUPrm addr:$src)>;
- }
- //===---------------------------------------------------------------------===//
- // SSE3 - Replicate Double FP - MOVDDUP
- //===---------------------------------------------------------------------===//
- multiclass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> {
- def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>,
- Sched<[sched.XMM]>;
- def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (v2f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>,
- Sched<[sched.XMM.Folded]>;
- }
- // FIXME: Merge with above classes when there are patterns for the ymm version
- multiclass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> {
- def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
- Sched<[sched.YMM]>;
- def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst,
- (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
- Sched<[sched.YMM.Folded]>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>,
- VEX, VEX_WIG;
- defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>,
- VEX, VEX_L, VEX_WIG;
- }
- defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- }
- let Predicates = [UseSSE3] in {
- def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- }
- //===---------------------------------------------------------------------===//
- // SSE3 - Move Unaligned Integer
- //===---------------------------------------------------------------------===//
- let Predicates = [HasAVX] in {
- def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
- def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
- } // Predicates
- def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "lddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>;
- //===---------------------------------------------------------------------===//
- // SSE3 - Arithmetic
- //===---------------------------------------------------------------------===//
- multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, X86FoldableSchedWrite sched,
- PatFrag ld_frag, bit Is2Addr = 1> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- def rr : I<0xD0, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : I<0xD0, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- let Predicates = [HasAVX] in {
- let ExeDomain = SSEPackedSingle in {
- defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
- SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
- XD, VEX_4V, VEX_WIG;
- defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
- SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
- XD, VEX_4V, VEX_L, VEX_WIG;
- }
- let ExeDomain = SSEPackedDouble in {
- defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
- SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
- PD, VEX_4V, VEX_WIG;
- defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
- SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
- PD, VEX_4V, VEX_L, VEX_WIG;
- }
- }
- let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
- let ExeDomain = SSEPackedSingle in
- defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
- SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD;
- let ExeDomain = SSEPackedDouble in
- defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
- SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD;
- }
- //===---------------------------------------------------------------------===//
- // SSE3 Instructions
- //===---------------------------------------------------------------------===//
- // Horizontal ops
- multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, SDNode OpNode,
- X86FoldableSchedWrite sched, PatFrag ld_frag,
- bit Is2Addr = 1> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, SDNode OpNode,
- X86FoldableSchedWrite sched, PatFrag ld_frag,
- bit Is2Addr = 1> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- let Predicates = [HasAVX] in {
- let ExeDomain = SSEPackedSingle in {
- defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
- defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
- defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
- }
- let ExeDomain = SSEPackedDouble in {
- defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
- defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
- defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
- }
- }
- let Constraints = "$src1 = $dst" in {
- let ExeDomain = SSEPackedSingle in {
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd,
- WriteFHAdd, memopv4f32>;
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub,
- WriteFHAdd, memopv4f32>;
- }
- let ExeDomain = SSEPackedDouble in {
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd,
- WriteFHAdd, memopv2f64>;
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub,
- WriteFHAdd, memopv2f64>;
- }
- }
- //===---------------------------------------------------------------------===//
- // SSSE3 - Packed Absolute Instructions
- //===---------------------------------------------------------------------===//
- /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
- multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
- SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> {
- def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (vt (OpNode VR128:$src)))]>,
- Sched<[sched.XMM]>;
- def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (vt (OpNode (ld_frag addr:$src))))]>,
- Sched<[sched.XMM.Folded]>;
- }
- /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
- multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
- SDNode OpNode, X86SchedWriteWidths sched> {
- def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
- Sched<[sched.YMM]>;
- def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins i256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst,
- (vt (OpNode (load addr:$src))))]>,
- Sched<[sched.YMM.Folded]>;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
- load>, VEX, VEX_WIG;
- defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
- load>, VEX, VEX_WIG;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
- load>, VEX, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
- VEX, VEX_L, VEX_WIG;
- defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>,
- VEX, VEX_L, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>,
- VEX, VEX_L, VEX_WIG;
- }
- defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
- memop>;
- defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
- memop>;
- defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
- memop>;
- //===---------------------------------------------------------------------===//
- // SSSE3 - Packed Binary Operator Instructions
- //===---------------------------------------------------------------------===//
- /// SS3I_binop_rm - Simple SSSE3 bin op
- multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType DstVT, ValueType OpVT, RegisterClass RC,
- PatFrag memop_frag, X86MemOperand x86memop,
- X86FoldableSchedWrite sched, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>,
- Sched<[sched]>;
- def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
- multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, X86FoldableSchedWrite sched,
- PatFrag ld_frag, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- Sched<[sched]>;
- def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
- Intrinsic IntId256,
- X86FoldableSchedWrite sched> {
- let isCommutable = 1 in
- def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
- Sched<[sched]>;
- def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (IntId256 VR256:$src1, (load addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- let isCommutable = 0 in {
- defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
- VR128, load, i128mem,
- SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
- defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
- v16i8, VR128, load, i128mem,
- SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
- }
- defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
- VR128, load, i128mem,
- SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
- }
- let ImmT = NoImm, Predicates = [HasAVX] in {
- let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
- load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
- defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
- load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
- defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
- load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
- defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
- load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
- int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
- int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
- int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
- int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
- int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
- }
- }
- let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- let isCommutable = 0 in {
- defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
- VR256, load, i256mem,
- SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
- v32i8, VR256, load, i256mem,
- SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- }
- defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
- VR256, load, i256mem,
- SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- }
- let ImmT = NoImm, Predicates = [HasAVX2] in {
- let isCommutable = 0 in {
- defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
- VR256, load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
- load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
- VR256, load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
- load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
- defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
- defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
- defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
- int_x86_avx2_phadd_sw,
- SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
- defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
- int_x86_avx2_phsub_sw,
- SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
- }
- }
- // None of these have i8 immediate fields.
- let ImmT = NoImm, Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
- memop, i128mem, SchedWritePHAdd.XMM>;
- defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
- memop, i128mem, SchedWritePHAdd.XMM>;
- defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
- memop, i128mem, SchedWritePHAdd.XMM>;
- defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
- memop, i128mem, SchedWritePHAdd.XMM>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, memop>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, memop>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, memop>;
- defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
- memop, i128mem, SchedWriteVarShuffle.XMM>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
- int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, memop>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
- int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, memop>;
- defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
- v16i8, VR128, memop, i128mem,
- SchedWriteVecIMul.XMM>;
- }
- defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
- VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
- }
- //===---------------------------------------------------------------------===//
- // SSSE3 - Packed Align Instruction Patterns
- //===---------------------------------------------------------------------===//
- multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
- PatFrag memop_frag, X86MemOperand x86memop,
- X86FoldableSchedWrite sched, bit Is2Addr = 1> {
- let hasSideEffects = 0 in {
- def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>,
- Sched<[sched]>;
- let mayLoad = 1 in
- def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (VT (X86PAlignr RC:$src1,
- (memop_frag addr:$src2),
- (i8 timm:$src3))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
- defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
- SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
- defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
- SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
- defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
- SchedWriteShuffle.XMM>;
- //===---------------------------------------------------------------------===//
- // SSSE3 - Thread synchronization
- //===---------------------------------------------------------------------===//
- let SchedRW = [WriteSystem] in {
- let Uses = [EAX, ECX, EDX] in
- def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
- TB, Requires<[HasSSE3, Not64BitMode]>;
- let Uses = [RAX, ECX, EDX] in
- def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
- TB, Requires<[HasSSE3, In64BitMode]>;
- let Uses = [ECX, EAX] in
- def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
- } // SchedRW
- def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
- def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
- def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
- Requires<[Not64BitMode]>;
- def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
- Requires<[In64BitMode]>;
- //===----------------------------------------------------------------------===//
- // SSE4.1 - Packed Move with Sign/Zero Extend
- // NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp
- //===----------------------------------------------------------------------===//
- multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
- RegisterClass OutRC, RegisterClass InRC,
- X86FoldableSchedWrite sched> {
- def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[sched]>;
- def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[sched.Folded]>;
- }
- multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
- X86MemOperand MemOp, X86MemOperand MemYOp,
- Predicate prd> {
- defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
- SchedWriteShuffle.XMM>;
- let Predicates = [HasAVX, prd] in
- defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
- VR128, VR128, SchedWriteShuffle.XMM>,
- VEX, VEX_WIG;
- let Predicates = [HasAVX2, prd] in
- defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
- VR256, VR128, WriteVPMOV256>,
- VEX, VEX_L, VEX_WIG;
- }
- multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
- X86MemOperand MemYOp, Predicate prd> {
- defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
- MemOp, MemYOp, prd>;
- defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
- !strconcat("pmovzx", OpcodeStr),
- MemOp, MemYOp, prd>;
- }
- defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;
- defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>;
- defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>;
- defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>;
- defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
- defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
- // AVX2 Patterns
- multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
- SDNode ExtOp, SDNode InVecOp> {
- // Register-Register patterns
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
- (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
- }
- // Simple Register-Memory patterns
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- }
- // AVX2 Register-Memory patterns
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- }
- }
- defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
- defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
- // SSE4.1/AVX patterns.
- multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
- SDNode ExtOp> {
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
- (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
- (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
- (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
- (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
- (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
- (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
- (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
- (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
- (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
- (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
- (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
- (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
- (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
- (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
- (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
- (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
- (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
- (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
- (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- }
- }
- defm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>;
- defm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>;
- let Predicates = [UseSSE41] in {
- defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>;
- defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>;
- }
- //===----------------------------------------------------------------------===//
- // SSE4.1 - Extract Instructions
- //===----------------------------------------------------------------------===//
- /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
- multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
- (ins VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
- timm:$src2))]>,
- Sched<[WriteVecExtract]>;
- let hasSideEffects = 0, mayStore = 1 in
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i8mem:$dst, VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))),
- addr:$dst)]>, Sched<[WriteVecExtractSt]>;
- }
- let Predicates = [HasAVX, NoBWI] in
- defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG;
- defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
- /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
- multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
- (ins VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>;
- let hasSideEffects = 0, mayStore = 1 in
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i16mem:$dst, VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
- addr:$dst)]>, Sched<[WriteVecExtractSt]>;
- }
- let Predicates = [HasAVX, NoBWI] in
- defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG;
- defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
- /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
- multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
- (ins VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst,
- (extractelt (v4i32 VR128:$src1), imm:$src2))]>,
- Sched<[WriteVecExtract]>;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i32mem:$dst, VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
- addr:$dst)]>, Sched<[WriteVecExtractSt]>;
- }
- let Predicates = [HasAVX, NoDQI] in
- defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
- defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
- /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
- multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR64:$dst,
- (extractelt (v2i64 VR128:$src1), imm:$src2))]>,
- Sched<[WriteVecExtract]>;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>, Sched<[WriteVecExtractSt]>;
- }
- let Predicates = [HasAVX, NoDQI] in
- defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
- defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W;
- /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
- /// destination
- multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
- (ins VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32orGR64:$dst,
- (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
- Sched<[WriteVecExtract]>;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins f32mem:$dst, VR128:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
- addr:$dst)]>, Sched<[WriteVecExtractSt]>;
- }
- let ExeDomain = SSEPackedSingle in {
- let Predicates = [UseAVX] in
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG;
- defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
- }
- //===----------------------------------------------------------------------===//
- // SSE4.1 - Insert Instructions
- //===----------------------------------------------------------------------===//
- multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
- Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i8mem:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>,
- Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoBWI] in
- defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
- multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
- Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i32mem:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
- Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoDQI] in
- defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
- let Constraints = "$src1 = $dst" in
- defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
- multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR64:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
- Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
- Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoDQI] in
- defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
- let Constraints = "$src1 = $dst" in
- defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
- // insertps has a few different modes, there's the first two here below which
- // are optimized inserts that won't zero arbitrary elements in the destination
- // vector. The next one matches the intrinsic and could zero arbitrary elements
- // in the target vector.
- multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>,
- Sched<[SchedWriteFShuffle.XMM]>;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86insertps VR128:$src1,
- (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- timm:$src3))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
- }
- let ExeDomain = SSEPackedSingle in {
- let Predicates = [UseAVX] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>,
- VEX_4V, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
- }
- //===----------------------------------------------------------------------===//
- // SSE4.1 - Round Instructions
- //===----------------------------------------------------------------------===//
- multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, RegisterClass RC,
- ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode,
- X86FoldableSchedWrite sched> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- def r : SS4AIi8<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>,
- Sched<[sched]>;
- // Vector intrinsic operation, mem
- def m : SS4AIi8<opc, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
- Sched<[sched.Folded]>;
- }
- }
- multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr, X86FoldableSchedWrite sched> {
- let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
- def SSr : SS4AIi8<opcss, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched]>;
- let mayLoad = 1 in
- def SSm : SS4AIi8<opcss, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // ExeDomain = SSEPackedSingle, hasSideEffects = 0
- let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
- def SDr : SS4AIi8<opcsd, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched]>;
- let mayLoad = 1 in
- def SDm : SS4AIi8<opcsd, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // ExeDomain = SSEPackedDouble, hasSideEffects = 0
- }
- multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr, X86FoldableSchedWrite sched> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
- def SSr : SS4AIi8<opcss, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched]>;
- let mayLoad = 1 in
- def SSm : SS4AIi8<opcss, MRMSrcMem,
- (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // ExeDomain = SSEPackedSingle, hasSideEffects = 0
- let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
- def SDr : SS4AIi8<opcsd, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched]>;
- let mayLoad = 1 in
- def SDm : SS4AIi8<opcsd, MRMSrcMem,
- (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // ExeDomain = SSEPackedDouble, hasSideEffects = 0
- }
- }
- multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr, X86FoldableSchedWrite sched,
- ValueType VT32, ValueType VT64,
- SDNode OpNode, bit Is2Addr = 1> {
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let ExeDomain = SSEPackedSingle in {
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
- Sched<[sched]>;
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
- let ExeDomain = SSEPackedDouble in {
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
- Sched<[sched]>;
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
- }
- }
- // FP round - roundss, roundps, roundsd, roundpd
- let Predicates = [HasAVX, NoVLX] in {
- let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
- // Intrinsic form
- defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
- loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
- VEX, VEX_WIG;
- defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
- loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
- VEX, VEX_L, VEX_WIG;
- }
- let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
- loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
- VEX, VEX_WIG;
- defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
- loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
- VEX, VEX_L, VEX_WIG;
- }
- }
- let Predicates = [UseAVX] in {
- defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
- v4f32, v2f64, X86RndScales, 0>,
- VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
- defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
- VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
- }
- let Predicates = [UseAVX] in {
- def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
- def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
- }
- let Predicates = [UseAVX, OptForSize] in {
- def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
- def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
- }
- let ExeDomain = SSEPackedSingle in
- defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
- memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
- let ExeDomain = SSEPackedDouble in
- defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
- memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
- defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
- let Constraints = "$src1 = $dst" in
- defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
- v4f32, v2f64, X86RndScales>;
- let Predicates = [UseSSE41] in {
- def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
- (ROUNDSSr FR32:$src1, timm:$src2)>;
- def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
- (ROUNDSDr FR64:$src1, timm:$src2)>;
- }
- let Predicates = [UseSSE41, OptForSize] in {
- def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
- (ROUNDSSm addr:$src1, timm:$src2)>;
- def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
- (ROUNDSDm addr:$src1, timm:$src2)>;
- }
- //===----------------------------------------------------------------------===//
- // SSE4.1 - Packed Bit Test
- //===----------------------------------------------------------------------===//
- // ptest instruction we'll lower to this in X86ISelLowering primarily from
- // the intel intrinsic that corresponds to this.
- let Defs = [EFLAGS], Predicates = [HasAVX] in {
- def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG;
- def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
- VEX, VEX_WIG;
- def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
- Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG;
- def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
- VEX, VEX_L, VEX_WIG;
- }
- let Defs = [EFLAGS] in {
- def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM]>;
- def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "ptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
- }
- // The bit test instructions below are AVX only
- multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
- X86FoldableSchedWrite sched> {
- def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
- Sched<[sched]>, VEX;
- def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
- }
- let Defs = [EFLAGS], Predicates = [HasAVX] in {
- let ExeDomain = SSEPackedSingle in {
- defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
- SchedWriteFTest.XMM>;
- defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32,
- SchedWriteFTest.YMM>, VEX_L;
- }
- let ExeDomain = SSEPackedDouble in {
- defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64,
- SchedWriteFTest.XMM>;
- defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
- SchedWriteFTest.YMM>, VEX_L;
- }
- }
- //===----------------------------------------------------------------------===//
- // SSE4.1 - Misc Instructions
- //===----------------------------------------------------------------------===//
- let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
- def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
- Sched<[WritePOPCNT]>, OpSize16, XS;
- def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
- (implicit EFLAGS)]>,
- Sched<[WritePOPCNT.Folded]>, OpSize16, XS;
- def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
- Sched<[WritePOPCNT]>, OpSize32, XS;
- def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
- (implicit EFLAGS)]>,
- Sched<[WritePOPCNT.Folded]>, OpSize32, XS;
- def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
- Sched<[WritePOPCNT]>, XS;
- def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
- (implicit EFLAGS)]>,
- Sched<[WritePOPCNT.Folded]>, XS;
- }
- // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
- multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
- SDNode OpNode, PatFrag ld_frag,
- X86FoldableSchedWrite Sched> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>,
- Sched<[Sched]>;
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (v8i16 (OpNode (ld_frag addr:$src))))]>,
- Sched<[Sched.Folded]>;
- }
- // PHMIN has the same profile as PSAD, thus we use the same scheduling
- // model, although the naming is misleading.
- let Predicates = [HasAVX] in
- defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
- X86phminpos, load,
- WritePHMINPOS>, VEX, VEX_WIG;
- defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
- X86phminpos, memop,
- WritePHMINPOS>;
- /// SS48I_binop_rm - Simple SSE41 binary operator.
- multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, X86FoldableSchedWrite sched,
- bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
- load, i128mem, SchedWriteVecIMul.XMM, 0>,
- VEX_4V, VEX_WIG;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
- load, i256mem, SchedWriteVecIMul.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- }
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
- memop, i128mem, SchedWriteVecIMul.XMM, 1>;
- }
- let Predicates = [HasAVX, NoVLX] in
- defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
- load, i128mem, SchedWritePMULLD.XMM, 0>,
- VEX_4V, VEX_WIG;
- let Predicates = [HasAVX] in
- defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2, NoVLX] in
- defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
- load, i256mem, SchedWritePMULLD.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- let Predicates = [HasAVX2] in
- defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst" in {
- defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
- memop, i128mem, SchedWritePMULLD.XMM, 1>;
- defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
- memop, i128mem, SchedWriteVecALU.XMM, 1>;
- }
- /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
- multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr,
- X86FoldableSchedWrite sched> {
- let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>,
- Sched<[sched]>;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst,
- (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- /// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
- multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr,
- X86FoldableSchedWrite sched> {
- let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
- Sched<[sched]>;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- def BlendCommuteImm2 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue() & 0x03;
- return getI8Imm(Imm ^ 0x03, SDLoc(N));
- }]>;
- def BlendCommuteImm4 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue() & 0x0f;
- return getI8Imm(Imm ^ 0x0f, SDLoc(N));
- }]>;
- def BlendCommuteImm8 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue() & 0xff;
- return getI8Imm(Imm ^ 0xff, SDLoc(N));
- }]>;
- // Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
- def BlendScaleImm4 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- uint8_t NewImm = 0;
- for (unsigned i = 0; i != 4; ++i) {
- if (Imm & (1 << i))
- NewImm |= 0x3 << (i * 2);
- }
- return getI8Imm(NewImm, SDLoc(N));
- }]>;
- // Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
- def BlendScaleImm2 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- uint8_t NewImm = 0;
- for (unsigned i = 0; i != 2; ++i) {
- if (Imm & (1 << i))
- NewImm |= 0xf << (i * 4);
- }
- return getI8Imm(NewImm, SDLoc(N));
- }]>;
- // Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
- def BlendScaleImm2to4 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- uint8_t NewImm = 0;
- for (unsigned i = 0; i != 2; ++i) {
- if (Imm & (1 << i))
- NewImm |= 0x3 << (i * 2);
- }
- return getI8Imm(NewImm, SDLoc(N));
- }]>;
- // Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
- def BlendScaleCommuteImm4 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- uint8_t NewImm = 0;
- for (unsigned i = 0; i != 4; ++i) {
- if (Imm & (1 << i))
- NewImm |= 0x3 << (i * 2);
- }
- return getI8Imm(NewImm ^ 0xff, SDLoc(N));
- }]>;
- // Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
- def BlendScaleCommuteImm2 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- uint8_t NewImm = 0;
- for (unsigned i = 0; i != 2; ++i) {
- if (Imm & (1 << i))
- NewImm |= 0xf << (i * 4);
- }
- return getI8Imm(NewImm ^ 0xff, SDLoc(N));
- }]>;
- // Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
- def BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- uint8_t NewImm = 0;
- for (unsigned i = 0; i != 2; ++i) {
- if (Imm & (1 << i))
- NewImm |= 0x3 << (i * 2);
- }
- return getI8Imm(NewImm ^ 0xf, SDLoc(N));
- }]>;
- let Predicates = [HasAVX] in {
- let isCommutable = 0 in {
- defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, load, i128mem, 0,
- SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
- }
- let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let ExeDomain = SSEPackedSingle in
- defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, load, f128mem, 0,
- SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
- let ExeDomain = SSEPackedDouble in
- defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, load, f128mem, 0,
- SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
- let ExeDomain = SSEPackedSingle in
- defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, load, i256mem, 0,
- SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
- }
- }
- let Predicates = [HasAVX2] in {
- let isCommutable = 0 in {
- defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, load, i256mem, 0,
- SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
- }
- }
- let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in {
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memop, i128mem, 1,
- SchedWriteMPSAD.XMM>;
- }
- let ExeDomain = SSEPackedSingle in
- defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memop, f128mem, 1,
- SchedWriteDPPS.XMM>, SIMD_EXC;
- let ExeDomain = SSEPackedDouble in
- defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memop, f128mem, 1,
- SchedWriteDPPD.XMM>, SIMD_EXC;
- }
- /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate
- multiclass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr, Domain d,
- X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> {
- let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
- let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
- Sched<[sched]>;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- // Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)),
- (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
- (commuteXForm timm:$src3))>;
- }
- let Predicates = [HasAVX] in {
- defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
- VR128, load, f128mem, 0, SSEPackedSingle,
- SchedWriteFBlend.XMM, BlendCommuteImm4>,
- VEX_4V, VEX_WIG;
- defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
- VR256, load, f256mem, 0, SSEPackedSingle,
- SchedWriteFBlend.YMM, BlendCommuteImm8>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
- VR128, load, f128mem, 0, SSEPackedDouble,
- SchedWriteFBlend.XMM, BlendCommuteImm2>,
- VEX_4V, VEX_WIG;
- defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
- VR256, load, f256mem, 0, SSEPackedDouble,
- SchedWriteFBlend.YMM, BlendCommuteImm4>,
- VEX_4V, VEX_L, VEX_WIG;
- defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
- VR128, load, i128mem, 0, SSEPackedInt,
- SchedWriteBlend.XMM, BlendCommuteImm8>,
- VEX_4V, VEX_WIG;
- }
- let Predicates = [HasAVX2] in {
- defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
- VR256, load, i256mem, 0, SSEPackedInt,
- SchedWriteBlend.YMM, BlendCommuteImm8>,
- VEX_4V, VEX_L, VEX_WIG;
- }
- // Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
- // ExecutionDomainFixPass will cleanup domains later on.
- let Predicates = [HasAVX1Only] in {
- def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
- (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>;
- def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
- (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>;
- def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
- (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>;
- // Use pblendw for 128-bit integer to keep it in the integer domain and prevent
- // it from becoming movsd via commuting under optsize.
- def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
- (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
- def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
- def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
- def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3),
- (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>;
- def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3),
- (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>;
- def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3),
- (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>;
- // Use pblendw for 128-bit integer to keep it in the integer domain and prevent
- // it from becoming movss via commuting under optsize.
- def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
- (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
- def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
- def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
- }
- defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
- VR128, memop, f128mem, 1, SSEPackedSingle,
- SchedWriteFBlend.XMM, BlendCommuteImm4>;
- defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
- VR128, memop, f128mem, 1, SSEPackedDouble,
- SchedWriteFBlend.XMM, BlendCommuteImm2>;
- defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
- VR128, memop, i128mem, 1, SSEPackedInt,
- SchedWriteBlend.XMM, BlendCommuteImm8>;
- let Predicates = [UseSSE41] in {
- // Use pblendw for 128-bit integer to keep it in the integer domain and prevent
- // it from becoming movss via commuting under optsize.
- def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
- (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
- def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
- def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
- def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
- (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
- def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
- def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
- }
- // For insertion into the zero index (low half) of a 256-bit vector, it is
- // more efficient to generate a blend with immediate instead of an insert*128.
- let Predicates = [HasAVX] in {
- def : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)),
- (VBLENDPDYrri VR256:$src1,
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0x3)>;
- def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
- (VBLENDPSYrri VR256:$src1,
- (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
- def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
- (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
- def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
- (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
- }
- /// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
- multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, ValueType VT,
- PatFrag mem_frag, SDNode OpNode,
- X86FoldableSchedWrite sched> {
- def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
- SSEPackedInt>, TAPD, VEX_4V,
- Sched<[sched]>;
- def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst,
- (OpNode RC:$src3, (mem_frag addr:$src2),
- RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
- Sched<[sched.Folded, sched.ReadAfterFold,
- // x86memop:$src2
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC::$src3
- sched.ReadAfterFold]>;
- }
- let Predicates = [HasAVX] in {
- let ExeDomain = SSEPackedDouble in {
- defm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem,
- v2f64, loadv2f64, X86Blendv,
- SchedWriteFVarBlend.XMM>;
- defm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem,
- v4f64, loadv4f64, X86Blendv,
- SchedWriteFVarBlend.YMM>, VEX_L;
- } // ExeDomain = SSEPackedDouble
- let ExeDomain = SSEPackedSingle in {
- defm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem,
- v4f32, loadv4f32, X86Blendv,
- SchedWriteFVarBlend.XMM>;
- defm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem,
- v8f32, loadv8f32, X86Blendv,
- SchedWriteFVarBlend.YMM>, VEX_L;
- } // ExeDomain = SSEPackedSingle
- defm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem,
- v16i8, loadv16i8, X86Blendv,
- SchedWriteVarBlend.XMM>;
- }
- let Predicates = [HasAVX2] in {
- defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
- v32i8, loadv32i8, X86Blendv,
- SchedWriteVarBlend.YMM>, VEX_L;
- }
- let Predicates = [HasAVX] in {
- def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
- (v4i32 VR128:$src2))),
- (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
- (v2i64 VR128:$src2))),
- (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
- (v8i32 VR256:$src2))),
- (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
- (v4i64 VR256:$src2))),
- (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- }
- // Prefer a movss or movsd over a blendps when optimizing for size. these were
- // changed to use blends because blends have better throughput on sandybridge
- // and haswell, but movs[s/d] are 1-2 byte shorter instructions.
- let Predicates = [HasAVX, OptForSpeed] in {
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
- def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
- def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))),
- (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
- def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)),
- (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
- def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))),
- (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
- def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)),
- (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
- // Move low f32 and clear high bits.
- def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
- (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)),
- (i8 1))), sub_xmm)>;
- def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
- (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
- (i8 3))), sub_xmm)>;
- }
- // Prefer a movss or movsd over a blendps when optimizing for size. these were
- // changed to use blends because blends have better throughput on sandybridge
- // and haswell, but movs[s/d] are 1-2 byte shorter instructions.
- let Predicates = [UseSSE41, OptForSpeed] in {
- // With SSE41 we can use blends for these patterns.
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
- def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
- def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))),
- (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
- def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)),
- (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
- def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))),
- (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
- def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)),
- (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
- }
- /// SS41I_ternary - SSE 4.1 ternary operator
- let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT,
- PatFrag mem_frag, X86MemOperand x86memop,
- SDNode OpNode, X86FoldableSchedWrite sched> {
- def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- [(set VR128:$dst,
- (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>,
- Sched<[sched]>;
- def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- [(set VR128:$dst,
- (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- }
- let ExeDomain = SSEPackedDouble in
- defm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem,
- X86Blendv, SchedWriteFVarBlend.XMM>;
- let ExeDomain = SSEPackedSingle in
- defm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem,
- X86Blendv, SchedWriteFVarBlend.XMM>;
- defm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem,
- X86Blendv, SchedWriteVarBlend.XMM>;
- // Aliases with the implicit xmm0 argument
- def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
- (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>;
- def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
- (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>;
- def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
- (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>;
- def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
- (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>;
- def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
- (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>;
- def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
- (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
- let Predicates = [UseSSE41] in {
- def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1),
- (v4i32 VR128:$src2))),
- (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1),
- (v2i64 VR128:$src2))),
- (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
- }
- let AddedComplexity = 400 in { // Prefer non-temporal versions
- let Predicates = [HasAVX, NoVLX] in
- def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
- let Predicates = [HasAVX2, NoVLX] in
- def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
- def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v8f32 (alignednontemporalload addr:$src)),
- (VMOVNTDQAYrm addr:$src)>;
- def : Pat<(v4f64 (alignednontemporalload addr:$src)),
- (VMOVNTDQAYrm addr:$src)>;
- def : Pat<(v4i64 (alignednontemporalload addr:$src)),
- (VMOVNTDQAYrm addr:$src)>;
- def : Pat<(v8i32 (alignednontemporalload addr:$src)),
- (VMOVNTDQAYrm addr:$src)>;
- def : Pat<(v16i16 (alignednontemporalload addr:$src)),
- (VMOVNTDQAYrm addr:$src)>;
- def : Pat<(v32i8 (alignednontemporalload addr:$src)),
- (VMOVNTDQAYrm addr:$src)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (alignednontemporalload addr:$src)),
- (VMOVNTDQArm addr:$src)>;
- def : Pat<(v2f64 (alignednontemporalload addr:$src)),
- (VMOVNTDQArm addr:$src)>;
- def : Pat<(v2i64 (alignednontemporalload addr:$src)),
- (VMOVNTDQArm addr:$src)>;
- def : Pat<(v4i32 (alignednontemporalload addr:$src)),
- (VMOVNTDQArm addr:$src)>;
- def : Pat<(v8i16 (alignednontemporalload addr:$src)),
- (VMOVNTDQArm addr:$src)>;
- def : Pat<(v16i8 (alignednontemporalload addr:$src)),
- (VMOVNTDQArm addr:$src)>;
- }
- let Predicates = [UseSSE41] in {
- def : Pat<(v4f32 (alignednontemporalload addr:$src)),
- (MOVNTDQArm addr:$src)>;
- def : Pat<(v2f64 (alignednontemporalload addr:$src)),
- (MOVNTDQArm addr:$src)>;
- def : Pat<(v2i64 (alignednontemporalload addr:$src)),
- (MOVNTDQArm addr:$src)>;
- def : Pat<(v4i32 (alignednontemporalload addr:$src)),
- (MOVNTDQArm addr:$src)>;
- def : Pat<(v8i16 (alignednontemporalload addr:$src)),
- (MOVNTDQArm addr:$src)>;
- def : Pat<(v16i8 (alignednontemporalload addr:$src)),
- (MOVNTDQArm addr:$src)>;
- }
- } // AddedComplexity
- //===----------------------------------------------------------------------===//
- // SSE4.2 - Compare Instructions
- //===----------------------------------------------------------------------===//
- /// SS42I_binop_rm - Simple SSE 4.2 binary operator
- multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, X86FoldableSchedWrite sched,
- bit Is2Addr = 1> {
- def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
- Sched<[sched]>;
- def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let Predicates = [HasAVX] in
- defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
- let Predicates = [HasAVX2] in
- defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
- let Constraints = "$src1 = $dst" in
- defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
- memop, i128mem, SchedWriteVecALU.XMM>;
- //===----------------------------------------------------------------------===//
- // SSE4.2 - String/text Processing Instructions
- //===----------------------------------------------------------------------===//
- multiclass pcmpistrm_SS42AI<string asm> {
- def rr : SS42AI<0x62, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrM]>;
- let mayLoad = 1 in
- def rm :SS42AI<0x62, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
- }
- let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
- let Predicates = [HasAVX] in
- defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
- defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ;
- }
- multiclass SS42AI_pcmpestrm<string asm> {
- def rr : SS42AI<0x60, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, u8imm:$src5),
- !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrM]>;
- let mayLoad = 1 in
- def rm : SS42AI<0x60, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
- !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
- }
- let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
- let Predicates = [HasAVX] in
- defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
- defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">;
- }
- multiclass SS42AI_pcmpistri<string asm> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrI]>;
- let mayLoad = 1 in
- def rm : SS42AI<0x63, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
- }
- let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
- let Predicates = [HasAVX] in
- defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
- defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
- }
- multiclass SS42AI_pcmpestri<string asm> {
- def rr : SS42AI<0x61, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, u8imm:$src5),
- !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrI]>;
- let mayLoad = 1 in
- def rm : SS42AI<0x61, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
- !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
- }
- let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
- let Predicates = [HasAVX] in
- defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
- defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
- }
- //===----------------------------------------------------------------------===//
- // SSE4.2 - CRC Instructions
- //===----------------------------------------------------------------------===//
- // No CRC instructions have AVX equivalents
- // crc intrinsic instruction
- // This set of instructions are only rm, the only difference is the size
- // of r and m.
- class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
- RegisterClass RCIn, SDPatternOperator Int> :
- CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
- !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
- [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
- Sched<[WriteCRC32]>;
- class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
- X86MemOperand x86memop, SDPatternOperator Int> :
- CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
- !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
- [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
- Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
- let Constraints = "$src1 = $dst" in {
- def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
- int_x86_sse42_crc32_32_8>;
- def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
- int_x86_sse42_crc32_32_8>;
- def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
- int_x86_sse42_crc32_32_16>, OpSize16;
- def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
- int_x86_sse42_crc32_32_16>, OpSize16;
- def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
- int_x86_sse42_crc32_32_32>, OpSize32;
- def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
- int_x86_sse42_crc32_32_32>, OpSize32;
- def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
- int_x86_sse42_crc32_64_64>, REX_W;
- def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
- int_x86_sse42_crc32_64_64>, REX_W;
- let hasSideEffects = 0 in {
- let mayLoad = 1 in
- def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
- null_frag>, REX_W;
- def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
- null_frag>, REX_W;
- }
- }
- //===----------------------------------------------------------------------===//
- // SHA-NI Instructions
- //===----------------------------------------------------------------------===//
- // FIXME: Is there a better scheduler class for SHA than WriteVecIMul?
- multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
- X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
- def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !if(UsesXMM0,
- !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
- [!if(UsesXMM0,
- (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
- (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
- T8PS, Sched<[sched]>;
- def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(UsesXMM0,
- !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
- [!if(UsesXMM0,
- (set VR128:$dst, (IntId VR128:$src1,
- (memop addr:$src2), XMM0)),
- (set VR128:$dst, (IntId VR128:$src1,
- (memop addr:$src2))))]>, T8PS,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- }
- let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
- def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
- (i8 timm:$src3)))]>, TAPS,
- Sched<[SchedWriteVecIMul.XMM]>;
- def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
- "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (int_x86_sha1rnds4 VR128:$src1,
- (memop addr:$src2),
- (i8 timm:$src3)))]>, TAPS,
- Sched<[SchedWriteVecIMul.XMM.Folded,
- SchedWriteVecIMul.XMM.ReadAfterFold]>;
- defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
- SchedWriteVecIMul.XMM>;
- defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1,
- SchedWriteVecIMul.XMM>;
- defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2,
- SchedWriteVecIMul.XMM>;
- let Uses=[XMM0] in
- defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
- SchedWriteVecIMul.XMM, 1>;
- defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
- SchedWriteVecIMul.XMM>;
- defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2,
- SchedWriteVecIMul.XMM>;
- }
- // Aliases with explicit %xmm0
- def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
- (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
- def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
- (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
- //===----------------------------------------------------------------------===//
- // AES-NI Instructions
- //===----------------------------------------------------------------------===//
- multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, PatFrag ld_frag,
- bit Is2Addr = 0, RegisterClass RC = VR128,
- X86MemOperand MemOp = i128mem> {
- let AsmString = OpcodeStr#
- !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
- def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2), "",
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>,
- Sched<[WriteAESDecEnc]>;
- def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, MemOp:$src2), "",
- [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
- Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
- }
- }
- // Perform One Round of an AES Encryption/Decryption Flow
- let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
- defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
- defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
- defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
- defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
- }
- let Predicates = [NoVLX, HasVAES] in {
- defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
- defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
- defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
- defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
- }
- let Constraints = "$src1 = $dst" in {
- defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
- int_x86_aesni_aesenc, memop, 1>;
- defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
- int_x86_aesni_aesenclast, memop, 1>;
- defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
- int_x86_aesni_aesdec, memop, 1>;
- defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
- int_x86_aesni_aesdeclast, memop, 1>;
- }
- // Perform the AES InvMixColumn Transformation
- let Predicates = [HasAVX, HasAES] in {
- def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1),
- "vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst,
- (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
- VEX, VEX_WIG;
- def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1),
- "vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
- Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
- }
- def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1),
- "aesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst,
- (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>;
- def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1),
- "aesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
- Sched<[WriteAESIMC.Folded]>;
- // AES Round Key Generation Assist
- let Predicates = [HasAVX, HasAES] in {
- def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, u8imm:$src2),
- "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
- Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
- def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, u8imm:$src2),
- "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
- Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
- }
- def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, u8imm:$src2),
- "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
- Sched<[WriteAESKeyGen]>;
- def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, u8imm:$src2),
- "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>,
- Sched<[WriteAESKeyGen.Folded]>;
- //===----------------------------------------------------------------------===//
- // PCLMUL Instructions
- //===----------------------------------------------------------------------===//
- // Immediate transform to help with commuting.
- def PCLMULCommuteImm : SDNodeXForm<timm, [{
- uint8_t Imm = N->getZExtValue();
- return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
- }]>;
- // SSE carry-less Multiplication instructions
- let Predicates = [NoAVX, HasPCLMUL] in {
- let Constraints = "$src1 = $dst" in {
- let isCommutable = 1 in
- def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>,
- Sched<[WriteCLMul]>;
- def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
- "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
- timm:$src3))]>,
- Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
- } // Constraints = "$src1 = $dst"
- def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
- (i8 timm:$src3)),
- (PCLMULQDQrm VR128:$src1, addr:$src2,
- (PCLMULCommuteImm timm:$src3))>;
- } // Predicates = [NoAVX, HasPCLMUL]
- // SSE aliases
- foreach HI = ["hq","lq"] in
- foreach LO = ["hq","lq"] in {
- def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
- (PCLMULQDQrr VR128:$dst, VR128:$src,
- !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
- def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
- (PCLMULQDQrm VR128:$dst, i128mem:$src,
- !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
- }
- // AVX carry-less Multiplication instructions
- multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
- PatFrag LdFrag, Intrinsic IntId> {
- let isCommutable = 1 in
- def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set RC:$dst,
- (IntId RC:$src1, RC:$src2, timm:$src3))]>,
- Sched<[WriteCLMul]>;
- def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, MemOp:$src2, u8imm:$src3),
- "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set RC:$dst,
- (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>,
- Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
- // We can commute a load in the first operand by swapping the sources and
- // rotating the immediate.
- def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)),
- (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
- (PCLMULCommuteImm timm:$src3))>;
- }
- let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
- defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
- int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
- let Predicates = [NoVLX, HasVPCLMULQDQ] in
- defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
- int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
- multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
- X86MemOperand MemOp, string Hi, string Lo> {
- def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2,
- !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
- def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2,
- !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
- }
- multiclass vpclmulqdq_aliases<string InstStr, RegisterClass RC,
- X86MemOperand MemOp> {
- defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">;
- defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">;
- defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">;
- defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">;
- }
- // AVX aliases
- defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>;
- defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>;
- //===----------------------------------------------------------------------===//
- // SSE4A Instructions
- //===----------------------------------------------------------------------===//
- let Predicates = [HasSSE4A] in {
- let ExeDomain = SSEPackedInt in {
- let Constraints = "$src = $dst" in {
- def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
- (ins VR128:$src, u8imm:$len, u8imm:$idx),
- "extrq\t{$idx, $len, $src|$src, $len, $idx}",
- [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
- timm:$idx))]>,
- PD, Sched<[SchedWriteVecALU.XMM]>;
- def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src, VR128:$mask),
- "extrq\t{$mask, $src|$src, $mask}",
- [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
- VR128:$mask))]>,
- PD, Sched<[SchedWriteVecALU.XMM]>;
- def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
- "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
- [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
- timm:$len, timm:$idx))]>,
- XD, Sched<[SchedWriteVecALU.XMM]>;
- def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src, VR128:$mask),
- "insertq\t{$mask, $src|$src, $mask}",
- [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
- VR128:$mask))]>,
- XD, Sched<[SchedWriteVecALU.XMM]>;
- }
- } // ExeDomain = SSEPackedInt
- // Non-temporal (unaligned) scalar stores.
- let AddedComplexity = 400 in { // Prefer non-temporal versions
- let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
- def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
- "movntss\t{$src, $dst|$dst, $src}", []>, XS;
- def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movntsd\t{$src, $dst|$dst, $src}", []>, XD;
- } // SchedRW
- def : Pat<(nontemporalstore FR32:$src, addr:$dst),
- (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
- def : Pat<(nontemporalstore FR64:$src, addr:$dst),
- (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
- } // AddedComplexity
- } // HasSSE4A
- //===----------------------------------------------------------------------===//
- // AVX Instructions
- //===----------------------------------------------------------------------===//
- //===----------------------------------------------------------------------===//
- // VBROADCAST - Load from memory and broadcast to all elements of the
- // destination operand
- //
- class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, ValueType VT,
- PatFrag bcast_frag, SchedWrite Sched> :
- AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (VT (bcast_frag addr:$src)))]>,
- Sched<[Sched]>, VEX;
- // AVX2 adds register forms
- class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
- ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
- AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
- Sched<[Sched]>, VEX;
- let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
- def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
- f32mem, v4f32, X86VBroadcastld32,
- SchedWriteFShuffle.XMM.Folded>;
- def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
- f32mem, v8f32, X86VBroadcastld32,
- SchedWriteFShuffle.XMM.Folded>, VEX_L;
- }
- let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in
- def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
- v4f64, X86VBroadcastld64,
- SchedWriteFShuffle.XMM.Folded>, VEX_L;
- let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in {
- def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
- v4f32, v4f32, SchedWriteFShuffle.XMM>;
- def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
- v8f32, v4f32, WriteFShuffle256>, VEX_L;
- }
- let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
- def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
- v4f64, v2f64, WriteFShuffle256>, VEX_L;
- //===----------------------------------------------------------------------===//
- // VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
- // halves of a 256-bit vector.
- //
- let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
- def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
- (ins i128mem:$src),
- "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteShuffleLd]>, VEX, VEX_L;
- let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
- ExeDomain = SSEPackedSingle in
- def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
- (ins f128mem:$src),
- "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
- def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
- // NOTE: We're using FP instructions here, but execution domain fixing can
- // convert to integer when profitable.
- def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
- def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
- def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
- def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
- }
- //===----------------------------------------------------------------------===//
- // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
- //
- let ExeDomain = SSEPackedSingle in {
- let isCommutable = 1 in
- def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
- def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
- }
- // Immediate transform to help with commuting.
- def Perm2XCommuteImm : SDNodeXForm<timm, [{
- return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
- }]>;
- multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
- // Pattern with load in other operand.
- def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
- (Perm2XCommuteImm timm:$imm))>;
- }
- let Predicates = [HasAVX] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
- }
- let Predicates = [HasAVX1Only] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
- defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
- defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
- }
- //===----------------------------------------------------------------------===//
- // VINSERTF128 - Insert packed floating-point values
- //
- let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
- def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR128:$src2, u8imm:$src3),
- "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
- let mayLoad = 1 in
- def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f128mem:$src2, u8imm:$src3),
- "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
- }
- // To create a 256-bit all ones value, we should produce VCMPTRUEPS
- // with YMM register containing zero.
- // FIXME: Avoid producing vxorps to clear the fake inputs.
- let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
- }
- multiclass vinsert_lowering<string InstrStr, string PermStr,
- ValueType From, ValueType To,
- PatFrag frommemop_frag, PatFrag tomemop_frag> {
- def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
- (iPTR imm)),
- (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
- (INSERT_get_vinsert128_imm VR256:$ins))>;
- def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
- (From (frommemop_frag addr:$src2)),
- (iPTR imm)),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR256:$ins))>;
- // Folding "To" vector - convert to perm2x128 and commute inputs.
- def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)),
- (From VR128:$src2),
- (iPTR imm)),
- (!cast<Instruction>(PermStr#rm)
- (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm),
- addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>;
- defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>;
- }
- let Predicates = [HasAVX1Only] in {
- defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>;
- defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>;
- defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>;
- defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>;
- }
- //===----------------------------------------------------------------------===//
- // VEXTRACTF128 - Extract packed floating-point values
- //
- let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
- def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
- (ins VR256:$src1, u8imm:$src2),
- "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, Sched<[WriteFShuffle256]>, VEX, VEX_L;
- let mayStore = 1 in
- def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
- (ins f128mem:$dst, VR256:$src1, u8imm:$src2),
- "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, Sched<[WriteFStoreX]>, VEX, VEX_L;
- }
- multiclass vextract_lowering<string InstrStr, ValueType From, ValueType To> {
- def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
- (To (!cast<Instruction>(InstrStr#rr)
- (From VR256:$src1),
- (EXTRACT_get_vextract128_imm VR128:$ext)))>;
- def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1),
- (iPTR imm))), addr:$dst),
- (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1,
- (EXTRACT_get_vextract128_imm VR128:$ext))>;
- }
- // AVX1 patterns
- let Predicates = [HasAVX, NoVLX] in {
- defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>;
- defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>;
- }
- let Predicates = [HasAVX1Only] in {
- defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>;
- defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>;
- defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>;
- defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>;
- }
- //===----------------------------------------------------------------------===//
- // VMASKMOV - Conditional SIMD Packed Loads and Stores
- //
- multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
- Intrinsic IntLd, Intrinsic IntLd256,
- Intrinsic IntSt, Intrinsic IntSt256,
- X86SchedWriteMaskMove schedX,
- X86SchedWriteMaskMove schedY> {
- def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[schedX.RM]>;
- def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[schedY.RM]>;
- def mr : AVX8I<opc_mr, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[schedX.MR]>;
- def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[schedY.MR]>;
- }
- let ExeDomain = SSEPackedSingle in
- defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
- int_x86_avx_maskload_ps,
- int_x86_avx_maskload_ps_256,
- int_x86_avx_maskstore_ps,
- int_x86_avx_maskstore_ps_256,
- WriteFMaskMove32, WriteFMaskMove32Y>;
- let ExeDomain = SSEPackedDouble in
- defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
- int_x86_avx_maskload_pd,
- int_x86_avx_maskload_pd_256,
- int_x86_avx_maskstore_pd,
- int_x86_avx_maskstore_pd_256,
- WriteFMaskMove64, WriteFMaskMove64Y>;
- //===----------------------------------------------------------------------===//
- // AVX_VNNI
- //===----------------------------------------------------------------------===//
- let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst",
- ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in
- multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit IsCommutable> {
- let isCommutable = IsCommutable in
- def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
- VR128:$src2, VR128:$src3)))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
- def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
- (loadv4i32 addr:$src3))))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
- let isCommutable = IsCommutable in
- def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
- VR256:$src2, VR256:$src3)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
- def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
- (loadv8i32 addr:$src3))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
- }
- defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>;
- defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>;
- defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>;
- defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>;
- def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
- (X86vpmaddwd node:$lhs, node:$rhs), [{
- return N->hasOneUse();
- }]>;
- let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
- def : Pat<(v8i32 (add VR256:$src1,
- (X86vpmaddwd_su VR256:$src2, VR256:$src3))),
- (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(v8i32 (add VR256:$src1,
- (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))),
- (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(v4i32 (add VR128:$src1,
- (X86vpmaddwd_su VR128:$src2, VR128:$src3))),
- (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(v4i32 (add VR128:$src1,
- (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))),
- (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
- }
- //===----------------------------------------------------------------------===//
- // VPERMIL - Permute Single and Double Floating-Point Values
- //
- multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop_f,
- X86MemOperand x86memop_i,
- ValueType f_vt, ValueType i_vt,
- X86FoldableSchedWrite sched,
- X86FoldableSchedWrite varsched> {
- let Predicates = [HasAVX, NoVLX] in {
- def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
- Sched<[varsched]>;
- def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop_i:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
- (i_vt (load addr:$src2)))))]>, VEX_4V,
- Sched<[varsched.Folded, sched.ReadAfterFold]>;
- def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, u8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX,
- Sched<[sched]>;
- def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
- (ins x86memop_f:$src1, u8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX,
- Sched<[sched.Folded]>;
- }// Predicates = [HasAVX, NoVLX]
- }
- let ExeDomain = SSEPackedSingle in {
- defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- v4f32, v4i32, SchedWriteFShuffle.XMM,
- SchedWriteFVarShuffle.XMM>;
- defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- v8f32, v8i32, SchedWriteFShuffle.YMM,
- SchedWriteFVarShuffle.YMM>, VEX_L;
- }
- let ExeDomain = SSEPackedDouble in {
- defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- v2f64, v2i64, SchedWriteFShuffle.XMM,
- SchedWriteFVarShuffle.XMM>;
- defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- v4f64, v4i64, SchedWriteFShuffle.YMM,
- SchedWriteFVarShuffle.YMM>, VEX_L;
- }
- //===----------------------------------------------------------------------===//
- // VZERO - Zero YMM registers
- // Note: These instruction do not affect the YMM16-YMM31.
- //
- let SchedRW = [WriteSystem] in {
- let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
- // Zero All YMM registers
- def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
- [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
- Requires<[HasAVX]>, VEX_WIG;
- // Zero Upper bits of YMM registers
- def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
- [(int_x86_avx_vzeroupper)]>, PS, VEX,
- Requires<[HasAVX]>, VEX_WIG;
- } // Defs
- } // SchedRW
- //===----------------------------------------------------------------------===//
- // Half precision conversion instructions
- //
- multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
- X86FoldableSchedWrite sched> {
- def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
- T8PD, VEX, Sched<[sched]>;
- let hasSideEffects = 0, mayLoad = 1 in
- def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}",
- []>, T8PD, VEX, Sched<[sched.Folded]>;
- }
- multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
- SchedWrite RR, SchedWrite MR> {
- def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
- (ins RC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>,
- TAPD, VEX, Sched<[RR]>;
- let hasSideEffects = 0, mayStore = 1 in
- def mr : Ii8<0x1D, MRMDestMem, (outs),
- (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TAPD, VEX, Sched<[MR]>;
- }
- let Predicates = [HasF16C, NoVLX] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
- defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
- WriteCvtPS2PHSt>, SIMD_EXC;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
- WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
- // Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
- (VCVTPH2PSrm addr:$src)>;
- def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16
- (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (VCVTPH2PSrm addr:$src)>;
- def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))),
- (VCVTPH2PSYrm addr:$src)>;
- def : Pat<(store (f64 (extractelt
- (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
- (iPTR 0))), addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
- def : Pat<(store (i64 (extractelt
- (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
- (iPTR 0))), addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
- def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
- (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>;
- }
- //===----------------------------------------------------------------------===//
- // AVX2 Instructions
- //===----------------------------------------------------------------------===//
- /// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
- multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, X86FoldableSchedWrite sched,
- RegisterClass RC,
- X86MemOperand x86memop, SDNodeXForm commuteXForm> {
- let isCommutable = 1 in
- def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
- Sched<[sched]>, VEX_4V;
- def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
- // Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
- (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
- (commuteXForm timm:$src3))>;
- }
- let Predicates = [HasAVX2] in {
- defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
- SchedWriteBlend.XMM, VR128, i128mem,
- BlendCommuteImm4>;
- defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
- SchedWriteBlend.YMM, VR256, i256mem,
- BlendCommuteImm8>, VEX_L;
- def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
- (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>;
- def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
- (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
- def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
- (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
- def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
- (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>;
- def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
- (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>;
- def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
- (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>;
- }
- // For insertion into the zero index (low half) of a 256-bit vector, it is
- // more efficient to generate a blend with immediate instead of an insert*128.
- // NOTE: We're using FP instructions here, but execution domain fixing should
- // take care of using integer instructions when profitable.
- let Predicates = [HasAVX] in {
- def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
- (VBLENDPSYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
- def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
- (VBLENDPSYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
- def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
- (VBLENDPSYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
- def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
- (VBLENDPSYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
- def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
- (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
- def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
- (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
- def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
- (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
- def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
- (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
- }
- //===----------------------------------------------------------------------===//
- // VPBROADCAST - Load from memory and broadcast to all elements of the
- // destination operand
- //
- multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, PatFrag bcast_frag,
- ValueType OpVT128, ValueType OpVT256, Predicate prd> {
- let Predicates = [HasAVX2, prd] in {
- def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
- Sched<[SchedWriteShuffle.XMM]>, VEX;
- def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (OpVT128 (bcast_frag addr:$src)))]>,
- Sched<[SchedWriteShuffle.XMM.Folded]>, VEX;
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst,
- (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
- Sched<[WriteShuffle256]>, VEX, VEX_L;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst,
- (OpVT256 (bcast_frag addr:$src)))]>,
- Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L;
- // Provide aliases for broadcast from the same register class that
- // automatically does the extract.
- def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
- (!cast<Instruction>(NAME#"Yrr")
- (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
- }
- }
- defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8,
- v16i8, v32i8, NoVLX_Or_NoBWI>;
- defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16,
- v8i16, v16i16, NoVLX_Or_NoBWI>;
- defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32,
- v4i32, v8i32, NoVLX>;
- defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64,
- v2i64, v4i64, NoVLX>;
- let Predicates = [HasAVX2, NoVLX] in {
- // Provide fallback in case the load node that is used in the patterns above
- // is used by additional users, which prevents the pattern selection.
- def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
- def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
- def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
- }
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
- (VPBROADCASTBrr (VMOVDI2PDIrr
- (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit))))>;
- def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
- (VPBROADCASTBYrr (VMOVDI2PDIrr
- (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit))))>;
- def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
- (VPBROADCASTWrr (VMOVDI2PDIrr
- (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit))))>;
- def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
- (VPBROADCASTWYrr (VMOVDI2PDIrr
- (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit))))>;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>;
- def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>;
- def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>;
- def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>;
- }
- // AVX1 broadcast patterns
- let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 (X86VBroadcastld32 addr:$src)),
- (VBROADCASTSSYrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcastld64 addr:$src)),
- (VBROADCASTSDYrm addr:$src)>;
- def : Pat<(v4i32 (X86VBroadcastld32 addr:$src)),
- (VBROADCASTSSrm addr:$src)>;
- }
- // Provide fallback in case the load node that is used in the patterns above
- // is used by additional users, which prevents the pattern selection.
- let Predicates = [HasAVX, NoVLX] in {
- // 128bit broadcasts:
- def : Pat<(v2f64 (X86VBroadcast f64:$src)),
- (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
- def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)),
- (VMOVDDUPrm addr:$src)>;
- def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
- (VMOVDDUPrr VR128:$src)>;
- }
- let Predicates = [HasAVX1Only] in {
- def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>;
- def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
- (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm),
- (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>;
- def : Pat<(v8f32 (X86VBroadcast v4f32:$src)),
- (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm),
- (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>;
- def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm),
- (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
- def : Pat<(v4f64 (X86VBroadcast v2f64:$src)),
- (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm),
- (v2f64 (VMOVDDUPrr VR128:$src)), 1)>;
- def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>;
- def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm),
- (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>;
- def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
- (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm),
- (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>;
- def : Pat<(v2i64 (X86VBroadcast i64:$src)),
- (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>;
- def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)),
- (VMOVDDUPrm addr:$src)>;
- }
- //===----------------------------------------------------------------------===//
- // VPERM - Permute instructions
- //
- multiclass avx2_perm<bits<8> opc, string OpcodeStr,
- ValueType OpVT, X86FoldableSchedWrite Sched,
- X86MemOperand memOp> {
- let Predicates = [HasAVX2, NoVLX] in {
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
- Sched<[Sched]>, VEX_4V, VEX_L;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, memOp:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (OpVT (X86VPermv VR256:$src1,
- (load addr:$src2))))]>,
- Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
- }
- }
- defm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
- let ExeDomain = SSEPackedSingle in
- defm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
- multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT, X86FoldableSchedWrite Sched,
- X86MemOperand memOp> {
- let Predicates = [HasAVX2, NoVLX] in {
- def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>,
- Sched<[Sched]>, VEX, VEX_L;
- def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
- (ins memOp:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (OpVT (X86VPermi (mem_frag addr:$src1),
- (i8 timm:$src2))))]>,
- Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
- }
- }
- defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
- WriteShuffle256, i256mem>, VEX_W;
- let ExeDomain = SSEPackedDouble in
- defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
- WriteFShuffle256, f256mem>, VEX_W;
- //===----------------------------------------------------------------------===//
- // VPERM2I128 - Permute Integer vector Values in 128-bit chunks
- //
- let isCommutable = 1 in
- def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
- def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
- let Predicates = [HasAVX2] in {
- defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>;
- defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>;
- defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
- defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>;
- }
- //===----------------------------------------------------------------------===//
- // VINSERTI128 - Insert packed integer values
- //
- let hasSideEffects = 0 in {
- def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR128:$src2, u8imm:$src3),
- "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
- let mayLoad = 1 in
- def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i128mem:$src2, u8imm:$src3),
- "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>;
- defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>;
- defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>;
- defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>;
- }
- //===----------------------------------------------------------------------===//
- // VEXTRACTI128 - Extract packed integer values
- //
- def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
- (ins VR256:$src1, u8imm:$src2),
- "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[WriteShuffle256]>, VEX, VEX_L;
- let hasSideEffects = 0, mayStore = 1 in
- def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
- (ins i128mem:$dst, VR256:$src1, u8imm:$src2),
- "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L;
- let Predicates = [HasAVX2, NoVLX] in {
- defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>;
- defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>;
- defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>;
- defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>;
- }
- //===----------------------------------------------------------------------===//
- // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
- //
- multiclass avx2_pmovmask<string OpcodeStr,
- Intrinsic IntLd128, Intrinsic IntLd256,
- Intrinsic IntSt128, Intrinsic IntSt256,
- X86SchedWriteMaskMove schedX,
- X86SchedWriteMaskMove schedY> {
- def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[schedX.RM]>;
- def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[schedY.RM]>;
- def mr : AVX28I<0x8e, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[schedX.MR]>;
- def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
- (ins i256mem:$dst, VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[schedY.MR]>;
- }
- defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
- int_x86_avx2_maskload_d,
- int_x86_avx2_maskload_d_256,
- int_x86_avx2_maskstore_d,
- int_x86_avx2_maskstore_d_256,
- WriteVecMaskMove32, WriteVecMaskMove32Y>;
- defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
- int_x86_avx2_maskload_q,
- int_x86_avx2_maskload_q_256,
- int_x86_avx2_maskstore_q,
- int_x86_avx2_maskstore_q_256,
- WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W;
- multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
- ValueType MaskVT> {
- // masked store
- def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
- (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
- // masked load
- def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
- (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
- def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
- (VT immAllZerosV))),
- (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
- }
- let Predicates = [HasAVX] in {
- defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>;
- defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>;
- defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>;
- defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>;
- }
- let Predicates = [HasAVX1Only] in {
- // load/store i32/i64 not supported use ps/pd version
- defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>;
- defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>;
- defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>;
- defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>;
- }
- let Predicates = [HasAVX2] in {
- defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>;
- defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>;
- defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>;
- defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>;
- }
- //===----------------------------------------------------------------------===//
- // Variable Bit Shifts
- //
- multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType vt128, ValueType vt256> {
- def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>;
- def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt128 (OpNode VR128:$src1,
- (vt128 (load addr:$src2)))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
- SchedWriteVarVecShift.XMM.ReadAfterFold]>;
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt256 (OpNode VR256:$src1,
- (vt256 (load addr:$src2)))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
- SchedWriteVarVecShift.YMM.ReadAfterFold]>;
- }
- let Predicates = [HasAVX2, NoVLX] in {
- defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
- defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
- defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
- defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
- defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
- }
- //===----------------------------------------------------------------------===//
- // VGATHER - GATHER Operations
- // FIXME: Improve scheduling of gather instructions.
- multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
- X86MemOperand memop128, X86MemOperand memop256> {
- let mayLoad = 1, hasSideEffects = 0 in {
- def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
- (ins VR128:$src1, memop128:$src2, VR128:$mask),
- !strconcat(OpcodeStr,
- "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
- []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
- def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
- (ins RC256:$src1, memop256:$src2, RC256:$mask),
- !strconcat(OpcodeStr,
- "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
- []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
- }
- }
- let Predicates = [HasAVX2] in {
- let mayLoad = 1, hasSideEffects = 0, Constraints
- = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
- in {
- defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
- VR256, vx128mem, vx256mem>, VEX_W;
- defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
- VR256, vx128mem, vy256mem>, VEX_W;
- defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
- VR256, vx128mem, vy256mem>;
- defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
- VR128, vx64mem, vy128mem>;
- let ExeDomain = SSEPackedDouble in {
- defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
- VR256, vx128mem, vx256mem>, VEX_W;
- defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
- VR256, vx128mem, vy256mem>, VEX_W;
- }
- let ExeDomain = SSEPackedSingle in {
- defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
- VR256, vx128mem, vy256mem>;
- defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
- VR128, vx64mem, vy128mem>;
- }
- }
- }
- //===----------------------------------------------------------------------===//
- // GFNI instructions
- //===----------------------------------------------------------------------===//
- multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
- RegisterClass RC, PatFrag MemOpFrag,
- X86MemOperand X86MemOp, bit Is2Addr = 0> {
- let ExeDomain = SSEPackedInt,
- AsmString = !if(Is2Addr,
- OpcodeStr#"\t{$src2, $dst|$dst, $src2}",
- OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
- let isCommutable = 1 in
- def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
- [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
- Sched<[SchedWriteVecALU.XMM]>, T8PD;
- def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
- [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
- (MemOpFrag addr:$src2))))]>,
- Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
- }
- }
- multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
- SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag,
- X86MemOperand X86MemOp, bit Is2Addr = 0> {
- let AsmString = !if(Is2Addr,
- OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
- def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3), "",
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
- SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
- def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
- [(set RC:$dst, (OpVT (OpNode RC:$src1,
- (MemOpFrag addr:$src2),
- timm:$src3)))], SSEPackedInt>,
- Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
- }
- }
- multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
- let Constraints = "$src1 = $dst",
- Predicates = [HasGFNI, UseSSE2] in
- defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
- VR128, load, i128mem, 1>;
- let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
- defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
- load, i128mem>, VEX_4V, VEX_W;
- defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
- load, i256mem>, VEX_4V, VEX_L, VEX_W;
- }
- }
- // GF2P8MULB
- let Constraints = "$src1 = $dst",
- Predicates = [HasGFNI, UseSSE2] in
- defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
- i128mem, 1>;
- let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
- defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
- i128mem>, VEX_4V;
- defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
- i256mem>, VEX_4V, VEX_L;
- }
- // GF2P8AFFINEINVQB, GF2P8AFFINEQB
- let isCommutable = 0 in {
- defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb",
- X86GF2P8affineinvqb>, TAPD;
- defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb",
- X86GF2P8affineqb>, TAPD;
- }
|