X86InstrAVX512.td 712 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750
  1. //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file describes the X86 AVX512 instruction set, defining the
  10. // instructions, and properties of the instructions which are needed for code
  11. // generation, machine code emission, and analysis.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. // Group template arguments that can be derived from the vector type (EltNum x
  15. // EltVT). These are things like the register class for the writemask, etc.
  16. // The idea is to pass one of these as the template argument rather than the
  17. // individual arguments.
  18. // The template is also used for scalar types, in this case numelts is 1.
  19. class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
  20. string suffix = ""> {
  21. RegisterClass RC = rc;
  22. ValueType EltVT = eltvt;
  23. int NumElts = numelts;
  24. // Corresponding mask register class.
  25. RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
  26. // Corresponding mask register pair class.
  27. RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
  28. !cast<RegisterOperand>("VK" # NumElts # "Pair"));
  29. // Corresponding write-mask register class.
  30. RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
  31. // The mask VT.
  32. ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
  33. // Suffix used in the instruction mnemonic.
  34. string Suffix = suffix;
  35. // VTName is a string name for vector VT. For vector types it will be
  36. // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
  37. // It is a little bit complex for scalar types, where NumElts = 1.
  38. // In this case we build v4f32 or v2f64
  39. string VTName = "v" # !if (!eq (NumElts, 1),
  40. !if (!eq (EltVT.Size, 16), 8,
  41. !if (!eq (EltVT.Size, 32), 4,
  42. !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
  43. // The vector VT.
  44. ValueType VT = !cast<ValueType>(VTName);
  45. string EltTypeName = !cast<string>(EltVT);
  46. // Size of the element type in bits, e.g. 32 for v16i32.
  47. string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
  48. int EltSize = EltVT.Size;
  49. // "i" for integer types and "f" for floating-point types
  50. string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
  51. // Size of RC in bits, e.g. 512 for VR512.
  52. int Size = VT.Size;
  53. // The corresponding memory operand, e.g. i512mem for VR512.
  54. X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
  55. X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
  56. // FP scalar memory operand for intrinsics - ssmem/sdmem.
  57. Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
  58. !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
  59. !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
  60. !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
  61. // Load patterns
  62. PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
  63. PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
  64. PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
  65. PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
  66. PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
  67. !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
  68. !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
  69. !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
  70. // The string to specify embedded broadcast in assembly.
  71. string BroadcastStr = "{1to" # NumElts # "}";
  72. // 8-bit compressed displacement tuple/subvector format. This is only
  73. // defined for NumElts <= 8.
  74. CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
  75. !cast<CD8VForm>("CD8VT" # NumElts), ?);
  76. SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
  77. !if (!eq (Size, 256), sub_ymm, ?));
  78. Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
  79. !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
  80. !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
  81. !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
  82. SSEPackedInt))));
  83. RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
  84. !if (!eq (EltTypeName, "f16"), FR16X,
  85. !if (!eq (EltTypeName, "bf16"), FR16X,
  86. FR64X)));
  87. dag ImmAllZerosV = (VT immAllZerosV);
  88. string ZSuffix = !if (!eq (Size, 128), "Z128",
  89. !if (!eq (Size, 256), "Z256", "Z"));
  90. }
  91. def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
  92. def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
  93. def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
  94. def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
  95. def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
  96. def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
  97. def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
  98. def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
  99. // "x" in v32i8x_info means RC = VR256X
  100. def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
  101. def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
  102. def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
  103. def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
  104. def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
  105. def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
  106. def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
  107. def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
  108. def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
  109. def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
  110. def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
  111. def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
  112. def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
  113. def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">;
  114. def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
  115. def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
  116. // We map scalar types to the smallest (128-bit) vector type
  117. // with the appropriate element type. This allows to use the same masking logic.
  118. def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
  119. def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
  120. def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
  121. def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">;
  122. def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
  123. def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
  124. class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
  125. X86VectorVTInfo i128> {
  126. X86VectorVTInfo info512 = i512;
  127. X86VectorVTInfo info256 = i256;
  128. X86VectorVTInfo info128 = i128;
  129. }
  130. def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
  131. v16i8x_info>;
  132. def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
  133. v8i16x_info>;
  134. def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
  135. v4i32x_info>;
  136. def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
  137. v2i64x_info>;
  138. def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
  139. v8f16x_info>;
  140. def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
  141. v8bf16x_info>;
  142. def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
  143. v4f32x_info>;
  144. def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
  145. v2f64x_info>;
  146. class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
  147. ValueType _vt> {
  148. RegisterClass KRC = _krc;
  149. RegisterClass KRCWM = _krcwm;
  150. ValueType KVT = _vt;
  151. }
  152. def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
  153. def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
  154. def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
  155. def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
  156. def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
  157. def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
  158. def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
  159. // Used for matching masked operations. Ensures the operation part only has a
  160. // single use.
  161. def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
  162. (vselect node:$mask, node:$src1, node:$src2), [{
  163. return isProfitableToFormMaskedOp(N);
  164. }]>;
  165. def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
  166. (X86selects node:$mask, node:$src1, node:$src2), [{
  167. return isProfitableToFormMaskedOp(N);
  168. }]>;
  169. // This multiclass generates the masking variants from the non-masking
  170. // variant. It only provides the assembly pieces for the masking variants.
  171. // It assumes custom ISel patterns for masking which can be provided as
  172. // template arguments.
  173. multiclass AVX512_maskable_custom<bits<8> O, Format F,
  174. dag Outs,
  175. dag Ins, dag MaskingIns, dag ZeroMaskingIns,
  176. string OpcodeStr,
  177. string AttSrcAsm, string IntelSrcAsm,
  178. list<dag> Pattern,
  179. list<dag> MaskingPattern,
  180. list<dag> ZeroMaskingPattern,
  181. string MaskingConstraint = "",
  182. bit IsCommutable = 0,
  183. bit IsKCommutable = 0,
  184. bit IsKZCommutable = IsCommutable,
  185. string ClobberConstraint = ""> {
  186. let isCommutable = IsCommutable, Constraints = ClobberConstraint in
  187. def NAME: AVX512<O, F, Outs, Ins,
  188. OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
  189. "$dst, "#IntelSrcAsm#"}",
  190. Pattern>;
  191. // Prefer over VMOV*rrk Pat<>
  192. let isCommutable = IsKCommutable in
  193. def NAME#k: AVX512<O, F, Outs, MaskingIns,
  194. OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
  195. "$dst {${mask}}, "#IntelSrcAsm#"}",
  196. MaskingPattern>,
  197. EVEX_K {
  198. // In case of the 3src subclass this is overridden with a let.
  199. string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
  200. !if(!eq(MaskingConstraint, ""), ClobberConstraint,
  201. !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
  202. }
  203. // Zero mask does not add any restrictions to commute operands transformation.
  204. // So, it is Ok to use IsCommutable instead of IsKCommutable.
  205. let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
  206. Constraints = ClobberConstraint in
  207. def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
  208. OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
  209. "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
  210. ZeroMaskingPattern>,
  211. EVEX_KZ;
  212. }
  213. // Common base class of AVX512_maskable and AVX512_maskable_3src.
  214. multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
  215. dag Outs,
  216. dag Ins, dag MaskingIns, dag ZeroMaskingIns,
  217. string OpcodeStr,
  218. string AttSrcAsm, string IntelSrcAsm,
  219. dag RHS, dag MaskingRHS,
  220. SDPatternOperator Select = vselect_mask,
  221. string MaskingConstraint = "",
  222. bit IsCommutable = 0,
  223. bit IsKCommutable = 0,
  224. bit IsKZCommutable = IsCommutable,
  225. string ClobberConstraint = ""> :
  226. AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
  227. AttSrcAsm, IntelSrcAsm,
  228. [(set _.RC:$dst, RHS)],
  229. [(set _.RC:$dst, MaskingRHS)],
  230. [(set _.RC:$dst,
  231. (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
  232. MaskingConstraint, IsCommutable,
  233. IsKCommutable, IsKZCommutable, ClobberConstraint>;
  234. // This multiclass generates the unconditional/non-masking, the masking and
  235. // the zero-masking variant of the vector instruction. In the masking case, the
  236. // preserved vector elements come from a new dummy input operand tied to $dst.
  237. // This version uses a separate dag for non-masking and masking.
  238. multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
  239. dag Outs, dag Ins, string OpcodeStr,
  240. string AttSrcAsm, string IntelSrcAsm,
  241. dag RHS, dag MaskRHS,
  242. string ClobberConstraint = "",
  243. bit IsCommutable = 0, bit IsKCommutable = 0,
  244. bit IsKZCommutable = IsCommutable> :
  245. AVX512_maskable_custom<O, F, Outs, Ins,
  246. !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
  247. !con((ins _.KRCWM:$mask), Ins),
  248. OpcodeStr, AttSrcAsm, IntelSrcAsm,
  249. [(set _.RC:$dst, RHS)],
  250. [(set _.RC:$dst,
  251. (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
  252. [(set _.RC:$dst,
  253. (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
  254. "$src0 = $dst", IsCommutable, IsKCommutable,
  255. IsKZCommutable, ClobberConstraint>;
  256. // This multiclass generates the unconditional/non-masking, the masking and
  257. // the zero-masking variant of the vector instruction. In the masking case, the
  258. // preserved vector elements come from a new dummy input operand tied to $dst.
  259. multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
  260. dag Outs, dag Ins, string OpcodeStr,
  261. string AttSrcAsm, string IntelSrcAsm,
  262. dag RHS,
  263. bit IsCommutable = 0, bit IsKCommutable = 0,
  264. bit IsKZCommutable = IsCommutable,
  265. SDPatternOperator Select = vselect_mask,
  266. string ClobberConstraint = ""> :
  267. AVX512_maskable_common<O, F, _, Outs, Ins,
  268. !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
  269. !con((ins _.KRCWM:$mask), Ins),
  270. OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
  271. (Select _.KRCWM:$mask, RHS, _.RC:$src0),
  272. Select, "$src0 = $dst", IsCommutable, IsKCommutable,
  273. IsKZCommutable, ClobberConstraint>;
  274. // This multiclass generates the unconditional/non-masking, the masking and
  275. // the zero-masking variant of the scalar instruction.
  276. multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
  277. dag Outs, dag Ins, string OpcodeStr,
  278. string AttSrcAsm, string IntelSrcAsm,
  279. dag RHS> :
  280. AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
  281. RHS, 0, 0, 0, X86selects_mask>;
  282. // Similar to AVX512_maskable but in this case one of the source operands
  283. // ($src1) is already tied to $dst so we just use that for the preserved
  284. // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
  285. // $src1.
  286. multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
  287. dag Outs, dag NonTiedIns, string OpcodeStr,
  288. string AttSrcAsm, string IntelSrcAsm,
  289. dag RHS,
  290. bit IsCommutable = 0,
  291. bit IsKCommutable = 0,
  292. SDPatternOperator Select = vselect_mask,
  293. bit MaskOnly = 0> :
  294. AVX512_maskable_common<O, F, _, Outs,
  295. !con((ins _.RC:$src1), NonTiedIns),
  296. !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
  297. !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
  298. OpcodeStr, AttSrcAsm, IntelSrcAsm,
  299. !if(MaskOnly, (null_frag), RHS),
  300. (Select _.KRCWM:$mask, RHS, _.RC:$src1),
  301. Select, "", IsCommutable, IsKCommutable>;
  302. // Similar to AVX512_maskable_3src but in this case the input VT for the tied
  303. // operand differs from the output VT. This requires a bitconvert on
  304. // the preserved vector going into the vselect.
  305. // NOTE: The unmasked pattern is disabled.
  306. multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
  307. X86VectorVTInfo InVT,
  308. dag Outs, dag NonTiedIns, string OpcodeStr,
  309. string AttSrcAsm, string IntelSrcAsm,
  310. dag RHS, bit IsCommutable = 0> :
  311. AVX512_maskable_common<O, F, OutVT, Outs,
  312. !con((ins InVT.RC:$src1), NonTiedIns),
  313. !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
  314. !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
  315. OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
  316. (vselect_mask InVT.KRCWM:$mask, RHS,
  317. (bitconvert InVT.RC:$src1)),
  318. vselect_mask, "", IsCommutable>;
  319. multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
  320. dag Outs, dag NonTiedIns, string OpcodeStr,
  321. string AttSrcAsm, string IntelSrcAsm,
  322. dag RHS,
  323. bit IsCommutable = 0,
  324. bit IsKCommutable = 0,
  325. bit MaskOnly = 0> :
  326. AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
  327. IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
  328. X86selects_mask, MaskOnly>;
  329. multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
  330. dag Outs, dag Ins,
  331. string OpcodeStr,
  332. string AttSrcAsm, string IntelSrcAsm,
  333. list<dag> Pattern> :
  334. AVX512_maskable_custom<O, F, Outs, Ins,
  335. !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
  336. !con((ins _.KRCWM:$mask), Ins),
  337. OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
  338. "$src0 = $dst">;
  339. multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
  340. dag Outs, dag NonTiedIns,
  341. string OpcodeStr,
  342. string AttSrcAsm, string IntelSrcAsm,
  343. list<dag> Pattern> :
  344. AVX512_maskable_custom<O, F, Outs,
  345. !con((ins _.RC:$src1), NonTiedIns),
  346. !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
  347. !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
  348. OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
  349. "">;
  350. // Instruction with mask that puts result in mask register,
  351. // like "compare" and "vptest"
  352. multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
  353. dag Outs,
  354. dag Ins, dag MaskingIns,
  355. string OpcodeStr,
  356. string AttSrcAsm, string IntelSrcAsm,
  357. list<dag> Pattern,
  358. list<dag> MaskingPattern,
  359. bit IsCommutable = 0> {
  360. let isCommutable = IsCommutable in {
  361. def NAME: AVX512<O, F, Outs, Ins,
  362. OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
  363. "$dst, "#IntelSrcAsm#"}",
  364. Pattern>;
  365. def NAME#k: AVX512<O, F, Outs, MaskingIns,
  366. OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
  367. "$dst {${mask}}, "#IntelSrcAsm#"}",
  368. MaskingPattern>, EVEX_K;
  369. }
  370. }
  371. multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
  372. dag Outs,
  373. dag Ins, dag MaskingIns,
  374. string OpcodeStr,
  375. string AttSrcAsm, string IntelSrcAsm,
  376. dag RHS, dag MaskingRHS,
  377. bit IsCommutable = 0> :
  378. AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
  379. AttSrcAsm, IntelSrcAsm,
  380. [(set _.KRC:$dst, RHS)],
  381. [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
  382. multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
  383. dag Outs, dag Ins, string OpcodeStr,
  384. string AttSrcAsm, string IntelSrcAsm,
  385. dag RHS, dag RHS_su, bit IsCommutable = 0> :
  386. AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
  387. !con((ins _.KRCWM:$mask), Ins),
  388. OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
  389. (and _.KRCWM:$mask, RHS_su), IsCommutable>;
  390. // Used by conversion instructions.
  391. multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
  392. dag Outs,
  393. dag Ins, dag MaskingIns, dag ZeroMaskingIns,
  394. string OpcodeStr,
  395. string AttSrcAsm, string IntelSrcAsm,
  396. dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
  397. AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
  398. AttSrcAsm, IntelSrcAsm,
  399. [(set _.RC:$dst, RHS)],
  400. [(set _.RC:$dst, MaskingRHS)],
  401. [(set _.RC:$dst, ZeroMaskingRHS)],
  402. "$src0 = $dst">;
  403. multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
  404. dag Outs, dag NonTiedIns, string OpcodeStr,
  405. string AttSrcAsm, string IntelSrcAsm,
  406. dag RHS, dag MaskingRHS, bit IsCommutable,
  407. bit IsKCommutable> :
  408. AVX512_maskable_custom<O, F, Outs,
  409. !con((ins _.RC:$src1), NonTiedIns),
  410. !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
  411. !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
  412. OpcodeStr, AttSrcAsm, IntelSrcAsm,
  413. [(set _.RC:$dst, RHS)],
  414. [(set _.RC:$dst,
  415. (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
  416. [(set _.RC:$dst,
  417. (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
  418. "", IsCommutable, IsKCommutable>;
  419. // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
  420. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
  421. // swizzled by ExecutionDomainFix to pxor.
  422. // We set canFoldAsLoad because this can be converted to a constant-pool
  423. // load of an all-zeros value if folding it would be beneficial.
  424. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
  425. isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
  426. def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
  427. [(set VR512:$dst, (v16i32 immAllZerosV))]>;
  428. def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
  429. [(set VR512:$dst, (v16i32 immAllOnesV))]>;
  430. }
  431. let Predicates = [HasAVX512] in {
  432. def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
  433. def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
  434. def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
  435. def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
  436. def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
  437. def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
  438. }
  439. // Alias instructions that allow VPTERNLOG to be used with a mask to create
  440. // a mix of all ones and all zeros elements. This is done this way to force
  441. // the same register to be used as input for all three sources.
  442. let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
  443. def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
  444. (ins VK16WM:$mask), "",
  445. [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
  446. (v16i32 immAllOnesV),
  447. (v16i32 immAllZerosV)))]>;
  448. def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
  449. (ins VK8WM:$mask), "",
  450. [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
  451. (v8i64 immAllOnesV),
  452. (v8i64 immAllZerosV)))]>;
  453. }
  454. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
  455. isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
  456. def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
  457. [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
  458. def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
  459. [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
  460. }
  461. let Predicates = [HasAVX512] in {
  462. def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
  463. def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
  464. def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
  465. def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
  466. def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
  467. def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
  468. def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
  469. def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
  470. def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
  471. def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
  472. def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
  473. def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
  474. }
  475. // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
  476. // This is expanded by ExpandPostRAPseudos.
  477. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
  478. isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
  479. def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
  480. [(set FR16X:$dst, fp16imm0)]>;
  481. def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
  482. [(set FR32X:$dst, fp32imm0)]>;
  483. def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
  484. [(set FR64X:$dst, fp64imm0)]>;
  485. def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
  486. [(set VR128X:$dst, fp128imm0)]>;
  487. }
  488. //===----------------------------------------------------------------------===//
  489. // AVX-512 - VECTOR INSERT
  490. //
  491. // Supports two different pattern operators for mask and unmasked ops. Allows
  492. // null_frag to be passed for one.
  493. multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
  494. X86VectorVTInfo To,
  495. SDPatternOperator vinsert_insert,
  496. SDPatternOperator vinsert_for_mask,
  497. X86FoldableSchedWrite sched> {
  498. let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
  499. defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
  500. (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
  501. "vinsert" # From.EltTypeName # "x" # From.NumElts,
  502. "$src3, $src2, $src1", "$src1, $src2, $src3",
  503. (vinsert_insert:$src3 (To.VT To.RC:$src1),
  504. (From.VT From.RC:$src2),
  505. (iPTR imm)),
  506. (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
  507. (From.VT From.RC:$src2),
  508. (iPTR imm))>,
  509. AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
  510. let mayLoad = 1 in
  511. defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
  512. (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
  513. "vinsert" # From.EltTypeName # "x" # From.NumElts,
  514. "$src3, $src2, $src1", "$src1, $src2, $src3",
  515. (vinsert_insert:$src3 (To.VT To.RC:$src1),
  516. (From.VT (From.LdFrag addr:$src2)),
  517. (iPTR imm)),
  518. (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
  519. (From.VT (From.LdFrag addr:$src2)),
  520. (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
  521. EVEX_CD8<From.EltSize, From.CD8TupleForm>,
  522. Sched<[sched.Folded, sched.ReadAfterFold]>;
  523. }
  524. }
  525. // Passes the same pattern operator for masked and unmasked ops.
  526. multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
  527. X86VectorVTInfo To,
  528. SDPatternOperator vinsert_insert,
  529. X86FoldableSchedWrite sched> :
  530. vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
  531. multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
  532. X86VectorVTInfo To, PatFrag vinsert_insert,
  533. SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
  534. let Predicates = p in {
  535. def : Pat<(vinsert_insert:$ins
  536. (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
  537. (To.VT (!cast<Instruction>(InstrStr#"rr")
  538. To.RC:$src1, From.RC:$src2,
  539. (INSERT_get_vinsert_imm To.RC:$ins)))>;
  540. def : Pat<(vinsert_insert:$ins
  541. (To.VT To.RC:$src1),
  542. (From.VT (From.LdFrag addr:$src2)),
  543. (iPTR imm)),
  544. (To.VT (!cast<Instruction>(InstrStr#"rm")
  545. To.RC:$src1, addr:$src2,
  546. (INSERT_get_vinsert_imm To.RC:$ins)))>;
  547. }
  548. }
  549. multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
  550. ValueType EltVT64, int Opcode256,
  551. X86FoldableSchedWrite sched> {
  552. let Predicates = [HasVLX] in
  553. defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
  554. X86VectorVTInfo< 4, EltVT32, VR128X>,
  555. X86VectorVTInfo< 8, EltVT32, VR256X>,
  556. vinsert128_insert, sched>, EVEX_V256;
  557. defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
  558. X86VectorVTInfo< 4, EltVT32, VR128X>,
  559. X86VectorVTInfo<16, EltVT32, VR512>,
  560. vinsert128_insert, sched>, EVEX_V512;
  561. defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
  562. X86VectorVTInfo< 4, EltVT64, VR256X>,
  563. X86VectorVTInfo< 8, EltVT64, VR512>,
  564. vinsert256_insert, sched>, VEX_W, EVEX_V512;
  565. // Even with DQI we'd like to only use these instructions for masking.
  566. let Predicates = [HasVLX, HasDQI] in
  567. defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
  568. X86VectorVTInfo< 2, EltVT64, VR128X>,
  569. X86VectorVTInfo< 4, EltVT64, VR256X>,
  570. null_frag, vinsert128_insert, sched>,
  571. VEX_W1X, EVEX_V256;
  572. // Even with DQI we'd like to only use these instructions for masking.
  573. let Predicates = [HasDQI] in {
  574. defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
  575. X86VectorVTInfo< 2, EltVT64, VR128X>,
  576. X86VectorVTInfo< 8, EltVT64, VR512>,
  577. null_frag, vinsert128_insert, sched>,
  578. VEX_W, EVEX_V512;
  579. defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
  580. X86VectorVTInfo< 8, EltVT32, VR256X>,
  581. X86VectorVTInfo<16, EltVT32, VR512>,
  582. null_frag, vinsert256_insert, sched>,
  583. EVEX_V512;
  584. }
  585. }
  586. // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
  587. defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
  588. defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
  589. // Codegen pattern with the alternative types,
  590. // Even with AVX512DQ we'll still use these for unmasked operations.
  591. defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
  592. vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
  593. defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
  594. vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
  595. defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
  596. vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
  597. defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
  598. vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
  599. defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
  600. vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
  601. defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
  602. vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
  603. // Codegen pattern with the alternative types insert VEC128 into VEC256
  604. defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
  605. vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
  606. defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
  607. vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
  608. defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
  609. vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
  610. // Codegen pattern with the alternative types insert VEC128 into VEC512
  611. defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
  612. vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
  613. defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
  614. vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
  615. defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
  616. vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
  617. // Codegen pattern with the alternative types insert VEC256 into VEC512
  618. defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
  619. vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
  620. defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
  621. vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
  622. defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
  623. vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
  624. multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
  625. X86VectorVTInfo To, X86VectorVTInfo Cast,
  626. PatFrag vinsert_insert,
  627. SDNodeXForm INSERT_get_vinsert_imm,
  628. list<Predicate> p> {
  629. let Predicates = p in {
  630. def : Pat<(Cast.VT
  631. (vselect_mask Cast.KRCWM:$mask,
  632. (bitconvert
  633. (vinsert_insert:$ins (To.VT To.RC:$src1),
  634. (From.VT From.RC:$src2),
  635. (iPTR imm))),
  636. Cast.RC:$src0)),
  637. (!cast<Instruction>(InstrStr#"rrk")
  638. Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
  639. (INSERT_get_vinsert_imm To.RC:$ins))>;
  640. def : Pat<(Cast.VT
  641. (vselect_mask Cast.KRCWM:$mask,
  642. (bitconvert
  643. (vinsert_insert:$ins (To.VT To.RC:$src1),
  644. (From.VT
  645. (bitconvert
  646. (From.LdFrag addr:$src2))),
  647. (iPTR imm))),
  648. Cast.RC:$src0)),
  649. (!cast<Instruction>(InstrStr#"rmk")
  650. Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
  651. (INSERT_get_vinsert_imm To.RC:$ins))>;
  652. def : Pat<(Cast.VT
  653. (vselect_mask Cast.KRCWM:$mask,
  654. (bitconvert
  655. (vinsert_insert:$ins (To.VT To.RC:$src1),
  656. (From.VT From.RC:$src2),
  657. (iPTR imm))),
  658. Cast.ImmAllZerosV)),
  659. (!cast<Instruction>(InstrStr#"rrkz")
  660. Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
  661. (INSERT_get_vinsert_imm To.RC:$ins))>;
  662. def : Pat<(Cast.VT
  663. (vselect_mask Cast.KRCWM:$mask,
  664. (bitconvert
  665. (vinsert_insert:$ins (To.VT To.RC:$src1),
  666. (From.VT (From.LdFrag addr:$src2)),
  667. (iPTR imm))),
  668. Cast.ImmAllZerosV)),
  669. (!cast<Instruction>(InstrStr#"rmkz")
  670. Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
  671. (INSERT_get_vinsert_imm To.RC:$ins))>;
  672. }
  673. }
  674. defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
  675. v8f32x_info, vinsert128_insert,
  676. INSERT_get_vinsert128_imm, [HasVLX]>;
  677. defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
  678. v4f64x_info, vinsert128_insert,
  679. INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
  680. defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
  681. v8i32x_info, vinsert128_insert,
  682. INSERT_get_vinsert128_imm, [HasVLX]>;
  683. defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
  684. v8i32x_info, vinsert128_insert,
  685. INSERT_get_vinsert128_imm, [HasVLX]>;
  686. defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
  687. v8i32x_info, vinsert128_insert,
  688. INSERT_get_vinsert128_imm, [HasVLX]>;
  689. defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
  690. v4i64x_info, vinsert128_insert,
  691. INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
  692. defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
  693. v4i64x_info, vinsert128_insert,
  694. INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
  695. defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
  696. v4i64x_info, vinsert128_insert,
  697. INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
  698. defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
  699. v16f32_info, vinsert128_insert,
  700. INSERT_get_vinsert128_imm, [HasAVX512]>;
  701. defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
  702. v8f64_info, vinsert128_insert,
  703. INSERT_get_vinsert128_imm, [HasDQI]>;
  704. defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
  705. v16i32_info, vinsert128_insert,
  706. INSERT_get_vinsert128_imm, [HasAVX512]>;
  707. defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
  708. v16i32_info, vinsert128_insert,
  709. INSERT_get_vinsert128_imm, [HasAVX512]>;
  710. defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
  711. v16i32_info, vinsert128_insert,
  712. INSERT_get_vinsert128_imm, [HasAVX512]>;
  713. defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
  714. v8i64_info, vinsert128_insert,
  715. INSERT_get_vinsert128_imm, [HasDQI]>;
  716. defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
  717. v8i64_info, vinsert128_insert,
  718. INSERT_get_vinsert128_imm, [HasDQI]>;
  719. defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
  720. v8i64_info, vinsert128_insert,
  721. INSERT_get_vinsert128_imm, [HasDQI]>;
  722. defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
  723. v16f32_info, vinsert256_insert,
  724. INSERT_get_vinsert256_imm, [HasDQI]>;
  725. defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
  726. v8f64_info, vinsert256_insert,
  727. INSERT_get_vinsert256_imm, [HasAVX512]>;
  728. defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
  729. v16i32_info, vinsert256_insert,
  730. INSERT_get_vinsert256_imm, [HasDQI]>;
  731. defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
  732. v16i32_info, vinsert256_insert,
  733. INSERT_get_vinsert256_imm, [HasDQI]>;
  734. defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
  735. v16i32_info, vinsert256_insert,
  736. INSERT_get_vinsert256_imm, [HasDQI]>;
  737. defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
  738. v8i64_info, vinsert256_insert,
  739. INSERT_get_vinsert256_imm, [HasAVX512]>;
  740. defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
  741. v8i64_info, vinsert256_insert,
  742. INSERT_get_vinsert256_imm, [HasAVX512]>;
  743. defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
  744. v8i64_info, vinsert256_insert,
  745. INSERT_get_vinsert256_imm, [HasAVX512]>;
  746. // vinsertps - insert f32 to XMM
  747. let ExeDomain = SSEPackedSingle in {
  748. let isCommutable = 1 in
  749. def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
  750. (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
  751. "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  752. [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
  753. EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
  754. def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
  755. (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
  756. "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  757. [(set VR128X:$dst, (X86insertps VR128X:$src1,
  758. (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
  759. timm:$src3))]>,
  760. EVEX_4V, EVEX_CD8<32, CD8VT1>,
  761. Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
  762. }
  763. //===----------------------------------------------------------------------===//
  764. // AVX-512 VECTOR EXTRACT
  765. //---
  766. // Supports two different pattern operators for mask and unmasked ops. Allows
  767. // null_frag to be passed for one.
  768. multiclass vextract_for_size_split<int Opcode,
  769. X86VectorVTInfo From, X86VectorVTInfo To,
  770. SDPatternOperator vextract_extract,
  771. SDPatternOperator vextract_for_mask,
  772. SchedWrite SchedRR, SchedWrite SchedMR> {
  773. let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
  774. defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
  775. (ins From.RC:$src1, u8imm:$idx),
  776. "vextract" # To.EltTypeName # "x" # To.NumElts,
  777. "$idx, $src1", "$src1, $idx",
  778. (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
  779. (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
  780. AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
  781. def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
  782. (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
  783. "vextract" # To.EltTypeName # "x" # To.NumElts #
  784. "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
  785. [(store (To.VT (vextract_extract:$idx
  786. (From.VT From.RC:$src1), (iPTR imm))),
  787. addr:$dst)]>, EVEX,
  788. Sched<[SchedMR]>;
  789. let mayStore = 1, hasSideEffects = 0 in
  790. def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
  791. (ins To.MemOp:$dst, To.KRCWM:$mask,
  792. From.RC:$src1, u8imm:$idx),
  793. "vextract" # To.EltTypeName # "x" # To.NumElts #
  794. "\t{$idx, $src1, $dst {${mask}}|"
  795. "$dst {${mask}}, $src1, $idx}", []>,
  796. EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
  797. }
  798. }
  799. // Passes the same pattern operator for masked and unmasked ops.
  800. multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
  801. X86VectorVTInfo To,
  802. SDPatternOperator vextract_extract,
  803. SchedWrite SchedRR, SchedWrite SchedMR> :
  804. vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
  805. // Codegen pattern for the alternative types
  806. multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
  807. X86VectorVTInfo To, PatFrag vextract_extract,
  808. SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
  809. let Predicates = p in {
  810. def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
  811. (To.VT (!cast<Instruction>(InstrStr#"rr")
  812. From.RC:$src1,
  813. (EXTRACT_get_vextract_imm To.RC:$ext)))>;
  814. def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
  815. (iPTR imm))), addr:$dst),
  816. (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
  817. (EXTRACT_get_vextract_imm To.RC:$ext))>;
  818. }
  819. }
  820. multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
  821. ValueType EltVT64, int Opcode256,
  822. SchedWrite SchedRR, SchedWrite SchedMR> {
  823. let Predicates = [HasAVX512] in {
  824. defm NAME # "32x4Z" : vextract_for_size<Opcode128,
  825. X86VectorVTInfo<16, EltVT32, VR512>,
  826. X86VectorVTInfo< 4, EltVT32, VR128X>,
  827. vextract128_extract, SchedRR, SchedMR>,
  828. EVEX_V512, EVEX_CD8<32, CD8VT4>;
  829. defm NAME # "64x4Z" : vextract_for_size<Opcode256,
  830. X86VectorVTInfo< 8, EltVT64, VR512>,
  831. X86VectorVTInfo< 4, EltVT64, VR256X>,
  832. vextract256_extract, SchedRR, SchedMR>,
  833. VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
  834. }
  835. let Predicates = [HasVLX] in
  836. defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
  837. X86VectorVTInfo< 8, EltVT32, VR256X>,
  838. X86VectorVTInfo< 4, EltVT32, VR128X>,
  839. vextract128_extract, SchedRR, SchedMR>,
  840. EVEX_V256, EVEX_CD8<32, CD8VT4>;
  841. // Even with DQI we'd like to only use these instructions for masking.
  842. let Predicates = [HasVLX, HasDQI] in
  843. defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
  844. X86VectorVTInfo< 4, EltVT64, VR256X>,
  845. X86VectorVTInfo< 2, EltVT64, VR128X>,
  846. null_frag, vextract128_extract, SchedRR, SchedMR>,
  847. VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
  848. // Even with DQI we'd like to only use these instructions for masking.
  849. let Predicates = [HasDQI] in {
  850. defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
  851. X86VectorVTInfo< 8, EltVT64, VR512>,
  852. X86VectorVTInfo< 2, EltVT64, VR128X>,
  853. null_frag, vextract128_extract, SchedRR, SchedMR>,
  854. VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
  855. defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
  856. X86VectorVTInfo<16, EltVT32, VR512>,
  857. X86VectorVTInfo< 8, EltVT32, VR256X>,
  858. null_frag, vextract256_extract, SchedRR, SchedMR>,
  859. EVEX_V512, EVEX_CD8<32, CD8VT8>;
  860. }
  861. }
  862. // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
  863. defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
  864. defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
  865. // extract_subvector codegen patterns with the alternative types.
  866. // Even with AVX512DQ we'll still use these for unmasked operations.
  867. defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
  868. vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
  869. defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
  870. vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
  871. defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
  872. vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
  873. defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
  874. vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
  875. defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
  876. vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
  877. defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
  878. vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
  879. // Codegen pattern with the alternative types extract VEC128 from VEC256
  880. defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
  881. vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
  882. defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
  883. vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
  884. defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
  885. vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
  886. // Codegen pattern with the alternative types extract VEC128 from VEC512
  887. defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
  888. vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
  889. defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
  890. vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
  891. defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
  892. vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
  893. // Codegen pattern with the alternative types extract VEC256 from VEC512
  894. defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
  895. vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
  896. defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
  897. vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
  898. defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
  899. vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
  900. // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
  901. // smaller extract to enable EVEX->VEX.
  902. let Predicates = [NoVLX] in {
  903. def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
  904. (v2i64 (VEXTRACTI128rr
  905. (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
  906. (iPTR 1)))>;
  907. def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
  908. (v2f64 (VEXTRACTF128rr
  909. (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
  910. (iPTR 1)))>;
  911. def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
  912. (v4i32 (VEXTRACTI128rr
  913. (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
  914. (iPTR 1)))>;
  915. def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
  916. (v4f32 (VEXTRACTF128rr
  917. (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
  918. (iPTR 1)))>;
  919. def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
  920. (v8i16 (VEXTRACTI128rr
  921. (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
  922. (iPTR 1)))>;
  923. def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
  924. (v8f16 (VEXTRACTF128rr
  925. (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
  926. (iPTR 1)))>;
  927. def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
  928. (v16i8 (VEXTRACTI128rr
  929. (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
  930. (iPTR 1)))>;
  931. }
  932. // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
  933. // smaller extract to enable EVEX->VEX.
  934. let Predicates = [HasVLX] in {
  935. def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
  936. (v2i64 (VEXTRACTI32x4Z256rr
  937. (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
  938. (iPTR 1)))>;
  939. def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
  940. (v2f64 (VEXTRACTF32x4Z256rr
  941. (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
  942. (iPTR 1)))>;
  943. def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
  944. (v4i32 (VEXTRACTI32x4Z256rr
  945. (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
  946. (iPTR 1)))>;
  947. def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
  948. (v4f32 (VEXTRACTF32x4Z256rr
  949. (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
  950. (iPTR 1)))>;
  951. def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
  952. (v8i16 (VEXTRACTI32x4Z256rr
  953. (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
  954. (iPTR 1)))>;
  955. def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
  956. (v8f16 (VEXTRACTF32x4Z256rr
  957. (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
  958. (iPTR 1)))>;
  959. def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
  960. (v16i8 (VEXTRACTI32x4Z256rr
  961. (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
  962. (iPTR 1)))>;
  963. }
  964. // Additional patterns for handling a bitcast between the vselect and the
  965. // extract_subvector.
  966. multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
  967. X86VectorVTInfo To, X86VectorVTInfo Cast,
  968. PatFrag vextract_extract,
  969. SDNodeXForm EXTRACT_get_vextract_imm,
  970. list<Predicate> p> {
  971. let Predicates = p in {
  972. def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
  973. (bitconvert
  974. (To.VT (vextract_extract:$ext
  975. (From.VT From.RC:$src), (iPTR imm)))),
  976. To.RC:$src0)),
  977. (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
  978. Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
  979. (EXTRACT_get_vextract_imm To.RC:$ext)))>;
  980. def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
  981. (bitconvert
  982. (To.VT (vextract_extract:$ext
  983. (From.VT From.RC:$src), (iPTR imm)))),
  984. Cast.ImmAllZerosV)),
  985. (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
  986. Cast.KRCWM:$mask, From.RC:$src,
  987. (EXTRACT_get_vextract_imm To.RC:$ext)))>;
  988. }
  989. }
  990. defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
  991. v4f32x_info, vextract128_extract,
  992. EXTRACT_get_vextract128_imm, [HasVLX]>;
  993. defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
  994. v2f64x_info, vextract128_extract,
  995. EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
  996. defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
  997. v4i32x_info, vextract128_extract,
  998. EXTRACT_get_vextract128_imm, [HasVLX]>;
  999. defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
  1000. v4i32x_info, vextract128_extract,
  1001. EXTRACT_get_vextract128_imm, [HasVLX]>;
  1002. defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
  1003. v4i32x_info, vextract128_extract,
  1004. EXTRACT_get_vextract128_imm, [HasVLX]>;
  1005. defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
  1006. v2i64x_info, vextract128_extract,
  1007. EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
  1008. defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
  1009. v2i64x_info, vextract128_extract,
  1010. EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
  1011. defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
  1012. v2i64x_info, vextract128_extract,
  1013. EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
  1014. defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
  1015. v4f32x_info, vextract128_extract,
  1016. EXTRACT_get_vextract128_imm, [HasAVX512]>;
  1017. defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
  1018. v2f64x_info, vextract128_extract,
  1019. EXTRACT_get_vextract128_imm, [HasDQI]>;
  1020. defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
  1021. v4i32x_info, vextract128_extract,
  1022. EXTRACT_get_vextract128_imm, [HasAVX512]>;
  1023. defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
  1024. v4i32x_info, vextract128_extract,
  1025. EXTRACT_get_vextract128_imm, [HasAVX512]>;
  1026. defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
  1027. v4i32x_info, vextract128_extract,
  1028. EXTRACT_get_vextract128_imm, [HasAVX512]>;
  1029. defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
  1030. v2i64x_info, vextract128_extract,
  1031. EXTRACT_get_vextract128_imm, [HasDQI]>;
  1032. defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
  1033. v2i64x_info, vextract128_extract,
  1034. EXTRACT_get_vextract128_imm, [HasDQI]>;
  1035. defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
  1036. v2i64x_info, vextract128_extract,
  1037. EXTRACT_get_vextract128_imm, [HasDQI]>;
  1038. defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
  1039. v8f32x_info, vextract256_extract,
  1040. EXTRACT_get_vextract256_imm, [HasDQI]>;
  1041. defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
  1042. v4f64x_info, vextract256_extract,
  1043. EXTRACT_get_vextract256_imm, [HasAVX512]>;
  1044. defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
  1045. v8i32x_info, vextract256_extract,
  1046. EXTRACT_get_vextract256_imm, [HasDQI]>;
  1047. defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
  1048. v8i32x_info, vextract256_extract,
  1049. EXTRACT_get_vextract256_imm, [HasDQI]>;
  1050. defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
  1051. v8i32x_info, vextract256_extract,
  1052. EXTRACT_get_vextract256_imm, [HasDQI]>;
  1053. defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
  1054. v4i64x_info, vextract256_extract,
  1055. EXTRACT_get_vextract256_imm, [HasAVX512]>;
  1056. defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
  1057. v4i64x_info, vextract256_extract,
  1058. EXTRACT_get_vextract256_imm, [HasAVX512]>;
  1059. defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
  1060. v4i64x_info, vextract256_extract,
  1061. EXTRACT_get_vextract256_imm, [HasAVX512]>;
  1062. // vextractps - extract 32 bits from XMM
  1063. def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
  1064. (ins VR128X:$src1, u8imm:$src2),
  1065. "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  1066. [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
  1067. EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
  1068. def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
  1069. (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
  1070. "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  1071. [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
  1072. addr:$dst)]>,
  1073. EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
  1074. //===---------------------------------------------------------------------===//
  1075. // AVX-512 BROADCAST
  1076. //---
  1077. // broadcast with a scalar argument.
  1078. multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
  1079. X86VectorVTInfo SrcInfo> {
  1080. def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
  1081. (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
  1082. (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
  1083. def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
  1084. (X86VBroadcast SrcInfo.FRC:$src),
  1085. DestInfo.RC:$src0)),
  1086. (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
  1087. DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
  1088. (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
  1089. def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
  1090. (X86VBroadcast SrcInfo.FRC:$src),
  1091. DestInfo.ImmAllZerosV)),
  1092. (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
  1093. DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
  1094. }
  1095. // Split version to allow mask and broadcast node to be different types. This
  1096. // helps support the 32x2 broadcasts.
  1097. multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
  1098. SchedWrite SchedRR, SchedWrite SchedRM,
  1099. X86VectorVTInfo MaskInfo,
  1100. X86VectorVTInfo DestInfo,
  1101. X86VectorVTInfo SrcInfo,
  1102. bit IsConvertibleToThreeAddress,
  1103. SDPatternOperator UnmaskedOp = X86VBroadcast,
  1104. SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
  1105. let hasSideEffects = 0 in
  1106. def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
  1107. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  1108. [(set MaskInfo.RC:$dst,
  1109. (MaskInfo.VT
  1110. (bitconvert
  1111. (DestInfo.VT
  1112. (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
  1113. DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
  1114. def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
  1115. (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
  1116. !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
  1117. "${dst} {${mask}} {z}, $src}"),
  1118. [(set MaskInfo.RC:$dst,
  1119. (vselect_mask MaskInfo.KRCWM:$mask,
  1120. (MaskInfo.VT
  1121. (bitconvert
  1122. (DestInfo.VT
  1123. (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
  1124. MaskInfo.ImmAllZerosV))],
  1125. DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
  1126. let Constraints = "$src0 = $dst" in
  1127. def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
  1128. (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
  1129. SrcInfo.RC:$src),
  1130. !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
  1131. "${dst} {${mask}}, $src}"),
  1132. [(set MaskInfo.RC:$dst,
  1133. (vselect_mask MaskInfo.KRCWM:$mask,
  1134. (MaskInfo.VT
  1135. (bitconvert
  1136. (DestInfo.VT
  1137. (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
  1138. MaskInfo.RC:$src0))],
  1139. DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
  1140. let hasSideEffects = 0, mayLoad = 1 in
  1141. def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
  1142. (ins SrcInfo.ScalarMemOp:$src),
  1143. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  1144. [(set MaskInfo.RC:$dst,
  1145. (MaskInfo.VT
  1146. (bitconvert
  1147. (DestInfo.VT
  1148. (UnmaskedBcastOp addr:$src)))))],
  1149. DestInfo.ExeDomain>, T8PD, EVEX,
  1150. EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
  1151. def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
  1152. (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
  1153. !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
  1154. "${dst} {${mask}} {z}, $src}"),
  1155. [(set MaskInfo.RC:$dst,
  1156. (vselect_mask MaskInfo.KRCWM:$mask,
  1157. (MaskInfo.VT
  1158. (bitconvert
  1159. (DestInfo.VT
  1160. (SrcInfo.BroadcastLdFrag addr:$src)))),
  1161. MaskInfo.ImmAllZerosV))],
  1162. DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
  1163. EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
  1164. let Constraints = "$src0 = $dst",
  1165. isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
  1166. def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
  1167. (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
  1168. SrcInfo.ScalarMemOp:$src),
  1169. !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
  1170. "${dst} {${mask}}, $src}"),
  1171. [(set MaskInfo.RC:$dst,
  1172. (vselect_mask MaskInfo.KRCWM:$mask,
  1173. (MaskInfo.VT
  1174. (bitconvert
  1175. (DestInfo.VT
  1176. (SrcInfo.BroadcastLdFrag addr:$src)))),
  1177. MaskInfo.RC:$src0))],
  1178. DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
  1179. EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
  1180. }
  1181. // Helper class to force mask and broadcast result to same type.
  1182. multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
  1183. SchedWrite SchedRR, SchedWrite SchedRM,
  1184. X86VectorVTInfo DestInfo,
  1185. X86VectorVTInfo SrcInfo,
  1186. bit IsConvertibleToThreeAddress> :
  1187. avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
  1188. DestInfo, DestInfo, SrcInfo,
  1189. IsConvertibleToThreeAddress>;
  1190. multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
  1191. AVX512VLVectorVTInfo _> {
  1192. let Predicates = [HasAVX512] in {
  1193. defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
  1194. WriteFShuffle256Ld, _.info512, _.info128, 1>,
  1195. avx512_broadcast_scalar<NAME, _.info512, _.info128>,
  1196. EVEX_V512;
  1197. }
  1198. let Predicates = [HasVLX] in {
  1199. defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
  1200. WriteFShuffle256Ld, _.info256, _.info128, 1>,
  1201. avx512_broadcast_scalar<NAME, _.info256, _.info128>,
  1202. EVEX_V256;
  1203. }
  1204. }
  1205. multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
  1206. AVX512VLVectorVTInfo _> {
  1207. let Predicates = [HasAVX512] in {
  1208. defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
  1209. WriteFShuffle256Ld, _.info512, _.info128, 1>,
  1210. avx512_broadcast_scalar<NAME, _.info512, _.info128>,
  1211. EVEX_V512;
  1212. }
  1213. let Predicates = [HasVLX] in {
  1214. defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
  1215. WriteFShuffle256Ld, _.info256, _.info128, 1>,
  1216. avx512_broadcast_scalar<NAME, _.info256, _.info128>,
  1217. EVEX_V256;
  1218. defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
  1219. WriteFShuffle256Ld, _.info128, _.info128, 1>,
  1220. avx512_broadcast_scalar<NAME, _.info128, _.info128>,
  1221. EVEX_V128;
  1222. }
  1223. }
  1224. defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
  1225. avx512vl_f32_info>;
  1226. defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
  1227. avx512vl_f64_info>, VEX_W1X;
  1228. multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
  1229. X86VectorVTInfo _, SDPatternOperator OpNode,
  1230. RegisterClass SrcRC> {
  1231. // Fold with a mask even if it has multiple uses since it is cheap.
  1232. let ExeDomain = _.ExeDomain in
  1233. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  1234. (ins SrcRC:$src),
  1235. "vpbroadcast"#_.Suffix, "$src", "$src",
  1236. (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
  1237. /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
  1238. T8PD, EVEX, Sched<[SchedRR]>;
  1239. }
  1240. multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
  1241. X86VectorVTInfo _, SDPatternOperator OpNode,
  1242. RegisterClass SrcRC, SubRegIndex Subreg> {
  1243. let hasSideEffects = 0, ExeDomain = _.ExeDomain in
  1244. defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
  1245. (outs _.RC:$dst), (ins GR32:$src),
  1246. !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
  1247. !con((ins _.KRCWM:$mask), (ins GR32:$src)),
  1248. "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
  1249. "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
  1250. def : Pat <(_.VT (OpNode SrcRC:$src)),
  1251. (!cast<Instruction>(Name#rr)
  1252. (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
  1253. // Fold with a mask even if it has multiple uses since it is cheap.
  1254. def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
  1255. (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
  1256. (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
  1257. def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
  1258. (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
  1259. (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
  1260. }
  1261. multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
  1262. AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
  1263. RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
  1264. let Predicates = [prd] in
  1265. defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
  1266. OpNode, SrcRC, Subreg>, EVEX_V512;
  1267. let Predicates = [prd, HasVLX] in {
  1268. defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
  1269. _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
  1270. defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
  1271. _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
  1272. }
  1273. }
  1274. multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
  1275. SDPatternOperator OpNode,
  1276. RegisterClass SrcRC, Predicate prd> {
  1277. let Predicates = [prd] in
  1278. defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
  1279. SrcRC>, EVEX_V512;
  1280. let Predicates = [prd, HasVLX] in {
  1281. defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
  1282. SrcRC>, EVEX_V256;
  1283. defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
  1284. SrcRC>, EVEX_V128;
  1285. }
  1286. }
  1287. defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
  1288. avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
  1289. defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
  1290. avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
  1291. HasBWI>;
  1292. defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
  1293. X86VBroadcast, GR32, HasAVX512>;
  1294. defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
  1295. X86VBroadcast, GR64, HasAVX512>, VEX_W;
  1296. multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
  1297. AVX512VLVectorVTInfo _, Predicate prd,
  1298. bit IsConvertibleToThreeAddress> {
  1299. let Predicates = [prd] in {
  1300. defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
  1301. WriteShuffle256Ld, _.info512, _.info128,
  1302. IsConvertibleToThreeAddress>,
  1303. EVEX_V512;
  1304. }
  1305. let Predicates = [prd, HasVLX] in {
  1306. defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
  1307. WriteShuffle256Ld, _.info256, _.info128,
  1308. IsConvertibleToThreeAddress>,
  1309. EVEX_V256;
  1310. defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
  1311. WriteShuffleXLd, _.info128, _.info128,
  1312. IsConvertibleToThreeAddress>,
  1313. EVEX_V128;
  1314. }
  1315. }
  1316. defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
  1317. avx512vl_i8_info, HasBWI, 0>;
  1318. defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
  1319. avx512vl_i16_info, HasBWI, 0>;
  1320. defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
  1321. avx512vl_i32_info, HasAVX512, 1>;
  1322. defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
  1323. avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
  1324. multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
  1325. SDPatternOperator OpNode,
  1326. X86VectorVTInfo _Dst,
  1327. X86VectorVTInfo _Src> {
  1328. defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
  1329. (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
  1330. (_Dst.VT (OpNode addr:$src))>,
  1331. Sched<[SchedWriteShuffle.YMM.Folded]>,
  1332. AVX5128IBase, EVEX;
  1333. }
  1334. // This should be used for the AVX512DQ broadcast instructions. It disables
  1335. // the unmasked patterns so that we only use the DQ instructions when masking
  1336. // is requested.
  1337. multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
  1338. SDPatternOperator OpNode,
  1339. X86VectorVTInfo _Dst,
  1340. X86VectorVTInfo _Src> {
  1341. let hasSideEffects = 0, mayLoad = 1 in
  1342. defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
  1343. (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
  1344. (null_frag),
  1345. (_Dst.VT (OpNode addr:$src))>,
  1346. Sched<[SchedWriteShuffle.YMM.Folded]>,
  1347. AVX5128IBase, EVEX;
  1348. }
  1349. let Predicates = [HasBWI] in {
  1350. def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
  1351. (VPBROADCASTWZrm addr:$src)>;
  1352. def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
  1353. (VPBROADCASTWZrr VR128X:$src)>;
  1354. def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
  1355. (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
  1356. }
  1357. let Predicates = [HasVLX, HasBWI] in {
  1358. def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
  1359. (VPBROADCASTWZ128rm addr:$src)>;
  1360. def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
  1361. (VPBROADCASTWZ256rm addr:$src)>;
  1362. def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
  1363. (VPBROADCASTWZ128rr VR128X:$src)>;
  1364. def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
  1365. (VPBROADCASTWZ256rr VR128X:$src)>;
  1366. def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
  1367. (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
  1368. def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
  1369. (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
  1370. }
  1371. //===----------------------------------------------------------------------===//
  1372. // AVX-512 BROADCAST SUBVECTORS
  1373. //
  1374. defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
  1375. X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
  1376. EVEX_V512, EVEX_CD8<32, CD8VT4>;
  1377. defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
  1378. X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
  1379. EVEX_V512, EVEX_CD8<32, CD8VT4>;
  1380. defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
  1381. X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
  1382. EVEX_V512, EVEX_CD8<64, CD8VT4>;
  1383. defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
  1384. X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
  1385. EVEX_V512, EVEX_CD8<64, CD8VT4>;
  1386. let Predicates = [HasAVX512] in {
  1387. def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
  1388. (VBROADCASTF64X4rm addr:$src)>;
  1389. def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
  1390. (VBROADCASTF64X4rm addr:$src)>;
  1391. def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
  1392. (VBROADCASTF64X4rm addr:$src)>;
  1393. def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
  1394. (VBROADCASTI64X4rm addr:$src)>;
  1395. def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
  1396. (VBROADCASTI64X4rm addr:$src)>;
  1397. def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
  1398. (VBROADCASTI64X4rm addr:$src)>;
  1399. def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
  1400. (VBROADCASTI64X4rm addr:$src)>;
  1401. def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
  1402. (VBROADCASTF32X4rm addr:$src)>;
  1403. def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
  1404. (VBROADCASTF32X4rm addr:$src)>;
  1405. def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
  1406. (VBROADCASTF32X4rm addr:$src)>;
  1407. def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
  1408. (VBROADCASTI32X4rm addr:$src)>;
  1409. def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
  1410. (VBROADCASTI32X4rm addr:$src)>;
  1411. def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
  1412. (VBROADCASTI32X4rm addr:$src)>;
  1413. def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
  1414. (VBROADCASTI32X4rm addr:$src)>;
  1415. // Patterns for selects of bitcasted operations.
  1416. def : Pat<(vselect_mask VK16WM:$mask,
  1417. (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
  1418. (v16f32 immAllZerosV)),
  1419. (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
  1420. def : Pat<(vselect_mask VK16WM:$mask,
  1421. (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
  1422. VR512:$src0),
  1423. (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
  1424. def : Pat<(vselect_mask VK16WM:$mask,
  1425. (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
  1426. (v16i32 immAllZerosV)),
  1427. (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
  1428. def : Pat<(vselect_mask VK16WM:$mask,
  1429. (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
  1430. VR512:$src0),
  1431. (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
  1432. def : Pat<(vselect_mask VK8WM:$mask,
  1433. (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
  1434. (v8f64 immAllZerosV)),
  1435. (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
  1436. def : Pat<(vselect_mask VK8WM:$mask,
  1437. (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
  1438. VR512:$src0),
  1439. (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
  1440. def : Pat<(vselect_mask VK8WM:$mask,
  1441. (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
  1442. (v8i64 immAllZerosV)),
  1443. (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
  1444. def : Pat<(vselect_mask VK8WM:$mask,
  1445. (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
  1446. VR512:$src0),
  1447. (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
  1448. }
  1449. let Predicates = [HasVLX] in {
  1450. defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
  1451. X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
  1452. EVEX_V256, EVEX_CD8<32, CD8VT4>;
  1453. defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
  1454. X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
  1455. EVEX_V256, EVEX_CD8<32, CD8VT4>;
  1456. def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
  1457. (VBROADCASTF32X4Z256rm addr:$src)>;
  1458. def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
  1459. (VBROADCASTF32X4Z256rm addr:$src)>;
  1460. def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
  1461. (VBROADCASTF32X4Z256rm addr:$src)>;
  1462. def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
  1463. (VBROADCASTI32X4Z256rm addr:$src)>;
  1464. def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
  1465. (VBROADCASTI32X4Z256rm addr:$src)>;
  1466. def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
  1467. (VBROADCASTI32X4Z256rm addr:$src)>;
  1468. def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
  1469. (VBROADCASTI32X4Z256rm addr:$src)>;
  1470. // Patterns for selects of bitcasted operations.
  1471. def : Pat<(vselect_mask VK8WM:$mask,
  1472. (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
  1473. (v8f32 immAllZerosV)),
  1474. (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
  1475. def : Pat<(vselect_mask VK8WM:$mask,
  1476. (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
  1477. VR256X:$src0),
  1478. (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
  1479. def : Pat<(vselect_mask VK8WM:$mask,
  1480. (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
  1481. (v8i32 immAllZerosV)),
  1482. (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
  1483. def : Pat<(vselect_mask VK8WM:$mask,
  1484. (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
  1485. VR256X:$src0),
  1486. (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
  1487. }
  1488. let Predicates = [HasVLX, HasDQI] in {
  1489. defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
  1490. X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
  1491. EVEX_V256, EVEX_CD8<64, CD8VT2>;
  1492. defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
  1493. X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
  1494. EVEX_V256, EVEX_CD8<64, CD8VT2>;
  1495. // Patterns for selects of bitcasted operations.
  1496. def : Pat<(vselect_mask VK4WM:$mask,
  1497. (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
  1498. (v4f64 immAllZerosV)),
  1499. (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
  1500. def : Pat<(vselect_mask VK4WM:$mask,
  1501. (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
  1502. VR256X:$src0),
  1503. (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
  1504. def : Pat<(vselect_mask VK4WM:$mask,
  1505. (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
  1506. (v4i64 immAllZerosV)),
  1507. (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
  1508. def : Pat<(vselect_mask VK4WM:$mask,
  1509. (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
  1510. VR256X:$src0),
  1511. (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
  1512. }
  1513. let Predicates = [HasDQI] in {
  1514. defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
  1515. X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
  1516. EVEX_V512, EVEX_CD8<64, CD8VT2>;
  1517. defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
  1518. X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
  1519. EVEX_V512, EVEX_CD8<32, CD8VT8>;
  1520. defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
  1521. X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
  1522. EVEX_V512, EVEX_CD8<64, CD8VT2>;
  1523. defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
  1524. X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
  1525. EVEX_V512, EVEX_CD8<32, CD8VT8>;
  1526. // Patterns for selects of bitcasted operations.
  1527. def : Pat<(vselect_mask VK16WM:$mask,
  1528. (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
  1529. (v16f32 immAllZerosV)),
  1530. (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
  1531. def : Pat<(vselect_mask VK16WM:$mask,
  1532. (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
  1533. VR512:$src0),
  1534. (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
  1535. def : Pat<(vselect_mask VK16WM:$mask,
  1536. (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
  1537. (v16i32 immAllZerosV)),
  1538. (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
  1539. def : Pat<(vselect_mask VK16WM:$mask,
  1540. (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
  1541. VR512:$src0),
  1542. (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
  1543. def : Pat<(vselect_mask VK8WM:$mask,
  1544. (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
  1545. (v8f64 immAllZerosV)),
  1546. (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
  1547. def : Pat<(vselect_mask VK8WM:$mask,
  1548. (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
  1549. VR512:$src0),
  1550. (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
  1551. def : Pat<(vselect_mask VK8WM:$mask,
  1552. (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
  1553. (v8i64 immAllZerosV)),
  1554. (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
  1555. def : Pat<(vselect_mask VK8WM:$mask,
  1556. (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
  1557. VR512:$src0),
  1558. (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
  1559. }
  1560. multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
  1561. AVX512VLVectorVTInfo _Dst,
  1562. AVX512VLVectorVTInfo _Src> {
  1563. let Predicates = [HasDQI] in
  1564. defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
  1565. WriteShuffle256Ld, _Dst.info512,
  1566. _Src.info512, _Src.info128, 0, null_frag, null_frag>,
  1567. EVEX_V512;
  1568. let Predicates = [HasDQI, HasVLX] in
  1569. defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
  1570. WriteShuffle256Ld, _Dst.info256,
  1571. _Src.info256, _Src.info128, 0, null_frag, null_frag>,
  1572. EVEX_V256;
  1573. }
  1574. multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
  1575. AVX512VLVectorVTInfo _Dst,
  1576. AVX512VLVectorVTInfo _Src> :
  1577. avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
  1578. let Predicates = [HasDQI, HasVLX] in
  1579. defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
  1580. WriteShuffleXLd, _Dst.info128,
  1581. _Src.info128, _Src.info128, 0, null_frag, null_frag>,
  1582. EVEX_V128;
  1583. }
  1584. defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
  1585. avx512vl_i32_info, avx512vl_i64_info>;
  1586. defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
  1587. avx512vl_f32_info, avx512vl_f64_info>;
  1588. //===----------------------------------------------------------------------===//
  1589. // AVX-512 BROADCAST MASK TO VECTOR REGISTER
  1590. //---
  1591. multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
  1592. X86VectorVTInfo _, RegisterClass KRC> {
  1593. def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
  1594. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  1595. [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
  1596. EVEX, Sched<[WriteShuffle]>;
  1597. }
  1598. multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
  1599. AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
  1600. let Predicates = [HasCDI] in
  1601. defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
  1602. let Predicates = [HasCDI, HasVLX] in {
  1603. defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
  1604. defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
  1605. }
  1606. }
  1607. defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
  1608. avx512vl_i32_info, VK16>;
  1609. defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
  1610. avx512vl_i64_info, VK8>, VEX_W;
  1611. //===----------------------------------------------------------------------===//
  1612. // -- VPERMI2 - 3 source operands form --
  1613. multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
  1614. X86FoldableSchedWrite sched,
  1615. X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
  1616. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
  1617. hasSideEffects = 0 in {
  1618. defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
  1619. (ins _.RC:$src2, _.RC:$src3),
  1620. OpcodeStr, "$src3, $src2", "$src2, $src3",
  1621. (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
  1622. EVEX_4V, AVX5128IBase, Sched<[sched]>;
  1623. let mayLoad = 1 in
  1624. defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
  1625. (ins _.RC:$src2, _.MemOp:$src3),
  1626. OpcodeStr, "$src3, $src2", "$src2, $src3",
  1627. (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
  1628. (_.VT (_.LdFrag addr:$src3)))), 1>,
  1629. EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
  1630. }
  1631. }
  1632. multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
  1633. X86FoldableSchedWrite sched,
  1634. X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
  1635. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
  1636. hasSideEffects = 0, mayLoad = 1 in
  1637. defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
  1638. (ins _.RC:$src2, _.ScalarMemOp:$src3),
  1639. OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
  1640. !strconcat("$src2, ${src3}", _.BroadcastStr ),
  1641. (_.VT (X86VPermt2 _.RC:$src2,
  1642. IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
  1643. AVX5128IBase, EVEX_4V, EVEX_B,
  1644. Sched<[sched.Folded, sched.ReadAfterFold]>;
  1645. }
  1646. multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
  1647. X86FoldableSchedWrite sched,
  1648. AVX512VLVectorVTInfo VTInfo,
  1649. AVX512VLVectorVTInfo ShuffleMask> {
  1650. defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
  1651. ShuffleMask.info512>,
  1652. avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
  1653. ShuffleMask.info512>, EVEX_V512;
  1654. let Predicates = [HasVLX] in {
  1655. defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
  1656. ShuffleMask.info128>,
  1657. avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
  1658. ShuffleMask.info128>, EVEX_V128;
  1659. defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
  1660. ShuffleMask.info256>,
  1661. avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
  1662. ShuffleMask.info256>, EVEX_V256;
  1663. }
  1664. }
  1665. multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
  1666. X86FoldableSchedWrite sched,
  1667. AVX512VLVectorVTInfo VTInfo,
  1668. AVX512VLVectorVTInfo Idx,
  1669. Predicate Prd> {
  1670. let Predicates = [Prd] in
  1671. defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
  1672. Idx.info512>, EVEX_V512;
  1673. let Predicates = [Prd, HasVLX] in {
  1674. defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
  1675. Idx.info128>, EVEX_V128;
  1676. defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
  1677. Idx.info256>, EVEX_V256;
  1678. }
  1679. }
  1680. defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
  1681. avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  1682. defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
  1683. avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
  1684. defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
  1685. avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
  1686. VEX_W, EVEX_CD8<16, CD8VF>;
  1687. defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
  1688. avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
  1689. EVEX_CD8<8, CD8VF>;
  1690. defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
  1691. avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  1692. defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
  1693. avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
  1694. // Extra patterns to deal with extra bitcasts due to passthru and index being
  1695. // different types on the fp versions.
  1696. multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
  1697. X86VectorVTInfo IdxVT,
  1698. X86VectorVTInfo CastVT> {
  1699. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  1700. (X86VPermt2 (_.VT _.RC:$src2),
  1701. (IdxVT.VT (bitconvert
  1702. (CastVT.VT _.RC:$src1))),
  1703. _.RC:$src3),
  1704. (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
  1705. (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
  1706. _.RC:$src2, _.RC:$src3)>;
  1707. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  1708. (X86VPermt2 _.RC:$src2,
  1709. (IdxVT.VT (bitconvert
  1710. (CastVT.VT _.RC:$src1))),
  1711. (_.LdFrag addr:$src3)),
  1712. (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
  1713. (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
  1714. _.RC:$src2, addr:$src3)>;
  1715. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  1716. (X86VPermt2 _.RC:$src2,
  1717. (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
  1718. (_.BroadcastLdFrag addr:$src3)),
  1719. (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
  1720. (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
  1721. _.RC:$src2, addr:$src3)>;
  1722. }
  1723. // TODO: Should we add more casts? The vXi64 case is common due to ABI.
  1724. defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
  1725. defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
  1726. defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
  1727. // VPERMT2
  1728. multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
  1729. X86FoldableSchedWrite sched,
  1730. X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
  1731. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
  1732. defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  1733. (ins IdxVT.RC:$src2, _.RC:$src3),
  1734. OpcodeStr, "$src3, $src2", "$src2, $src3",
  1735. (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
  1736. EVEX_4V, AVX5128IBase, Sched<[sched]>;
  1737. defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  1738. (ins IdxVT.RC:$src2, _.MemOp:$src3),
  1739. OpcodeStr, "$src3, $src2", "$src2, $src3",
  1740. (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
  1741. (_.LdFrag addr:$src3))), 1>,
  1742. EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
  1743. }
  1744. }
  1745. multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
  1746. X86FoldableSchedWrite sched,
  1747. X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
  1748. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
  1749. defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  1750. (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
  1751. OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
  1752. !strconcat("$src2, ${src3}", _.BroadcastStr ),
  1753. (_.VT (X86VPermt2 _.RC:$src1,
  1754. IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
  1755. AVX5128IBase, EVEX_4V, EVEX_B,
  1756. Sched<[sched.Folded, sched.ReadAfterFold]>;
  1757. }
  1758. multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
  1759. X86FoldableSchedWrite sched,
  1760. AVX512VLVectorVTInfo VTInfo,
  1761. AVX512VLVectorVTInfo ShuffleMask> {
  1762. defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
  1763. ShuffleMask.info512>,
  1764. avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
  1765. ShuffleMask.info512>, EVEX_V512;
  1766. let Predicates = [HasVLX] in {
  1767. defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
  1768. ShuffleMask.info128>,
  1769. avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
  1770. ShuffleMask.info128>, EVEX_V128;
  1771. defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
  1772. ShuffleMask.info256>,
  1773. avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
  1774. ShuffleMask.info256>, EVEX_V256;
  1775. }
  1776. }
  1777. multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
  1778. X86FoldableSchedWrite sched,
  1779. AVX512VLVectorVTInfo VTInfo,
  1780. AVX512VLVectorVTInfo Idx, Predicate Prd> {
  1781. let Predicates = [Prd] in
  1782. defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
  1783. Idx.info512>, EVEX_V512;
  1784. let Predicates = [Prd, HasVLX] in {
  1785. defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
  1786. Idx.info128>, EVEX_V128;
  1787. defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
  1788. Idx.info256>, EVEX_V256;
  1789. }
  1790. }
  1791. defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
  1792. avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  1793. defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
  1794. avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
  1795. defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
  1796. avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
  1797. VEX_W, EVEX_CD8<16, CD8VF>;
  1798. defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
  1799. avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
  1800. EVEX_CD8<8, CD8VF>;
  1801. defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
  1802. avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  1803. defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
  1804. avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
  1805. //===----------------------------------------------------------------------===//
  1806. // AVX-512 - BLEND using mask
  1807. //
  1808. multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
  1809. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  1810. let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
  1811. def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
  1812. (ins _.RC:$src1, _.RC:$src2),
  1813. !strconcat(OpcodeStr,
  1814. "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
  1815. EVEX_4V, Sched<[sched]>;
  1816. def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
  1817. (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
  1818. !strconcat(OpcodeStr,
  1819. "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
  1820. []>, EVEX_4V, EVEX_K, Sched<[sched]>;
  1821. def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
  1822. (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
  1823. !strconcat(OpcodeStr,
  1824. "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
  1825. []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
  1826. let mayLoad = 1 in {
  1827. def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
  1828. (ins _.RC:$src1, _.MemOp:$src2),
  1829. !strconcat(OpcodeStr,
  1830. "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
  1831. []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  1832. Sched<[sched.Folded, sched.ReadAfterFold]>;
  1833. def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
  1834. (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
  1835. !strconcat(OpcodeStr,
  1836. "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
  1837. []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
  1838. Sched<[sched.Folded, sched.ReadAfterFold]>;
  1839. def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
  1840. (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
  1841. !strconcat(OpcodeStr,
  1842. "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
  1843. []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
  1844. Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
  1845. }
  1846. }
  1847. }
  1848. multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
  1849. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  1850. let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
  1851. def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
  1852. (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
  1853. !strconcat(OpcodeStr,
  1854. "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
  1855. "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
  1856. EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
  1857. Sched<[sched.Folded, sched.ReadAfterFold]>;
  1858. def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
  1859. (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
  1860. !strconcat(OpcodeStr,
  1861. "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
  1862. "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
  1863. EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
  1864. Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
  1865. def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
  1866. (ins _.RC:$src1, _.ScalarMemOp:$src2),
  1867. !strconcat(OpcodeStr,
  1868. "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
  1869. "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
  1870. EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
  1871. Sched<[sched.Folded, sched.ReadAfterFold]>;
  1872. }
  1873. }
  1874. multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
  1875. AVX512VLVectorVTInfo VTInfo> {
  1876. defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
  1877. WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
  1878. EVEX_V512;
  1879. let Predicates = [HasVLX] in {
  1880. defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
  1881. WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
  1882. EVEX_V256;
  1883. defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
  1884. WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
  1885. EVEX_V128;
  1886. }
  1887. }
  1888. multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
  1889. AVX512VLVectorVTInfo VTInfo> {
  1890. let Predicates = [HasBWI] in
  1891. defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
  1892. EVEX_V512;
  1893. let Predicates = [HasBWI, HasVLX] in {
  1894. defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
  1895. EVEX_V256;
  1896. defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
  1897. EVEX_V128;
  1898. }
  1899. }
  1900. defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
  1901. avx512vl_f32_info>;
  1902. defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
  1903. avx512vl_f64_info>, VEX_W;
  1904. defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
  1905. avx512vl_i32_info>;
  1906. defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
  1907. avx512vl_i64_info>, VEX_W;
  1908. defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
  1909. avx512vl_i8_info>;
  1910. defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
  1911. avx512vl_i16_info>, VEX_W;
  1912. //===----------------------------------------------------------------------===//
  1913. // Compare Instructions
  1914. //===----------------------------------------------------------------------===//
  1915. // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
  1916. multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
  1917. PatFrag OpNode_su, PatFrag OpNodeSAE_su,
  1918. X86FoldableSchedWrite sched> {
  1919. defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
  1920. (outs _.KRC:$dst),
  1921. (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
  1922. "vcmp"#_.Suffix,
  1923. "$cc, $src2, $src1", "$src1, $src2, $cc",
  1924. (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
  1925. (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
  1926. timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
  1927. let mayLoad = 1 in
  1928. defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
  1929. (outs _.KRC:$dst),
  1930. (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
  1931. "vcmp"#_.Suffix,
  1932. "$cc, $src2, $src1", "$src1, $src2, $cc",
  1933. (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
  1934. timm:$cc),
  1935. (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
  1936. timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
  1937. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  1938. let Uses = [MXCSR] in
  1939. defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
  1940. (outs _.KRC:$dst),
  1941. (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
  1942. "vcmp"#_.Suffix,
  1943. "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
  1944. (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
  1945. timm:$cc),
  1946. (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
  1947. timm:$cc)>,
  1948. EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
  1949. let isCodeGenOnly = 1 in {
  1950. let isCommutable = 1 in
  1951. def rr : AVX512Ii8<0xC2, MRMSrcReg,
  1952. (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
  1953. !strconcat("vcmp", _.Suffix,
  1954. "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
  1955. [(set _.KRC:$dst, (OpNode _.FRC:$src1,
  1956. _.FRC:$src2,
  1957. timm:$cc))]>,
  1958. EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
  1959. def rm : AVX512Ii8<0xC2, MRMSrcMem,
  1960. (outs _.KRC:$dst),
  1961. (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
  1962. !strconcat("vcmp", _.Suffix,
  1963. "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
  1964. [(set _.KRC:$dst, (OpNode _.FRC:$src1,
  1965. (_.ScalarLdFrag addr:$src2),
  1966. timm:$cc))]>,
  1967. EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
  1968. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  1969. }
  1970. }
  1971. def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  1972. (X86cmpms node:$src1, node:$src2, node:$cc), [{
  1973. return N->hasOneUse();
  1974. }]>;
  1975. def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  1976. (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
  1977. return N->hasOneUse();
  1978. }]>;
  1979. let Predicates = [HasAVX512] in {
  1980. let ExeDomain = SSEPackedSingle in
  1981. defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
  1982. X86cmpms_su, X86cmpmsSAE_su,
  1983. SchedWriteFCmp.Scl>, AVX512XSIi8Base;
  1984. let ExeDomain = SSEPackedDouble in
  1985. defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
  1986. X86cmpms_su, X86cmpmsSAE_su,
  1987. SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
  1988. }
  1989. let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
  1990. defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
  1991. X86cmpms_su, X86cmpmsSAE_su,
  1992. SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
  1993. multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
  1994. X86FoldableSchedWrite sched,
  1995. X86VectorVTInfo _, bit IsCommutable> {
  1996. let isCommutable = IsCommutable, hasSideEffects = 0 in
  1997. def rr : AVX512BI<opc, MRMSrcReg,
  1998. (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
  1999. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  2000. []>, EVEX_4V, Sched<[sched]>;
  2001. let mayLoad = 1, hasSideEffects = 0 in
  2002. def rm : AVX512BI<opc, MRMSrcMem,
  2003. (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
  2004. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  2005. []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2006. let isCommutable = IsCommutable, hasSideEffects = 0 in
  2007. def rrk : AVX512BI<opc, MRMSrcReg,
  2008. (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
  2009. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
  2010. "$dst {${mask}}, $src1, $src2}"),
  2011. []>, EVEX_4V, EVEX_K, Sched<[sched]>;
  2012. let mayLoad = 1, hasSideEffects = 0 in
  2013. def rmk : AVX512BI<opc, MRMSrcMem,
  2014. (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
  2015. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
  2016. "$dst {${mask}}, $src1, $src2}"),
  2017. []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2018. }
  2019. multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
  2020. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  2021. bit IsCommutable> :
  2022. avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
  2023. let mayLoad = 1, hasSideEffects = 0 in {
  2024. def rmb : AVX512BI<opc, MRMSrcMem,
  2025. (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
  2026. !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
  2027. "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
  2028. []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2029. def rmbk : AVX512BI<opc, MRMSrcMem,
  2030. (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
  2031. _.ScalarMemOp:$src2),
  2032. !strconcat(OpcodeStr,
  2033. "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
  2034. "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
  2035. []>, EVEX_4V, EVEX_K, EVEX_B,
  2036. Sched<[sched.Folded, sched.ReadAfterFold]>;
  2037. }
  2038. }
  2039. multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
  2040. X86SchedWriteWidths sched,
  2041. AVX512VLVectorVTInfo VTInfo, Predicate prd,
  2042. bit IsCommutable = 0> {
  2043. let Predicates = [prd] in
  2044. defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
  2045. VTInfo.info512, IsCommutable>, EVEX_V512;
  2046. let Predicates = [prd, HasVLX] in {
  2047. defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
  2048. VTInfo.info256, IsCommutable>, EVEX_V256;
  2049. defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
  2050. VTInfo.info128, IsCommutable>, EVEX_V128;
  2051. }
  2052. }
  2053. multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
  2054. X86SchedWriteWidths sched,
  2055. AVX512VLVectorVTInfo VTInfo,
  2056. Predicate prd, bit IsCommutable = 0> {
  2057. let Predicates = [prd] in
  2058. defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
  2059. VTInfo.info512, IsCommutable>, EVEX_V512;
  2060. let Predicates = [prd, HasVLX] in {
  2061. defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
  2062. VTInfo.info256, IsCommutable>, EVEX_V256;
  2063. defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
  2064. VTInfo.info128, IsCommutable>, EVEX_V128;
  2065. }
  2066. }
  2067. // This fragment treats X86cmpm as commutable to help match loads in both
  2068. // operands for PCMPEQ.
  2069. def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
  2070. def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
  2071. (setcc node:$src1, node:$src2, SETGT)>;
  2072. // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
  2073. // increase the pattern complexity the way an immediate would.
  2074. let AddedComplexity = 2 in {
  2075. // FIXME: Is there a better scheduler class for VPCMP?
  2076. defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
  2077. SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
  2078. EVEX_CD8<8, CD8VF>, VEX_WIG;
  2079. defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
  2080. SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
  2081. EVEX_CD8<16, CD8VF>, VEX_WIG;
  2082. defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
  2083. SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
  2084. EVEX_CD8<32, CD8VF>;
  2085. defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
  2086. SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
  2087. T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
  2088. defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
  2089. SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
  2090. EVEX_CD8<8, CD8VF>, VEX_WIG;
  2091. defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
  2092. SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
  2093. EVEX_CD8<16, CD8VF>, VEX_WIG;
  2094. defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
  2095. SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
  2096. EVEX_CD8<32, CD8VF>;
  2097. defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
  2098. SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
  2099. T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
  2100. }
  2101. def X86pcmpm_imm : SDNodeXForm<setcc, [{
  2102. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
  2103. uint8_t SSECC = X86::getVPCMPImmForCond(CC);
  2104. return getI8Imm(SSECC, SDLoc(N));
  2105. }]>;
  2106. // Swapped operand version of the above.
  2107. def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
  2108. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
  2109. uint8_t SSECC = X86::getVPCMPImmForCond(CC);
  2110. SSECC = X86::getSwappedVPCMPImm(SSECC);
  2111. return getI8Imm(SSECC, SDLoc(N));
  2112. }]>;
  2113. multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
  2114. PatFrag Frag_su,
  2115. X86FoldableSchedWrite sched,
  2116. X86VectorVTInfo _, string Name> {
  2117. let isCommutable = 1 in
  2118. def rri : AVX512AIi8<opc, MRMSrcReg,
  2119. (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
  2120. !strconcat("vpcmp", Suffix,
  2121. "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
  2122. [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
  2123. (_.VT _.RC:$src2),
  2124. cond)))]>,
  2125. EVEX_4V, Sched<[sched]>;
  2126. def rmi : AVX512AIi8<opc, MRMSrcMem,
  2127. (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
  2128. !strconcat("vpcmp", Suffix,
  2129. "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
  2130. [(set _.KRC:$dst, (_.KVT
  2131. (Frag:$cc
  2132. (_.VT _.RC:$src1),
  2133. (_.VT (_.LdFrag addr:$src2)),
  2134. cond)))]>,
  2135. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2136. let isCommutable = 1 in
  2137. def rrik : AVX512AIi8<opc, MRMSrcReg,
  2138. (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
  2139. u8imm:$cc),
  2140. !strconcat("vpcmp", Suffix,
  2141. "\t{$cc, $src2, $src1, $dst {${mask}}|",
  2142. "$dst {${mask}}, $src1, $src2, $cc}"),
  2143. [(set _.KRC:$dst, (and _.KRCWM:$mask,
  2144. (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
  2145. (_.VT _.RC:$src2),
  2146. cond))))]>,
  2147. EVEX_4V, EVEX_K, Sched<[sched]>;
  2148. def rmik : AVX512AIi8<opc, MRMSrcMem,
  2149. (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
  2150. u8imm:$cc),
  2151. !strconcat("vpcmp", Suffix,
  2152. "\t{$cc, $src2, $src1, $dst {${mask}}|",
  2153. "$dst {${mask}}, $src1, $src2, $cc}"),
  2154. [(set _.KRC:$dst, (and _.KRCWM:$mask,
  2155. (_.KVT
  2156. (Frag_su:$cc
  2157. (_.VT _.RC:$src1),
  2158. (_.VT (_.LdFrag addr:$src2)),
  2159. cond))))]>,
  2160. EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2161. def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
  2162. (_.VT _.RC:$src1), cond)),
  2163. (!cast<Instruction>(Name#_.ZSuffix#"rmi")
  2164. _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
  2165. def : Pat<(and _.KRCWM:$mask,
  2166. (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
  2167. (_.VT _.RC:$src1), cond))),
  2168. (!cast<Instruction>(Name#_.ZSuffix#"rmik")
  2169. _.KRCWM:$mask, _.RC:$src1, addr:$src2,
  2170. (X86pcmpm_imm_commute $cc))>;
  2171. }
  2172. multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
  2173. PatFrag Frag_su, X86FoldableSchedWrite sched,
  2174. X86VectorVTInfo _, string Name> :
  2175. avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
  2176. def rmib : AVX512AIi8<opc, MRMSrcMem,
  2177. (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
  2178. u8imm:$cc),
  2179. !strconcat("vpcmp", Suffix,
  2180. "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
  2181. "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
  2182. [(set _.KRC:$dst, (_.KVT (Frag:$cc
  2183. (_.VT _.RC:$src1),
  2184. (_.BroadcastLdFrag addr:$src2),
  2185. cond)))]>,
  2186. EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2187. def rmibk : AVX512AIi8<opc, MRMSrcMem,
  2188. (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
  2189. _.ScalarMemOp:$src2, u8imm:$cc),
  2190. !strconcat("vpcmp", Suffix,
  2191. "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
  2192. "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
  2193. [(set _.KRC:$dst, (and _.KRCWM:$mask,
  2194. (_.KVT (Frag_su:$cc
  2195. (_.VT _.RC:$src1),
  2196. (_.BroadcastLdFrag addr:$src2),
  2197. cond))))]>,
  2198. EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2199. def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
  2200. (_.VT _.RC:$src1), cond)),
  2201. (!cast<Instruction>(Name#_.ZSuffix#"rmib")
  2202. _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
  2203. def : Pat<(and _.KRCWM:$mask,
  2204. (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
  2205. (_.VT _.RC:$src1), cond))),
  2206. (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
  2207. _.KRCWM:$mask, _.RC:$src1, addr:$src2,
  2208. (X86pcmpm_imm_commute $cc))>;
  2209. }
  2210. multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
  2211. PatFrag Frag_su, X86SchedWriteWidths sched,
  2212. AVX512VLVectorVTInfo VTInfo, Predicate prd> {
  2213. let Predicates = [prd] in
  2214. defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
  2215. sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
  2216. let Predicates = [prd, HasVLX] in {
  2217. defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
  2218. sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
  2219. defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
  2220. sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
  2221. }
  2222. }
  2223. multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
  2224. PatFrag Frag_su, X86SchedWriteWidths sched,
  2225. AVX512VLVectorVTInfo VTInfo, Predicate prd> {
  2226. let Predicates = [prd] in
  2227. defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
  2228. sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
  2229. let Predicates = [prd, HasVLX] in {
  2230. defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
  2231. sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
  2232. defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
  2233. sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
  2234. }
  2235. }
  2236. def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  2237. (setcc node:$src1, node:$src2, node:$cc), [{
  2238. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
  2239. return !ISD::isUnsignedIntSetCC(CC);
  2240. }], X86pcmpm_imm>;
  2241. def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  2242. (setcc node:$src1, node:$src2, node:$cc), [{
  2243. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
  2244. return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
  2245. }], X86pcmpm_imm>;
  2246. def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  2247. (setcc node:$src1, node:$src2, node:$cc), [{
  2248. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
  2249. return ISD::isUnsignedIntSetCC(CC);
  2250. }], X86pcmpm_imm>;
  2251. def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  2252. (setcc node:$src1, node:$src2, node:$cc), [{
  2253. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
  2254. return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
  2255. }], X86pcmpm_imm>;
  2256. // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
  2257. defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
  2258. SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
  2259. EVEX_CD8<8, CD8VF>;
  2260. defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
  2261. SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
  2262. EVEX_CD8<8, CD8VF>;
  2263. defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
  2264. SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
  2265. VEX_W, EVEX_CD8<16, CD8VF>;
  2266. defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
  2267. SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
  2268. VEX_W, EVEX_CD8<16, CD8VF>;
  2269. defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
  2270. SchedWriteVecALU, avx512vl_i32_info,
  2271. HasAVX512>, EVEX_CD8<32, CD8VF>;
  2272. defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
  2273. SchedWriteVecALU, avx512vl_i32_info,
  2274. HasAVX512>, EVEX_CD8<32, CD8VF>;
  2275. defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
  2276. SchedWriteVecALU, avx512vl_i64_info,
  2277. HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
  2278. defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
  2279. SchedWriteVecALU, avx512vl_i64_info,
  2280. HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
  2281. def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
  2282. (X86cmpm node:$src1, node:$src2, node:$cc), [{
  2283. return N->hasOneUse();
  2284. }]>;
  2285. def X86cmpm_imm_commute : SDNodeXForm<timm, [{
  2286. uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
  2287. return getI8Imm(Imm, SDLoc(N));
  2288. }]>;
  2289. multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
  2290. string Name> {
  2291. let Uses = [MXCSR], mayRaiseFPException = 1 in {
  2292. defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
  2293. (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
  2294. "vcmp"#_.Suffix,
  2295. "$cc, $src2, $src1", "$src1, $src2, $cc",
  2296. (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
  2297. (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
  2298. 1>, Sched<[sched]>;
  2299. defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
  2300. (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
  2301. "vcmp"#_.Suffix,
  2302. "$cc, $src2, $src1", "$src1, $src2, $cc",
  2303. (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
  2304. timm:$cc),
  2305. (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
  2306. timm:$cc)>,
  2307. Sched<[sched.Folded, sched.ReadAfterFold]>;
  2308. defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
  2309. (outs _.KRC:$dst),
  2310. (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
  2311. "vcmp"#_.Suffix,
  2312. "$cc, ${src2}"#_.BroadcastStr#", $src1",
  2313. "$src1, ${src2}"#_.BroadcastStr#", $cc",
  2314. (X86any_cmpm (_.VT _.RC:$src1),
  2315. (_.VT (_.BroadcastLdFrag addr:$src2)),
  2316. timm:$cc),
  2317. (X86cmpm_su (_.VT _.RC:$src1),
  2318. (_.VT (_.BroadcastLdFrag addr:$src2)),
  2319. timm:$cc)>,
  2320. EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2321. }
  2322. // Patterns for selecting with loads in other operand.
  2323. def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
  2324. timm:$cc),
  2325. (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
  2326. (X86cmpm_imm_commute timm:$cc))>;
  2327. def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
  2328. (_.VT _.RC:$src1),
  2329. timm:$cc)),
  2330. (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
  2331. _.RC:$src1, addr:$src2,
  2332. (X86cmpm_imm_commute timm:$cc))>;
  2333. def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
  2334. (_.VT _.RC:$src1), timm:$cc),
  2335. (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
  2336. (X86cmpm_imm_commute timm:$cc))>;
  2337. def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
  2338. (_.VT _.RC:$src1),
  2339. timm:$cc)),
  2340. (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
  2341. _.RC:$src1, addr:$src2,
  2342. (X86cmpm_imm_commute timm:$cc))>;
  2343. // Patterns for mask intrinsics.
  2344. def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
  2345. (_.KVT immAllOnesV)),
  2346. (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
  2347. def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
  2348. (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
  2349. _.RC:$src2, timm:$cc)>;
  2350. def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
  2351. (_.KVT immAllOnesV)),
  2352. (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
  2353. def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
  2354. _.KRCWM:$mask),
  2355. (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
  2356. addr:$src2, timm:$cc)>;
  2357. def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
  2358. (_.KVT immAllOnesV)),
  2359. (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
  2360. def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
  2361. _.KRCWM:$mask),
  2362. (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
  2363. addr:$src2, timm:$cc)>;
  2364. // Patterns for mask intrinsics with loads in other operand.
  2365. def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
  2366. (_.KVT immAllOnesV)),
  2367. (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
  2368. (X86cmpm_imm_commute timm:$cc))>;
  2369. def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
  2370. _.KRCWM:$mask),
  2371. (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
  2372. _.RC:$src1, addr:$src2,
  2373. (X86cmpm_imm_commute timm:$cc))>;
  2374. def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
  2375. (_.KVT immAllOnesV)),
  2376. (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
  2377. (X86cmpm_imm_commute timm:$cc))>;
  2378. def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
  2379. _.KRCWM:$mask),
  2380. (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
  2381. _.RC:$src1, addr:$src2,
  2382. (X86cmpm_imm_commute timm:$cc))>;
  2383. }
  2384. multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  2385. // comparison code form (VCMP[EQ/LT/LE/...]
  2386. let Uses = [MXCSR] in
  2387. defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
  2388. (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
  2389. (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
  2390. "vcmp"#_.Suffix,
  2391. "$cc, {sae}, $src2, $src1",
  2392. "$src1, $src2, {sae}, $cc",
  2393. [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
  2394. (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
  2395. [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
  2396. (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
  2397. EVEX_B, Sched<[sched]>;
  2398. }
  2399. multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
  2400. Predicate Pred = HasAVX512> {
  2401. let Predicates = [Pred] in {
  2402. defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
  2403. avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
  2404. }
  2405. let Predicates = [Pred,HasVLX] in {
  2406. defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
  2407. defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
  2408. }
  2409. }
  2410. defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
  2411. AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
  2412. defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
  2413. AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
  2414. defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
  2415. AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
  2416. // Patterns to select fp compares with load as first operand.
  2417. let Predicates = [HasAVX512] in {
  2418. def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
  2419. (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
  2420. def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
  2421. (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
  2422. }
  2423. let Predicates = [HasFP16] in {
  2424. def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
  2425. (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
  2426. }
  2427. // ----------------------------------------------------------------
  2428. // FPClass
  2429. def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
  2430. (X86Vfpclasss node:$src1, node:$src2), [{
  2431. return N->hasOneUse();
  2432. }]>;
  2433. def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
  2434. (X86Vfpclass node:$src1, node:$src2), [{
  2435. return N->hasOneUse();
  2436. }]>;
  2437. //handle fpclass instruction mask = op(reg_scalar,imm)
  2438. // op(mem_scalar,imm)
  2439. multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
  2440. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  2441. Predicate prd> {
  2442. let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
  2443. def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
  2444. (ins _.RC:$src1, i32u8imm:$src2),
  2445. OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  2446. [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
  2447. (i32 timm:$src2)))]>,
  2448. Sched<[sched]>;
  2449. def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
  2450. (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
  2451. OpcodeStr#_.Suffix#
  2452. "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
  2453. [(set _.KRC:$dst,(and _.KRCWM:$mask,
  2454. (X86Vfpclasss_su (_.VT _.RC:$src1),
  2455. (i32 timm:$src2))))]>,
  2456. EVEX_K, Sched<[sched]>;
  2457. def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
  2458. (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
  2459. OpcodeStr#_.Suffix#
  2460. "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  2461. [(set _.KRC:$dst,
  2462. (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
  2463. (i32 timm:$src2)))]>,
  2464. Sched<[sched.Folded, sched.ReadAfterFold]>;
  2465. def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
  2466. (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
  2467. OpcodeStr#_.Suffix#
  2468. "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
  2469. [(set _.KRC:$dst,(and _.KRCWM:$mask,
  2470. (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
  2471. (i32 timm:$src2))))]>,
  2472. EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2473. }
  2474. }
  2475. //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
  2476. // fpclass(reg_vec, mem_vec, imm)
  2477. // fpclass(reg_vec, broadcast(eltVt), imm)
  2478. multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
  2479. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  2480. string mem>{
  2481. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
  2482. def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
  2483. (ins _.RC:$src1, i32u8imm:$src2),
  2484. OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  2485. [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
  2486. (i32 timm:$src2)))]>,
  2487. Sched<[sched]>;
  2488. def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
  2489. (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
  2490. OpcodeStr#_.Suffix#
  2491. "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
  2492. [(set _.KRC:$dst,(and _.KRCWM:$mask,
  2493. (X86Vfpclass_su (_.VT _.RC:$src1),
  2494. (i32 timm:$src2))))]>,
  2495. EVEX_K, Sched<[sched]>;
  2496. def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
  2497. (ins _.MemOp:$src1, i32u8imm:$src2),
  2498. OpcodeStr#_.Suffix#"{"#mem#"}"#
  2499. "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  2500. [(set _.KRC:$dst,(X86Vfpclass
  2501. (_.VT (_.LdFrag addr:$src1)),
  2502. (i32 timm:$src2)))]>,
  2503. Sched<[sched.Folded, sched.ReadAfterFold]>;
  2504. def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
  2505. (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
  2506. OpcodeStr#_.Suffix#"{"#mem#"}"#
  2507. "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
  2508. [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
  2509. (_.VT (_.LdFrag addr:$src1)),
  2510. (i32 timm:$src2))))]>,
  2511. EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2512. def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
  2513. (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
  2514. OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
  2515. _.BroadcastStr#", $dst|$dst, ${src1}"
  2516. #_.BroadcastStr#", $src2}",
  2517. [(set _.KRC:$dst,(X86Vfpclass
  2518. (_.VT (_.BroadcastLdFrag addr:$src1)),
  2519. (i32 timm:$src2)))]>,
  2520. EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2521. def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
  2522. (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
  2523. OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
  2524. _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
  2525. _.BroadcastStr#", $src2}",
  2526. [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
  2527. (_.VT (_.BroadcastLdFrag addr:$src1)),
  2528. (i32 timm:$src2))))]>,
  2529. EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
  2530. }
  2531. // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
  2532. // the memory form.
  2533. def : InstAlias<OpcodeStr#_.Suffix#mem#
  2534. "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  2535. (!cast<Instruction>(NAME#"rr")
  2536. _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
  2537. def : InstAlias<OpcodeStr#_.Suffix#mem#
  2538. "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
  2539. (!cast<Instruction>(NAME#"rrk")
  2540. _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
  2541. def : InstAlias<OpcodeStr#_.Suffix#mem#
  2542. "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
  2543. _.BroadcastStr#", $src2}",
  2544. (!cast<Instruction>(NAME#"rmb")
  2545. _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
  2546. def : InstAlias<OpcodeStr#_.Suffix#mem#
  2547. "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
  2548. "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
  2549. (!cast<Instruction>(NAME#"rmbk")
  2550. _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
  2551. }
  2552. multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
  2553. bits<8> opc, X86SchedWriteWidths sched,
  2554. Predicate prd>{
  2555. let Predicates = [prd] in {
  2556. defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
  2557. _.info512, "z">, EVEX_V512;
  2558. }
  2559. let Predicates = [prd, HasVLX] in {
  2560. defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
  2561. _.info128, "x">, EVEX_V128;
  2562. defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
  2563. _.info256, "y">, EVEX_V256;
  2564. }
  2565. }
  2566. multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
  2567. bits<8> opcScalar, X86SchedWriteWidths sched> {
  2568. defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
  2569. sched, HasFP16>,
  2570. EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
  2571. defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
  2572. sched.Scl, f16x_info, HasFP16>,
  2573. EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
  2574. defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
  2575. sched, HasDQI>,
  2576. EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
  2577. defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
  2578. sched, HasDQI>,
  2579. EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
  2580. defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
  2581. sched.Scl, f32x_info, HasDQI>, VEX_LIG,
  2582. EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
  2583. defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
  2584. sched.Scl, f64x_info, HasDQI>, VEX_LIG,
  2585. EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
  2586. }
  2587. defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
  2588. //-----------------------------------------------------------------
  2589. // Mask register copy, including
  2590. // - copy between mask registers
  2591. // - load/store mask registers
  2592. // - copy from GPR to mask register and vice versa
  2593. //
  2594. multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
  2595. string OpcodeStr, RegisterClass KRC,
  2596. ValueType vvt, X86MemOperand x86memop> {
  2597. let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
  2598. def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
  2599. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
  2600. Sched<[WriteMove]>;
  2601. def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
  2602. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  2603. [(set KRC:$dst, (vvt (load addr:$src)))]>,
  2604. Sched<[WriteLoad]>;
  2605. def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
  2606. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  2607. [(store KRC:$src, addr:$dst)]>,
  2608. Sched<[WriteStore]>;
  2609. }
  2610. multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
  2611. string OpcodeStr,
  2612. RegisterClass KRC, RegisterClass GRC> {
  2613. let hasSideEffects = 0 in {
  2614. def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
  2615. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
  2616. Sched<[WriteMove]>;
  2617. def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
  2618. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
  2619. Sched<[WriteMove]>;
  2620. }
  2621. }
  2622. let Predicates = [HasDQI] in
  2623. defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
  2624. avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
  2625. VEX, PD;
  2626. let Predicates = [HasAVX512] in
  2627. defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
  2628. avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
  2629. VEX, PS;
  2630. let Predicates = [HasBWI] in {
  2631. defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
  2632. VEX, PD, VEX_W;
  2633. defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
  2634. VEX, XD;
  2635. defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
  2636. VEX, PS, VEX_W;
  2637. defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
  2638. VEX, XD, VEX_W;
  2639. }
  2640. // GR from/to mask register
  2641. def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
  2642. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
  2643. def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
  2644. (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
  2645. def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
  2646. (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
  2647. def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
  2648. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
  2649. def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
  2650. (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
  2651. def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
  2652. (KMOVWrk VK16:$src)>;
  2653. def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
  2654. (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
  2655. def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
  2656. (COPY_TO_REGCLASS VK16:$src, GR32)>;
  2657. def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
  2658. (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
  2659. def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
  2660. (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
  2661. def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
  2662. (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
  2663. def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
  2664. (COPY_TO_REGCLASS VK8:$src, GR32)>;
  2665. def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
  2666. (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
  2667. def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
  2668. (COPY_TO_REGCLASS GR32:$src, VK32)>;
  2669. def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
  2670. (COPY_TO_REGCLASS VK32:$src, GR32)>;
  2671. def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
  2672. (COPY_TO_REGCLASS GR64:$src, VK64)>;
  2673. def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
  2674. (COPY_TO_REGCLASS VK64:$src, GR64)>;
  2675. // Load/store kreg
  2676. let Predicates = [HasDQI] in {
  2677. def : Pat<(v1i1 (load addr:$src)),
  2678. (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
  2679. def : Pat<(v2i1 (load addr:$src)),
  2680. (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
  2681. def : Pat<(v4i1 (load addr:$src)),
  2682. (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
  2683. }
  2684. let Predicates = [HasAVX512] in {
  2685. def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
  2686. (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
  2687. def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
  2688. (KMOVWkm addr:$src)>;
  2689. }
  2690. def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
  2691. SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
  2692. SDTCVecEltisVT<1, i1>,
  2693. SDTCisPtrTy<2>]>>;
  2694. let Predicates = [HasAVX512] in {
  2695. multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
  2696. def : Pat<(maskVT (scalar_to_vector GR32:$src)),
  2697. (COPY_TO_REGCLASS GR32:$src, maskRC)>;
  2698. def : Pat<(maskVT (scalar_to_vector GR8:$src)),
  2699. (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
  2700. def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
  2701. (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
  2702. def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
  2703. (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
  2704. }
  2705. defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
  2706. defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
  2707. defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
  2708. defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
  2709. defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
  2710. defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
  2711. defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
  2712. def : Pat<(insert_subvector (v16i1 immAllZerosV),
  2713. (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
  2714. (KMOVWkr (AND32ri8
  2715. (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
  2716. (i32 1)))>;
  2717. }
  2718. // Mask unary operation
  2719. // - KNOT
  2720. multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
  2721. RegisterClass KRC, SDPatternOperator OpNode,
  2722. X86FoldableSchedWrite sched, Predicate prd> {
  2723. let Predicates = [prd] in
  2724. def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
  2725. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  2726. [(set KRC:$dst, (OpNode KRC:$src))]>,
  2727. Sched<[sched]>;
  2728. }
  2729. multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
  2730. SDPatternOperator OpNode,
  2731. X86FoldableSchedWrite sched> {
  2732. defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
  2733. sched, HasDQI>, VEX, PD;
  2734. defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
  2735. sched, HasAVX512>, VEX, PS;
  2736. defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
  2737. sched, HasBWI>, VEX, PD, VEX_W;
  2738. defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
  2739. sched, HasBWI>, VEX, PS, VEX_W;
  2740. }
  2741. // TODO - do we need a X86SchedWriteWidths::KMASK type?
  2742. defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
  2743. // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
  2744. let Predicates = [HasAVX512, NoDQI] in
  2745. def : Pat<(vnot VK8:$src),
  2746. (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
  2747. def : Pat<(vnot VK4:$src),
  2748. (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
  2749. def : Pat<(vnot VK2:$src),
  2750. (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
  2751. def : Pat<(vnot VK1:$src),
  2752. (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
  2753. // Mask binary operation
  2754. // - KAND, KANDN, KOR, KXNOR, KXOR
  2755. multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
  2756. RegisterClass KRC, SDPatternOperator OpNode,
  2757. X86FoldableSchedWrite sched, Predicate prd,
  2758. bit IsCommutable> {
  2759. let Predicates = [prd], isCommutable = IsCommutable in
  2760. def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
  2761. !strconcat(OpcodeStr,
  2762. "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  2763. [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
  2764. Sched<[sched]>;
  2765. }
  2766. multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
  2767. SDPatternOperator OpNode,
  2768. X86FoldableSchedWrite sched, bit IsCommutable,
  2769. Predicate prdW = HasAVX512> {
  2770. defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
  2771. sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
  2772. defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
  2773. sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
  2774. defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
  2775. sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
  2776. defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
  2777. sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
  2778. }
  2779. // These nodes use 'vnot' instead of 'not' to support vectors.
  2780. def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
  2781. def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
  2782. // TODO - do we need a X86SchedWriteWidths::KMASK type?
  2783. defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
  2784. defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
  2785. defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
  2786. defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
  2787. defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
  2788. defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
  2789. multiclass avx512_binop_pat<SDPatternOperator VOpNode,
  2790. Instruction Inst> {
  2791. // With AVX512F, 8-bit mask is promoted to 16-bit mask,
  2792. // for the DQI set, this type is legal and KxxxB instruction is used
  2793. let Predicates = [NoDQI] in
  2794. def : Pat<(VOpNode VK8:$src1, VK8:$src2),
  2795. (COPY_TO_REGCLASS
  2796. (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
  2797. (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
  2798. // All types smaller than 8 bits require conversion anyway
  2799. def : Pat<(VOpNode VK1:$src1, VK1:$src2),
  2800. (COPY_TO_REGCLASS (Inst
  2801. (COPY_TO_REGCLASS VK1:$src1, VK16),
  2802. (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
  2803. def : Pat<(VOpNode VK2:$src1, VK2:$src2),
  2804. (COPY_TO_REGCLASS (Inst
  2805. (COPY_TO_REGCLASS VK2:$src1, VK16),
  2806. (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
  2807. def : Pat<(VOpNode VK4:$src1, VK4:$src2),
  2808. (COPY_TO_REGCLASS (Inst
  2809. (COPY_TO_REGCLASS VK4:$src1, VK16),
  2810. (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
  2811. }
  2812. defm : avx512_binop_pat<and, KANDWrr>;
  2813. defm : avx512_binop_pat<vandn, KANDNWrr>;
  2814. defm : avx512_binop_pat<or, KORWrr>;
  2815. defm : avx512_binop_pat<vxnor, KXNORWrr>;
  2816. defm : avx512_binop_pat<xor, KXORWrr>;
  2817. // Mask unpacking
  2818. multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
  2819. X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
  2820. Predicate prd> {
  2821. let Predicates = [prd] in {
  2822. let hasSideEffects = 0 in
  2823. def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
  2824. (ins Src.KRC:$src1, Src.KRC:$src2),
  2825. "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  2826. VEX_4V, VEX_L, Sched<[sched]>;
  2827. def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
  2828. (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
  2829. }
  2830. }
  2831. defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
  2832. defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
  2833. defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
  2834. // Mask bit testing
  2835. multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
  2836. SDNode OpNode, X86FoldableSchedWrite sched,
  2837. Predicate prd> {
  2838. let Predicates = [prd], Defs = [EFLAGS] in
  2839. def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
  2840. !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
  2841. [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
  2842. Sched<[sched]>;
  2843. }
  2844. multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
  2845. X86FoldableSchedWrite sched,
  2846. Predicate prdW = HasAVX512> {
  2847. defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
  2848. VEX, PD;
  2849. defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
  2850. VEX, PS;
  2851. defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
  2852. VEX, PS, VEX_W;
  2853. defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
  2854. VEX, PD, VEX_W;
  2855. }
  2856. // TODO - do we need a X86SchedWriteWidths::KMASK type?
  2857. defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
  2858. defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
  2859. // Mask shift
  2860. multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
  2861. SDNode OpNode, X86FoldableSchedWrite sched> {
  2862. let Predicates = [HasAVX512] in
  2863. def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
  2864. !strconcat(OpcodeStr,
  2865. "\t{$imm, $src, $dst|$dst, $src, $imm}"),
  2866. [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
  2867. Sched<[sched]>;
  2868. }
  2869. multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
  2870. SDNode OpNode, X86FoldableSchedWrite sched> {
  2871. defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
  2872. sched>, VEX, TAPD, VEX_W;
  2873. let Predicates = [HasDQI] in
  2874. defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
  2875. sched>, VEX, TAPD;
  2876. let Predicates = [HasBWI] in {
  2877. defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
  2878. sched>, VEX, TAPD, VEX_W;
  2879. defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
  2880. sched>, VEX, TAPD;
  2881. }
  2882. }
  2883. defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
  2884. defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
  2885. // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
  2886. multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
  2887. string InstStr,
  2888. X86VectorVTInfo Narrow,
  2889. X86VectorVTInfo Wide> {
  2890. def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
  2891. (Narrow.VT Narrow.RC:$src2), cond)),
  2892. (COPY_TO_REGCLASS
  2893. (!cast<Instruction>(InstStr#"Zrri")
  2894. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2895. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
  2896. (X86pcmpm_imm $cc)), Narrow.KRC)>;
  2897. def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
  2898. (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
  2899. (Narrow.VT Narrow.RC:$src2),
  2900. cond)))),
  2901. (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
  2902. (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
  2903. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2904. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
  2905. (X86pcmpm_imm $cc)), Narrow.KRC)>;
  2906. }
  2907. multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
  2908. string InstStr,
  2909. X86VectorVTInfo Narrow,
  2910. X86VectorVTInfo Wide> {
  2911. // Broadcast load.
  2912. def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
  2913. (Narrow.BroadcastLdFrag addr:$src2), cond)),
  2914. (COPY_TO_REGCLASS
  2915. (!cast<Instruction>(InstStr#"Zrmib")
  2916. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2917. addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
  2918. def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
  2919. (Narrow.KVT
  2920. (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
  2921. (Narrow.BroadcastLdFrag addr:$src2),
  2922. cond)))),
  2923. (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
  2924. (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
  2925. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2926. addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
  2927. // Commuted with broadcast load.
  2928. def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
  2929. (Narrow.VT Narrow.RC:$src1),
  2930. cond)),
  2931. (COPY_TO_REGCLASS
  2932. (!cast<Instruction>(InstStr#"Zrmib")
  2933. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2934. addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
  2935. def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
  2936. (Narrow.KVT
  2937. (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
  2938. (Narrow.VT Narrow.RC:$src1),
  2939. cond)))),
  2940. (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
  2941. (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
  2942. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2943. addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
  2944. }
  2945. // Same as above, but for fp types which don't use PatFrags.
  2946. multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
  2947. X86VectorVTInfo Narrow,
  2948. X86VectorVTInfo Wide> {
  2949. def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
  2950. (Narrow.VT Narrow.RC:$src2), timm:$cc)),
  2951. (COPY_TO_REGCLASS
  2952. (!cast<Instruction>(InstStr#"Zrri")
  2953. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2954. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
  2955. timm:$cc), Narrow.KRC)>;
  2956. def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
  2957. (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
  2958. (Narrow.VT Narrow.RC:$src2), timm:$cc))),
  2959. (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
  2960. (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
  2961. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2962. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
  2963. timm:$cc), Narrow.KRC)>;
  2964. // Broadcast load.
  2965. def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
  2966. (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
  2967. (COPY_TO_REGCLASS
  2968. (!cast<Instruction>(InstStr#"Zrmbi")
  2969. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2970. addr:$src2, timm:$cc), Narrow.KRC)>;
  2971. def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
  2972. (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
  2973. (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
  2974. (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
  2975. (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
  2976. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2977. addr:$src2, timm:$cc), Narrow.KRC)>;
  2978. // Commuted with broadcast load.
  2979. def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
  2980. (Narrow.VT Narrow.RC:$src1), timm:$cc)),
  2981. (COPY_TO_REGCLASS
  2982. (!cast<Instruction>(InstStr#"Zrmbi")
  2983. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2984. addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
  2985. def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
  2986. (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
  2987. (Narrow.VT Narrow.RC:$src1), timm:$cc))),
  2988. (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
  2989. (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
  2990. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
  2991. addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
  2992. }
  2993. let Predicates = [HasAVX512, NoVLX] in {
  2994. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
  2995. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
  2996. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
  2997. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
  2998. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
  2999. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
  3000. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
  3001. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
  3002. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
  3003. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
  3004. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
  3005. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
  3006. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
  3007. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
  3008. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
  3009. defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
  3010. defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
  3011. defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
  3012. defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
  3013. defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
  3014. }
  3015. let Predicates = [HasBWI, NoVLX] in {
  3016. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
  3017. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
  3018. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
  3019. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
  3020. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
  3021. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
  3022. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
  3023. defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
  3024. }
  3025. // Mask setting all 0s or 1s
  3026. multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
  3027. let Predicates = [HasAVX512] in
  3028. let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
  3029. SchedRW = [WriteZero] in
  3030. def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
  3031. [(set KRC:$dst, (VT Val))]>;
  3032. }
  3033. multiclass avx512_mask_setop_w<SDPatternOperator Val> {
  3034. defm W : avx512_mask_setop<VK16, v16i1, Val>;
  3035. defm D : avx512_mask_setop<VK32, v32i1, Val>;
  3036. defm Q : avx512_mask_setop<VK64, v64i1, Val>;
  3037. }
  3038. defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
  3039. defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
  3040. // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
  3041. let Predicates = [HasAVX512] in {
  3042. def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
  3043. def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
  3044. def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
  3045. def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
  3046. def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
  3047. def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
  3048. def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
  3049. def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
  3050. }
  3051. // Patterns for kmask insert_subvector/extract_subvector to/from index=0
  3052. multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
  3053. RegisterClass RC, ValueType VT> {
  3054. def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
  3055. (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
  3056. def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
  3057. (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
  3058. }
  3059. defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
  3060. defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
  3061. defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
  3062. defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
  3063. defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
  3064. defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
  3065. defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
  3066. defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
  3067. defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
  3068. defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
  3069. defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
  3070. defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
  3071. defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
  3072. defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
  3073. defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
  3074. defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
  3075. defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
  3076. defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
  3077. defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
  3078. defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
  3079. defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
  3080. //===----------------------------------------------------------------------===//
  3081. // AVX-512 - Aligned and unaligned load and store
  3082. //
  3083. multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
  3084. X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
  3085. X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
  3086. bit NoRMPattern = 0,
  3087. SDPatternOperator SelectOprr = vselect> {
  3088. let hasSideEffects = 0 in {
  3089. let isMoveReg = 1 in
  3090. def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
  3091. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
  3092. _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
  3093. EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
  3094. def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
  3095. (ins _.KRCWM:$mask, _.RC:$src),
  3096. !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
  3097. "${dst} {${mask}} {z}, $src}"),
  3098. [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
  3099. (_.VT _.RC:$src),
  3100. _.ImmAllZerosV)))], _.ExeDomain>,
  3101. EVEX, EVEX_KZ, Sched<[Sched.RR]>;
  3102. let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
  3103. def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
  3104. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  3105. !if(NoRMPattern, [],
  3106. [(set _.RC:$dst,
  3107. (_.VT (ld_frag addr:$src)))]),
  3108. _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
  3109. EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
  3110. let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
  3111. def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
  3112. (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
  3113. !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
  3114. "${dst} {${mask}}, $src1}"),
  3115. [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
  3116. (_.VT _.RC:$src1),
  3117. (_.VT _.RC:$src0))))], _.ExeDomain>,
  3118. EVEX, EVEX_K, Sched<[Sched.RR]>;
  3119. def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
  3120. (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
  3121. !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
  3122. "${dst} {${mask}}, $src1}"),
  3123. [(set _.RC:$dst, (_.VT
  3124. (vselect_mask _.KRCWM:$mask,
  3125. (_.VT (ld_frag addr:$src1)),
  3126. (_.VT _.RC:$src0))))], _.ExeDomain>,
  3127. EVEX, EVEX_K, Sched<[Sched.RM]>;
  3128. }
  3129. def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
  3130. (ins _.KRCWM:$mask, _.MemOp:$src),
  3131. OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
  3132. "${dst} {${mask}} {z}, $src}",
  3133. [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
  3134. (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
  3135. _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
  3136. }
  3137. def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
  3138. (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
  3139. def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
  3140. (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
  3141. def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
  3142. (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
  3143. _.KRCWM:$mask, addr:$ptr)>;
  3144. }
  3145. multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
  3146. AVX512VLVectorVTInfo _, Predicate prd,
  3147. X86SchedWriteMoveLSWidths Sched,
  3148. string EVEX2VEXOvrd, bit NoRMPattern = 0> {
  3149. let Predicates = [prd] in
  3150. defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
  3151. _.info512.AlignedLdFrag, masked_load_aligned,
  3152. Sched.ZMM, "", NoRMPattern>, EVEX_V512;
  3153. let Predicates = [prd, HasVLX] in {
  3154. defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
  3155. _.info256.AlignedLdFrag, masked_load_aligned,
  3156. Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
  3157. defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
  3158. _.info128.AlignedLdFrag, masked_load_aligned,
  3159. Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
  3160. }
  3161. }
  3162. multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
  3163. AVX512VLVectorVTInfo _, Predicate prd,
  3164. X86SchedWriteMoveLSWidths Sched,
  3165. string EVEX2VEXOvrd, bit NoRMPattern = 0,
  3166. SDPatternOperator SelectOprr = vselect> {
  3167. let Predicates = [prd] in
  3168. defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
  3169. masked_load, Sched.ZMM, "",
  3170. NoRMPattern, SelectOprr>, EVEX_V512;
  3171. let Predicates = [prd, HasVLX] in {
  3172. defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
  3173. masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
  3174. NoRMPattern, SelectOprr>, EVEX_V256;
  3175. defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
  3176. masked_load, Sched.XMM, EVEX2VEXOvrd,
  3177. NoRMPattern, SelectOprr>, EVEX_V128;
  3178. }
  3179. }
  3180. multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
  3181. X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
  3182. X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
  3183. bit NoMRPattern = 0> {
  3184. let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
  3185. let isMoveReg = 1 in
  3186. def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
  3187. OpcodeStr # "\t{$src, $dst|$dst, $src}",
  3188. [], _.ExeDomain>, EVEX,
  3189. FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
  3190. EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
  3191. def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
  3192. (ins _.KRCWM:$mask, _.RC:$src),
  3193. OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
  3194. "${dst} {${mask}}, $src}",
  3195. [], _.ExeDomain>, EVEX, EVEX_K,
  3196. FoldGenData<BaseName#_.ZSuffix#rrk>,
  3197. Sched<[Sched.RR]>;
  3198. def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
  3199. (ins _.KRCWM:$mask, _.RC:$src),
  3200. OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
  3201. "${dst} {${mask}} {z}, $src}",
  3202. [], _.ExeDomain>, EVEX, EVEX_KZ,
  3203. FoldGenData<BaseName#_.ZSuffix#rrkz>,
  3204. Sched<[Sched.RR]>;
  3205. }
  3206. let hasSideEffects = 0, mayStore = 1 in
  3207. def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
  3208. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  3209. !if(NoMRPattern, [],
  3210. [(st_frag (_.VT _.RC:$src), addr:$dst)]),
  3211. _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
  3212. EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
  3213. def mrk : AVX512PI<opc, MRMDestMem, (outs),
  3214. (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
  3215. OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
  3216. [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
  3217. NotMemoryFoldable;
  3218. def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
  3219. (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
  3220. _.KRCWM:$mask, _.RC:$src)>;
  3221. def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
  3222. (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
  3223. _.RC:$dst, _.RC:$src), 0>;
  3224. def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
  3225. (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
  3226. _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
  3227. def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
  3228. (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
  3229. _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
  3230. }
  3231. multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
  3232. AVX512VLVectorVTInfo _, Predicate prd,
  3233. X86SchedWriteMoveLSWidths Sched,
  3234. string EVEX2VEXOvrd, bit NoMRPattern = 0> {
  3235. let Predicates = [prd] in
  3236. defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
  3237. masked_store, Sched.ZMM, "",
  3238. NoMRPattern>, EVEX_V512;
  3239. let Predicates = [prd, HasVLX] in {
  3240. defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
  3241. masked_store, Sched.YMM,
  3242. EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
  3243. defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
  3244. masked_store, Sched.XMM, EVEX2VEXOvrd,
  3245. NoMRPattern>, EVEX_V128;
  3246. }
  3247. }
  3248. multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
  3249. AVX512VLVectorVTInfo _, Predicate prd,
  3250. X86SchedWriteMoveLSWidths Sched,
  3251. string EVEX2VEXOvrd, bit NoMRPattern = 0> {
  3252. let Predicates = [prd] in
  3253. defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
  3254. masked_store_aligned, Sched.ZMM, "",
  3255. NoMRPattern>, EVEX_V512;
  3256. let Predicates = [prd, HasVLX] in {
  3257. defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
  3258. masked_store_aligned, Sched.YMM,
  3259. EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
  3260. defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
  3261. masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
  3262. NoMRPattern>, EVEX_V128;
  3263. }
  3264. }
  3265. defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
  3266. HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
  3267. avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
  3268. HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
  3269. PS, EVEX_CD8<32, CD8VF>;
  3270. defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
  3271. HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
  3272. avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
  3273. HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
  3274. PD, VEX_W, EVEX_CD8<64, CD8VF>;
  3275. defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
  3276. SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
  3277. avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
  3278. SchedWriteFMoveLS, "VMOVUPS">,
  3279. PS, EVEX_CD8<32, CD8VF>;
  3280. defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
  3281. SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
  3282. avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
  3283. SchedWriteFMoveLS, "VMOVUPD">,
  3284. PD, VEX_W, EVEX_CD8<64, CD8VF>;
  3285. defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
  3286. HasAVX512, SchedWriteVecMoveLS,
  3287. "VMOVDQA", 1>,
  3288. avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
  3289. HasAVX512, SchedWriteVecMoveLS,
  3290. "VMOVDQA", 1>,
  3291. PD, EVEX_CD8<32, CD8VF>;
  3292. defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
  3293. HasAVX512, SchedWriteVecMoveLS,
  3294. "VMOVDQA">,
  3295. avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
  3296. HasAVX512, SchedWriteVecMoveLS,
  3297. "VMOVDQA">,
  3298. PD, VEX_W, EVEX_CD8<64, CD8VF>;
  3299. defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
  3300. SchedWriteVecMoveLS, "VMOVDQU", 1>,
  3301. avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
  3302. SchedWriteVecMoveLS, "VMOVDQU", 1>,
  3303. XD, EVEX_CD8<8, CD8VF>;
  3304. defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
  3305. SchedWriteVecMoveLS, "VMOVDQU", 1>,
  3306. avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
  3307. SchedWriteVecMoveLS, "VMOVDQU", 1>,
  3308. XD, VEX_W, EVEX_CD8<16, CD8VF>;
  3309. defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
  3310. SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
  3311. avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
  3312. SchedWriteVecMoveLS, "VMOVDQU", 1>,
  3313. XS, EVEX_CD8<32, CD8VF>;
  3314. defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
  3315. SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
  3316. avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
  3317. SchedWriteVecMoveLS, "VMOVDQU">,
  3318. XS, VEX_W, EVEX_CD8<64, CD8VF>;
  3319. // Special instructions to help with spilling when we don't have VLX. We need
  3320. // to load or store from a ZMM register instead. These are converted in
  3321. // expandPostRAPseudos.
  3322. let isReMaterializable = 1, canFoldAsLoad = 1,
  3323. isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
  3324. def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
  3325. "", []>, Sched<[WriteFLoadX]>;
  3326. def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
  3327. "", []>, Sched<[WriteFLoadY]>;
  3328. def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
  3329. "", []>, Sched<[WriteFLoadX]>;
  3330. def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
  3331. "", []>, Sched<[WriteFLoadY]>;
  3332. }
  3333. let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
  3334. def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
  3335. "", []>, Sched<[WriteFStoreX]>;
  3336. def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
  3337. "", []>, Sched<[WriteFStoreY]>;
  3338. def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
  3339. "", []>, Sched<[WriteFStoreX]>;
  3340. def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
  3341. "", []>, Sched<[WriteFStoreY]>;
  3342. }
  3343. def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
  3344. (v8i64 VR512:$src))),
  3345. (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
  3346. VK8), VR512:$src)>;
  3347. def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
  3348. (v16i32 VR512:$src))),
  3349. (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
  3350. // These patterns exist to prevent the above patterns from introducing a second
  3351. // mask inversion when one already exists.
  3352. def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
  3353. (v8i64 immAllZerosV),
  3354. (v8i64 VR512:$src))),
  3355. (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
  3356. def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
  3357. (v16i32 immAllZerosV),
  3358. (v16i32 VR512:$src))),
  3359. (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
  3360. multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
  3361. X86VectorVTInfo Wide> {
  3362. def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
  3363. Narrow.RC:$src1, Narrow.RC:$src0)),
  3364. (EXTRACT_SUBREG
  3365. (Wide.VT
  3366. (!cast<Instruction>(InstrStr#"rrk")
  3367. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
  3368. (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
  3369. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
  3370. Narrow.SubRegIdx)>;
  3371. def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
  3372. Narrow.RC:$src1, Narrow.ImmAllZerosV)),
  3373. (EXTRACT_SUBREG
  3374. (Wide.VT
  3375. (!cast<Instruction>(InstrStr#"rrkz")
  3376. (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
  3377. (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
  3378. Narrow.SubRegIdx)>;
  3379. }
  3380. // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
  3381. // available. Use a 512-bit operation and extract.
  3382. let Predicates = [HasAVX512, NoVLX] in {
  3383. defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
  3384. defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
  3385. defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
  3386. defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
  3387. defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
  3388. defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
  3389. defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
  3390. defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
  3391. }
  3392. let Predicates = [HasBWI, NoVLX] in {
  3393. defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
  3394. defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
  3395. defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
  3396. defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
  3397. defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
  3398. defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
  3399. defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
  3400. defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
  3401. }
  3402. let Predicates = [HasAVX512] in {
  3403. // 512-bit load.
  3404. def : Pat<(alignedloadv16i32 addr:$src),
  3405. (VMOVDQA64Zrm addr:$src)>;
  3406. def : Pat<(alignedloadv32i16 addr:$src),
  3407. (VMOVDQA64Zrm addr:$src)>;
  3408. def : Pat<(alignedloadv32f16 addr:$src),
  3409. (VMOVAPSZrm addr:$src)>;
  3410. def : Pat<(alignedloadv32bf16 addr:$src),
  3411. (VMOVAPSZrm addr:$src)>;
  3412. def : Pat<(alignedloadv64i8 addr:$src),
  3413. (VMOVDQA64Zrm addr:$src)>;
  3414. def : Pat<(loadv16i32 addr:$src),
  3415. (VMOVDQU64Zrm addr:$src)>;
  3416. def : Pat<(loadv32i16 addr:$src),
  3417. (VMOVDQU64Zrm addr:$src)>;
  3418. def : Pat<(loadv32f16 addr:$src),
  3419. (VMOVUPSZrm addr:$src)>;
  3420. def : Pat<(loadv32bf16 addr:$src),
  3421. (VMOVUPSZrm addr:$src)>;
  3422. def : Pat<(loadv64i8 addr:$src),
  3423. (VMOVDQU64Zrm addr:$src)>;
  3424. // 512-bit store.
  3425. def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
  3426. (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
  3427. def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
  3428. (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
  3429. def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
  3430. (VMOVAPSZmr addr:$dst, VR512:$src)>;
  3431. def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
  3432. (VMOVAPSZmr addr:$dst, VR512:$src)>;
  3433. def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
  3434. (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
  3435. def : Pat<(store (v16i32 VR512:$src), addr:$dst),
  3436. (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
  3437. def : Pat<(store (v32i16 VR512:$src), addr:$dst),
  3438. (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
  3439. def : Pat<(store (v32f16 VR512:$src), addr:$dst),
  3440. (VMOVUPSZmr addr:$dst, VR512:$src)>;
  3441. def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
  3442. (VMOVUPSZmr addr:$dst, VR512:$src)>;
  3443. def : Pat<(store (v64i8 VR512:$src), addr:$dst),
  3444. (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
  3445. }
  3446. let Predicates = [HasVLX] in {
  3447. // 128-bit load.
  3448. def : Pat<(alignedloadv4i32 addr:$src),
  3449. (VMOVDQA64Z128rm addr:$src)>;
  3450. def : Pat<(alignedloadv8i16 addr:$src),
  3451. (VMOVDQA64Z128rm addr:$src)>;
  3452. def : Pat<(alignedloadv8f16 addr:$src),
  3453. (VMOVAPSZ128rm addr:$src)>;
  3454. def : Pat<(alignedloadv8bf16 addr:$src),
  3455. (VMOVAPSZ128rm addr:$src)>;
  3456. def : Pat<(alignedloadv16i8 addr:$src),
  3457. (VMOVDQA64Z128rm addr:$src)>;
  3458. def : Pat<(loadv4i32 addr:$src),
  3459. (VMOVDQU64Z128rm addr:$src)>;
  3460. def : Pat<(loadv8i16 addr:$src),
  3461. (VMOVDQU64Z128rm addr:$src)>;
  3462. def : Pat<(loadv8f16 addr:$src),
  3463. (VMOVUPSZ128rm addr:$src)>;
  3464. def : Pat<(loadv8bf16 addr:$src),
  3465. (VMOVUPSZ128rm addr:$src)>;
  3466. def : Pat<(loadv16i8 addr:$src),
  3467. (VMOVDQU64Z128rm addr:$src)>;
  3468. // 128-bit store.
  3469. def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
  3470. (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
  3471. def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
  3472. (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
  3473. def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
  3474. (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
  3475. def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
  3476. (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
  3477. def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
  3478. (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
  3479. def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
  3480. (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
  3481. def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
  3482. (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
  3483. def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
  3484. (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
  3485. def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
  3486. (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
  3487. def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
  3488. (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
  3489. // 256-bit load.
  3490. def : Pat<(alignedloadv8i32 addr:$src),
  3491. (VMOVDQA64Z256rm addr:$src)>;
  3492. def : Pat<(alignedloadv16i16 addr:$src),
  3493. (VMOVDQA64Z256rm addr:$src)>;
  3494. def : Pat<(alignedloadv16f16 addr:$src),
  3495. (VMOVAPSZ256rm addr:$src)>;
  3496. def : Pat<(alignedloadv16bf16 addr:$src),
  3497. (VMOVAPSZ256rm addr:$src)>;
  3498. def : Pat<(alignedloadv32i8 addr:$src),
  3499. (VMOVDQA64Z256rm addr:$src)>;
  3500. def : Pat<(loadv8i32 addr:$src),
  3501. (VMOVDQU64Z256rm addr:$src)>;
  3502. def : Pat<(loadv16i16 addr:$src),
  3503. (VMOVDQU64Z256rm addr:$src)>;
  3504. def : Pat<(loadv16f16 addr:$src),
  3505. (VMOVUPSZ256rm addr:$src)>;
  3506. def : Pat<(loadv16bf16 addr:$src),
  3507. (VMOVUPSZ256rm addr:$src)>;
  3508. def : Pat<(loadv32i8 addr:$src),
  3509. (VMOVDQU64Z256rm addr:$src)>;
  3510. // 256-bit store.
  3511. def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
  3512. (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
  3513. def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
  3514. (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
  3515. def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
  3516. (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
  3517. def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
  3518. (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
  3519. def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
  3520. (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
  3521. def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
  3522. (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
  3523. def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
  3524. (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
  3525. def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
  3526. (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
  3527. def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
  3528. (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
  3529. def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
  3530. (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
  3531. }
  3532. multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
  3533. let Predicates = [HasBWI] in {
  3534. def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
  3535. (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
  3536. def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
  3537. (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
  3538. def : Pat<(_.info512.VT (vselect VK32WM:$mask,
  3539. (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
  3540. (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
  3541. def : Pat<(_.info512.VT (vselect VK32WM:$mask,
  3542. (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
  3543. (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
  3544. def : Pat<(_.info512.VT (vselect VK32WM:$mask,
  3545. (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
  3546. (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
  3547. def : Pat<(_.info512.VT (vselect VK32WM:$mask,
  3548. (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
  3549. (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
  3550. def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
  3551. (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
  3552. def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
  3553. (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
  3554. def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
  3555. (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
  3556. def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
  3557. (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
  3558. }
  3559. let Predicates = [HasBWI, HasVLX] in {
  3560. def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
  3561. (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
  3562. def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
  3563. (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
  3564. def : Pat<(_.info256.VT (vselect VK16WM:$mask,
  3565. (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
  3566. (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
  3567. def : Pat<(_.info256.VT (vselect VK16WM:$mask,
  3568. (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
  3569. (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
  3570. def : Pat<(_.info256.VT (vselect VK16WM:$mask,
  3571. (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
  3572. (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
  3573. def : Pat<(_.info256.VT (vselect VK16WM:$mask,
  3574. (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
  3575. (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
  3576. def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
  3577. (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
  3578. def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
  3579. (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
  3580. def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
  3581. (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
  3582. def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
  3583. (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
  3584. def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
  3585. (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
  3586. def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
  3587. (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
  3588. def : Pat<(_.info128.VT (vselect VK8WM:$mask,
  3589. (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
  3590. (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
  3591. def : Pat<(_.info128.VT (vselect VK8WM:$mask,
  3592. (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
  3593. (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
  3594. def : Pat<(_.info128.VT (vselect VK8WM:$mask,
  3595. (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
  3596. (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
  3597. def : Pat<(_.info128.VT (vselect VK8WM:$mask,
  3598. (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
  3599. (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
  3600. def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
  3601. (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
  3602. def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
  3603. (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
  3604. def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
  3605. (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
  3606. def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
  3607. (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
  3608. }
  3609. }
  3610. defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
  3611. defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
  3612. // Move Int Doubleword to Packed Double Int
  3613. //
  3614. let ExeDomain = SSEPackedInt in {
  3615. def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
  3616. "vmovd\t{$src, $dst|$dst, $src}",
  3617. [(set VR128X:$dst,
  3618. (v4i32 (scalar_to_vector GR32:$src)))]>,
  3619. EVEX, Sched<[WriteVecMoveFromGpr]>;
  3620. def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
  3621. "vmovd\t{$src, $dst|$dst, $src}",
  3622. [(set VR128X:$dst,
  3623. (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
  3624. EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
  3625. def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
  3626. "vmovq\t{$src, $dst|$dst, $src}",
  3627. [(set VR128X:$dst,
  3628. (v2i64 (scalar_to_vector GR64:$src)))]>,
  3629. EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
  3630. let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
  3631. def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
  3632. (ins i64mem:$src),
  3633. "vmovq\t{$src, $dst|$dst, $src}", []>,
  3634. EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
  3635. let isCodeGenOnly = 1 in {
  3636. def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
  3637. "vmovq\t{$src, $dst|$dst, $src}",
  3638. [(set FR64X:$dst, (bitconvert GR64:$src))]>,
  3639. EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
  3640. def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
  3641. "vmovq\t{$src, $dst|$dst, $src}",
  3642. [(set GR64:$dst, (bitconvert FR64X:$src))]>,
  3643. EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
  3644. }
  3645. } // ExeDomain = SSEPackedInt
  3646. // Move Int Doubleword to Single Scalar
  3647. //
  3648. let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
  3649. def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
  3650. "vmovd\t{$src, $dst|$dst, $src}",
  3651. [(set FR32X:$dst, (bitconvert GR32:$src))]>,
  3652. EVEX, Sched<[WriteVecMoveFromGpr]>;
  3653. } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
  3654. // Move doubleword from xmm register to r/m32
  3655. //
  3656. let ExeDomain = SSEPackedInt in {
  3657. def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
  3658. "vmovd\t{$src, $dst|$dst, $src}",
  3659. [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
  3660. (iPTR 0)))]>,
  3661. EVEX, Sched<[WriteVecMoveToGpr]>;
  3662. def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
  3663. (ins i32mem:$dst, VR128X:$src),
  3664. "vmovd\t{$src, $dst|$dst, $src}",
  3665. [(store (i32 (extractelt (v4i32 VR128X:$src),
  3666. (iPTR 0))), addr:$dst)]>,
  3667. EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
  3668. } // ExeDomain = SSEPackedInt
  3669. // Move quadword from xmm1 register to r/m64
  3670. //
  3671. let ExeDomain = SSEPackedInt in {
  3672. def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
  3673. "vmovq\t{$src, $dst|$dst, $src}",
  3674. [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
  3675. (iPTR 0)))]>,
  3676. PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
  3677. Requires<[HasAVX512]>;
  3678. let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
  3679. def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
  3680. "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
  3681. EVEX, VEX_W, Sched<[WriteVecStore]>,
  3682. Requires<[HasAVX512, In64BitMode]>;
  3683. def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
  3684. (ins i64mem:$dst, VR128X:$src),
  3685. "vmovq\t{$src, $dst|$dst, $src}",
  3686. [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
  3687. addr:$dst)]>,
  3688. EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
  3689. Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
  3690. let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
  3691. def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
  3692. (ins VR128X:$src),
  3693. "vmovq\t{$src, $dst|$dst, $src}", []>,
  3694. EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
  3695. } // ExeDomain = SSEPackedInt
  3696. def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
  3697. (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
  3698. let Predicates = [HasAVX512] in {
  3699. def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
  3700. (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
  3701. }
  3702. // Move Scalar Single to Double Int
  3703. //
  3704. let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
  3705. def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
  3706. (ins FR32X:$src),
  3707. "vmovd\t{$src, $dst|$dst, $src}",
  3708. [(set GR32:$dst, (bitconvert FR32X:$src))]>,
  3709. EVEX, Sched<[WriteVecMoveToGpr]>;
  3710. } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
  3711. // Move Quadword Int to Packed Quadword Int
  3712. //
  3713. let ExeDomain = SSEPackedInt in {
  3714. def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
  3715. (ins i64mem:$src),
  3716. "vmovq\t{$src, $dst|$dst, $src}",
  3717. [(set VR128X:$dst,
  3718. (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
  3719. EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
  3720. } // ExeDomain = SSEPackedInt
  3721. // Allow "vmovd" but print "vmovq".
  3722. def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
  3723. (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
  3724. def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
  3725. (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
  3726. // Conversions between masks and scalar fp.
  3727. def : Pat<(v32i1 (bitconvert FR32X:$src)),
  3728. (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
  3729. def : Pat<(f32 (bitconvert VK32:$src)),
  3730. (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
  3731. def : Pat<(v64i1 (bitconvert FR64X:$src)),
  3732. (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
  3733. def : Pat<(f64 (bitconvert VK64:$src)),
  3734. (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
  3735. //===----------------------------------------------------------------------===//
  3736. // AVX-512 MOVSH, MOVSS, MOVSD
  3737. //===----------------------------------------------------------------------===//
  3738. multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
  3739. X86VectorVTInfo _, Predicate prd = HasAVX512> {
  3740. let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
  3741. def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
  3742. (ins _.RC:$src1, _.RC:$src2),
  3743. !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  3744. [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
  3745. _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
  3746. let Predicates = [prd] in {
  3747. def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
  3748. (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
  3749. !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
  3750. "$dst {${mask}} {z}, $src1, $src2}"),
  3751. [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
  3752. (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
  3753. _.ImmAllZerosV)))],
  3754. _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
  3755. let Constraints = "$src0 = $dst" in
  3756. def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
  3757. (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
  3758. !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
  3759. "$dst {${mask}}, $src1, $src2}"),
  3760. [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
  3761. (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
  3762. (_.VT _.RC:$src0))))],
  3763. _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
  3764. let canFoldAsLoad = 1, isReMaterializable = 1 in {
  3765. def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
  3766. !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
  3767. [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
  3768. _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
  3769. // _alt version uses FR32/FR64 register class.
  3770. let isCodeGenOnly = 1 in
  3771. def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
  3772. !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
  3773. [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
  3774. _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
  3775. }
  3776. let mayLoad = 1, hasSideEffects = 0 in {
  3777. let Constraints = "$src0 = $dst" in
  3778. def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
  3779. (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
  3780. !strconcat(asm, "\t{$src, $dst {${mask}}|",
  3781. "$dst {${mask}}, $src}"),
  3782. [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
  3783. def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
  3784. (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
  3785. !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
  3786. "$dst {${mask}} {z}, $src}"),
  3787. [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
  3788. }
  3789. def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
  3790. !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
  3791. [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
  3792. EVEX, Sched<[WriteFStore]>;
  3793. let mayStore = 1, hasSideEffects = 0 in
  3794. def mrk: AVX512PI<0x11, MRMDestMem, (outs),
  3795. (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
  3796. !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
  3797. [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
  3798. NotMemoryFoldable;
  3799. }
  3800. }
  3801. defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
  3802. VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
  3803. defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
  3804. VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
  3805. defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
  3806. HasFP16>,
  3807. VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
  3808. multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
  3809. PatLeaf ZeroFP, X86VectorVTInfo _> {
  3810. def : Pat<(_.VT (OpNode _.RC:$src0,
  3811. (_.VT (scalar_to_vector
  3812. (_.EltVT (X86selects VK1WM:$mask,
  3813. (_.EltVT _.FRC:$src1),
  3814. (_.EltVT _.FRC:$src2))))))),
  3815. (!cast<Instruction>(InstrStr#rrk)
  3816. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
  3817. VK1WM:$mask,
  3818. (_.VT _.RC:$src0),
  3819. (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
  3820. def : Pat<(_.VT (OpNode _.RC:$src0,
  3821. (_.VT (scalar_to_vector
  3822. (_.EltVT (X86selects VK1WM:$mask,
  3823. (_.EltVT _.FRC:$src1),
  3824. (_.EltVT ZeroFP))))))),
  3825. (!cast<Instruction>(InstrStr#rrkz)
  3826. VK1WM:$mask,
  3827. (_.VT _.RC:$src0),
  3828. (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
  3829. }
  3830. multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
  3831. dag Mask, RegisterClass MaskRC> {
  3832. def : Pat<(masked_store
  3833. (_.info512.VT (insert_subvector undef,
  3834. (_.info128.VT _.info128.RC:$src),
  3835. (iPTR 0))), addr:$dst, Mask),
  3836. (!cast<Instruction>(InstrStr#mrk) addr:$dst,
  3837. (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
  3838. _.info128.RC:$src)>;
  3839. }
  3840. multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
  3841. AVX512VLVectorVTInfo _,
  3842. dag Mask, RegisterClass MaskRC,
  3843. SubRegIndex subreg> {
  3844. def : Pat<(masked_store
  3845. (_.info512.VT (insert_subvector undef,
  3846. (_.info128.VT _.info128.RC:$src),
  3847. (iPTR 0))), addr:$dst, Mask),
  3848. (!cast<Instruction>(InstrStr#mrk) addr:$dst,
  3849. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3850. _.info128.RC:$src)>;
  3851. }
  3852. // This matches the more recent codegen from clang that avoids emitting a 512
  3853. // bit masked store directly. Codegen will widen 128-bit masked store to 512
  3854. // bits on AVX512F only targets.
  3855. multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
  3856. AVX512VLVectorVTInfo _,
  3857. dag Mask512, dag Mask128,
  3858. RegisterClass MaskRC,
  3859. SubRegIndex subreg> {
  3860. // AVX512F pattern.
  3861. def : Pat<(masked_store
  3862. (_.info512.VT (insert_subvector undef,
  3863. (_.info128.VT _.info128.RC:$src),
  3864. (iPTR 0))), addr:$dst, Mask512),
  3865. (!cast<Instruction>(InstrStr#mrk) addr:$dst,
  3866. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3867. _.info128.RC:$src)>;
  3868. // AVX512VL pattern.
  3869. def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
  3870. (!cast<Instruction>(InstrStr#mrk) addr:$dst,
  3871. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3872. _.info128.RC:$src)>;
  3873. }
  3874. multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
  3875. dag Mask, RegisterClass MaskRC> {
  3876. def : Pat<(_.info128.VT (extract_subvector
  3877. (_.info512.VT (masked_load addr:$srcAddr, Mask,
  3878. _.info512.ImmAllZerosV)),
  3879. (iPTR 0))),
  3880. (!cast<Instruction>(InstrStr#rmkz)
  3881. (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
  3882. addr:$srcAddr)>;
  3883. def : Pat<(_.info128.VT (extract_subvector
  3884. (_.info512.VT (masked_load addr:$srcAddr, Mask,
  3885. (_.info512.VT (insert_subvector undef,
  3886. (_.info128.VT (X86vzmovl _.info128.RC:$src)),
  3887. (iPTR 0))))),
  3888. (iPTR 0))),
  3889. (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
  3890. (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
  3891. addr:$srcAddr)>;
  3892. }
  3893. multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
  3894. AVX512VLVectorVTInfo _,
  3895. dag Mask, RegisterClass MaskRC,
  3896. SubRegIndex subreg> {
  3897. def : Pat<(_.info128.VT (extract_subvector
  3898. (_.info512.VT (masked_load addr:$srcAddr, Mask,
  3899. _.info512.ImmAllZerosV)),
  3900. (iPTR 0))),
  3901. (!cast<Instruction>(InstrStr#rmkz)
  3902. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3903. addr:$srcAddr)>;
  3904. def : Pat<(_.info128.VT (extract_subvector
  3905. (_.info512.VT (masked_load addr:$srcAddr, Mask,
  3906. (_.info512.VT (insert_subvector undef,
  3907. (_.info128.VT (X86vzmovl _.info128.RC:$src)),
  3908. (iPTR 0))))),
  3909. (iPTR 0))),
  3910. (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
  3911. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3912. addr:$srcAddr)>;
  3913. }
  3914. // This matches the more recent codegen from clang that avoids emitting a 512
  3915. // bit masked load directly. Codegen will widen 128-bit masked load to 512
  3916. // bits on AVX512F only targets.
  3917. multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
  3918. AVX512VLVectorVTInfo _,
  3919. dag Mask512, dag Mask128,
  3920. RegisterClass MaskRC,
  3921. SubRegIndex subreg> {
  3922. // AVX512F patterns.
  3923. def : Pat<(_.info128.VT (extract_subvector
  3924. (_.info512.VT (masked_load addr:$srcAddr, Mask512,
  3925. _.info512.ImmAllZerosV)),
  3926. (iPTR 0))),
  3927. (!cast<Instruction>(InstrStr#rmkz)
  3928. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3929. addr:$srcAddr)>;
  3930. def : Pat<(_.info128.VT (extract_subvector
  3931. (_.info512.VT (masked_load addr:$srcAddr, Mask512,
  3932. (_.info512.VT (insert_subvector undef,
  3933. (_.info128.VT (X86vzmovl _.info128.RC:$src)),
  3934. (iPTR 0))))),
  3935. (iPTR 0))),
  3936. (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
  3937. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3938. addr:$srcAddr)>;
  3939. // AVX512Vl patterns.
  3940. def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
  3941. _.info128.ImmAllZerosV)),
  3942. (!cast<Instruction>(InstrStr#rmkz)
  3943. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3944. addr:$srcAddr)>;
  3945. def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
  3946. (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
  3947. (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
  3948. (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
  3949. addr:$srcAddr)>;
  3950. }
  3951. defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
  3952. defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
  3953. defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
  3954. (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
  3955. defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
  3956. (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
  3957. defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
  3958. (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
  3959. let Predicates = [HasFP16] in {
  3960. defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
  3961. defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
  3962. (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
  3963. defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
  3964. (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
  3965. defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
  3966. (v32i1 (insert_subvector
  3967. (v32i1 immAllZerosV),
  3968. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  3969. (iPTR 0))),
  3970. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  3971. GR8, sub_8bit>;
  3972. defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
  3973. (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
  3974. defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
  3975. (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
  3976. defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
  3977. (v32i1 (insert_subvector
  3978. (v32i1 immAllZerosV),
  3979. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  3980. (iPTR 0))),
  3981. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  3982. GR8, sub_8bit>;
  3983. def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
  3984. (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
  3985. (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
  3986. VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
  3987. (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
  3988. def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
  3989. (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
  3990. (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
  3991. }
  3992. defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
  3993. (v16i1 (insert_subvector
  3994. (v16i1 immAllZerosV),
  3995. (v4i1 (extract_subvector
  3996. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  3997. (iPTR 0))),
  3998. (iPTR 0))),
  3999. (v4i1 (extract_subvector
  4000. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  4001. (iPTR 0))), GR8, sub_8bit>;
  4002. defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
  4003. (v8i1
  4004. (extract_subvector
  4005. (v16i1
  4006. (insert_subvector
  4007. (v16i1 immAllZerosV),
  4008. (v2i1 (extract_subvector
  4009. (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
  4010. (iPTR 0))),
  4011. (iPTR 0))),
  4012. (iPTR 0))),
  4013. (v2i1 (extract_subvector
  4014. (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
  4015. (iPTR 0))), GR8, sub_8bit>;
  4016. defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
  4017. (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
  4018. defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
  4019. (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
  4020. defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
  4021. (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
  4022. defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
  4023. (v16i1 (insert_subvector
  4024. (v16i1 immAllZerosV),
  4025. (v4i1 (extract_subvector
  4026. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  4027. (iPTR 0))),
  4028. (iPTR 0))),
  4029. (v4i1 (extract_subvector
  4030. (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
  4031. (iPTR 0))), GR8, sub_8bit>;
  4032. defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
  4033. (v8i1
  4034. (extract_subvector
  4035. (v16i1
  4036. (insert_subvector
  4037. (v16i1 immAllZerosV),
  4038. (v2i1 (extract_subvector
  4039. (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
  4040. (iPTR 0))),
  4041. (iPTR 0))),
  4042. (iPTR 0))),
  4043. (v2i1 (extract_subvector
  4044. (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
  4045. (iPTR 0))), GR8, sub_8bit>;
  4046. def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
  4047. (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
  4048. (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
  4049. VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
  4050. (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
  4051. def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
  4052. (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
  4053. (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
  4054. def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
  4055. (COPY_TO_REGCLASS
  4056. (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
  4057. VK1WM:$mask, addr:$src)),
  4058. FR32X)>;
  4059. def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
  4060. (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
  4061. def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
  4062. (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
  4063. (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
  4064. VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
  4065. (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
  4066. def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
  4067. (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
  4068. (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
  4069. def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
  4070. (COPY_TO_REGCLASS
  4071. (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
  4072. VK1WM:$mask, addr:$src)),
  4073. FR64X)>;
  4074. def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
  4075. (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
  4076. def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
  4077. (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
  4078. def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
  4079. (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
  4080. def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
  4081. (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
  4082. def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
  4083. (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
  4084. let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
  4085. let Predicates = [HasFP16] in {
  4086. def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4087. (ins VR128X:$src1, VR128X:$src2),
  4088. "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  4089. []>, T_MAP5XS, EVEX_4V, VEX_LIG,
  4090. FoldGenData<"VMOVSHZrr">,
  4091. Sched<[SchedWriteFShuffle.XMM]>;
  4092. let Constraints = "$src0 = $dst" in
  4093. def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4094. (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
  4095. VR128X:$src1, VR128X:$src2),
  4096. "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
  4097. "$dst {${mask}}, $src1, $src2}",
  4098. []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
  4099. FoldGenData<"VMOVSHZrrk">,
  4100. Sched<[SchedWriteFShuffle.XMM]>;
  4101. def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4102. (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
  4103. "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
  4104. "$dst {${mask}} {z}, $src1, $src2}",
  4105. []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
  4106. FoldGenData<"VMOVSHZrrkz">,
  4107. Sched<[SchedWriteFShuffle.XMM]>;
  4108. }
  4109. def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4110. (ins VR128X:$src1, VR128X:$src2),
  4111. "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  4112. []>, XS, EVEX_4V, VEX_LIG,
  4113. FoldGenData<"VMOVSSZrr">,
  4114. Sched<[SchedWriteFShuffle.XMM]>;
  4115. let Constraints = "$src0 = $dst" in
  4116. def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4117. (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
  4118. VR128X:$src1, VR128X:$src2),
  4119. "vmovss\t{$src2, $src1, $dst {${mask}}|"#
  4120. "$dst {${mask}}, $src1, $src2}",
  4121. []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
  4122. FoldGenData<"VMOVSSZrrk">,
  4123. Sched<[SchedWriteFShuffle.XMM]>;
  4124. def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4125. (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
  4126. "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
  4127. "$dst {${mask}} {z}, $src1, $src2}",
  4128. []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
  4129. FoldGenData<"VMOVSSZrrkz">,
  4130. Sched<[SchedWriteFShuffle.XMM]>;
  4131. def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4132. (ins VR128X:$src1, VR128X:$src2),
  4133. "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  4134. []>, XD, EVEX_4V, VEX_LIG, VEX_W,
  4135. FoldGenData<"VMOVSDZrr">,
  4136. Sched<[SchedWriteFShuffle.XMM]>;
  4137. let Constraints = "$src0 = $dst" in
  4138. def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4139. (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
  4140. VR128X:$src1, VR128X:$src2),
  4141. "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
  4142. "$dst {${mask}}, $src1, $src2}",
  4143. []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
  4144. VEX_W, FoldGenData<"VMOVSDZrrk">,
  4145. Sched<[SchedWriteFShuffle.XMM]>;
  4146. def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
  4147. (ins f64x_info.KRCWM:$mask, VR128X:$src1,
  4148. VR128X:$src2),
  4149. "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
  4150. "$dst {${mask}} {z}, $src1, $src2}",
  4151. []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
  4152. VEX_W, FoldGenData<"VMOVSDZrrkz">,
  4153. Sched<[SchedWriteFShuffle.XMM]>;
  4154. }
  4155. def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  4156. (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
  4157. def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
  4158. "$dst {${mask}}, $src1, $src2}",
  4159. (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
  4160. VR128X:$src1, VR128X:$src2), 0>;
  4161. def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
  4162. "$dst {${mask}} {z}, $src1, $src2}",
  4163. (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
  4164. VR128X:$src1, VR128X:$src2), 0>;
  4165. def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  4166. (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
  4167. def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
  4168. "$dst {${mask}}, $src1, $src2}",
  4169. (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
  4170. VR128X:$src1, VR128X:$src2), 0>;
  4171. def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
  4172. "$dst {${mask}} {z}, $src1, $src2}",
  4173. (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
  4174. VR128X:$src1, VR128X:$src2), 0>;
  4175. def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  4176. (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
  4177. def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
  4178. "$dst {${mask}}, $src1, $src2}",
  4179. (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
  4180. VR128X:$src1, VR128X:$src2), 0>;
  4181. def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
  4182. "$dst {${mask}} {z}, $src1, $src2}",
  4183. (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
  4184. VR128X:$src1, VR128X:$src2), 0>;
  4185. let Predicates = [HasAVX512, OptForSize] in {
  4186. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
  4187. (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
  4188. def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
  4189. (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
  4190. // Move low f32 and clear high bits.
  4191. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
  4192. (SUBREG_TO_REG (i32 0),
  4193. (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
  4194. (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
  4195. def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
  4196. (SUBREG_TO_REG (i32 0),
  4197. (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
  4198. (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
  4199. def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
  4200. (SUBREG_TO_REG (i32 0),
  4201. (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
  4202. (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
  4203. def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
  4204. (SUBREG_TO_REG (i32 0),
  4205. (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
  4206. (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
  4207. }
  4208. // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
  4209. // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
  4210. let Predicates = [HasAVX512, OptForSpeed] in {
  4211. def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
  4212. (SUBREG_TO_REG (i32 0),
  4213. (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
  4214. (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
  4215. (i8 1))), sub_xmm)>;
  4216. def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
  4217. (SUBREG_TO_REG (i32 0),
  4218. (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
  4219. (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
  4220. (i8 3))), sub_xmm)>;
  4221. }
  4222. let Predicates = [HasAVX512] in {
  4223. def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
  4224. (VMOVSSZrm addr:$src)>;
  4225. def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
  4226. (VMOVSDZrm addr:$src)>;
  4227. // Represent the same patterns above but in the form they appear for
  4228. // 256-bit types
  4229. def : Pat<(v8f32 (X86vzload32 addr:$src)),
  4230. (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
  4231. def : Pat<(v4f64 (X86vzload64 addr:$src)),
  4232. (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
  4233. // Represent the same patterns above but in the form they appear for
  4234. // 512-bit types
  4235. def : Pat<(v16f32 (X86vzload32 addr:$src)),
  4236. (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
  4237. def : Pat<(v8f64 (X86vzload64 addr:$src)),
  4238. (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
  4239. }
  4240. let Predicates = [HasFP16] in {
  4241. def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
  4242. (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
  4243. def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
  4244. (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
  4245. // FIXME we need better canonicalization in dag combine
  4246. def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
  4247. (SUBREG_TO_REG (i32 0),
  4248. (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
  4249. (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
  4250. def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
  4251. (SUBREG_TO_REG (i32 0),
  4252. (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
  4253. (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
  4254. // FIXME we need better canonicalization in dag combine
  4255. def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
  4256. (SUBREG_TO_REG (i32 0),
  4257. (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
  4258. (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
  4259. def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
  4260. (SUBREG_TO_REG (i32 0),
  4261. (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
  4262. (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
  4263. def : Pat<(v8f16 (X86vzload16 addr:$src)),
  4264. (VMOVSHZrm addr:$src)>;
  4265. def : Pat<(v16f16 (X86vzload16 addr:$src)),
  4266. (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
  4267. def : Pat<(v32f16 (X86vzload16 addr:$src)),
  4268. (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
  4269. }
  4270. let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
  4271. def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
  4272. (ins VR128X:$src),
  4273. "vmovq\t{$src, $dst|$dst, $src}",
  4274. [(set VR128X:$dst, (v2i64 (X86vzmovl
  4275. (v2i64 VR128X:$src))))]>,
  4276. EVEX, VEX_W;
  4277. }
  4278. let Predicates = [HasAVX512] in {
  4279. def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
  4280. (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
  4281. GR8:$src, sub_8bit)))>;
  4282. def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
  4283. (VMOVDI2PDIZrr GR32:$src)>;
  4284. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
  4285. (VMOV64toPQIZrr GR64:$src)>;
  4286. // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
  4287. def : Pat<(v4i32 (X86vzload32 addr:$src)),
  4288. (VMOVDI2PDIZrm addr:$src)>;
  4289. def : Pat<(v8i32 (X86vzload32 addr:$src)),
  4290. (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
  4291. def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
  4292. (VMOVZPQILo2PQIZrr VR128X:$src)>;
  4293. def : Pat<(v2i64 (X86vzload64 addr:$src)),
  4294. (VMOVQI2PQIZrm addr:$src)>;
  4295. def : Pat<(v4i64 (X86vzload64 addr:$src)),
  4296. (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
  4297. // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
  4298. def : Pat<(v16i32 (X86vzload32 addr:$src)),
  4299. (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
  4300. def : Pat<(v8i64 (X86vzload64 addr:$src)),
  4301. (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
  4302. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
  4303. (SUBREG_TO_REG (i32 0),
  4304. (v2f64 (VMOVZPQILo2PQIZrr
  4305. (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
  4306. sub_xmm)>;
  4307. def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
  4308. (SUBREG_TO_REG (i32 0),
  4309. (v2i64 (VMOVZPQILo2PQIZrr
  4310. (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
  4311. sub_xmm)>;
  4312. def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
  4313. (SUBREG_TO_REG (i32 0),
  4314. (v2f64 (VMOVZPQILo2PQIZrr
  4315. (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
  4316. sub_xmm)>;
  4317. def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
  4318. (SUBREG_TO_REG (i32 0),
  4319. (v2i64 (VMOVZPQILo2PQIZrr
  4320. (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
  4321. sub_xmm)>;
  4322. }
  4323. //===----------------------------------------------------------------------===//
  4324. // AVX-512 - Non-temporals
  4325. //===----------------------------------------------------------------------===//
  4326. def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
  4327. (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
  4328. [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
  4329. EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
  4330. let Predicates = [HasVLX] in {
  4331. def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
  4332. (ins i256mem:$src),
  4333. "vmovntdqa\t{$src, $dst|$dst, $src}",
  4334. [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
  4335. EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
  4336. def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
  4337. (ins i128mem:$src),
  4338. "vmovntdqa\t{$src, $dst|$dst, $src}",
  4339. [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
  4340. EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
  4341. }
  4342. multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  4343. X86SchedWriteMoveLS Sched,
  4344. PatFrag st_frag = alignednontemporalstore> {
  4345. let SchedRW = [Sched.MR], AddedComplexity = 400 in
  4346. def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
  4347. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  4348. [(st_frag (_.VT _.RC:$src), addr:$dst)],
  4349. _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
  4350. }
  4351. multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
  4352. AVX512VLVectorVTInfo VTInfo,
  4353. X86SchedWriteMoveLSWidths Sched> {
  4354. let Predicates = [HasAVX512] in
  4355. defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
  4356. let Predicates = [HasAVX512, HasVLX] in {
  4357. defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
  4358. defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
  4359. }
  4360. }
  4361. defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
  4362. SchedWriteVecMoveLSNT>, PD;
  4363. defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
  4364. SchedWriteFMoveLSNT>, PD, VEX_W;
  4365. defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
  4366. SchedWriteFMoveLSNT>, PS;
  4367. let Predicates = [HasAVX512], AddedComplexity = 400 in {
  4368. def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
  4369. (VMOVNTDQZmr addr:$dst, VR512:$src)>;
  4370. def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
  4371. (VMOVNTDQZmr addr:$dst, VR512:$src)>;
  4372. def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
  4373. (VMOVNTDQZmr addr:$dst, VR512:$src)>;
  4374. def : Pat<(v8f64 (alignednontemporalload addr:$src)),
  4375. (VMOVNTDQAZrm addr:$src)>;
  4376. def : Pat<(v16f32 (alignednontemporalload addr:$src)),
  4377. (VMOVNTDQAZrm addr:$src)>;
  4378. def : Pat<(v8i64 (alignednontemporalload addr:$src)),
  4379. (VMOVNTDQAZrm addr:$src)>;
  4380. def : Pat<(v16i32 (alignednontemporalload addr:$src)),
  4381. (VMOVNTDQAZrm addr:$src)>;
  4382. def : Pat<(v32i16 (alignednontemporalload addr:$src)),
  4383. (VMOVNTDQAZrm addr:$src)>;
  4384. def : Pat<(v64i8 (alignednontemporalload addr:$src)),
  4385. (VMOVNTDQAZrm addr:$src)>;
  4386. }
  4387. let Predicates = [HasVLX], AddedComplexity = 400 in {
  4388. def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
  4389. (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
  4390. def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
  4391. (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
  4392. def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
  4393. (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
  4394. def : Pat<(v4f64 (alignednontemporalload addr:$src)),
  4395. (VMOVNTDQAZ256rm addr:$src)>;
  4396. def : Pat<(v8f32 (alignednontemporalload addr:$src)),
  4397. (VMOVNTDQAZ256rm addr:$src)>;
  4398. def : Pat<(v4i64 (alignednontemporalload addr:$src)),
  4399. (VMOVNTDQAZ256rm addr:$src)>;
  4400. def : Pat<(v8i32 (alignednontemporalload addr:$src)),
  4401. (VMOVNTDQAZ256rm addr:$src)>;
  4402. def : Pat<(v16i16 (alignednontemporalload addr:$src)),
  4403. (VMOVNTDQAZ256rm addr:$src)>;
  4404. def : Pat<(v32i8 (alignednontemporalload addr:$src)),
  4405. (VMOVNTDQAZ256rm addr:$src)>;
  4406. def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
  4407. (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
  4408. def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
  4409. (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
  4410. def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
  4411. (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
  4412. def : Pat<(v2f64 (alignednontemporalload addr:$src)),
  4413. (VMOVNTDQAZ128rm addr:$src)>;
  4414. def : Pat<(v4f32 (alignednontemporalload addr:$src)),
  4415. (VMOVNTDQAZ128rm addr:$src)>;
  4416. def : Pat<(v2i64 (alignednontemporalload addr:$src)),
  4417. (VMOVNTDQAZ128rm addr:$src)>;
  4418. def : Pat<(v4i32 (alignednontemporalload addr:$src)),
  4419. (VMOVNTDQAZ128rm addr:$src)>;
  4420. def : Pat<(v8i16 (alignednontemporalload addr:$src)),
  4421. (VMOVNTDQAZ128rm addr:$src)>;
  4422. def : Pat<(v16i8 (alignednontemporalload addr:$src)),
  4423. (VMOVNTDQAZ128rm addr:$src)>;
  4424. }
  4425. //===----------------------------------------------------------------------===//
  4426. // AVX-512 - Integer arithmetic
  4427. //
  4428. multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4429. X86VectorVTInfo _, X86FoldableSchedWrite sched,
  4430. bit IsCommutable = 0> {
  4431. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  4432. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  4433. "$src2, $src1", "$src1, $src2",
  4434. (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
  4435. IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
  4436. Sched<[sched]>;
  4437. defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  4438. (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
  4439. "$src2, $src1", "$src1, $src2",
  4440. (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
  4441. AVX512BIBase, EVEX_4V,
  4442. Sched<[sched.Folded, sched.ReadAfterFold]>;
  4443. }
  4444. multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4445. X86VectorVTInfo _, X86FoldableSchedWrite sched,
  4446. bit IsCommutable = 0> :
  4447. avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
  4448. defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  4449. (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
  4450. "${src2}"#_.BroadcastStr#", $src1",
  4451. "$src1, ${src2}"#_.BroadcastStr,
  4452. (_.VT (OpNode _.RC:$src1,
  4453. (_.BroadcastLdFrag addr:$src2)))>,
  4454. AVX512BIBase, EVEX_4V, EVEX_B,
  4455. Sched<[sched.Folded, sched.ReadAfterFold]>;
  4456. }
  4457. multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4458. AVX512VLVectorVTInfo VTInfo,
  4459. X86SchedWriteWidths sched, Predicate prd,
  4460. bit IsCommutable = 0> {
  4461. let Predicates = [prd] in
  4462. defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
  4463. IsCommutable>, EVEX_V512;
  4464. let Predicates = [prd, HasVLX] in {
  4465. defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
  4466. sched.YMM, IsCommutable>, EVEX_V256;
  4467. defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
  4468. sched.XMM, IsCommutable>, EVEX_V128;
  4469. }
  4470. }
  4471. multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4472. AVX512VLVectorVTInfo VTInfo,
  4473. X86SchedWriteWidths sched, Predicate prd,
  4474. bit IsCommutable = 0> {
  4475. let Predicates = [prd] in
  4476. defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
  4477. IsCommutable>, EVEX_V512;
  4478. let Predicates = [prd, HasVLX] in {
  4479. defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
  4480. sched.YMM, IsCommutable>, EVEX_V256;
  4481. defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
  4482. sched.XMM, IsCommutable>, EVEX_V128;
  4483. }
  4484. }
  4485. multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4486. X86SchedWriteWidths sched, Predicate prd,
  4487. bit IsCommutable = 0> {
  4488. defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
  4489. sched, prd, IsCommutable>,
  4490. VEX_W, EVEX_CD8<64, CD8VF>;
  4491. }
  4492. multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4493. X86SchedWriteWidths sched, Predicate prd,
  4494. bit IsCommutable = 0> {
  4495. defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
  4496. sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
  4497. }
  4498. multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4499. X86SchedWriteWidths sched, Predicate prd,
  4500. bit IsCommutable = 0> {
  4501. defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
  4502. sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
  4503. VEX_WIG;
  4504. }
  4505. multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4506. X86SchedWriteWidths sched, Predicate prd,
  4507. bit IsCommutable = 0> {
  4508. defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
  4509. sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
  4510. VEX_WIG;
  4511. }
  4512. multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
  4513. SDNode OpNode, X86SchedWriteWidths sched,
  4514. Predicate prd, bit IsCommutable = 0> {
  4515. defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
  4516. IsCommutable>;
  4517. defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
  4518. IsCommutable>;
  4519. }
  4520. multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
  4521. SDNode OpNode, X86SchedWriteWidths sched,
  4522. Predicate prd, bit IsCommutable = 0> {
  4523. defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
  4524. IsCommutable>;
  4525. defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
  4526. IsCommutable>;
  4527. }
  4528. multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
  4529. bits<8> opc_d, bits<8> opc_q,
  4530. string OpcodeStr, SDNode OpNode,
  4531. X86SchedWriteWidths sched,
  4532. bit IsCommutable = 0> {
  4533. defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
  4534. sched, HasAVX512, IsCommutable>,
  4535. avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
  4536. sched, HasBWI, IsCommutable>;
  4537. }
  4538. multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
  4539. X86FoldableSchedWrite sched,
  4540. SDNode OpNode,X86VectorVTInfo _Src,
  4541. X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
  4542. bit IsCommutable = 0> {
  4543. defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
  4544. (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
  4545. "$src2, $src1","$src1, $src2",
  4546. (_Dst.VT (OpNode
  4547. (_Src.VT _Src.RC:$src1),
  4548. (_Src.VT _Src.RC:$src2))),
  4549. IsCommutable>,
  4550. AVX512BIBase, EVEX_4V, Sched<[sched]>;
  4551. defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
  4552. (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
  4553. "$src2, $src1", "$src1, $src2",
  4554. (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
  4555. (_Src.LdFrag addr:$src2)))>,
  4556. AVX512BIBase, EVEX_4V,
  4557. Sched<[sched.Folded, sched.ReadAfterFold]>;
  4558. defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
  4559. (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
  4560. OpcodeStr,
  4561. "${src2}"#_Brdct.BroadcastStr#", $src1",
  4562. "$src1, ${src2}"#_Brdct.BroadcastStr,
  4563. (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
  4564. (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
  4565. AVX512BIBase, EVEX_4V, EVEX_B,
  4566. Sched<[sched.Folded, sched.ReadAfterFold]>;
  4567. }
  4568. defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
  4569. SchedWriteVecALU, 1>;
  4570. defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
  4571. SchedWriteVecALU, 0>;
  4572. defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
  4573. SchedWriteVecALU, HasBWI, 1>;
  4574. defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
  4575. SchedWriteVecALU, HasBWI, 0>;
  4576. defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
  4577. SchedWriteVecALU, HasBWI, 1>;
  4578. defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
  4579. SchedWriteVecALU, HasBWI, 0>;
  4580. defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
  4581. SchedWritePMULLD, HasAVX512, 1>, T8PD;
  4582. defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
  4583. SchedWriteVecIMul, HasBWI, 1>;
  4584. defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
  4585. SchedWriteVecIMul, HasDQI, 1>, T8PD,
  4586. NotEVEX2VEXConvertible;
  4587. defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
  4588. HasBWI, 1>;
  4589. defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
  4590. HasBWI, 1>;
  4591. defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
  4592. SchedWriteVecIMul, HasBWI, 1>, T8PD;
  4593. defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
  4594. SchedWriteVecALU, HasBWI, 1>;
  4595. defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
  4596. SchedWriteVecIMul, HasAVX512, 1>, T8PD;
  4597. defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
  4598. SchedWriteVecIMul, HasAVX512, 1>;
  4599. multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
  4600. X86SchedWriteWidths sched,
  4601. AVX512VLVectorVTInfo _SrcVTInfo,
  4602. AVX512VLVectorVTInfo _DstVTInfo,
  4603. SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
  4604. let Predicates = [prd] in
  4605. defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
  4606. _SrcVTInfo.info512, _DstVTInfo.info512,
  4607. v8i64_info, IsCommutable>,
  4608. EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
  4609. let Predicates = [HasVLX, prd] in {
  4610. defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
  4611. _SrcVTInfo.info256, _DstVTInfo.info256,
  4612. v4i64x_info, IsCommutable>,
  4613. EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
  4614. defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
  4615. _SrcVTInfo.info128, _DstVTInfo.info128,
  4616. v2i64x_info, IsCommutable>,
  4617. EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
  4618. }
  4619. }
  4620. defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
  4621. avx512vl_i8_info, avx512vl_i8_info,
  4622. X86multishift, HasVBMI, 0>, T8PD;
  4623. multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
  4624. X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
  4625. X86FoldableSchedWrite sched> {
  4626. defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
  4627. (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
  4628. OpcodeStr,
  4629. "${src2}"#_Src.BroadcastStr#", $src1",
  4630. "$src1, ${src2}"#_Src.BroadcastStr,
  4631. (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
  4632. (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
  4633. EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
  4634. Sched<[sched.Folded, sched.ReadAfterFold]>;
  4635. }
  4636. multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
  4637. SDNode OpNode,X86VectorVTInfo _Src,
  4638. X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
  4639. bit IsCommutable = 0> {
  4640. defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
  4641. (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
  4642. "$src2, $src1","$src1, $src2",
  4643. (_Dst.VT (OpNode
  4644. (_Src.VT _Src.RC:$src1),
  4645. (_Src.VT _Src.RC:$src2))),
  4646. IsCommutable, IsCommutable>,
  4647. EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
  4648. defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
  4649. (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
  4650. "$src2, $src1", "$src1, $src2",
  4651. (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
  4652. (_Src.LdFrag addr:$src2)))>,
  4653. EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
  4654. Sched<[sched.Folded, sched.ReadAfterFold]>;
  4655. }
  4656. multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
  4657. SDNode OpNode> {
  4658. let Predicates = [HasBWI] in
  4659. defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
  4660. v32i16_info, SchedWriteShuffle.ZMM>,
  4661. avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
  4662. v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
  4663. let Predicates = [HasBWI, HasVLX] in {
  4664. defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
  4665. v16i16x_info, SchedWriteShuffle.YMM>,
  4666. avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
  4667. v16i16x_info, SchedWriteShuffle.YMM>,
  4668. EVEX_V256;
  4669. defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
  4670. v8i16x_info, SchedWriteShuffle.XMM>,
  4671. avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
  4672. v8i16x_info, SchedWriteShuffle.XMM>,
  4673. EVEX_V128;
  4674. }
  4675. }
  4676. multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
  4677. SDNode OpNode> {
  4678. let Predicates = [HasBWI] in
  4679. defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
  4680. SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
  4681. let Predicates = [HasBWI, HasVLX] in {
  4682. defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
  4683. v32i8x_info, SchedWriteShuffle.YMM>,
  4684. EVEX_V256, VEX_WIG;
  4685. defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
  4686. v16i8x_info, SchedWriteShuffle.XMM>,
  4687. EVEX_V128, VEX_WIG;
  4688. }
  4689. }
  4690. multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
  4691. SDNode OpNode, AVX512VLVectorVTInfo _Src,
  4692. AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
  4693. let Predicates = [HasBWI] in
  4694. defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
  4695. _Dst.info512, SchedWriteVecIMul.ZMM,
  4696. IsCommutable>, EVEX_V512;
  4697. let Predicates = [HasBWI, HasVLX] in {
  4698. defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
  4699. _Dst.info256, SchedWriteVecIMul.YMM,
  4700. IsCommutable>, EVEX_V256;
  4701. defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
  4702. _Dst.info128, SchedWriteVecIMul.XMM,
  4703. IsCommutable>, EVEX_V128;
  4704. }
  4705. }
  4706. defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
  4707. defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
  4708. defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
  4709. defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
  4710. defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
  4711. avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
  4712. defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
  4713. avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
  4714. defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
  4715. SchedWriteVecALU, HasBWI, 1>, T8PD;
  4716. defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
  4717. SchedWriteVecALU, HasBWI, 1>;
  4718. defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
  4719. SchedWriteVecALU, HasAVX512, 1>, T8PD;
  4720. defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
  4721. SchedWriteVecALU, HasAVX512, 1>, T8PD,
  4722. NotEVEX2VEXConvertible;
  4723. defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
  4724. SchedWriteVecALU, HasBWI, 1>;
  4725. defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
  4726. SchedWriteVecALU, HasBWI, 1>, T8PD;
  4727. defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
  4728. SchedWriteVecALU, HasAVX512, 1>, T8PD;
  4729. defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
  4730. SchedWriteVecALU, HasAVX512, 1>, T8PD,
  4731. NotEVEX2VEXConvertible;
  4732. defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
  4733. SchedWriteVecALU, HasBWI, 1>, T8PD;
  4734. defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
  4735. SchedWriteVecALU, HasBWI, 1>;
  4736. defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
  4737. SchedWriteVecALU, HasAVX512, 1>, T8PD;
  4738. defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
  4739. SchedWriteVecALU, HasAVX512, 1>, T8PD,
  4740. NotEVEX2VEXConvertible;
  4741. defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
  4742. SchedWriteVecALU, HasBWI, 1>;
  4743. defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
  4744. SchedWriteVecALU, HasBWI, 1>, T8PD;
  4745. defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
  4746. SchedWriteVecALU, HasAVX512, 1>, T8PD;
  4747. defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
  4748. SchedWriteVecALU, HasAVX512, 1>, T8PD,
  4749. NotEVEX2VEXConvertible;
  4750. // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
  4751. let Predicates = [HasDQI, NoVLX] in {
  4752. def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
  4753. (EXTRACT_SUBREG
  4754. (VPMULLQZrr
  4755. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
  4756. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
  4757. sub_ymm)>;
  4758. def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
  4759. (EXTRACT_SUBREG
  4760. (VPMULLQZrmb
  4761. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
  4762. addr:$src2),
  4763. sub_ymm)>;
  4764. def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
  4765. (EXTRACT_SUBREG
  4766. (VPMULLQZrr
  4767. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
  4768. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
  4769. sub_xmm)>;
  4770. def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
  4771. (EXTRACT_SUBREG
  4772. (VPMULLQZrmb
  4773. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
  4774. addr:$src2),
  4775. sub_xmm)>;
  4776. }
  4777. multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
  4778. def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
  4779. (EXTRACT_SUBREG
  4780. (!cast<Instruction>(Instr#"rr")
  4781. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
  4782. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
  4783. sub_ymm)>;
  4784. def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
  4785. (EXTRACT_SUBREG
  4786. (!cast<Instruction>(Instr#"rmb")
  4787. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
  4788. addr:$src2),
  4789. sub_ymm)>;
  4790. def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
  4791. (EXTRACT_SUBREG
  4792. (!cast<Instruction>(Instr#"rr")
  4793. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
  4794. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
  4795. sub_xmm)>;
  4796. def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
  4797. (EXTRACT_SUBREG
  4798. (!cast<Instruction>(Instr#"rmb")
  4799. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
  4800. addr:$src2),
  4801. sub_xmm)>;
  4802. }
  4803. let Predicates = [HasAVX512, NoVLX] in {
  4804. defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
  4805. defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
  4806. defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
  4807. defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
  4808. }
  4809. //===----------------------------------------------------------------------===//
  4810. // AVX-512 Logical Instructions
  4811. //===----------------------------------------------------------------------===//
  4812. defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
  4813. SchedWriteVecLogic, HasAVX512, 1>;
  4814. defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
  4815. SchedWriteVecLogic, HasAVX512, 1>;
  4816. defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
  4817. SchedWriteVecLogic, HasAVX512, 1>;
  4818. defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
  4819. SchedWriteVecLogic, HasAVX512>;
  4820. let Predicates = [HasVLX] in {
  4821. def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
  4822. (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
  4823. def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
  4824. (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
  4825. def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
  4826. (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
  4827. def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
  4828. (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
  4829. def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
  4830. (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
  4831. def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
  4832. (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
  4833. def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
  4834. (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
  4835. def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
  4836. (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
  4837. def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
  4838. (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
  4839. def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
  4840. (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
  4841. def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
  4842. (VPORQZ128rm VR128X:$src1, addr:$src2)>;
  4843. def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
  4844. (VPORQZ128rm VR128X:$src1, addr:$src2)>;
  4845. def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
  4846. (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
  4847. def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
  4848. (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
  4849. def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
  4850. (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
  4851. def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
  4852. (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
  4853. def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
  4854. (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
  4855. def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
  4856. (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
  4857. def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
  4858. (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
  4859. def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
  4860. (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
  4861. def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
  4862. (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
  4863. def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
  4864. (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
  4865. def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
  4866. (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
  4867. def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
  4868. (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
  4869. def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
  4870. (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
  4871. def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
  4872. (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
  4873. def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
  4874. (VPORQZ256rm VR256X:$src1, addr:$src2)>;
  4875. def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
  4876. (VPORQZ256rm VR256X:$src1, addr:$src2)>;
  4877. def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
  4878. (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
  4879. def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
  4880. (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
  4881. def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
  4882. (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
  4883. def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
  4884. (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
  4885. }
  4886. let Predicates = [HasAVX512] in {
  4887. def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
  4888. (VPANDQZrr VR512:$src1, VR512:$src2)>;
  4889. def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
  4890. (VPANDQZrr VR512:$src1, VR512:$src2)>;
  4891. def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
  4892. (VPORQZrr VR512:$src1, VR512:$src2)>;
  4893. def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
  4894. (VPORQZrr VR512:$src1, VR512:$src2)>;
  4895. def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
  4896. (VPXORQZrr VR512:$src1, VR512:$src2)>;
  4897. def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
  4898. (VPXORQZrr VR512:$src1, VR512:$src2)>;
  4899. def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
  4900. (VPANDNQZrr VR512:$src1, VR512:$src2)>;
  4901. def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
  4902. (VPANDNQZrr VR512:$src1, VR512:$src2)>;
  4903. def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
  4904. (VPANDQZrm VR512:$src1, addr:$src2)>;
  4905. def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
  4906. (VPANDQZrm VR512:$src1, addr:$src2)>;
  4907. def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
  4908. (VPORQZrm VR512:$src1, addr:$src2)>;
  4909. def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
  4910. (VPORQZrm VR512:$src1, addr:$src2)>;
  4911. def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
  4912. (VPXORQZrm VR512:$src1, addr:$src2)>;
  4913. def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
  4914. (VPXORQZrm VR512:$src1, addr:$src2)>;
  4915. def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
  4916. (VPANDNQZrm VR512:$src1, addr:$src2)>;
  4917. def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
  4918. (VPANDNQZrm VR512:$src1, addr:$src2)>;
  4919. }
  4920. // Patterns to catch vselect with different type than logic op.
  4921. multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
  4922. X86VectorVTInfo _,
  4923. X86VectorVTInfo IntInfo> {
  4924. // Masked register-register logical operations.
  4925. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  4926. (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
  4927. _.RC:$src0)),
  4928. (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
  4929. _.RC:$src1, _.RC:$src2)>;
  4930. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  4931. (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
  4932. _.ImmAllZerosV)),
  4933. (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
  4934. _.RC:$src2)>;
  4935. // Masked register-memory logical operations.
  4936. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  4937. (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
  4938. (load addr:$src2)))),
  4939. _.RC:$src0)),
  4940. (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
  4941. _.RC:$src1, addr:$src2)>;
  4942. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  4943. (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
  4944. (load addr:$src2)))),
  4945. _.ImmAllZerosV)),
  4946. (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
  4947. addr:$src2)>;
  4948. }
  4949. multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
  4950. X86VectorVTInfo _,
  4951. X86VectorVTInfo IntInfo> {
  4952. // Register-broadcast logical operations.
  4953. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  4954. (bitconvert
  4955. (IntInfo.VT (OpNode _.RC:$src1,
  4956. (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
  4957. _.RC:$src0)),
  4958. (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
  4959. _.RC:$src1, addr:$src2)>;
  4960. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  4961. (bitconvert
  4962. (IntInfo.VT (OpNode _.RC:$src1,
  4963. (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
  4964. _.ImmAllZerosV)),
  4965. (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
  4966. _.RC:$src1, addr:$src2)>;
  4967. }
  4968. multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
  4969. AVX512VLVectorVTInfo SelectInfo,
  4970. AVX512VLVectorVTInfo IntInfo> {
  4971. let Predicates = [HasVLX] in {
  4972. defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
  4973. IntInfo.info128>;
  4974. defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
  4975. IntInfo.info256>;
  4976. }
  4977. let Predicates = [HasAVX512] in {
  4978. defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
  4979. IntInfo.info512>;
  4980. }
  4981. }
  4982. multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
  4983. AVX512VLVectorVTInfo SelectInfo,
  4984. AVX512VLVectorVTInfo IntInfo> {
  4985. let Predicates = [HasVLX] in {
  4986. defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
  4987. SelectInfo.info128, IntInfo.info128>;
  4988. defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
  4989. SelectInfo.info256, IntInfo.info256>;
  4990. }
  4991. let Predicates = [HasAVX512] in {
  4992. defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
  4993. SelectInfo.info512, IntInfo.info512>;
  4994. }
  4995. }
  4996. multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
  4997. // i64 vselect with i32/i16/i8 logic op
  4998. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
  4999. avx512vl_i32_info>;
  5000. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
  5001. avx512vl_i16_info>;
  5002. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
  5003. avx512vl_i8_info>;
  5004. // i32 vselect with i64/i16/i8 logic op
  5005. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
  5006. avx512vl_i64_info>;
  5007. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
  5008. avx512vl_i16_info>;
  5009. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
  5010. avx512vl_i8_info>;
  5011. // f32 vselect with i64/i32/i16/i8 logic op
  5012. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
  5013. avx512vl_i64_info>;
  5014. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
  5015. avx512vl_i32_info>;
  5016. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
  5017. avx512vl_i16_info>;
  5018. defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
  5019. avx512vl_i8_info>;
  5020. // f64 vselect with i64/i32/i16/i8 logic op
  5021. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
  5022. avx512vl_i64_info>;
  5023. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
  5024. avx512vl_i32_info>;
  5025. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
  5026. avx512vl_i16_info>;
  5027. defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
  5028. avx512vl_i8_info>;
  5029. defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
  5030. avx512vl_f32_info,
  5031. avx512vl_i32_info>;
  5032. defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
  5033. avx512vl_f64_info,
  5034. avx512vl_i64_info>;
  5035. }
  5036. defm : avx512_logical_lowering_types<"VPAND", and>;
  5037. defm : avx512_logical_lowering_types<"VPOR", or>;
  5038. defm : avx512_logical_lowering_types<"VPXOR", xor>;
  5039. defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
  5040. //===----------------------------------------------------------------------===//
  5041. // AVX-512 FP arithmetic
  5042. //===----------------------------------------------------------------------===//
  5043. multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
  5044. SDPatternOperator OpNode, SDNode VecNode,
  5045. X86FoldableSchedWrite sched, bit IsCommutable> {
  5046. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  5047. defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5048. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  5049. "$src2, $src1", "$src1, $src2",
  5050. (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
  5051. Sched<[sched]>;
  5052. defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5053. (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
  5054. "$src2, $src1", "$src1, $src2",
  5055. (_.VT (VecNode _.RC:$src1,
  5056. (_.ScalarIntMemFrags addr:$src2)))>,
  5057. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5058. let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
  5059. def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
  5060. (ins _.FRC:$src1, _.FRC:$src2),
  5061. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  5062. [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
  5063. Sched<[sched]> {
  5064. let isCommutable = IsCommutable;
  5065. }
  5066. def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
  5067. (ins _.FRC:$src1, _.ScalarMemOp:$src2),
  5068. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  5069. [(set _.FRC:$dst, (OpNode _.FRC:$src1,
  5070. (_.ScalarLdFrag addr:$src2)))]>,
  5071. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5072. }
  5073. }
  5074. }
  5075. multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
  5076. SDNode VecNode, X86FoldableSchedWrite sched> {
  5077. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  5078. defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5079. (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
  5080. "$rc, $src2, $src1", "$src1, $src2, $rc",
  5081. (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
  5082. (i32 timm:$rc))>,
  5083. EVEX_B, EVEX_RC, Sched<[sched]>;
  5084. }
  5085. multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
  5086. SDNode OpNode, SDNode VecNode, SDNode SaeNode,
  5087. X86FoldableSchedWrite sched, bit IsCommutable,
  5088. string EVEX2VexOvrd> {
  5089. let ExeDomain = _.ExeDomain in {
  5090. defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5091. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  5092. "$src2, $src1", "$src1, $src2",
  5093. (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
  5094. Sched<[sched]>, SIMD_EXC;
  5095. defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5096. (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
  5097. "$src2, $src1", "$src1, $src2",
  5098. (_.VT (VecNode _.RC:$src1,
  5099. (_.ScalarIntMemFrags addr:$src2)))>,
  5100. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  5101. let isCodeGenOnly = 1, Predicates = [HasAVX512],
  5102. Uses = [MXCSR], mayRaiseFPException = 1 in {
  5103. def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
  5104. (ins _.FRC:$src1, _.FRC:$src2),
  5105. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  5106. [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
  5107. Sched<[sched]>,
  5108. EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
  5109. let isCommutable = IsCommutable;
  5110. }
  5111. def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
  5112. (ins _.FRC:$src1, _.ScalarMemOp:$src2),
  5113. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  5114. [(set _.FRC:$dst, (OpNode _.FRC:$src1,
  5115. (_.ScalarLdFrag addr:$src2)))]>,
  5116. Sched<[sched.Folded, sched.ReadAfterFold]>,
  5117. EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
  5118. }
  5119. let Uses = [MXCSR] in
  5120. defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5121. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  5122. "{sae}, $src2, $src1", "$src1, $src2, {sae}",
  5123. (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
  5124. EVEX_B, Sched<[sched]>;
  5125. }
  5126. }
  5127. multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  5128. SDNode VecNode, SDNode RndNode,
  5129. X86SchedWriteSizes sched, bit IsCommutable> {
  5130. defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
  5131. sched.PS.Scl, IsCommutable>,
  5132. avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
  5133. sched.PS.Scl>,
  5134. XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
  5135. defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
  5136. sched.PD.Scl, IsCommutable>,
  5137. avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
  5138. sched.PD.Scl>,
  5139. XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
  5140. let Predicates = [HasFP16] in
  5141. defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
  5142. VecNode, sched.PH.Scl, IsCommutable>,
  5143. avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
  5144. sched.PH.Scl>,
  5145. T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
  5146. }
  5147. multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5148. SDNode VecNode, SDNode SaeNode,
  5149. X86SchedWriteSizes sched, bit IsCommutable> {
  5150. defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
  5151. VecNode, SaeNode, sched.PS.Scl, IsCommutable,
  5152. NAME#"SS">,
  5153. XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
  5154. defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
  5155. VecNode, SaeNode, sched.PD.Scl, IsCommutable,
  5156. NAME#"SD">,
  5157. XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
  5158. let Predicates = [HasFP16] in {
  5159. defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
  5160. VecNode, SaeNode, sched.PH.Scl, IsCommutable,
  5161. NAME#"SH">,
  5162. T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
  5163. NotEVEX2VEXConvertible;
  5164. }
  5165. }
  5166. defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
  5167. SchedWriteFAddSizes, 1>;
  5168. defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
  5169. SchedWriteFMulSizes, 1>;
  5170. defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
  5171. SchedWriteFAddSizes, 0>;
  5172. defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
  5173. SchedWriteFDivSizes, 0>;
  5174. defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
  5175. SchedWriteFCmpSizes, 0>;
  5176. defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
  5177. SchedWriteFCmpSizes, 0>;
  5178. // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
  5179. // X86fminc and X86fmaxc instead of X86fmin and X86fmax
  5180. multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
  5181. X86VectorVTInfo _, SDNode OpNode,
  5182. X86FoldableSchedWrite sched,
  5183. string EVEX2VEXOvrd> {
  5184. let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
  5185. def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
  5186. (ins _.FRC:$src1, _.FRC:$src2),
  5187. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  5188. [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
  5189. Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
  5190. let isCommutable = 1;
  5191. }
  5192. def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
  5193. (ins _.FRC:$src1, _.ScalarMemOp:$src2),
  5194. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  5195. [(set _.FRC:$dst, (OpNode _.FRC:$src1,
  5196. (_.ScalarLdFrag addr:$src2)))]>,
  5197. Sched<[sched.Folded, sched.ReadAfterFold]>,
  5198. EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
  5199. }
  5200. }
  5201. defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
  5202. SchedWriteFCmp.Scl, "VMINCSS">, XS,
  5203. EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
  5204. defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
  5205. SchedWriteFCmp.Scl, "VMINCSD">, XD,
  5206. VEX_W, EVEX_4V, VEX_LIG,
  5207. EVEX_CD8<64, CD8VT1>, SIMD_EXC;
  5208. defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
  5209. SchedWriteFCmp.Scl, "VMAXCSS">, XS,
  5210. EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
  5211. defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
  5212. SchedWriteFCmp.Scl, "VMAXCSD">, XD,
  5213. VEX_W, EVEX_4V, VEX_LIG,
  5214. EVEX_CD8<64, CD8VT1>, SIMD_EXC;
  5215. defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
  5216. SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
  5217. EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
  5218. NotEVEX2VEXConvertible;
  5219. defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
  5220. SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
  5221. EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
  5222. NotEVEX2VEXConvertible;
  5223. multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  5224. SDPatternOperator MaskOpNode,
  5225. X86VectorVTInfo _, X86FoldableSchedWrite sched,
  5226. bit IsCommutable,
  5227. bit IsKCommutable = IsCommutable,
  5228. string suffix = _.Suffix,
  5229. string ClobberConstraint = "",
  5230. bit MayRaiseFPException = 1> {
  5231. let ExeDomain = _.ExeDomain, hasSideEffects = 0,
  5232. Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
  5233. defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5234. (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
  5235. "$src2, $src1", "$src1, $src2",
  5236. (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
  5237. (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
  5238. IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
  5239. let mayLoad = 1 in {
  5240. defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5241. (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
  5242. "$src2, $src1", "$src1, $src2",
  5243. (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
  5244. (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
  5245. ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  5246. defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5247. (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
  5248. "${src2}"#_.BroadcastStr#", $src1",
  5249. "$src1, ${src2}"#_.BroadcastStr,
  5250. (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
  5251. (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
  5252. ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  5253. }
  5254. }
  5255. }
  5256. multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
  5257. SDPatternOperator OpNodeRnd,
  5258. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  5259. string suffix = _.Suffix,
  5260. string ClobberConstraint = ""> {
  5261. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  5262. defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5263. (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
  5264. "$rc, $src2, $src1", "$src1, $src2, $rc",
  5265. (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
  5266. 0, 0, 0, vselect_mask, ClobberConstraint>,
  5267. EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
  5268. }
  5269. multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
  5270. SDPatternOperator OpNodeSAE,
  5271. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5272. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  5273. defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5274. (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
  5275. "{sae}, $src2, $src1", "$src1, $src2, {sae}",
  5276. (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
  5277. EVEX_4V, EVEX_B, Sched<[sched]>;
  5278. }
  5279. multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  5280. SDPatternOperator MaskOpNode,
  5281. Predicate prd, X86SchedWriteSizes sched,
  5282. bit IsCommutable = 0,
  5283. bit IsPD128Commutable = IsCommutable> {
  5284. let Predicates = [prd] in {
  5285. defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
  5286. sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
  5287. EVEX_CD8<32, CD8VF>;
  5288. defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
  5289. sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
  5290. EVEX_CD8<64, CD8VF>;
  5291. }
  5292. // Define only if AVX512VL feature is present.
  5293. let Predicates = [prd, HasVLX] in {
  5294. defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
  5295. sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
  5296. EVEX_CD8<32, CD8VF>;
  5297. defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
  5298. sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
  5299. EVEX_CD8<32, CD8VF>;
  5300. defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
  5301. sched.PD.XMM, IsPD128Commutable,
  5302. IsCommutable>, EVEX_V128, PD, VEX_W,
  5303. EVEX_CD8<64, CD8VF>;
  5304. defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
  5305. sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
  5306. EVEX_CD8<64, CD8VF>;
  5307. }
  5308. }
  5309. multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  5310. SDPatternOperator MaskOpNode,
  5311. X86SchedWriteSizes sched, bit IsCommutable = 0> {
  5312. let Predicates = [HasFP16] in {
  5313. defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
  5314. sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
  5315. EVEX_CD8<16, CD8VF>;
  5316. }
  5317. let Predicates = [HasVLX, HasFP16] in {
  5318. defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
  5319. sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
  5320. EVEX_CD8<16, CD8VF>;
  5321. defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
  5322. sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
  5323. EVEX_CD8<16, CD8VF>;
  5324. }
  5325. }
  5326. let Uses = [MXCSR] in
  5327. multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
  5328. X86SchedWriteSizes sched> {
  5329. let Predicates = [HasFP16] in {
  5330. defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
  5331. v32f16_info>,
  5332. EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  5333. }
  5334. defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
  5335. v16f32_info>,
  5336. EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
  5337. defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
  5338. v8f64_info>,
  5339. EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
  5340. }
  5341. let Uses = [MXCSR] in
  5342. multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
  5343. X86SchedWriteSizes sched> {
  5344. let Predicates = [HasFP16] in {
  5345. defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
  5346. v32f16_info>,
  5347. EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  5348. }
  5349. defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
  5350. v16f32_info>,
  5351. EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
  5352. defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
  5353. v8f64_info>,
  5354. EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
  5355. }
  5356. defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
  5357. SchedWriteFAddSizes, 1>,
  5358. avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
  5359. avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
  5360. defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
  5361. SchedWriteFMulSizes, 1>,
  5362. avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
  5363. avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
  5364. defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
  5365. SchedWriteFAddSizes>,
  5366. avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
  5367. avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
  5368. defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
  5369. SchedWriteFDivSizes>,
  5370. avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
  5371. avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
  5372. defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
  5373. SchedWriteFCmpSizes, 0>,
  5374. avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
  5375. avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
  5376. defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
  5377. SchedWriteFCmpSizes, 0>,
  5378. avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
  5379. avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
  5380. let isCodeGenOnly = 1 in {
  5381. defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
  5382. SchedWriteFCmpSizes, 1>,
  5383. avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
  5384. SchedWriteFCmpSizes, 1>;
  5385. defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
  5386. SchedWriteFCmpSizes, 1>,
  5387. avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
  5388. SchedWriteFCmpSizes, 1>;
  5389. }
  5390. let Uses = []<Register>, mayRaiseFPException = 0 in {
  5391. defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
  5392. SchedWriteFLogicSizes, 1>;
  5393. defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
  5394. SchedWriteFLogicSizes, 0>;
  5395. defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
  5396. SchedWriteFLogicSizes, 1>;
  5397. defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
  5398. SchedWriteFLogicSizes, 1>;
  5399. }
  5400. multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5401. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5402. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  5403. defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5404. (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
  5405. "$src2, $src1", "$src1, $src2",
  5406. (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
  5407. EVEX_4V, Sched<[sched]>;
  5408. defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5409. (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
  5410. "$src2, $src1", "$src1, $src2",
  5411. (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
  5412. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  5413. defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5414. (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
  5415. "${src2}"#_.BroadcastStr#", $src1",
  5416. "$src1, ${src2}"#_.BroadcastStr,
  5417. (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
  5418. EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  5419. }
  5420. }
  5421. multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5422. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5423. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  5424. defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5425. (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
  5426. "$src2, $src1", "$src1, $src2",
  5427. (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
  5428. Sched<[sched]>;
  5429. defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5430. (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
  5431. "$src2, $src1", "$src1, $src2",
  5432. (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
  5433. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5434. }
  5435. }
  5436. multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
  5437. X86SchedWriteWidths sched> {
  5438. let Predicates = [HasFP16] in {
  5439. defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
  5440. avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
  5441. EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
  5442. defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
  5443. avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
  5444. EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
  5445. }
  5446. defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
  5447. avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
  5448. EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
  5449. defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
  5450. avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
  5451. EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
  5452. defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
  5453. avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
  5454. X86scalefsRnd, sched.Scl>,
  5455. EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
  5456. defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
  5457. avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
  5458. X86scalefsRnd, sched.Scl>,
  5459. EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
  5460. // Define only if AVX512VL feature is present.
  5461. let Predicates = [HasVLX] in {
  5462. defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
  5463. EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
  5464. defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
  5465. EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
  5466. defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
  5467. EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
  5468. defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
  5469. EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
  5470. }
  5471. let Predicates = [HasFP16, HasVLX] in {
  5472. defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
  5473. EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
  5474. defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
  5475. EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
  5476. }
  5477. }
  5478. defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
  5479. SchedWriteFAdd>, NotEVEX2VEXConvertible;
  5480. //===----------------------------------------------------------------------===//
  5481. // AVX-512 VPTESTM instructions
  5482. //===----------------------------------------------------------------------===//
  5483. multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
  5484. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5485. // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
  5486. // There are just too many permutations due to commutability and bitcasts.
  5487. let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
  5488. defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
  5489. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  5490. "$src2, $src1", "$src1, $src2",
  5491. (null_frag), (null_frag), 1>,
  5492. EVEX_4V, Sched<[sched]>;
  5493. let mayLoad = 1 in
  5494. defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
  5495. (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
  5496. "$src2, $src1", "$src1, $src2",
  5497. (null_frag), (null_frag)>,
  5498. EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  5499. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5500. }
  5501. }
  5502. multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
  5503. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5504. let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
  5505. defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
  5506. (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
  5507. "${src2}"#_.BroadcastStr#", $src1",
  5508. "$src1, ${src2}"#_.BroadcastStr,
  5509. (null_frag), (null_frag)>,
  5510. EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  5511. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5512. }
  5513. multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
  5514. X86SchedWriteWidths sched,
  5515. AVX512VLVectorVTInfo _> {
  5516. let Predicates = [HasAVX512] in
  5517. defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
  5518. avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
  5519. let Predicates = [HasAVX512, HasVLX] in {
  5520. defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
  5521. avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
  5522. defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
  5523. avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
  5524. }
  5525. }
  5526. multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
  5527. X86SchedWriteWidths sched> {
  5528. defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
  5529. avx512vl_i32_info>;
  5530. defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
  5531. avx512vl_i64_info>, VEX_W;
  5532. }
  5533. multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
  5534. X86SchedWriteWidths sched> {
  5535. let Predicates = [HasBWI] in {
  5536. defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
  5537. v32i16_info>, EVEX_V512, VEX_W;
  5538. defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
  5539. v64i8_info>, EVEX_V512;
  5540. }
  5541. let Predicates = [HasVLX, HasBWI] in {
  5542. defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
  5543. v16i16x_info>, EVEX_V256, VEX_W;
  5544. defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
  5545. v8i16x_info>, EVEX_V128, VEX_W;
  5546. defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
  5547. v32i8x_info>, EVEX_V256;
  5548. defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
  5549. v16i8x_info>, EVEX_V128;
  5550. }
  5551. }
  5552. multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
  5553. X86SchedWriteWidths sched> :
  5554. avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
  5555. avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
  5556. defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
  5557. SchedWriteVecLogic>, T8PD;
  5558. defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
  5559. SchedWriteVecLogic>, T8XS;
  5560. //===----------------------------------------------------------------------===//
  5561. // AVX-512 Shift instructions
  5562. //===----------------------------------------------------------------------===//
  5563. multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
  5564. string OpcodeStr, SDNode OpNode,
  5565. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5566. let ExeDomain = _.ExeDomain in {
  5567. defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
  5568. (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
  5569. "$src2, $src1", "$src1, $src2",
  5570. (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
  5571. Sched<[sched]>;
  5572. defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
  5573. (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
  5574. "$src2, $src1", "$src1, $src2",
  5575. (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
  5576. (i8 timm:$src2)))>,
  5577. Sched<[sched.Folded]>;
  5578. }
  5579. }
  5580. multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
  5581. string OpcodeStr, SDNode OpNode,
  5582. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5583. let ExeDomain = _.ExeDomain in
  5584. defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
  5585. (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
  5586. "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
  5587. (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
  5588. EVEX_B, Sched<[sched.Folded]>;
  5589. }
  5590. multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5591. X86FoldableSchedWrite sched, ValueType SrcVT,
  5592. X86VectorVTInfo _> {
  5593. // src2 is always 128-bit
  5594. let ExeDomain = _.ExeDomain in {
  5595. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5596. (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
  5597. "$src2, $src1", "$src1, $src2",
  5598. (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
  5599. AVX512BIBase, EVEX_4V, Sched<[sched]>;
  5600. defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5601. (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
  5602. "$src2, $src1", "$src1, $src2",
  5603. (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
  5604. AVX512BIBase,
  5605. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  5606. }
  5607. }
  5608. multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5609. X86SchedWriteWidths sched, ValueType SrcVT,
  5610. AVX512VLVectorVTInfo VTInfo,
  5611. Predicate prd> {
  5612. let Predicates = [prd] in
  5613. defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
  5614. VTInfo.info512>, EVEX_V512,
  5615. EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
  5616. let Predicates = [prd, HasVLX] in {
  5617. defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
  5618. VTInfo.info256>, EVEX_V256,
  5619. EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
  5620. defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
  5621. VTInfo.info128>, EVEX_V128,
  5622. EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
  5623. }
  5624. }
  5625. multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
  5626. string OpcodeStr, SDNode OpNode,
  5627. X86SchedWriteWidths sched,
  5628. bit NotEVEX2VEXConvertibleQ = 0> {
  5629. defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
  5630. avx512vl_i32_info, HasAVX512>;
  5631. let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
  5632. defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
  5633. avx512vl_i64_info, HasAVX512>, VEX_W;
  5634. defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
  5635. avx512vl_i16_info, HasBWI>;
  5636. }
  5637. multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
  5638. string OpcodeStr, SDNode OpNode,
  5639. X86SchedWriteWidths sched,
  5640. AVX512VLVectorVTInfo VTInfo> {
  5641. let Predicates = [HasAVX512] in
  5642. defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5643. sched.ZMM, VTInfo.info512>,
  5644. avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
  5645. VTInfo.info512>, EVEX_V512;
  5646. let Predicates = [HasAVX512, HasVLX] in {
  5647. defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5648. sched.YMM, VTInfo.info256>,
  5649. avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
  5650. VTInfo.info256>, EVEX_V256;
  5651. defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5652. sched.XMM, VTInfo.info128>,
  5653. avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
  5654. VTInfo.info128>, EVEX_V128;
  5655. }
  5656. }
  5657. multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
  5658. string OpcodeStr, SDNode OpNode,
  5659. X86SchedWriteWidths sched> {
  5660. let Predicates = [HasBWI] in
  5661. defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5662. sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
  5663. let Predicates = [HasVLX, HasBWI] in {
  5664. defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5665. sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
  5666. defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5667. sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
  5668. }
  5669. }
  5670. multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
  5671. Format ImmFormR, Format ImmFormM,
  5672. string OpcodeStr, SDNode OpNode,
  5673. X86SchedWriteWidths sched,
  5674. bit NotEVEX2VEXConvertibleQ = 0> {
  5675. defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
  5676. sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  5677. let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
  5678. defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
  5679. sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
  5680. }
  5681. defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
  5682. SchedWriteVecShiftImm>,
  5683. avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
  5684. SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
  5685. defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
  5686. SchedWriteVecShiftImm>,
  5687. avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
  5688. SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
  5689. defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
  5690. SchedWriteVecShiftImm, 1>,
  5691. avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
  5692. SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
  5693. defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
  5694. SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
  5695. defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
  5696. SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
  5697. defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
  5698. SchedWriteVecShift>;
  5699. defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
  5700. SchedWriteVecShift, 1>;
  5701. defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
  5702. SchedWriteVecShift>;
  5703. // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
  5704. let Predicates = [HasAVX512, NoVLX] in {
  5705. def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
  5706. (EXTRACT_SUBREG (v8i64
  5707. (VPSRAQZrr
  5708. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5709. VR128X:$src2)), sub_ymm)>;
  5710. def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
  5711. (EXTRACT_SUBREG (v8i64
  5712. (VPSRAQZrr
  5713. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5714. VR128X:$src2)), sub_xmm)>;
  5715. def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
  5716. (EXTRACT_SUBREG (v8i64
  5717. (VPSRAQZri
  5718. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5719. timm:$src2)), sub_ymm)>;
  5720. def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
  5721. (EXTRACT_SUBREG (v8i64
  5722. (VPSRAQZri
  5723. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5724. timm:$src2)), sub_xmm)>;
  5725. }
  5726. //===-------------------------------------------------------------------===//
  5727. // Variable Bit Shifts
  5728. //===-------------------------------------------------------------------===//
  5729. multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5730. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5731. let ExeDomain = _.ExeDomain in {
  5732. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  5733. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  5734. "$src2, $src1", "$src1, $src2",
  5735. (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
  5736. AVX5128IBase, EVEX_4V, Sched<[sched]>;
  5737. defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5738. (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
  5739. "$src2, $src1", "$src1, $src2",
  5740. (_.VT (OpNode _.RC:$src1,
  5741. (_.VT (_.LdFrag addr:$src2))))>,
  5742. AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  5743. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5744. }
  5745. }
  5746. multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5747. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  5748. let ExeDomain = _.ExeDomain in
  5749. defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  5750. (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
  5751. "${src2}"#_.BroadcastStr#", $src1",
  5752. "$src1, ${src2}"#_.BroadcastStr,
  5753. (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
  5754. AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  5755. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5756. }
  5757. multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5758. X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
  5759. let Predicates = [HasAVX512] in
  5760. defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
  5761. avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
  5762. let Predicates = [HasAVX512, HasVLX] in {
  5763. defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
  5764. avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
  5765. defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
  5766. avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
  5767. }
  5768. }
  5769. multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
  5770. SDNode OpNode, X86SchedWriteWidths sched> {
  5771. defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
  5772. avx512vl_i32_info>;
  5773. defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
  5774. avx512vl_i64_info>, VEX_W;
  5775. }
  5776. // Use 512bit version to implement 128/256 bit in case NoVLX.
  5777. multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
  5778. SDNode OpNode, list<Predicate> p> {
  5779. let Predicates = p in {
  5780. def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
  5781. (_.info256.VT _.info256.RC:$src2))),
  5782. (EXTRACT_SUBREG
  5783. (!cast<Instruction>(OpcodeStr#"Zrr")
  5784. (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
  5785. (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
  5786. sub_ymm)>;
  5787. def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
  5788. (_.info128.VT _.info128.RC:$src2))),
  5789. (EXTRACT_SUBREG
  5790. (!cast<Instruction>(OpcodeStr#"Zrr")
  5791. (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
  5792. (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
  5793. sub_xmm)>;
  5794. }
  5795. }
  5796. multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
  5797. SDNode OpNode, X86SchedWriteWidths sched> {
  5798. let Predicates = [HasBWI] in
  5799. defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
  5800. EVEX_V512, VEX_W;
  5801. let Predicates = [HasVLX, HasBWI] in {
  5802. defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
  5803. EVEX_V256, VEX_W;
  5804. defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
  5805. EVEX_V128, VEX_W;
  5806. }
  5807. }
  5808. defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
  5809. avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
  5810. defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
  5811. avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
  5812. defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
  5813. avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
  5814. defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
  5815. defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
  5816. defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
  5817. defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
  5818. defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
  5819. defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
  5820. // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
  5821. let Predicates = [HasAVX512, NoVLX] in {
  5822. def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
  5823. (EXTRACT_SUBREG (v8i64
  5824. (VPROLVQZrr
  5825. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5826. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
  5827. sub_xmm)>;
  5828. def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
  5829. (EXTRACT_SUBREG (v8i64
  5830. (VPROLVQZrr
  5831. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5832. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
  5833. sub_ymm)>;
  5834. def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
  5835. (EXTRACT_SUBREG (v16i32
  5836. (VPROLVDZrr
  5837. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5838. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
  5839. sub_xmm)>;
  5840. def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
  5841. (EXTRACT_SUBREG (v16i32
  5842. (VPROLVDZrr
  5843. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5844. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
  5845. sub_ymm)>;
  5846. def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
  5847. (EXTRACT_SUBREG (v8i64
  5848. (VPROLQZri
  5849. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5850. timm:$src2)), sub_xmm)>;
  5851. def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
  5852. (EXTRACT_SUBREG (v8i64
  5853. (VPROLQZri
  5854. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5855. timm:$src2)), sub_ymm)>;
  5856. def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
  5857. (EXTRACT_SUBREG (v16i32
  5858. (VPROLDZri
  5859. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5860. timm:$src2)), sub_xmm)>;
  5861. def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
  5862. (EXTRACT_SUBREG (v16i32
  5863. (VPROLDZri
  5864. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5865. timm:$src2)), sub_ymm)>;
  5866. }
  5867. // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
  5868. let Predicates = [HasAVX512, NoVLX] in {
  5869. def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
  5870. (EXTRACT_SUBREG (v8i64
  5871. (VPRORVQZrr
  5872. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5873. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
  5874. sub_xmm)>;
  5875. def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
  5876. (EXTRACT_SUBREG (v8i64
  5877. (VPRORVQZrr
  5878. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5879. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
  5880. sub_ymm)>;
  5881. def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
  5882. (EXTRACT_SUBREG (v16i32
  5883. (VPRORVDZrr
  5884. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5885. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
  5886. sub_xmm)>;
  5887. def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
  5888. (EXTRACT_SUBREG (v16i32
  5889. (VPRORVDZrr
  5890. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5891. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
  5892. sub_ymm)>;
  5893. def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
  5894. (EXTRACT_SUBREG (v8i64
  5895. (VPRORQZri
  5896. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5897. timm:$src2)), sub_xmm)>;
  5898. def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
  5899. (EXTRACT_SUBREG (v8i64
  5900. (VPRORQZri
  5901. (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5902. timm:$src2)), sub_ymm)>;
  5903. def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
  5904. (EXTRACT_SUBREG (v16i32
  5905. (VPRORDZri
  5906. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
  5907. timm:$src2)), sub_xmm)>;
  5908. def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
  5909. (EXTRACT_SUBREG (v16i32
  5910. (VPRORDZri
  5911. (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
  5912. timm:$src2)), sub_ymm)>;
  5913. }
  5914. //===-------------------------------------------------------------------===//
  5915. // 1-src variable permutation VPERMW/D/Q
  5916. //===-------------------------------------------------------------------===//
  5917. multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
  5918. X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
  5919. let Predicates = [HasAVX512] in
  5920. defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
  5921. avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
  5922. let Predicates = [HasAVX512, HasVLX] in
  5923. defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
  5924. avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
  5925. }
  5926. multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
  5927. string OpcodeStr, SDNode OpNode,
  5928. X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
  5929. let Predicates = [HasAVX512] in
  5930. defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5931. sched, VTInfo.info512>,
  5932. avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
  5933. sched, VTInfo.info512>, EVEX_V512;
  5934. let Predicates = [HasAVX512, HasVLX] in
  5935. defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
  5936. sched, VTInfo.info256>,
  5937. avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
  5938. sched, VTInfo.info256>, EVEX_V256;
  5939. }
  5940. multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
  5941. Predicate prd, SDNode OpNode,
  5942. X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
  5943. let Predicates = [prd] in
  5944. defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
  5945. EVEX_V512 ;
  5946. let Predicates = [HasVLX, prd] in {
  5947. defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
  5948. EVEX_V256 ;
  5949. defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
  5950. EVEX_V128 ;
  5951. }
  5952. }
  5953. defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
  5954. WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
  5955. defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
  5956. WriteVarShuffle256, avx512vl_i8_info>;
  5957. defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
  5958. WriteVarShuffle256, avx512vl_i32_info>;
  5959. defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
  5960. WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
  5961. defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
  5962. WriteFVarShuffle256, avx512vl_f32_info>;
  5963. defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
  5964. WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
  5965. defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
  5966. X86VPermi, WriteShuffle256, avx512vl_i64_info>,
  5967. EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
  5968. defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
  5969. X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
  5970. EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
  5971. //===----------------------------------------------------------------------===//
  5972. // AVX-512 - VPERMIL
  5973. //===----------------------------------------------------------------------===//
  5974. multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
  5975. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  5976. X86VectorVTInfo Ctrl> {
  5977. defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
  5978. (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
  5979. "$src2, $src1", "$src1, $src2",
  5980. (_.VT (OpNode _.RC:$src1,
  5981. (Ctrl.VT Ctrl.RC:$src2)))>,
  5982. T8PD, EVEX_4V, Sched<[sched]>;
  5983. defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
  5984. (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
  5985. "$src2, $src1", "$src1, $src2",
  5986. (_.VT (OpNode
  5987. _.RC:$src1,
  5988. (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
  5989. T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  5990. Sched<[sched.Folded, sched.ReadAfterFold]>;
  5991. defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
  5992. (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
  5993. "${src2}"#_.BroadcastStr#", $src1",
  5994. "$src1, ${src2}"#_.BroadcastStr,
  5995. (_.VT (OpNode
  5996. _.RC:$src1,
  5997. (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
  5998. T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
  5999. Sched<[sched.Folded, sched.ReadAfterFold]>;
  6000. }
  6001. multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
  6002. X86SchedWriteWidths sched,
  6003. AVX512VLVectorVTInfo _,
  6004. AVX512VLVectorVTInfo Ctrl> {
  6005. let Predicates = [HasAVX512] in {
  6006. defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
  6007. _.info512, Ctrl.info512>, EVEX_V512;
  6008. }
  6009. let Predicates = [HasAVX512, HasVLX] in {
  6010. defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
  6011. _.info128, Ctrl.info128>, EVEX_V128;
  6012. defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
  6013. _.info256, Ctrl.info256>, EVEX_V256;
  6014. }
  6015. }
  6016. multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
  6017. AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
  6018. defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
  6019. _, Ctrl>;
  6020. defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
  6021. X86VPermilpi, SchedWriteFShuffle, _>,
  6022. EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
  6023. }
  6024. let ExeDomain = SSEPackedSingle in
  6025. defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
  6026. avx512vl_i32_info>;
  6027. let ExeDomain = SSEPackedDouble in
  6028. defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
  6029. avx512vl_i64_info>, VEX_W1X;
  6030. //===----------------------------------------------------------------------===//
  6031. // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
  6032. //===----------------------------------------------------------------------===//
  6033. defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
  6034. X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
  6035. EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
  6036. defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
  6037. X86PShufhw, SchedWriteShuffle>,
  6038. EVEX, AVX512XSIi8Base;
  6039. defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
  6040. X86PShuflw, SchedWriteShuffle>,
  6041. EVEX, AVX512XDIi8Base;
  6042. //===----------------------------------------------------------------------===//
  6043. // AVX-512 - VPSHUFB
  6044. //===----------------------------------------------------------------------===//
  6045. multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
  6046. X86SchedWriteWidths sched> {
  6047. let Predicates = [HasBWI] in
  6048. defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
  6049. EVEX_V512;
  6050. let Predicates = [HasVLX, HasBWI] in {
  6051. defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
  6052. EVEX_V256;
  6053. defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
  6054. EVEX_V128;
  6055. }
  6056. }
  6057. defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
  6058. SchedWriteVarShuffle>, VEX_WIG;
  6059. //===----------------------------------------------------------------------===//
  6060. // Move Low to High and High to Low packed FP Instructions
  6061. //===----------------------------------------------------------------------===//
  6062. def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
  6063. (ins VR128X:$src1, VR128X:$src2),
  6064. "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  6065. [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
  6066. Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
  6067. let isCommutable = 1 in
  6068. def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
  6069. (ins VR128X:$src1, VR128X:$src2),
  6070. "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  6071. [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
  6072. Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
  6073. //===----------------------------------------------------------------------===//
  6074. // VMOVHPS/PD VMOVLPS Instructions
  6075. // All patterns was taken from SSS implementation.
  6076. //===----------------------------------------------------------------------===//
  6077. multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
  6078. SDPatternOperator OpNode,
  6079. X86VectorVTInfo _> {
  6080. let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
  6081. def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
  6082. (ins _.RC:$src1, f64mem:$src2),
  6083. !strconcat(OpcodeStr,
  6084. "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  6085. [(set _.RC:$dst,
  6086. (OpNode _.RC:$src1,
  6087. (_.VT (bitconvert
  6088. (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
  6089. Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
  6090. }
  6091. // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
  6092. // SSE1. And MOVLPS pattern is even more complex.
  6093. defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
  6094. v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
  6095. defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
  6096. v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
  6097. defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
  6098. v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
  6099. defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
  6100. v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
  6101. let Predicates = [HasAVX512] in {
  6102. // VMOVHPD patterns
  6103. def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
  6104. (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
  6105. // VMOVLPD patterns
  6106. def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
  6107. (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
  6108. }
  6109. let SchedRW = [WriteFStore] in {
  6110. let mayStore = 1, hasSideEffects = 0 in
  6111. def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
  6112. (ins f64mem:$dst, VR128X:$src),
  6113. "vmovhps\t{$src, $dst|$dst, $src}",
  6114. []>, EVEX, EVEX_CD8<32, CD8VT2>;
  6115. def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
  6116. (ins f64mem:$dst, VR128X:$src),
  6117. "vmovhpd\t{$src, $dst|$dst, $src}",
  6118. [(store (f64 (extractelt
  6119. (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
  6120. (iPTR 0))), addr:$dst)]>,
  6121. EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
  6122. let mayStore = 1, hasSideEffects = 0 in
  6123. def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
  6124. (ins f64mem:$dst, VR128X:$src),
  6125. "vmovlps\t{$src, $dst|$dst, $src}",
  6126. []>, EVEX, EVEX_CD8<32, CD8VT2>;
  6127. def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
  6128. (ins f64mem:$dst, VR128X:$src),
  6129. "vmovlpd\t{$src, $dst|$dst, $src}",
  6130. [(store (f64 (extractelt (v2f64 VR128X:$src),
  6131. (iPTR 0))), addr:$dst)]>,
  6132. EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
  6133. } // SchedRW
  6134. let Predicates = [HasAVX512] in {
  6135. // VMOVHPD patterns
  6136. def : Pat<(store (f64 (extractelt
  6137. (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
  6138. (iPTR 0))), addr:$dst),
  6139. (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
  6140. }
  6141. //===----------------------------------------------------------------------===//
  6142. // FMA - Fused Multiply Operations
  6143. //
  6144. multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6145. SDNode MaskOpNode, X86FoldableSchedWrite sched,
  6146. X86VectorVTInfo _> {
  6147. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
  6148. Uses = [MXCSR], mayRaiseFPException = 1 in {
  6149. defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6150. (ins _.RC:$src2, _.RC:$src3),
  6151. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6152. (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
  6153. (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
  6154. EVEX_4V, Sched<[sched]>;
  6155. defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6156. (ins _.RC:$src2, _.MemOp:$src3),
  6157. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6158. (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
  6159. (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
  6160. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6161. defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6162. (ins _.RC:$src2, _.ScalarMemOp:$src3),
  6163. OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
  6164. !strconcat("$src2, ${src3}", _.BroadcastStr ),
  6165. (OpNode _.RC:$src2,
  6166. _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
  6167. (MaskOpNode _.RC:$src2,
  6168. _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
  6169. EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6170. }
  6171. }
  6172. multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
  6173. X86FoldableSchedWrite sched,
  6174. X86VectorVTInfo _> {
  6175. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
  6176. Uses = [MXCSR] in
  6177. defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6178. (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
  6179. OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
  6180. (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
  6181. (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
  6182. EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
  6183. }
  6184. multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6185. SDNode MaskOpNode, SDNode OpNodeRnd,
  6186. X86SchedWriteWidths sched,
  6187. AVX512VLVectorVTInfo _,
  6188. Predicate prd = HasAVX512> {
  6189. let Predicates = [prd] in {
  6190. defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6191. sched.ZMM, _.info512>,
  6192. avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
  6193. _.info512>,
  6194. EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
  6195. }
  6196. let Predicates = [HasVLX, prd] in {
  6197. defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6198. sched.YMM, _.info256>,
  6199. EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
  6200. defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6201. sched.XMM, _.info128>,
  6202. EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
  6203. }
  6204. }
  6205. multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6206. SDNode MaskOpNode, SDNode OpNodeRnd> {
  6207. defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
  6208. OpNodeRnd, SchedWriteFMA,
  6209. avx512vl_f16_info, HasFP16>, T_MAP6PD;
  6210. defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
  6211. OpNodeRnd, SchedWriteFMA,
  6212. avx512vl_f32_info>, T8PD;
  6213. defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
  6214. OpNodeRnd, SchedWriteFMA,
  6215. avx512vl_f64_info>, T8PD, VEX_W;
  6216. }
  6217. defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
  6218. fma, X86FmaddRnd>;
  6219. defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
  6220. X86Fmsub, X86FmsubRnd>;
  6221. defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
  6222. X86Fmaddsub, X86FmaddsubRnd>;
  6223. defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
  6224. X86Fmsubadd, X86FmsubaddRnd>;
  6225. defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
  6226. X86Fnmadd, X86FnmaddRnd>;
  6227. defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
  6228. X86Fnmsub, X86FnmsubRnd>;
  6229. multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6230. SDNode MaskOpNode, X86FoldableSchedWrite sched,
  6231. X86VectorVTInfo _> {
  6232. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
  6233. Uses = [MXCSR], mayRaiseFPException = 1 in {
  6234. defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6235. (ins _.RC:$src2, _.RC:$src3),
  6236. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6237. (null_frag),
  6238. (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
  6239. EVEX_4V, Sched<[sched]>;
  6240. defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6241. (ins _.RC:$src2, _.MemOp:$src3),
  6242. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6243. (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
  6244. (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
  6245. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6246. defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6247. (ins _.RC:$src2, _.ScalarMemOp:$src3),
  6248. OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
  6249. "$src2, ${src3}"#_.BroadcastStr,
  6250. (_.VT (OpNode _.RC:$src2,
  6251. (_.VT (_.BroadcastLdFrag addr:$src3)),
  6252. _.RC:$src1)),
  6253. (_.VT (MaskOpNode _.RC:$src2,
  6254. (_.VT (_.BroadcastLdFrag addr:$src3)),
  6255. _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
  6256. Sched<[sched.Folded, sched.ReadAfterFold]>;
  6257. }
  6258. }
  6259. multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
  6260. X86FoldableSchedWrite sched,
  6261. X86VectorVTInfo _> {
  6262. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
  6263. Uses = [MXCSR] in
  6264. defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6265. (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
  6266. OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
  6267. (null_frag),
  6268. (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
  6269. 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
  6270. }
  6271. multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6272. SDNode MaskOpNode, SDNode OpNodeRnd,
  6273. X86SchedWriteWidths sched,
  6274. AVX512VLVectorVTInfo _,
  6275. Predicate prd = HasAVX512> {
  6276. let Predicates = [prd] in {
  6277. defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6278. sched.ZMM, _.info512>,
  6279. avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
  6280. _.info512>,
  6281. EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
  6282. }
  6283. let Predicates = [HasVLX, prd] in {
  6284. defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6285. sched.YMM, _.info256>,
  6286. EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
  6287. defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6288. sched.XMM, _.info128>,
  6289. EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
  6290. }
  6291. }
  6292. multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6293. SDNode MaskOpNode, SDNode OpNodeRnd > {
  6294. defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
  6295. OpNodeRnd, SchedWriteFMA,
  6296. avx512vl_f16_info, HasFP16>, T_MAP6PD;
  6297. defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
  6298. OpNodeRnd, SchedWriteFMA,
  6299. avx512vl_f32_info>, T8PD;
  6300. defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
  6301. OpNodeRnd, SchedWriteFMA,
  6302. avx512vl_f64_info>, T8PD, VEX_W;
  6303. }
  6304. defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
  6305. fma, X86FmaddRnd>;
  6306. defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
  6307. X86Fmsub, X86FmsubRnd>;
  6308. defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
  6309. X86Fmaddsub, X86FmaddsubRnd>;
  6310. defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
  6311. X86Fmsubadd, X86FmsubaddRnd>;
  6312. defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
  6313. X86Fnmadd, X86FnmaddRnd>;
  6314. defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
  6315. X86Fnmsub, X86FnmsubRnd>;
  6316. multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6317. SDNode MaskOpNode, X86FoldableSchedWrite sched,
  6318. X86VectorVTInfo _> {
  6319. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
  6320. Uses = [MXCSR], mayRaiseFPException = 1 in {
  6321. defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6322. (ins _.RC:$src2, _.RC:$src3),
  6323. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6324. (null_frag),
  6325. (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
  6326. EVEX_4V, Sched<[sched]>;
  6327. // Pattern is 312 order so that the load is in a different place from the
  6328. // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
  6329. defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6330. (ins _.RC:$src2, _.MemOp:$src3),
  6331. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6332. (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
  6333. (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
  6334. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6335. // Pattern is 312 order so that the load is in a different place from the
  6336. // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
  6337. defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6338. (ins _.RC:$src2, _.ScalarMemOp:$src3),
  6339. OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
  6340. "$src2, ${src3}"#_.BroadcastStr,
  6341. (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
  6342. _.RC:$src1, _.RC:$src2)),
  6343. (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
  6344. _.RC:$src1, _.RC:$src2)), 1, 0>,
  6345. EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6346. }
  6347. }
  6348. multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
  6349. X86FoldableSchedWrite sched,
  6350. X86VectorVTInfo _> {
  6351. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
  6352. Uses = [MXCSR] in
  6353. defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6354. (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
  6355. OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
  6356. (null_frag),
  6357. (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
  6358. 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
  6359. }
  6360. multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6361. SDNode MaskOpNode, SDNode OpNodeRnd,
  6362. X86SchedWriteWidths sched,
  6363. AVX512VLVectorVTInfo _,
  6364. Predicate prd = HasAVX512> {
  6365. let Predicates = [prd] in {
  6366. defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6367. sched.ZMM, _.info512>,
  6368. avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
  6369. _.info512>,
  6370. EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
  6371. }
  6372. let Predicates = [HasVLX, prd] in {
  6373. defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6374. sched.YMM, _.info256>,
  6375. EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
  6376. defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
  6377. sched.XMM, _.info128>,
  6378. EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
  6379. }
  6380. }
  6381. multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  6382. SDNode MaskOpNode, SDNode OpNodeRnd > {
  6383. defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
  6384. OpNodeRnd, SchedWriteFMA,
  6385. avx512vl_f16_info, HasFP16>, T_MAP6PD;
  6386. defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
  6387. OpNodeRnd, SchedWriteFMA,
  6388. avx512vl_f32_info>, T8PD;
  6389. defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
  6390. OpNodeRnd, SchedWriteFMA,
  6391. avx512vl_f64_info>, T8PD, VEX_W;
  6392. }
  6393. defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
  6394. fma, X86FmaddRnd>;
  6395. defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
  6396. X86Fmsub, X86FmsubRnd>;
  6397. defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
  6398. X86Fmaddsub, X86FmaddsubRnd>;
  6399. defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
  6400. X86Fmsubadd, X86FmsubaddRnd>;
  6401. defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
  6402. X86Fnmadd, X86FnmaddRnd>;
  6403. defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
  6404. X86Fnmsub, X86FnmsubRnd>;
  6405. // Scalar FMA
  6406. multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  6407. dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
  6408. let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
  6409. defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6410. (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
  6411. "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
  6412. EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
  6413. let mayLoad = 1 in
  6414. defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6415. (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
  6416. "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
  6417. EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
  6418. let Uses = [MXCSR] in
  6419. defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6420. (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
  6421. OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
  6422. EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
  6423. let isCodeGenOnly = 1, isCommutable = 1 in {
  6424. def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
  6425. (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
  6426. !strconcat(OpcodeStr,
  6427. "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
  6428. !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
  6429. def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
  6430. (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
  6431. !strconcat(OpcodeStr,
  6432. "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
  6433. [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
  6434. let Uses = [MXCSR] in
  6435. def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
  6436. (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
  6437. !strconcat(OpcodeStr,
  6438. "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
  6439. !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
  6440. Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
  6441. }// isCodeGenOnly = 1
  6442. }// Constraints = "$src1 = $dst"
  6443. }
  6444. multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
  6445. string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
  6446. X86VectorVTInfo _, string SUFF> {
  6447. let ExeDomain = _.ExeDomain in {
  6448. defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
  6449. // Operands for intrinsic are in 123 order to preserve passthu
  6450. // semantics.
  6451. (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
  6452. _.FRC:$src3))),
  6453. (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
  6454. (_.ScalarLdFrag addr:$src3)))),
  6455. (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
  6456. _.FRC:$src3, (i32 timm:$rc)))), 0>;
  6457. defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
  6458. (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
  6459. _.FRC:$src1))),
  6460. (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
  6461. (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
  6462. (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
  6463. _.FRC:$src1, (i32 timm:$rc)))), 1>;
  6464. // One pattern is 312 order so that the load is in a different place from the
  6465. // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
  6466. defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
  6467. (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
  6468. _.FRC:$src2))),
  6469. (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
  6470. _.FRC:$src1, _.FRC:$src2))),
  6471. (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
  6472. _.FRC:$src2, (i32 timm:$rc)))), 1>;
  6473. }
  6474. }
  6475. multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
  6476. string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
  6477. let Predicates = [HasAVX512] in {
  6478. defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
  6479. OpNodeRnd, f32x_info, "SS">,
  6480. EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
  6481. defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
  6482. OpNodeRnd, f64x_info, "SD">,
  6483. EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
  6484. }
  6485. let Predicates = [HasFP16] in {
  6486. defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
  6487. OpNodeRnd, f16x_info, "SH">,
  6488. EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
  6489. }
  6490. }
  6491. defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
  6492. defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
  6493. defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
  6494. defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
  6495. multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
  6496. SDNode RndOp, string Prefix,
  6497. string Suffix, SDNode Move,
  6498. X86VectorVTInfo _, PatLeaf ZeroFP,
  6499. Predicate prd = HasAVX512> {
  6500. let Predicates = [prd] in {
  6501. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6502. (Op _.FRC:$src2,
  6503. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6504. _.FRC:$src3))))),
  6505. (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
  6506. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6507. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
  6508. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6509. (Op _.FRC:$src2, _.FRC:$src3,
  6510. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6511. (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
  6512. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6513. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
  6514. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6515. (Op _.FRC:$src2,
  6516. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6517. (_.ScalarLdFrag addr:$src3)))))),
  6518. (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
  6519. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6520. addr:$src3)>;
  6521. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6522. (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6523. (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
  6524. (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
  6525. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6526. addr:$src3)>;
  6527. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6528. (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
  6529. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6530. (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
  6531. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6532. addr:$src3)>;
  6533. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6534. (X86selects_mask VK1WM:$mask,
  6535. (MaskedOp _.FRC:$src2,
  6536. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6537. _.FRC:$src3),
  6538. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6539. (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
  6540. VR128X:$src1, VK1WM:$mask,
  6541. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6542. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
  6543. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6544. (X86selects_mask VK1WM:$mask,
  6545. (MaskedOp _.FRC:$src2,
  6546. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6547. (_.ScalarLdFrag addr:$src3)),
  6548. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6549. (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
  6550. VR128X:$src1, VK1WM:$mask,
  6551. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
  6552. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6553. (X86selects_mask VK1WM:$mask,
  6554. (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6555. (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
  6556. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6557. (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
  6558. VR128X:$src1, VK1WM:$mask,
  6559. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
  6560. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6561. (X86selects_mask VK1WM:$mask,
  6562. (MaskedOp _.FRC:$src2, _.FRC:$src3,
  6563. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
  6564. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6565. (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
  6566. VR128X:$src1, VK1WM:$mask,
  6567. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6568. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
  6569. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6570. (X86selects_mask VK1WM:$mask,
  6571. (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
  6572. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
  6573. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6574. (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
  6575. VR128X:$src1, VK1WM:$mask,
  6576. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
  6577. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6578. (X86selects_mask VK1WM:$mask,
  6579. (MaskedOp _.FRC:$src2,
  6580. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6581. _.FRC:$src3),
  6582. (_.EltVT ZeroFP)))))),
  6583. (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
  6584. VR128X:$src1, VK1WM:$mask,
  6585. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6586. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
  6587. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6588. (X86selects_mask VK1WM:$mask,
  6589. (MaskedOp _.FRC:$src2, _.FRC:$src3,
  6590. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
  6591. (_.EltVT ZeroFP)))))),
  6592. (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
  6593. VR128X:$src1, VK1WM:$mask,
  6594. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6595. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
  6596. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6597. (X86selects_mask VK1WM:$mask,
  6598. (MaskedOp _.FRC:$src2,
  6599. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6600. (_.ScalarLdFrag addr:$src3)),
  6601. (_.EltVT ZeroFP)))))),
  6602. (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
  6603. VR128X:$src1, VK1WM:$mask,
  6604. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
  6605. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6606. (X86selects_mask VK1WM:$mask,
  6607. (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6608. _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
  6609. (_.EltVT ZeroFP)))))),
  6610. (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
  6611. VR128X:$src1, VK1WM:$mask,
  6612. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
  6613. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6614. (X86selects_mask VK1WM:$mask,
  6615. (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
  6616. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
  6617. (_.EltVT ZeroFP)))))),
  6618. (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
  6619. VR128X:$src1, VK1WM:$mask,
  6620. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
  6621. // Patterns with rounding mode.
  6622. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6623. (RndOp _.FRC:$src2,
  6624. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6625. _.FRC:$src3, (i32 timm:$rc)))))),
  6626. (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
  6627. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6628. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
  6629. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6630. (RndOp _.FRC:$src2, _.FRC:$src3,
  6631. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6632. (i32 timm:$rc)))))),
  6633. (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
  6634. VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6635. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
  6636. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6637. (X86selects_mask VK1WM:$mask,
  6638. (RndOp _.FRC:$src2,
  6639. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6640. _.FRC:$src3, (i32 timm:$rc)),
  6641. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6642. (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
  6643. VR128X:$src1, VK1WM:$mask,
  6644. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6645. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
  6646. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6647. (X86selects_mask VK1WM:$mask,
  6648. (RndOp _.FRC:$src2, _.FRC:$src3,
  6649. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6650. (i32 timm:$rc)),
  6651. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
  6652. (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
  6653. VR128X:$src1, VK1WM:$mask,
  6654. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6655. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
  6656. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6657. (X86selects_mask VK1WM:$mask,
  6658. (RndOp _.FRC:$src2,
  6659. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6660. _.FRC:$src3, (i32 timm:$rc)),
  6661. (_.EltVT ZeroFP)))))),
  6662. (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
  6663. VR128X:$src1, VK1WM:$mask,
  6664. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6665. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
  6666. def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
  6667. (X86selects_mask VK1WM:$mask,
  6668. (RndOp _.FRC:$src2, _.FRC:$src3,
  6669. (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  6670. (i32 timm:$rc)),
  6671. (_.EltVT ZeroFP)))))),
  6672. (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
  6673. VR128X:$src1, VK1WM:$mask,
  6674. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
  6675. (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
  6676. }
  6677. }
  6678. defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
  6679. X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
  6680. defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
  6681. X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
  6682. defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
  6683. X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
  6684. defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
  6685. X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
  6686. defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
  6687. "SS", X86Movss, v4f32x_info, fp32imm0>;
  6688. defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
  6689. "SS", X86Movss, v4f32x_info, fp32imm0>;
  6690. defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
  6691. "SS", X86Movss, v4f32x_info, fp32imm0>;
  6692. defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
  6693. "SS", X86Movss, v4f32x_info, fp32imm0>;
  6694. defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
  6695. "SD", X86Movsd, v2f64x_info, fp64imm0>;
  6696. defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
  6697. "SD", X86Movsd, v2f64x_info, fp64imm0>;
  6698. defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
  6699. "SD", X86Movsd, v2f64x_info, fp64imm0>;
  6700. defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
  6701. "SD", X86Movsd, v2f64x_info, fp64imm0>;
  6702. //===----------------------------------------------------------------------===//
  6703. // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
  6704. //===----------------------------------------------------------------------===//
  6705. let Constraints = "$src1 = $dst" in {
  6706. multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  6707. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  6708. // NOTE: The SDNode have the multiply operands first with the add last.
  6709. // This enables commuted load patterns to be autogenerated by tablegen.
  6710. let ExeDomain = _.ExeDomain in {
  6711. defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  6712. (ins _.RC:$src2, _.RC:$src3),
  6713. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6714. (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
  6715. T8PD, EVEX_4V, Sched<[sched]>;
  6716. defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6717. (ins _.RC:$src2, _.MemOp:$src3),
  6718. OpcodeStr, "$src3, $src2", "$src2, $src3",
  6719. (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
  6720. T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6721. defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  6722. (ins _.RC:$src2, _.ScalarMemOp:$src3),
  6723. OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
  6724. !strconcat("$src2, ${src3}", _.BroadcastStr ),
  6725. (OpNode _.RC:$src2,
  6726. (_.VT (_.BroadcastLdFrag addr:$src3)),
  6727. _.RC:$src1)>,
  6728. T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6729. }
  6730. }
  6731. } // Constraints = "$src1 = $dst"
  6732. multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
  6733. X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
  6734. let Predicates = [HasIFMA] in {
  6735. defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
  6736. EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
  6737. }
  6738. let Predicates = [HasVLX, HasIFMA] in {
  6739. defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
  6740. EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
  6741. defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
  6742. EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
  6743. }
  6744. }
  6745. defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
  6746. SchedWriteVecIMul, avx512vl_i64_info>,
  6747. VEX_W;
  6748. defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
  6749. SchedWriteVecIMul, avx512vl_i64_info>,
  6750. VEX_W;
  6751. //===----------------------------------------------------------------------===//
  6752. // AVX-512 Scalar convert from sign integer to float/double
  6753. //===----------------------------------------------------------------------===//
  6754. multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
  6755. RegisterClass SrcRC, X86VectorVTInfo DstVT,
  6756. X86MemOperand x86memop, PatFrag ld_frag, string asm,
  6757. string mem, list<Register> _Uses = [MXCSR],
  6758. bit _mayRaiseFPException = 1> {
  6759. let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
  6760. mayRaiseFPException = _mayRaiseFPException in {
  6761. let hasSideEffects = 0, isCodeGenOnly = 1 in {
  6762. def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
  6763. (ins DstVT.FRC:$src1, SrcRC:$src),
  6764. !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
  6765. EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
  6766. let mayLoad = 1 in
  6767. def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
  6768. (ins DstVT.FRC:$src1, x86memop:$src),
  6769. asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
  6770. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6771. } // hasSideEffects = 0
  6772. def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
  6773. (ins DstVT.RC:$src1, SrcRC:$src2),
  6774. !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  6775. [(set DstVT.RC:$dst,
  6776. (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
  6777. EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
  6778. def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
  6779. (ins DstVT.RC:$src1, x86memop:$src2),
  6780. asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  6781. [(set DstVT.RC:$dst,
  6782. (OpNode (DstVT.VT DstVT.RC:$src1),
  6783. (ld_frag addr:$src2)))]>,
  6784. EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  6785. }
  6786. def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  6787. (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
  6788. DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
  6789. }
  6790. multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
  6791. X86FoldableSchedWrite sched, RegisterClass SrcRC,
  6792. X86VectorVTInfo DstVT, string asm,
  6793. string mem> {
  6794. let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
  6795. def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
  6796. (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
  6797. !strconcat(asm,
  6798. "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
  6799. [(set DstVT.RC:$dst,
  6800. (OpNode (DstVT.VT DstVT.RC:$src1),
  6801. SrcRC:$src2,
  6802. (i32 timm:$rc)))]>,
  6803. EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
  6804. def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
  6805. (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
  6806. DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
  6807. }
  6808. multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
  6809. X86FoldableSchedWrite sched,
  6810. RegisterClass SrcRC, X86VectorVTInfo DstVT,
  6811. X86MemOperand x86memop, PatFrag ld_frag,
  6812. string asm, string mem> {
  6813. defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
  6814. avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
  6815. ld_frag, asm, mem>, VEX_LIG;
  6816. }
  6817. let Predicates = [HasAVX512] in {
  6818. defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
  6819. WriteCvtI2SS, GR32,
  6820. v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
  6821. XS, EVEX_CD8<32, CD8VT1>;
  6822. defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
  6823. WriteCvtI2SS, GR64,
  6824. v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
  6825. XS, VEX_W, EVEX_CD8<64, CD8VT1>;
  6826. defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
  6827. v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
  6828. XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
  6829. defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
  6830. WriteCvtI2SD, GR64,
  6831. v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
  6832. XD, VEX_W, EVEX_CD8<64, CD8VT1>;
  6833. def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
  6834. (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
  6835. def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
  6836. (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
  6837. def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
  6838. (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
  6839. def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
  6840. (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
  6841. def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
  6842. (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
  6843. def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
  6844. (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
  6845. def : Pat<(f32 (any_sint_to_fp GR32:$src)),
  6846. (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
  6847. def : Pat<(f32 (any_sint_to_fp GR64:$src)),
  6848. (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
  6849. def : Pat<(f64 (any_sint_to_fp GR32:$src)),
  6850. (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
  6851. def : Pat<(f64 (any_sint_to_fp GR64:$src)),
  6852. (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
  6853. defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
  6854. WriteCvtI2SS, GR32,
  6855. v4f32x_info, i32mem, loadi32,
  6856. "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
  6857. defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
  6858. WriteCvtI2SS, GR64,
  6859. v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
  6860. XS, VEX_W, EVEX_CD8<64, CD8VT1>;
  6861. defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
  6862. i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
  6863. XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
  6864. defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
  6865. WriteCvtI2SD, GR64,
  6866. v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
  6867. XD, VEX_W, EVEX_CD8<64, CD8VT1>;
  6868. def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
  6869. (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
  6870. def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
  6871. (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
  6872. def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
  6873. (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
  6874. def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
  6875. (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
  6876. def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
  6877. (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
  6878. def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
  6879. (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
  6880. def : Pat<(f32 (any_uint_to_fp GR32:$src)),
  6881. (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
  6882. def : Pat<(f32 (any_uint_to_fp GR64:$src)),
  6883. (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
  6884. def : Pat<(f64 (any_uint_to_fp GR32:$src)),
  6885. (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
  6886. def : Pat<(f64 (any_uint_to_fp GR64:$src)),
  6887. (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
  6888. }
  6889. //===----------------------------------------------------------------------===//
  6890. // AVX-512 Scalar convert from float/double to integer
  6891. //===----------------------------------------------------------------------===//
  6892. multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
  6893. X86VectorVTInfo DstVT, SDNode OpNode,
  6894. SDNode OpNodeRnd,
  6895. X86FoldableSchedWrite sched, string asm,
  6896. string aliasStr, Predicate prd = HasAVX512> {
  6897. let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
  6898. def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
  6899. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  6900. [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
  6901. EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
  6902. let Uses = [MXCSR] in
  6903. def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
  6904. !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
  6905. [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
  6906. EVEX, VEX_LIG, EVEX_B, EVEX_RC,
  6907. Sched<[sched]>;
  6908. def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
  6909. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  6910. [(set DstVT.RC:$dst, (OpNode
  6911. (SrcVT.ScalarIntMemFrags addr:$src)))]>,
  6912. EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  6913. } // Predicates = [prd]
  6914. def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
  6915. (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
  6916. def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
  6917. (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
  6918. def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
  6919. (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
  6920. SrcVT.IntScalarMemOp:$src), 0, "att">;
  6921. }
  6922. // Convert float/double to signed/unsigned int 32/64
  6923. defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
  6924. X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
  6925. XS, EVEX_CD8<32, CD8VT1>;
  6926. defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
  6927. X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
  6928. XS, VEX_W, EVEX_CD8<32, CD8VT1>;
  6929. defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
  6930. X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
  6931. XS, EVEX_CD8<32, CD8VT1>;
  6932. defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
  6933. X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
  6934. XS, VEX_W, EVEX_CD8<32, CD8VT1>;
  6935. defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
  6936. X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
  6937. XD, EVEX_CD8<64, CD8VT1>;
  6938. defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
  6939. X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
  6940. XD, VEX_W, EVEX_CD8<64, CD8VT1>;
  6941. defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
  6942. X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
  6943. XD, EVEX_CD8<64, CD8VT1>;
  6944. defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
  6945. X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
  6946. XD, VEX_W, EVEX_CD8<64, CD8VT1>;
  6947. multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
  6948. X86VectorVTInfo DstVT, SDNode OpNode,
  6949. X86FoldableSchedWrite sched> {
  6950. let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
  6951. let isCodeGenOnly = 1 in {
  6952. def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
  6953. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  6954. [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
  6955. EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
  6956. def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
  6957. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  6958. [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
  6959. EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  6960. }
  6961. } // Predicates = [HasAVX512]
  6962. }
  6963. defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
  6964. lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
  6965. defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
  6966. llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
  6967. defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
  6968. lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
  6969. defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
  6970. llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
  6971. let Predicates = [HasAVX512] in {
  6972. def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
  6973. def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
  6974. def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
  6975. def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
  6976. }
  6977. // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
  6978. // which produce unnecessary vmovs{s,d} instructions
  6979. let Predicates = [HasAVX512] in {
  6980. def : Pat<(v4f32 (X86Movss
  6981. (v4f32 VR128X:$dst),
  6982. (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
  6983. (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
  6984. def : Pat<(v4f32 (X86Movss
  6985. (v4f32 VR128X:$dst),
  6986. (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
  6987. (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
  6988. def : Pat<(v4f32 (X86Movss
  6989. (v4f32 VR128X:$dst),
  6990. (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
  6991. (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
  6992. def : Pat<(v4f32 (X86Movss
  6993. (v4f32 VR128X:$dst),
  6994. (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
  6995. (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
  6996. def : Pat<(v2f64 (X86Movsd
  6997. (v2f64 VR128X:$dst),
  6998. (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
  6999. (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
  7000. def : Pat<(v2f64 (X86Movsd
  7001. (v2f64 VR128X:$dst),
  7002. (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
  7003. (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
  7004. def : Pat<(v2f64 (X86Movsd
  7005. (v2f64 VR128X:$dst),
  7006. (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
  7007. (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
  7008. def : Pat<(v2f64 (X86Movsd
  7009. (v2f64 VR128X:$dst),
  7010. (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
  7011. (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
  7012. def : Pat<(v4f32 (X86Movss
  7013. (v4f32 VR128X:$dst),
  7014. (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
  7015. (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
  7016. def : Pat<(v4f32 (X86Movss
  7017. (v4f32 VR128X:$dst),
  7018. (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
  7019. (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
  7020. def : Pat<(v4f32 (X86Movss
  7021. (v4f32 VR128X:$dst),
  7022. (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
  7023. (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
  7024. def : Pat<(v4f32 (X86Movss
  7025. (v4f32 VR128X:$dst),
  7026. (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
  7027. (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
  7028. def : Pat<(v2f64 (X86Movsd
  7029. (v2f64 VR128X:$dst),
  7030. (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
  7031. (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
  7032. def : Pat<(v2f64 (X86Movsd
  7033. (v2f64 VR128X:$dst),
  7034. (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
  7035. (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
  7036. def : Pat<(v2f64 (X86Movsd
  7037. (v2f64 VR128X:$dst),
  7038. (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
  7039. (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
  7040. def : Pat<(v2f64 (X86Movsd
  7041. (v2f64 VR128X:$dst),
  7042. (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
  7043. (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
  7044. } // Predicates = [HasAVX512]
  7045. // Convert float/double to signed/unsigned int 32/64 with truncation
  7046. multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
  7047. X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
  7048. SDNode OpNodeInt, SDNode OpNodeSAE,
  7049. X86FoldableSchedWrite sched, string aliasStr,
  7050. Predicate prd = HasAVX512> {
  7051. let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
  7052. let isCodeGenOnly = 1 in {
  7053. def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
  7054. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  7055. [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
  7056. EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
  7057. def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
  7058. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  7059. [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
  7060. EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  7061. }
  7062. def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
  7063. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  7064. [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
  7065. EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
  7066. let Uses = [MXCSR] in
  7067. def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
  7068. !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
  7069. [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
  7070. EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
  7071. def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
  7072. (ins _SrcRC.IntScalarMemOp:$src),
  7073. !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
  7074. [(set _DstRC.RC:$dst,
  7075. (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
  7076. EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  7077. } // Predicates = [prd]
  7078. def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
  7079. (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
  7080. def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
  7081. (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
  7082. def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
  7083. (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
  7084. _SrcRC.IntScalarMemOp:$src), 0, "att">;
  7085. }
  7086. defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
  7087. any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
  7088. "{l}">, XS, EVEX_CD8<32, CD8VT1>;
  7089. defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
  7090. any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
  7091. "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
  7092. defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
  7093. any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
  7094. "{l}">, XD, EVEX_CD8<64, CD8VT1>;
  7095. defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
  7096. any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
  7097. "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
  7098. defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
  7099. any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
  7100. "{l}">, XS, EVEX_CD8<32, CD8VT1>;
  7101. defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
  7102. any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
  7103. "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
  7104. defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
  7105. any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
  7106. "{l}">, XD, EVEX_CD8<64, CD8VT1>;
  7107. defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
  7108. any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
  7109. "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
  7110. //===----------------------------------------------------------------------===//
  7111. // AVX-512 Convert form float to double and back
  7112. //===----------------------------------------------------------------------===//
  7113. let Uses = [MXCSR], mayRaiseFPException = 1 in
  7114. multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7115. X86VectorVTInfo _Src, SDNode OpNode,
  7116. X86FoldableSchedWrite sched> {
  7117. defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  7118. (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
  7119. "$src2, $src1", "$src1, $src2",
  7120. (_.VT (OpNode (_.VT _.RC:$src1),
  7121. (_Src.VT _Src.RC:$src2)))>,
  7122. EVEX_4V, VEX_LIG, Sched<[sched]>;
  7123. defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  7124. (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
  7125. "$src2, $src1", "$src1, $src2",
  7126. (_.VT (OpNode (_.VT _.RC:$src1),
  7127. (_Src.ScalarIntMemFrags addr:$src2)))>,
  7128. EVEX_4V, VEX_LIG,
  7129. Sched<[sched.Folded, sched.ReadAfterFold]>;
  7130. let isCodeGenOnly = 1, hasSideEffects = 0 in {
  7131. def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
  7132. (ins _.FRC:$src1, _Src.FRC:$src2),
  7133. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  7134. EVEX_4V, VEX_LIG, Sched<[sched]>;
  7135. let mayLoad = 1 in
  7136. def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
  7137. (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
  7138. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  7139. EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
  7140. }
  7141. }
  7142. // Scalar Conversion with SAE - suppress all exceptions
  7143. multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7144. X86VectorVTInfo _Src, SDNode OpNodeSAE,
  7145. X86FoldableSchedWrite sched> {
  7146. let Uses = [MXCSR] in
  7147. defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  7148. (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
  7149. "{sae}, $src2, $src1", "$src1, $src2, {sae}",
  7150. (_.VT (OpNodeSAE (_.VT _.RC:$src1),
  7151. (_Src.VT _Src.RC:$src2)))>,
  7152. EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
  7153. }
  7154. // Scalar Conversion with rounding control (RC)
  7155. multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7156. X86VectorVTInfo _Src, SDNode OpNodeRnd,
  7157. X86FoldableSchedWrite sched> {
  7158. let Uses = [MXCSR] in
  7159. defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  7160. (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
  7161. "$rc, $src2, $src1", "$src1, $src2, $rc",
  7162. (_.VT (OpNodeRnd (_.VT _.RC:$src1),
  7163. (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
  7164. EVEX_4V, VEX_LIG, Sched<[sched]>,
  7165. EVEX_B, EVEX_RC;
  7166. }
  7167. multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
  7168. SDNode OpNode, SDNode OpNodeRnd,
  7169. X86FoldableSchedWrite sched,
  7170. X86VectorVTInfo _src, X86VectorVTInfo _dst,
  7171. Predicate prd = HasAVX512> {
  7172. let Predicates = [prd], ExeDomain = SSEPackedSingle in {
  7173. defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
  7174. avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
  7175. OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
  7176. }
  7177. }
  7178. multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
  7179. SDNode OpNode, SDNode OpNodeSAE,
  7180. X86FoldableSchedWrite sched,
  7181. X86VectorVTInfo _src, X86VectorVTInfo _dst,
  7182. Predicate prd = HasAVX512> {
  7183. let Predicates = [prd], ExeDomain = SSEPackedSingle in {
  7184. defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
  7185. avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
  7186. EVEX_CD8<_src.EltSize, CD8VT1>;
  7187. }
  7188. }
  7189. defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
  7190. X86froundsRnd, WriteCvtSD2SS, f64x_info,
  7191. f32x_info>, XD, VEX_W;
  7192. defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
  7193. X86fpextsSAE, WriteCvtSS2SD, f32x_info,
  7194. f64x_info>, XS;
  7195. defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
  7196. X86froundsRnd, WriteCvtSD2SS, f64x_info,
  7197. f16x_info, HasFP16>, T_MAP5XD, VEX_W;
  7198. defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
  7199. X86fpextsSAE, WriteCvtSS2SD, f16x_info,
  7200. f64x_info, HasFP16>, T_MAP5XS;
  7201. defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
  7202. X86froundsRnd, WriteCvtSD2SS, f32x_info,
  7203. f16x_info, HasFP16>, T_MAP5PS;
  7204. defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
  7205. X86fpextsSAE, WriteCvtSS2SD, f16x_info,
  7206. f32x_info, HasFP16>, T_MAP6PS;
  7207. def : Pat<(f64 (any_fpextend FR32X:$src)),
  7208. (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
  7209. Requires<[HasAVX512]>;
  7210. def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
  7211. (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
  7212. Requires<[HasAVX512, OptForSize]>;
  7213. def : Pat<(f32 (any_fpround FR64X:$src)),
  7214. (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
  7215. Requires<[HasAVX512]>;
  7216. def : Pat<(f32 (any_fpextend FR16X:$src)),
  7217. (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
  7218. Requires<[HasFP16]>;
  7219. def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
  7220. (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
  7221. Requires<[HasFP16, OptForSize]>;
  7222. def : Pat<(f64 (any_fpextend FR16X:$src)),
  7223. (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
  7224. Requires<[HasFP16]>;
  7225. def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
  7226. (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
  7227. Requires<[HasFP16, OptForSize]>;
  7228. def : Pat<(f16 (any_fpround FR32X:$src)),
  7229. (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
  7230. Requires<[HasFP16]>;
  7231. def : Pat<(f16 (any_fpround FR64X:$src)),
  7232. (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
  7233. Requires<[HasFP16]>;
  7234. def : Pat<(v4f32 (X86Movss
  7235. (v4f32 VR128X:$dst),
  7236. (v4f32 (scalar_to_vector
  7237. (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
  7238. (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
  7239. Requires<[HasAVX512]>;
  7240. def : Pat<(v2f64 (X86Movsd
  7241. (v2f64 VR128X:$dst),
  7242. (v2f64 (scalar_to_vector
  7243. (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
  7244. (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
  7245. Requires<[HasAVX512]>;
  7246. //===----------------------------------------------------------------------===//
  7247. // AVX-512 Vector convert from signed/unsigned integer to float/double
  7248. // and from float/double to signed/unsigned integer
  7249. //===----------------------------------------------------------------------===//
  7250. multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7251. X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
  7252. X86FoldableSchedWrite sched,
  7253. string Broadcast = _.BroadcastStr,
  7254. string Alias = "", X86MemOperand MemOp = _Src.MemOp,
  7255. RegisterClass MaskRC = _.KRCWM,
  7256. dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
  7257. dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
  7258. let Uses = [MXCSR], mayRaiseFPException = 1 in {
  7259. defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
  7260. (ins _Src.RC:$src),
  7261. (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
  7262. (ins MaskRC:$mask, _Src.RC:$src),
  7263. OpcodeStr, "$src", "$src",
  7264. (_.VT (OpNode (_Src.VT _Src.RC:$src))),
  7265. (vselect_mask MaskRC:$mask,
  7266. (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
  7267. _.RC:$src0),
  7268. (vselect_mask MaskRC:$mask,
  7269. (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
  7270. _.ImmAllZerosV)>,
  7271. EVEX, Sched<[sched]>;
  7272. defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
  7273. (ins MemOp:$src),
  7274. (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
  7275. (ins MaskRC:$mask, MemOp:$src),
  7276. OpcodeStr#Alias, "$src", "$src",
  7277. LdDAG,
  7278. (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
  7279. (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
  7280. EVEX, Sched<[sched.Folded]>;
  7281. defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
  7282. (ins _Src.ScalarMemOp:$src),
  7283. (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
  7284. (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
  7285. OpcodeStr,
  7286. "${src}"#Broadcast, "${src}"#Broadcast,
  7287. (_.VT (OpNode (_Src.VT
  7288. (_Src.BroadcastLdFrag addr:$src))
  7289. )),
  7290. (vselect_mask MaskRC:$mask,
  7291. (_.VT
  7292. (MaskOpNode
  7293. (_Src.VT
  7294. (_Src.BroadcastLdFrag addr:$src)))),
  7295. _.RC:$src0),
  7296. (vselect_mask MaskRC:$mask,
  7297. (_.VT
  7298. (MaskOpNode
  7299. (_Src.VT
  7300. (_Src.BroadcastLdFrag addr:$src)))),
  7301. _.ImmAllZerosV)>,
  7302. EVEX, EVEX_B, Sched<[sched.Folded]>;
  7303. }
  7304. }
  7305. // Conversion with SAE - suppress all exceptions
  7306. multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7307. X86VectorVTInfo _Src, SDNode OpNodeSAE,
  7308. X86FoldableSchedWrite sched> {
  7309. let Uses = [MXCSR] in
  7310. defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  7311. (ins _Src.RC:$src), OpcodeStr,
  7312. "{sae}, $src", "$src, {sae}",
  7313. (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
  7314. EVEX, EVEX_B, Sched<[sched]>;
  7315. }
  7316. // Conversion with rounding control (RC)
  7317. multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7318. X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
  7319. X86FoldableSchedWrite sched> {
  7320. let Uses = [MXCSR] in
  7321. defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  7322. (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
  7323. "$rc, $src", "$src, $rc",
  7324. (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
  7325. EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
  7326. }
  7327. // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
  7328. multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  7329. X86VectorVTInfo _Src, SDPatternOperator OpNode,
  7330. SDNode MaskOpNode,
  7331. X86FoldableSchedWrite sched,
  7332. string Broadcast = _.BroadcastStr,
  7333. string Alias = "", X86MemOperand MemOp = _Src.MemOp,
  7334. RegisterClass MaskRC = _.KRCWM>
  7335. : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
  7336. Alias, MemOp, MaskRC,
  7337. (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
  7338. (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
  7339. // Extend [Float to Double, Half to Float]
  7340. multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
  7341. AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
  7342. X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
  7343. let Predicates = [prd] in {
  7344. defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
  7345. any_fpextend, fpextend, sched.ZMM>,
  7346. avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
  7347. X86vfpextSAE, sched.ZMM>, EVEX_V512;
  7348. }
  7349. let Predicates = [prd, HasVLX] in {
  7350. defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
  7351. X86any_vfpext, X86vfpext, sched.XMM,
  7352. _dst.info128.BroadcastStr,
  7353. "", f64mem>, EVEX_V128;
  7354. defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
  7355. any_fpextend, fpextend, sched.YMM>, EVEX_V256;
  7356. }
  7357. }
  7358. // Truncate [Double to Float, Float to Half]
  7359. multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
  7360. AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
  7361. X86SchedWriteWidths sched, Predicate prd = HasAVX512,
  7362. PatFrag bcast128 = _src.info128.BroadcastLdFrag,
  7363. PatFrag loadVT128 = _src.info128.LdFrag,
  7364. RegisterClass maskRC128 = _src.info128.KRCWM> {
  7365. let Predicates = [prd] in {
  7366. defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
  7367. X86any_vfpround, X86vfpround, sched.ZMM>,
  7368. avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
  7369. X86vfproundRnd, sched.ZMM>, EVEX_V512;
  7370. }
  7371. let Predicates = [prd, HasVLX] in {
  7372. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
  7373. null_frag, null_frag, sched.XMM,
  7374. _src.info128.BroadcastStr, "{x}",
  7375. f128mem, maskRC128>, EVEX_V128;
  7376. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
  7377. X86any_vfpround, X86vfpround,
  7378. sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
  7379. // Special patterns to allow use of X86vmfpround for masking. Instruction
  7380. // patterns have been disabled with null_frag.
  7381. def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
  7382. (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
  7383. def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
  7384. maskRC128:$mask),
  7385. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
  7386. def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
  7387. maskRC128:$mask),
  7388. (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
  7389. def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
  7390. (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
  7391. def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
  7392. maskRC128:$mask),
  7393. (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
  7394. def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
  7395. maskRC128:$mask),
  7396. (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
  7397. def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
  7398. (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
  7399. def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
  7400. (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
  7401. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
  7402. def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
  7403. _dst.info128.ImmAllZerosV, maskRC128:$mask),
  7404. (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
  7405. }
  7406. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  7407. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
  7408. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7409. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
  7410. VK2WM:$mask, VR128X:$src), 0, "att">;
  7411. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
  7412. "$dst {${mask}} {z}, $src}",
  7413. (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
  7414. VK2WM:$mask, VR128X:$src), 0, "att">;
  7415. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
  7416. (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
  7417. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
  7418. "$dst {${mask}}, ${src}{1to2}}",
  7419. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
  7420. VK2WM:$mask, f64mem:$src), 0, "att">;
  7421. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
  7422. "$dst {${mask}} {z}, ${src}{1to2}}",
  7423. (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
  7424. VK2WM:$mask, f64mem:$src), 0, "att">;
  7425. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  7426. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
  7427. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7428. (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
  7429. VK4WM:$mask, VR256X:$src), 0, "att">;
  7430. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
  7431. "$dst {${mask}} {z}, $src}",
  7432. (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
  7433. VK4WM:$mask, VR256X:$src), 0, "att">;
  7434. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
  7435. (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
  7436. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
  7437. "$dst {${mask}}, ${src}{1to4}}",
  7438. (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
  7439. VK4WM:$mask, f64mem:$src), 0, "att">;
  7440. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
  7441. "$dst {${mask}} {z}, ${src}{1to4}}",
  7442. (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
  7443. VK4WM:$mask, f64mem:$src), 0, "att">;
  7444. }
  7445. defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
  7446. avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
  7447. VEX_W, PD, EVEX_CD8<64, CD8VF>;
  7448. defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
  7449. avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
  7450. PS, EVEX_CD8<32, CD8VH>;
  7451. // Extend Half to Double
  7452. multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
  7453. X86SchedWriteWidths sched> {
  7454. let Predicates = [HasFP16] in {
  7455. defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
  7456. any_fpextend, fpextend, sched.ZMM>,
  7457. avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
  7458. X86vfpextSAE, sched.ZMM>, EVEX_V512;
  7459. def : Pat<(v8f64 (extloadv8f16 addr:$src)),
  7460. (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
  7461. }
  7462. let Predicates = [HasFP16, HasVLX] in {
  7463. defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
  7464. X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
  7465. f32mem>, EVEX_V128;
  7466. defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
  7467. X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
  7468. f64mem>, EVEX_V256;
  7469. }
  7470. }
  7471. // Truncate Double to Half
  7472. multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
  7473. let Predicates = [HasFP16] in {
  7474. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
  7475. X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
  7476. avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
  7477. X86vfproundRnd, sched.ZMM>, EVEX_V512;
  7478. }
  7479. let Predicates = [HasFP16, HasVLX] in {
  7480. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
  7481. null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
  7482. VK2WM>, EVEX_V128;
  7483. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
  7484. null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
  7485. VK4WM>, EVEX_V256;
  7486. }
  7487. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  7488. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
  7489. VR128X:$src), 0, "att">;
  7490. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7491. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
  7492. VK2WM:$mask, VR128X:$src), 0, "att">;
  7493. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  7494. (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
  7495. VK2WM:$mask, VR128X:$src), 0, "att">;
  7496. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
  7497. (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
  7498. i64mem:$src), 0, "att">;
  7499. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
  7500. "$dst {${mask}}, ${src}{1to2}}",
  7501. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
  7502. VK2WM:$mask, i64mem:$src), 0, "att">;
  7503. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
  7504. "$dst {${mask}} {z}, ${src}{1to2}}",
  7505. (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
  7506. VK2WM:$mask, i64mem:$src), 0, "att">;
  7507. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  7508. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
  7509. VR256X:$src), 0, "att">;
  7510. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
  7511. "$dst {${mask}}, $src}",
  7512. (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
  7513. VK4WM:$mask, VR256X:$src), 0, "att">;
  7514. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
  7515. "$dst {${mask}} {z}, $src}",
  7516. (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
  7517. VK4WM:$mask, VR256X:$src), 0, "att">;
  7518. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
  7519. (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
  7520. i64mem:$src), 0, "att">;
  7521. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
  7522. "$dst {${mask}}, ${src}{1to4}}",
  7523. (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
  7524. VK4WM:$mask, i64mem:$src), 0, "att">;
  7525. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
  7526. "$dst {${mask}} {z}, ${src}{1to4}}",
  7527. (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
  7528. VK4WM:$mask, i64mem:$src), 0, "att">;
  7529. def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
  7530. (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
  7531. VR512:$src), 0, "att">;
  7532. def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
  7533. "$dst {${mask}}, $src}",
  7534. (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
  7535. VK8WM:$mask, VR512:$src), 0, "att">;
  7536. def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
  7537. "$dst {${mask}} {z}, $src}",
  7538. (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
  7539. VK8WM:$mask, VR512:$src), 0, "att">;
  7540. def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
  7541. (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
  7542. i64mem:$src), 0, "att">;
  7543. def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
  7544. "$dst {${mask}}, ${src}{1to8}}",
  7545. (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
  7546. VK8WM:$mask, i64mem:$src), 0, "att">;
  7547. def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
  7548. "$dst {${mask}} {z}, ${src}{1to8}}",
  7549. (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
  7550. VK8WM:$mask, i64mem:$src), 0, "att">;
  7551. }
  7552. defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
  7553. avx512vl_f32_info, SchedWriteCvtPD2PS,
  7554. HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
  7555. defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
  7556. avx512vl_f16_info, SchedWriteCvtPS2PD,
  7557. HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
  7558. defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
  7559. VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
  7560. defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
  7561. T_MAP5PS, EVEX_CD8<16, CD8VQ>;
  7562. let Predicates = [HasFP16, HasVLX] in {
  7563. // Special patterns to allow use of X86vmfpround for masking. Instruction
  7564. // patterns have been disabled with null_frag.
  7565. def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
  7566. (VCVTPD2PHZ256rr VR256X:$src)>;
  7567. def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
  7568. VK4WM:$mask)),
  7569. (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
  7570. def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
  7571. VK4WM:$mask),
  7572. (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
  7573. def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
  7574. (VCVTPD2PHZ256rm addr:$src)>;
  7575. def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
  7576. VK4WM:$mask),
  7577. (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  7578. def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
  7579. VK4WM:$mask),
  7580. (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
  7581. def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
  7582. (VCVTPD2PHZ256rmb addr:$src)>;
  7583. def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
  7584. (v8f16 VR128X:$src0), VK4WM:$mask),
  7585. (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  7586. def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
  7587. v8f16x_info.ImmAllZerosV, VK4WM:$mask),
  7588. (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
  7589. def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
  7590. (VCVTPD2PHZ128rr VR128X:$src)>;
  7591. def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
  7592. VK2WM:$mask),
  7593. (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  7594. def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
  7595. VK2WM:$mask),
  7596. (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
  7597. def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
  7598. (VCVTPD2PHZ128rm addr:$src)>;
  7599. def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
  7600. VK2WM:$mask),
  7601. (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  7602. def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
  7603. VK2WM:$mask),
  7604. (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
  7605. def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
  7606. (VCVTPD2PHZ128rmb addr:$src)>;
  7607. def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
  7608. (v8f16 VR128X:$src0), VK2WM:$mask),
  7609. (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  7610. def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
  7611. v8f16x_info.ImmAllZerosV, VK2WM:$mask),
  7612. (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
  7613. }
  7614. // Convert Signed/Unsigned Doubleword to Double
  7615. let Uses = []<Register>, mayRaiseFPException = 0 in
  7616. multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7617. SDNode MaskOpNode, SDPatternOperator OpNode128,
  7618. SDNode MaskOpNode128,
  7619. X86SchedWriteWidths sched> {
  7620. // No rounding in this op
  7621. let Predicates = [HasAVX512] in
  7622. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
  7623. MaskOpNode, sched.ZMM>, EVEX_V512;
  7624. let Predicates = [HasVLX] in {
  7625. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
  7626. OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
  7627. "", i64mem, VK2WM,
  7628. (v2f64 (OpNode128 (bc_v4i32
  7629. (v2i64
  7630. (scalar_to_vector (loadi64 addr:$src)))))),
  7631. (v2f64 (MaskOpNode128 (bc_v4i32
  7632. (v2i64
  7633. (scalar_to_vector (loadi64 addr:$src))))))>,
  7634. EVEX_V128;
  7635. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
  7636. MaskOpNode, sched.YMM>, EVEX_V256;
  7637. }
  7638. }
  7639. // Convert Signed/Unsigned Doubleword to Float
  7640. multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7641. SDNode MaskOpNode, SDNode OpNodeRnd,
  7642. X86SchedWriteWidths sched> {
  7643. let Predicates = [HasAVX512] in
  7644. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
  7645. MaskOpNode, sched.ZMM>,
  7646. avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
  7647. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7648. let Predicates = [HasVLX] in {
  7649. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
  7650. MaskOpNode, sched.XMM>, EVEX_V128;
  7651. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
  7652. MaskOpNode, sched.YMM>, EVEX_V256;
  7653. }
  7654. }
  7655. // Convert Float to Signed/Unsigned Doubleword with truncation
  7656. multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7657. SDNode MaskOpNode,
  7658. SDNode OpNodeSAE, X86SchedWriteWidths sched> {
  7659. let Predicates = [HasAVX512] in {
  7660. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
  7661. MaskOpNode, sched.ZMM>,
  7662. avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
  7663. OpNodeSAE, sched.ZMM>, EVEX_V512;
  7664. }
  7665. let Predicates = [HasVLX] in {
  7666. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
  7667. MaskOpNode, sched.XMM>, EVEX_V128;
  7668. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
  7669. MaskOpNode, sched.YMM>, EVEX_V256;
  7670. }
  7671. }
  7672. // Convert Float to Signed/Unsigned Doubleword
  7673. multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
  7674. SDNode MaskOpNode, SDNode OpNodeRnd,
  7675. X86SchedWriteWidths sched> {
  7676. let Predicates = [HasAVX512] in {
  7677. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
  7678. MaskOpNode, sched.ZMM>,
  7679. avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
  7680. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7681. }
  7682. let Predicates = [HasVLX] in {
  7683. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
  7684. MaskOpNode, sched.XMM>, EVEX_V128;
  7685. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
  7686. MaskOpNode, sched.YMM>, EVEX_V256;
  7687. }
  7688. }
  7689. // Convert Double to Signed/Unsigned Doubleword with truncation
  7690. multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7691. SDNode MaskOpNode, SDNode OpNodeSAE,
  7692. X86SchedWriteWidths sched> {
  7693. let Predicates = [HasAVX512] in {
  7694. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
  7695. MaskOpNode, sched.ZMM>,
  7696. avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
  7697. OpNodeSAE, sched.ZMM>, EVEX_V512;
  7698. }
  7699. let Predicates = [HasVLX] in {
  7700. // we need "x"/"y" suffixes in order to distinguish between 128 and 256
  7701. // memory forms of these instructions in Asm Parser. They have the same
  7702. // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
  7703. // due to the same reason.
  7704. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
  7705. null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
  7706. VK2WM>, EVEX_V128;
  7707. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
  7708. MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
  7709. }
  7710. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  7711. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
  7712. VR128X:$src), 0, "att">;
  7713. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7714. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
  7715. VK2WM:$mask, VR128X:$src), 0, "att">;
  7716. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  7717. (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
  7718. VK2WM:$mask, VR128X:$src), 0, "att">;
  7719. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
  7720. (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
  7721. f64mem:$src), 0, "att">;
  7722. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
  7723. "$dst {${mask}}, ${src}{1to2}}",
  7724. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
  7725. VK2WM:$mask, f64mem:$src), 0, "att">;
  7726. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
  7727. "$dst {${mask}} {z}, ${src}{1to2}}",
  7728. (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
  7729. VK2WM:$mask, f64mem:$src), 0, "att">;
  7730. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  7731. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
  7732. VR256X:$src), 0, "att">;
  7733. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7734. (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
  7735. VK4WM:$mask, VR256X:$src), 0, "att">;
  7736. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  7737. (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
  7738. VK4WM:$mask, VR256X:$src), 0, "att">;
  7739. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
  7740. (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
  7741. f64mem:$src), 0, "att">;
  7742. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
  7743. "$dst {${mask}}, ${src}{1to4}}",
  7744. (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
  7745. VK4WM:$mask, f64mem:$src), 0, "att">;
  7746. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
  7747. "$dst {${mask}} {z}, ${src}{1to4}}",
  7748. (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
  7749. VK4WM:$mask, f64mem:$src), 0, "att">;
  7750. }
  7751. // Convert Double to Signed/Unsigned Doubleword
  7752. multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
  7753. SDNode MaskOpNode, SDNode OpNodeRnd,
  7754. X86SchedWriteWidths sched> {
  7755. let Predicates = [HasAVX512] in {
  7756. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
  7757. MaskOpNode, sched.ZMM>,
  7758. avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
  7759. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7760. }
  7761. let Predicates = [HasVLX] in {
  7762. // we need "x"/"y" suffixes in order to distinguish between 128 and 256
  7763. // memory forms of these instructions in Asm Parcer. They have the same
  7764. // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
  7765. // due to the same reason.
  7766. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
  7767. null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
  7768. VK2WM>, EVEX_V128;
  7769. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
  7770. MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
  7771. }
  7772. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  7773. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
  7774. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7775. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
  7776. VK2WM:$mask, VR128X:$src), 0, "att">;
  7777. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  7778. (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
  7779. VK2WM:$mask, VR128X:$src), 0, "att">;
  7780. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
  7781. (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
  7782. f64mem:$src), 0, "att">;
  7783. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
  7784. "$dst {${mask}}, ${src}{1to2}}",
  7785. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
  7786. VK2WM:$mask, f64mem:$src), 0, "att">;
  7787. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
  7788. "$dst {${mask}} {z}, ${src}{1to2}}",
  7789. (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
  7790. VK2WM:$mask, f64mem:$src), 0, "att">;
  7791. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  7792. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
  7793. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7794. (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
  7795. VK4WM:$mask, VR256X:$src), 0, "att">;
  7796. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  7797. (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
  7798. VK4WM:$mask, VR256X:$src), 0, "att">;
  7799. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
  7800. (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
  7801. f64mem:$src), 0, "att">;
  7802. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
  7803. "$dst {${mask}}, ${src}{1to4}}",
  7804. (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
  7805. VK4WM:$mask, f64mem:$src), 0, "att">;
  7806. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
  7807. "$dst {${mask}} {z}, ${src}{1to4}}",
  7808. (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
  7809. VK4WM:$mask, f64mem:$src), 0, "att">;
  7810. }
  7811. // Convert Double to Signed/Unsigned Quardword
  7812. multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
  7813. SDNode MaskOpNode, SDNode OpNodeRnd,
  7814. X86SchedWriteWidths sched> {
  7815. let Predicates = [HasDQI] in {
  7816. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
  7817. MaskOpNode, sched.ZMM>,
  7818. avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
  7819. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7820. }
  7821. let Predicates = [HasDQI, HasVLX] in {
  7822. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
  7823. MaskOpNode, sched.XMM>, EVEX_V128;
  7824. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
  7825. MaskOpNode, sched.YMM>, EVEX_V256;
  7826. }
  7827. }
  7828. // Convert Double to Signed/Unsigned Quardword with truncation
  7829. multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7830. SDNode MaskOpNode, SDNode OpNodeRnd,
  7831. X86SchedWriteWidths sched> {
  7832. let Predicates = [HasDQI] in {
  7833. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
  7834. MaskOpNode, sched.ZMM>,
  7835. avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
  7836. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7837. }
  7838. let Predicates = [HasDQI, HasVLX] in {
  7839. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
  7840. MaskOpNode, sched.XMM>, EVEX_V128;
  7841. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
  7842. MaskOpNode, sched.YMM>, EVEX_V256;
  7843. }
  7844. }
  7845. // Convert Signed/Unsigned Quardword to Double
  7846. multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7847. SDNode MaskOpNode, SDNode OpNodeRnd,
  7848. X86SchedWriteWidths sched> {
  7849. let Predicates = [HasDQI] in {
  7850. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
  7851. MaskOpNode, sched.ZMM>,
  7852. avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
  7853. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7854. }
  7855. let Predicates = [HasDQI, HasVLX] in {
  7856. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
  7857. MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
  7858. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
  7859. MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
  7860. }
  7861. }
  7862. // Convert Float to Signed/Unsigned Quardword
  7863. multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
  7864. SDNode MaskOpNode, SDNode OpNodeRnd,
  7865. X86SchedWriteWidths sched> {
  7866. let Predicates = [HasDQI] in {
  7867. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
  7868. MaskOpNode, sched.ZMM>,
  7869. avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
  7870. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7871. }
  7872. let Predicates = [HasDQI, HasVLX] in {
  7873. // Explicitly specified broadcast string, since we take only 2 elements
  7874. // from v4f32x_info source
  7875. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
  7876. MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
  7877. (v2i64 (OpNode (bc_v4f32
  7878. (v2f64
  7879. (scalar_to_vector (loadf64 addr:$src)))))),
  7880. (v2i64 (MaskOpNode (bc_v4f32
  7881. (v2f64
  7882. (scalar_to_vector (loadf64 addr:$src))))))>,
  7883. EVEX_V128;
  7884. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
  7885. MaskOpNode, sched.YMM>, EVEX_V256;
  7886. }
  7887. }
  7888. // Convert Float to Signed/Unsigned Quardword with truncation
  7889. multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7890. SDNode MaskOpNode, SDNode OpNodeRnd,
  7891. X86SchedWriteWidths sched> {
  7892. let Predicates = [HasDQI] in {
  7893. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
  7894. MaskOpNode, sched.ZMM>,
  7895. avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
  7896. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7897. }
  7898. let Predicates = [HasDQI, HasVLX] in {
  7899. // Explicitly specified broadcast string, since we take only 2 elements
  7900. // from v4f32x_info source
  7901. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
  7902. MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
  7903. (v2i64 (OpNode (bc_v4f32
  7904. (v2f64
  7905. (scalar_to_vector (loadf64 addr:$src)))))),
  7906. (v2i64 (MaskOpNode (bc_v4f32
  7907. (v2f64
  7908. (scalar_to_vector (loadf64 addr:$src))))))>,
  7909. EVEX_V128;
  7910. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
  7911. MaskOpNode, sched.YMM>, EVEX_V256;
  7912. }
  7913. }
  7914. // Convert Signed/Unsigned Quardword to Float
  7915. // Also Convert Signed/Unsigned Doubleword to Half
  7916. multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  7917. SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
  7918. SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
  7919. AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
  7920. X86SchedWriteWidths sched, Predicate prd = HasDQI> {
  7921. let Predicates = [prd] in {
  7922. defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
  7923. MaskOpNode, sched.ZMM>,
  7924. avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
  7925. OpNodeRnd, sched.ZMM>, EVEX_V512;
  7926. }
  7927. let Predicates = [prd, HasVLX] in {
  7928. // we need "x"/"y" suffixes in order to distinguish between 128 and 256
  7929. // memory forms of these instructions in Asm Parcer. They have the same
  7930. // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
  7931. // due to the same reason.
  7932. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
  7933. null_frag, sched.XMM, _src.info128.BroadcastStr,
  7934. "{x}", i128mem, _src.info128.KRCWM>,
  7935. EVEX_V128, NotEVEX2VEXConvertible;
  7936. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
  7937. MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
  7938. "{y}">, EVEX_V256,
  7939. NotEVEX2VEXConvertible;
  7940. // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
  7941. // patterns have been disabled with null_frag.
  7942. def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
  7943. (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
  7944. def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
  7945. _src.info128.KRCWM:$mask),
  7946. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
  7947. def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
  7948. _src.info128.KRCWM:$mask),
  7949. (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
  7950. def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
  7951. (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
  7952. def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
  7953. _src.info128.KRCWM:$mask),
  7954. (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
  7955. def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
  7956. _src.info128.KRCWM:$mask),
  7957. (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
  7958. def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
  7959. (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
  7960. def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
  7961. (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
  7962. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
  7963. def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
  7964. _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
  7965. (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
  7966. }
  7967. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  7968. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
  7969. VR128X:$src), 0, "att">;
  7970. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  7971. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
  7972. VK2WM:$mask, VR128X:$src), 0, "att">;
  7973. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  7974. (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
  7975. VK2WM:$mask, VR128X:$src), 0, "att">;
  7976. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
  7977. (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
  7978. i64mem:$src), 0, "att">;
  7979. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
  7980. "$dst {${mask}}, ${src}{1to2}}",
  7981. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
  7982. VK2WM:$mask, i64mem:$src), 0, "att">;
  7983. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
  7984. "$dst {${mask}} {z}, ${src}{1to2}}",
  7985. (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
  7986. VK2WM:$mask, i64mem:$src), 0, "att">;
  7987. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  7988. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
  7989. VR256X:$src), 0, "att">;
  7990. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
  7991. "$dst {${mask}}, $src}",
  7992. (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
  7993. VK4WM:$mask, VR256X:$src), 0, "att">;
  7994. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
  7995. "$dst {${mask}} {z}, $src}",
  7996. (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
  7997. VK4WM:$mask, VR256X:$src), 0, "att">;
  7998. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
  7999. (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
  8000. i64mem:$src), 0, "att">;
  8001. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
  8002. "$dst {${mask}}, ${src}{1to4}}",
  8003. (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
  8004. VK4WM:$mask, i64mem:$src), 0, "att">;
  8005. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
  8006. "$dst {${mask}} {z}, ${src}{1to4}}",
  8007. (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
  8008. VK4WM:$mask, i64mem:$src), 0, "att">;
  8009. }
  8010. defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
  8011. X86any_VSintToFP, X86VSintToFP,
  8012. SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
  8013. defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
  8014. X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
  8015. PS, EVEX_CD8<32, CD8VF>;
  8016. defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
  8017. X86cvttp2si, X86cvttp2siSAE,
  8018. SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
  8019. defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
  8020. X86cvttp2si, X86cvttp2siSAE,
  8021. SchedWriteCvtPD2DQ>,
  8022. PD, VEX_W, EVEX_CD8<64, CD8VF>;
  8023. defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
  8024. X86cvttp2ui, X86cvttp2uiSAE,
  8025. SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
  8026. defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
  8027. X86cvttp2ui, X86cvttp2uiSAE,
  8028. SchedWriteCvtPD2DQ>,
  8029. PS, VEX_W, EVEX_CD8<64, CD8VF>;
  8030. defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
  8031. uint_to_fp, X86any_VUintToFP, X86VUintToFP,
  8032. SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
  8033. defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
  8034. uint_to_fp, X86VUintToFpRnd,
  8035. SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
  8036. defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
  8037. X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
  8038. EVEX_CD8<32, CD8VF>;
  8039. defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
  8040. X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
  8041. VEX_W, EVEX_CD8<64, CD8VF>;
  8042. defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
  8043. X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
  8044. PS, EVEX_CD8<32, CD8VF>;
  8045. defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
  8046. X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
  8047. PS, EVEX_CD8<64, CD8VF>;
  8048. defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
  8049. X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
  8050. PD, EVEX_CD8<64, CD8VF>;
  8051. defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
  8052. X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
  8053. EVEX_CD8<32, CD8VH>;
  8054. defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
  8055. X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
  8056. PD, EVEX_CD8<64, CD8VF>;
  8057. defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
  8058. X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
  8059. EVEX_CD8<32, CD8VH>;
  8060. defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
  8061. X86cvttp2si, X86cvttp2siSAE,
  8062. SchedWriteCvtPD2DQ>, VEX_W,
  8063. PD, EVEX_CD8<64, CD8VF>;
  8064. defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
  8065. X86cvttp2si, X86cvttp2siSAE,
  8066. SchedWriteCvtPS2DQ>, PD,
  8067. EVEX_CD8<32, CD8VH>;
  8068. defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
  8069. X86cvttp2ui, X86cvttp2uiSAE,
  8070. SchedWriteCvtPD2DQ>, VEX_W,
  8071. PD, EVEX_CD8<64, CD8VF>;
  8072. defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
  8073. X86cvttp2ui, X86cvttp2uiSAE,
  8074. SchedWriteCvtPS2DQ>, PD,
  8075. EVEX_CD8<32, CD8VH>;
  8076. defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
  8077. sint_to_fp, X86VSintToFpRnd,
  8078. SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
  8079. defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
  8080. uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
  8081. VEX_W, XS, EVEX_CD8<64, CD8VF>;
  8082. defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
  8083. X86any_VSintToFP, X86VMSintToFP,
  8084. X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
  8085. SchedWriteCvtDQ2PS, HasFP16>,
  8086. T_MAP5PS, EVEX_CD8<32, CD8VF>;
  8087. defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
  8088. X86any_VUintToFP, X86VMUintToFP,
  8089. X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
  8090. SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
  8091. EVEX_CD8<32, CD8VF>;
  8092. defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
  8093. X86any_VSintToFP, X86VMSintToFP,
  8094. X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
  8095. SchedWriteCvtDQ2PS>, VEX_W, PS,
  8096. EVEX_CD8<64, CD8VF>;
  8097. defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
  8098. X86any_VUintToFP, X86VMUintToFP,
  8099. X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
  8100. SchedWriteCvtDQ2PS>, VEX_W, XD,
  8101. EVEX_CD8<64, CD8VF>;
  8102. let Predicates = [HasVLX] in {
  8103. // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
  8104. // patterns have been disabled with null_frag.
  8105. def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
  8106. (VCVTPD2DQZ128rr VR128X:$src)>;
  8107. def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
  8108. VK2WM:$mask),
  8109. (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  8110. def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
  8111. VK2WM:$mask),
  8112. (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
  8113. def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
  8114. (VCVTPD2DQZ128rm addr:$src)>;
  8115. def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
  8116. VK2WM:$mask),
  8117. (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8118. def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
  8119. VK2WM:$mask),
  8120. (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
  8121. def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
  8122. (VCVTPD2DQZ128rmb addr:$src)>;
  8123. def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
  8124. (v4i32 VR128X:$src0), VK2WM:$mask),
  8125. (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8126. def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
  8127. v4i32x_info.ImmAllZerosV, VK2WM:$mask),
  8128. (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
  8129. // Special patterns to allow use of X86mcvttp2si for masking. Instruction
  8130. // patterns have been disabled with null_frag.
  8131. def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
  8132. (VCVTTPD2DQZ128rr VR128X:$src)>;
  8133. def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
  8134. VK2WM:$mask),
  8135. (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  8136. def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
  8137. VK2WM:$mask),
  8138. (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
  8139. def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
  8140. (VCVTTPD2DQZ128rm addr:$src)>;
  8141. def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
  8142. VK2WM:$mask),
  8143. (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8144. def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
  8145. VK2WM:$mask),
  8146. (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
  8147. def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
  8148. (VCVTTPD2DQZ128rmb addr:$src)>;
  8149. def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
  8150. (v4i32 VR128X:$src0), VK2WM:$mask),
  8151. (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8152. def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
  8153. v4i32x_info.ImmAllZerosV, VK2WM:$mask),
  8154. (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
  8155. // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
  8156. // patterns have been disabled with null_frag.
  8157. def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
  8158. (VCVTPD2UDQZ128rr VR128X:$src)>;
  8159. def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
  8160. VK2WM:$mask),
  8161. (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  8162. def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
  8163. VK2WM:$mask),
  8164. (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
  8165. def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
  8166. (VCVTPD2UDQZ128rm addr:$src)>;
  8167. def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
  8168. VK2WM:$mask),
  8169. (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8170. def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
  8171. VK2WM:$mask),
  8172. (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
  8173. def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
  8174. (VCVTPD2UDQZ128rmb addr:$src)>;
  8175. def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
  8176. (v4i32 VR128X:$src0), VK2WM:$mask),
  8177. (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8178. def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
  8179. v4i32x_info.ImmAllZerosV, VK2WM:$mask),
  8180. (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
  8181. // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
  8182. // patterns have been disabled with null_frag.
  8183. def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
  8184. (VCVTTPD2UDQZ128rr VR128X:$src)>;
  8185. def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
  8186. VK2WM:$mask),
  8187. (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  8188. def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
  8189. VK2WM:$mask),
  8190. (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
  8191. def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
  8192. (VCVTTPD2UDQZ128rm addr:$src)>;
  8193. def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
  8194. VK2WM:$mask),
  8195. (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8196. def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
  8197. VK2WM:$mask),
  8198. (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
  8199. def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
  8200. (VCVTTPD2UDQZ128rmb addr:$src)>;
  8201. def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
  8202. (v4i32 VR128X:$src0), VK2WM:$mask),
  8203. (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8204. def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
  8205. v4i32x_info.ImmAllZerosV, VK2WM:$mask),
  8206. (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
  8207. }
  8208. let Predicates = [HasDQI, HasVLX] in {
  8209. def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
  8210. (VCVTPS2QQZ128rm addr:$src)>;
  8211. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8212. (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8213. VR128X:$src0)),
  8214. (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8215. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8216. (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8217. v2i64x_info.ImmAllZerosV)),
  8218. (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
  8219. def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
  8220. (VCVTPS2UQQZ128rm addr:$src)>;
  8221. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8222. (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8223. VR128X:$src0)),
  8224. (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8225. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8226. (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8227. v2i64x_info.ImmAllZerosV)),
  8228. (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
  8229. def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
  8230. (VCVTTPS2QQZ128rm addr:$src)>;
  8231. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8232. (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8233. VR128X:$src0)),
  8234. (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8235. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8236. (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8237. v2i64x_info.ImmAllZerosV)),
  8238. (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
  8239. def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
  8240. (VCVTTPS2UQQZ128rm addr:$src)>;
  8241. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8242. (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8243. VR128X:$src0)),
  8244. (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8245. def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
  8246. (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
  8247. v2i64x_info.ImmAllZerosV)),
  8248. (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
  8249. }
  8250. let Predicates = [HasVLX] in {
  8251. def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
  8252. (VCVTDQ2PDZ128rm addr:$src)>;
  8253. def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
  8254. (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
  8255. VR128X:$src0)),
  8256. (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8257. def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
  8258. (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
  8259. v2f64x_info.ImmAllZerosV)),
  8260. (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
  8261. def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
  8262. (VCVTUDQ2PDZ128rm addr:$src)>;
  8263. def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
  8264. (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
  8265. VR128X:$src0)),
  8266. (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  8267. def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
  8268. (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
  8269. v2f64x_info.ImmAllZerosV)),
  8270. (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
  8271. }
  8272. //===----------------------------------------------------------------------===//
  8273. // Half precision conversion instructions
  8274. //===----------------------------------------------------------------------===//
  8275. let Uses = [MXCSR], mayRaiseFPException = 1 in
  8276. multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
  8277. X86MemOperand x86memop, dag ld_dag,
  8278. X86FoldableSchedWrite sched> {
  8279. defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
  8280. (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
  8281. (X86any_cvtph2ps (_src.VT _src.RC:$src)),
  8282. (X86cvtph2ps (_src.VT _src.RC:$src))>,
  8283. T8PD, Sched<[sched]>;
  8284. defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
  8285. (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
  8286. (X86any_cvtph2ps (_src.VT ld_dag)),
  8287. (X86cvtph2ps (_src.VT ld_dag))>,
  8288. T8PD, Sched<[sched.Folded]>;
  8289. }
  8290. multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
  8291. X86FoldableSchedWrite sched> {
  8292. let Uses = [MXCSR] in
  8293. defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
  8294. (ins _src.RC:$src), "vcvtph2ps",
  8295. "{sae}, $src", "$src, {sae}",
  8296. (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
  8297. T8PD, EVEX_B, Sched<[sched]>;
  8298. }
  8299. let Predicates = [HasAVX512] in
  8300. defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
  8301. (load addr:$src), WriteCvtPH2PSZ>,
  8302. avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
  8303. EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
  8304. let Predicates = [HasVLX] in {
  8305. defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
  8306. (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
  8307. EVEX_CD8<32, CD8VH>;
  8308. defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
  8309. (bitconvert (v2i64 (X86vzload64 addr:$src))),
  8310. WriteCvtPH2PS>, EVEX, EVEX_V128,
  8311. EVEX_CD8<32, CD8VH>;
  8312. // Pattern match vcvtph2ps of a scalar i64 load.
  8313. def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
  8314. (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
  8315. (VCVTPH2PSZ128rm addr:$src)>;
  8316. }
  8317. multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
  8318. X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
  8319. let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  8320. def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
  8321. (ins _src.RC:$src1, i32u8imm:$src2),
  8322. "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  8323. [(set _dest.RC:$dst,
  8324. (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
  8325. Sched<[RR]>;
  8326. let Constraints = "$src0 = $dst" in
  8327. def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
  8328. (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
  8329. "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
  8330. [(set _dest.RC:$dst,
  8331. (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
  8332. _dest.RC:$src0, _src.KRCWM:$mask))]>,
  8333. Sched<[RR]>, EVEX_K;
  8334. def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
  8335. (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
  8336. "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
  8337. [(set _dest.RC:$dst,
  8338. (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
  8339. _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
  8340. Sched<[RR]>, EVEX_KZ;
  8341. let hasSideEffects = 0, mayStore = 1 in {
  8342. def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
  8343. (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
  8344. "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  8345. Sched<[MR]>;
  8346. def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
  8347. (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
  8348. "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
  8349. EVEX_K, Sched<[MR]>, NotMemoryFoldable;
  8350. }
  8351. }
  8352. }
  8353. multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
  8354. SchedWrite Sched> {
  8355. let hasSideEffects = 0, Uses = [MXCSR] in {
  8356. def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
  8357. (ins _src.RC:$src1, i32u8imm:$src2),
  8358. "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
  8359. [(set _dest.RC:$dst,
  8360. (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
  8361. EVEX_B, Sched<[Sched]>;
  8362. let Constraints = "$src0 = $dst" in
  8363. def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
  8364. (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
  8365. "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
  8366. [(set _dest.RC:$dst,
  8367. (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
  8368. _dest.RC:$src0, _src.KRCWM:$mask))]>,
  8369. EVEX_B, Sched<[Sched]>, EVEX_K;
  8370. def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
  8371. (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
  8372. "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
  8373. [(set _dest.RC:$dst,
  8374. (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
  8375. _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
  8376. EVEX_B, Sched<[Sched]>, EVEX_KZ;
  8377. }
  8378. }
  8379. let Predicates = [HasAVX512] in {
  8380. defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
  8381. WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
  8382. avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
  8383. EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
  8384. def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
  8385. (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
  8386. }
  8387. let Predicates = [HasVLX] in {
  8388. defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
  8389. WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
  8390. EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
  8391. defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
  8392. WriteCvtPS2PH, WriteCvtPS2PHSt>,
  8393. EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
  8394. def : Pat<(store (f64 (extractelt
  8395. (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
  8396. (iPTR 0))), addr:$dst),
  8397. (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
  8398. def : Pat<(store (i64 (extractelt
  8399. (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
  8400. (iPTR 0))), addr:$dst),
  8401. (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
  8402. def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
  8403. (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
  8404. }
  8405. // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
  8406. multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
  8407. string OpcodeStr, Domain d,
  8408. X86FoldableSchedWrite sched = WriteFComX> {
  8409. let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
  8410. def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
  8411. !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
  8412. EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
  8413. }
  8414. let Defs = [EFLAGS], Predicates = [HasAVX512] in {
  8415. defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
  8416. AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
  8417. defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
  8418. AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
  8419. defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
  8420. AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
  8421. defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
  8422. AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
  8423. }
  8424. let Defs = [EFLAGS], Predicates = [HasAVX512] in {
  8425. defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
  8426. "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
  8427. EVEX_CD8<32, CD8VT1>;
  8428. defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
  8429. "ucomisd", SSEPackedDouble>, PD, EVEX,
  8430. VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
  8431. defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
  8432. "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
  8433. EVEX_CD8<32, CD8VT1>;
  8434. defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
  8435. "comisd", SSEPackedDouble>, PD, EVEX,
  8436. VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
  8437. let isCodeGenOnly = 1 in {
  8438. defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
  8439. sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
  8440. EVEX_CD8<32, CD8VT1>;
  8441. defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
  8442. sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
  8443. VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
  8444. defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
  8445. sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
  8446. EVEX_CD8<32, CD8VT1>;
  8447. defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
  8448. sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
  8449. VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
  8450. }
  8451. }
  8452. let Defs = [EFLAGS], Predicates = [HasFP16] in {
  8453. defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
  8454. SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
  8455. EVEX_CD8<16, CD8VT1>;
  8456. defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
  8457. SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
  8458. EVEX_CD8<16, CD8VT1>;
  8459. defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
  8460. "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
  8461. VEX_LIG, EVEX_CD8<16, CD8VT1>;
  8462. defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
  8463. "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
  8464. VEX_LIG, EVEX_CD8<16, CD8VT1>;
  8465. let isCodeGenOnly = 1 in {
  8466. defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
  8467. sse_load_f16, "ucomish", SSEPackedSingle>,
  8468. T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
  8469. defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
  8470. sse_load_f16, "comish", SSEPackedSingle>,
  8471. T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
  8472. }
  8473. }
  8474. /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
  8475. multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8476. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  8477. Predicate prd = HasAVX512> {
  8478. let Predicates = [prd], ExeDomain = _.ExeDomain in {
  8479. defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8480. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  8481. "$src2, $src1", "$src1, $src2",
  8482. (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
  8483. EVEX_4V, VEX_LIG, Sched<[sched]>;
  8484. defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8485. (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
  8486. "$src2, $src1", "$src1, $src2",
  8487. (OpNode (_.VT _.RC:$src1),
  8488. (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
  8489. Sched<[sched.Folded, sched.ReadAfterFold]>;
  8490. }
  8491. }
  8492. defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
  8493. f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
  8494. T_MAP6PD;
  8495. defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
  8496. SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
  8497. EVEX_CD8<16, CD8VT1>, T_MAP6PD;
  8498. let Uses = [MXCSR] in {
  8499. defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
  8500. f32x_info>, EVEX_CD8<32, CD8VT1>,
  8501. T8PD;
  8502. defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
  8503. f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
  8504. T8PD;
  8505. defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
  8506. SchedWriteFRsqrt.Scl, f32x_info>,
  8507. EVEX_CD8<32, CD8VT1>, T8PD;
  8508. defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
  8509. SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
  8510. EVEX_CD8<64, CD8VT1>, T8PD;
  8511. }
  8512. /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
  8513. multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8514. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  8515. let ExeDomain = _.ExeDomain in {
  8516. defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8517. (ins _.RC:$src), OpcodeStr, "$src", "$src",
  8518. (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
  8519. Sched<[sched]>;
  8520. defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8521. (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
  8522. (OpNode (_.VT
  8523. (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
  8524. Sched<[sched.Folded, sched.ReadAfterFold]>;
  8525. defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8526. (ins _.ScalarMemOp:$src), OpcodeStr,
  8527. "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
  8528. (OpNode (_.VT
  8529. (_.BroadcastLdFrag addr:$src)))>,
  8530. EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  8531. }
  8532. }
  8533. multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8534. X86SchedWriteWidths sched> {
  8535. let Uses = [MXCSR] in {
  8536. defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
  8537. v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
  8538. defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
  8539. v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
  8540. }
  8541. let Predicates = [HasFP16] in
  8542. defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
  8543. v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
  8544. // Define only if AVX512VL feature is present.
  8545. let Predicates = [HasVLX], Uses = [MXCSR] in {
  8546. defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
  8547. OpNode, sched.XMM, v4f32x_info>,
  8548. EVEX_V128, EVEX_CD8<32, CD8VF>;
  8549. defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
  8550. OpNode, sched.YMM, v8f32x_info>,
  8551. EVEX_V256, EVEX_CD8<32, CD8VF>;
  8552. defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
  8553. OpNode, sched.XMM, v2f64x_info>,
  8554. EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
  8555. defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
  8556. OpNode, sched.YMM, v4f64x_info>,
  8557. EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
  8558. }
  8559. let Predicates = [HasFP16, HasVLX] in {
  8560. defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
  8561. OpNode, sched.XMM, v8f16x_info>,
  8562. EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
  8563. defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
  8564. OpNode, sched.YMM, v16f16x_info>,
  8565. EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
  8566. }
  8567. }
  8568. defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
  8569. defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
  8570. /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
  8571. multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
  8572. SDNode OpNode, SDNode OpNodeSAE,
  8573. X86FoldableSchedWrite sched> {
  8574. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
  8575. defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8576. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  8577. "$src2, $src1", "$src1, $src2",
  8578. (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
  8579. Sched<[sched]>, SIMD_EXC;
  8580. defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8581. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  8582. "{sae}, $src2, $src1", "$src1, $src2, {sae}",
  8583. (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
  8584. EVEX_B, Sched<[sched]>;
  8585. defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8586. (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
  8587. "$src2, $src1", "$src1, $src2",
  8588. (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
  8589. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  8590. }
  8591. }
  8592. multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8593. SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
  8594. defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
  8595. sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
  8596. defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
  8597. sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
  8598. }
  8599. multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8600. SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
  8601. let Predicates = [HasFP16] in
  8602. defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
  8603. EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
  8604. }
  8605. let Predicates = [HasERI] in {
  8606. defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
  8607. SchedWriteFRcp.Scl>;
  8608. defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
  8609. SchedWriteFRsqrt.Scl>;
  8610. }
  8611. defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
  8612. SchedWriteFRnd.Scl>,
  8613. avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
  8614. SchedWriteFRnd.Scl>;
  8615. /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
  8616. multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  8617. SDNode OpNode, X86FoldableSchedWrite sched> {
  8618. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  8619. defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8620. (ins _.RC:$src), OpcodeStr, "$src", "$src",
  8621. (OpNode (_.VT _.RC:$src))>,
  8622. Sched<[sched]>;
  8623. defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8624. (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
  8625. (OpNode (_.VT
  8626. (bitconvert (_.LdFrag addr:$src))))>,
  8627. Sched<[sched.Folded, sched.ReadAfterFold]>;
  8628. defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8629. (ins _.ScalarMemOp:$src), OpcodeStr,
  8630. "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
  8631. (OpNode (_.VT
  8632. (_.BroadcastLdFrag addr:$src)))>,
  8633. EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  8634. }
  8635. }
  8636. multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  8637. SDNode OpNode, X86FoldableSchedWrite sched> {
  8638. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  8639. defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8640. (ins _.RC:$src), OpcodeStr,
  8641. "{sae}, $src", "$src, {sae}",
  8642. (OpNode (_.VT _.RC:$src))>,
  8643. EVEX_B, Sched<[sched]>;
  8644. }
  8645. multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8646. SDNode OpNodeSAE, X86SchedWriteWidths sched> {
  8647. defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
  8648. avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
  8649. T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
  8650. defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
  8651. avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
  8652. T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
  8653. }
  8654. multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
  8655. SDNode OpNode, X86SchedWriteWidths sched> {
  8656. // Define only if AVX512VL feature is present.
  8657. let Predicates = [HasVLX] in {
  8658. defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
  8659. sched.XMM>,
  8660. EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
  8661. defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
  8662. sched.YMM>,
  8663. EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
  8664. defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
  8665. sched.XMM>,
  8666. EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
  8667. defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
  8668. sched.YMM>,
  8669. EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
  8670. }
  8671. }
  8672. multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8673. SDNode OpNodeSAE, X86SchedWriteWidths sched> {
  8674. let Predicates = [HasFP16] in
  8675. defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
  8676. avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
  8677. T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
  8678. let Predicates = [HasFP16, HasVLX] in {
  8679. defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
  8680. EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
  8681. defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
  8682. EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
  8683. }
  8684. }
  8685. let Predicates = [HasERI] in {
  8686. defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
  8687. SchedWriteFRsqrt>, EVEX;
  8688. defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
  8689. SchedWriteFRcp>, EVEX;
  8690. defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
  8691. SchedWriteFAdd>, EVEX;
  8692. }
  8693. defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
  8694. SchedWriteFRnd>,
  8695. avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
  8696. SchedWriteFRnd>,
  8697. avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
  8698. SchedWriteFRnd>, EVEX;
  8699. multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
  8700. X86FoldableSchedWrite sched, X86VectorVTInfo _>{
  8701. let ExeDomain = _.ExeDomain in
  8702. defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8703. (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
  8704. (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
  8705. EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
  8706. }
  8707. multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
  8708. X86FoldableSchedWrite sched, X86VectorVTInfo _>{
  8709. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  8710. defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8711. (ins _.RC:$src), OpcodeStr, "$src", "$src",
  8712. (_.VT (any_fsqrt _.RC:$src)),
  8713. (_.VT (fsqrt _.RC:$src))>, EVEX,
  8714. Sched<[sched]>;
  8715. defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8716. (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
  8717. (any_fsqrt (_.VT (_.LdFrag addr:$src))),
  8718. (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
  8719. Sched<[sched.Folded, sched.ReadAfterFold]>;
  8720. defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8721. (ins _.ScalarMemOp:$src), OpcodeStr,
  8722. "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
  8723. (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
  8724. (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
  8725. EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  8726. }
  8727. }
  8728. let Uses = [MXCSR], mayRaiseFPException = 1 in
  8729. multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
  8730. X86SchedWriteSizes sched> {
  8731. let Predicates = [HasFP16] in
  8732. defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
  8733. sched.PH.ZMM, v32f16_info>,
  8734. EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  8735. let Predicates = [HasFP16, HasVLX] in {
  8736. defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
  8737. sched.PH.XMM, v8f16x_info>,
  8738. EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  8739. defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
  8740. sched.PH.YMM, v16f16x_info>,
  8741. EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  8742. }
  8743. defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
  8744. sched.PS.ZMM, v16f32_info>,
  8745. EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
  8746. defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
  8747. sched.PD.ZMM, v8f64_info>,
  8748. EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
  8749. // Define only if AVX512VL feature is present.
  8750. let Predicates = [HasVLX] in {
  8751. defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
  8752. sched.PS.XMM, v4f32x_info>,
  8753. EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
  8754. defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
  8755. sched.PS.YMM, v8f32x_info>,
  8756. EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
  8757. defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
  8758. sched.PD.XMM, v2f64x_info>,
  8759. EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
  8760. defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
  8761. sched.PD.YMM, v4f64x_info>,
  8762. EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
  8763. }
  8764. }
  8765. let Uses = [MXCSR] in
  8766. multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
  8767. X86SchedWriteSizes sched> {
  8768. let Predicates = [HasFP16] in
  8769. defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
  8770. sched.PH.ZMM, v32f16_info>,
  8771. EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  8772. defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
  8773. sched.PS.ZMM, v16f32_info>,
  8774. EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
  8775. defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
  8776. sched.PD.ZMM, v8f64_info>,
  8777. EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
  8778. }
  8779. multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
  8780. X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
  8781. let ExeDomain = _.ExeDomain, Predicates = [prd] in {
  8782. defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8783. (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
  8784. "$src2, $src1", "$src1, $src2",
  8785. (X86fsqrts (_.VT _.RC:$src1),
  8786. (_.VT _.RC:$src2))>,
  8787. Sched<[sched]>, SIMD_EXC;
  8788. defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8789. (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
  8790. "$src2, $src1", "$src1, $src2",
  8791. (X86fsqrts (_.VT _.RC:$src1),
  8792. (_.ScalarIntMemFrags addr:$src2))>,
  8793. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  8794. let Uses = [MXCSR] in
  8795. defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8796. (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
  8797. "$rc, $src2, $src1", "$src1, $src2, $rc",
  8798. (X86fsqrtRnds (_.VT _.RC:$src1),
  8799. (_.VT _.RC:$src2),
  8800. (i32 timm:$rc))>,
  8801. EVEX_B, EVEX_RC, Sched<[sched]>;
  8802. let isCodeGenOnly = 1, hasSideEffects = 0 in {
  8803. def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
  8804. (ins _.FRC:$src1, _.FRC:$src2),
  8805. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  8806. Sched<[sched]>, SIMD_EXC;
  8807. let mayLoad = 1 in
  8808. def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
  8809. (ins _.FRC:$src1, _.ScalarMemOp:$src2),
  8810. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  8811. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  8812. }
  8813. }
  8814. let Predicates = [prd] in {
  8815. def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
  8816. (!cast<Instruction>(Name#Zr)
  8817. (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
  8818. }
  8819. let Predicates = [prd, OptForSize] in {
  8820. def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
  8821. (!cast<Instruction>(Name#Zm)
  8822. (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
  8823. }
  8824. }
  8825. multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
  8826. X86SchedWriteSizes sched> {
  8827. defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
  8828. EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
  8829. defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
  8830. EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
  8831. defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
  8832. EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
  8833. }
  8834. defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
  8835. avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
  8836. defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
  8837. multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
  8838. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  8839. let ExeDomain = _.ExeDomain in {
  8840. defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8841. (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
  8842. "$src3, $src2, $src1", "$src1, $src2, $src3",
  8843. (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
  8844. (i32 timm:$src3)))>,
  8845. Sched<[sched]>, SIMD_EXC;
  8846. let Uses = [MXCSR] in
  8847. defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  8848. (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
  8849. "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
  8850. (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
  8851. (i32 timm:$src3)))>, EVEX_B,
  8852. Sched<[sched]>;
  8853. defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  8854. (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
  8855. OpcodeStr,
  8856. "$src3, $src2, $src1", "$src1, $src2, $src3",
  8857. (_.VT (X86RndScales _.RC:$src1,
  8858. (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
  8859. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  8860. let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
  8861. def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
  8862. (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
  8863. OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  8864. []>, Sched<[sched]>, SIMD_EXC;
  8865. let mayLoad = 1 in
  8866. def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
  8867. (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
  8868. OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  8869. []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  8870. }
  8871. }
  8872. let Predicates = [HasAVX512] in {
  8873. def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
  8874. (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
  8875. _.FRC:$src1, timm:$src2))>;
  8876. }
  8877. let Predicates = [HasAVX512, OptForSize] in {
  8878. def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
  8879. (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
  8880. addr:$src1, timm:$src2))>;
  8881. }
  8882. }
  8883. let Predicates = [HasFP16] in
  8884. defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
  8885. SchedWriteFRnd.Scl, f16x_info>,
  8886. AVX512PSIi8Base, TA, EVEX_4V,
  8887. EVEX_CD8<16, CD8VT1>;
  8888. defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
  8889. SchedWriteFRnd.Scl, f32x_info>,
  8890. AVX512AIi8Base, EVEX_4V, VEX_LIG,
  8891. EVEX_CD8<32, CD8VT1>;
  8892. defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
  8893. SchedWriteFRnd.Scl, f64x_info>,
  8894. VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
  8895. EVEX_CD8<64, CD8VT1>;
  8896. multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
  8897. dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
  8898. dag OutMask, Predicate BasePredicate> {
  8899. let Predicates = [BasePredicate] in {
  8900. def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
  8901. (OpNode (extractelt _.VT:$src2, (iPTR 0))),
  8902. (extractelt _.VT:$dst, (iPTR 0))))),
  8903. (!cast<Instruction>("V"#OpcPrefix#r_Intk)
  8904. _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
  8905. def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
  8906. (OpNode (extractelt _.VT:$src2, (iPTR 0))),
  8907. ZeroFP))),
  8908. (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
  8909. OutMask, _.VT:$src2, _.VT:$src1)>;
  8910. }
  8911. }
  8912. defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
  8913. (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
  8914. fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
  8915. defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
  8916. (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
  8917. fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
  8918. defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
  8919. (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
  8920. fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
  8921. //-------------------------------------------------
  8922. // Integer truncate and extend operations
  8923. //-------------------------------------------------
  8924. // PatFrags that contain a select and a truncate op. The take operands in the
  8925. // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
  8926. // either to the multiclasses.
  8927. def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
  8928. (vselect_mask node:$mask,
  8929. (trunc node:$src), node:$src0)>;
  8930. def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
  8931. (vselect_mask node:$mask,
  8932. (X86vtruncs node:$src), node:$src0)>;
  8933. def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
  8934. (vselect_mask node:$mask,
  8935. (X86vtruncus node:$src), node:$src0)>;
  8936. multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
  8937. SDPatternOperator MaskNode,
  8938. X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
  8939. X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
  8940. let ExeDomain = DestInfo.ExeDomain in {
  8941. def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
  8942. (ins SrcInfo.RC:$src),
  8943. OpcodeStr # "\t{$src, $dst|$dst, $src}",
  8944. [(set DestInfo.RC:$dst,
  8945. (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
  8946. EVEX, Sched<[sched]>;
  8947. let Constraints = "$src0 = $dst" in
  8948. def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
  8949. (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
  8950. OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  8951. [(set DestInfo.RC:$dst,
  8952. (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
  8953. (DestInfo.VT DestInfo.RC:$src0),
  8954. SrcInfo.KRCWM:$mask))]>,
  8955. EVEX, EVEX_K, Sched<[sched]>;
  8956. def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
  8957. (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
  8958. OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  8959. [(set DestInfo.RC:$dst,
  8960. (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
  8961. DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
  8962. EVEX, EVEX_KZ, Sched<[sched]>;
  8963. }
  8964. let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
  8965. def mr : AVX512XS8I<opc, MRMDestMem, (outs),
  8966. (ins x86memop:$dst, SrcInfo.RC:$src),
  8967. OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
  8968. EVEX, Sched<[sched.Folded]>;
  8969. def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
  8970. (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
  8971. OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
  8972. EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
  8973. }//mayStore = 1, hasSideEffects = 0
  8974. }
  8975. multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
  8976. PatFrag truncFrag, PatFrag mtruncFrag,
  8977. string Name> {
  8978. def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
  8979. (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
  8980. addr:$dst, SrcInfo.RC:$src)>;
  8981. def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
  8982. SrcInfo.KRCWM:$mask),
  8983. (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
  8984. addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
  8985. }
  8986. multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
  8987. SDNode OpNode256, SDNode OpNode512,
  8988. SDPatternOperator MaskNode128,
  8989. SDPatternOperator MaskNode256,
  8990. SDPatternOperator MaskNode512,
  8991. X86SchedWriteWidths sched,
  8992. AVX512VLVectorVTInfo VTSrcInfo,
  8993. X86VectorVTInfo DestInfoZ128,
  8994. X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
  8995. X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
  8996. X86MemOperand x86memopZ, PatFrag truncFrag,
  8997. PatFrag mtruncFrag, Predicate prd = HasAVX512>{
  8998. let Predicates = [HasVLX, prd] in {
  8999. defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
  9000. VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
  9001. avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
  9002. mtruncFrag, NAME>, EVEX_V128;
  9003. defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
  9004. VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
  9005. avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
  9006. mtruncFrag, NAME>, EVEX_V256;
  9007. }
  9008. let Predicates = [prd] in
  9009. defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
  9010. VTSrcInfo.info512, DestInfoZ, x86memopZ>,
  9011. avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
  9012. mtruncFrag, NAME>, EVEX_V512;
  9013. }
  9014. multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
  9015. X86SchedWriteWidths sched, PatFrag StoreNode,
  9016. PatFrag MaskedStoreNode, SDNode InVecNode,
  9017. SDPatternOperator InVecMaskNode> {
  9018. defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
  9019. InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
  9020. avx512vl_i64_info, v16i8x_info, v16i8x_info,
  9021. v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
  9022. MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
  9023. }
  9024. multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9025. SDPatternOperator MaskNode,
  9026. X86SchedWriteWidths sched, PatFrag StoreNode,
  9027. PatFrag MaskedStoreNode, SDNode InVecNode,
  9028. SDPatternOperator InVecMaskNode> {
  9029. defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
  9030. InVecMaskNode, InVecMaskNode, MaskNode, sched,
  9031. avx512vl_i64_info, v8i16x_info, v8i16x_info,
  9032. v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
  9033. MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
  9034. }
  9035. multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9036. SDPatternOperator MaskNode,
  9037. X86SchedWriteWidths sched, PatFrag StoreNode,
  9038. PatFrag MaskedStoreNode, SDNode InVecNode,
  9039. SDPatternOperator InVecMaskNode> {
  9040. defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
  9041. InVecMaskNode, MaskNode, MaskNode, sched,
  9042. avx512vl_i64_info, v4i32x_info, v4i32x_info,
  9043. v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
  9044. MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
  9045. }
  9046. multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9047. SDPatternOperator MaskNode,
  9048. X86SchedWriteWidths sched, PatFrag StoreNode,
  9049. PatFrag MaskedStoreNode, SDNode InVecNode,
  9050. SDPatternOperator InVecMaskNode> {
  9051. defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
  9052. InVecMaskNode, InVecMaskNode, MaskNode, sched,
  9053. avx512vl_i32_info, v16i8x_info, v16i8x_info,
  9054. v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
  9055. MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
  9056. }
  9057. multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9058. SDPatternOperator MaskNode,
  9059. X86SchedWriteWidths sched, PatFrag StoreNode,
  9060. PatFrag MaskedStoreNode, SDNode InVecNode,
  9061. SDPatternOperator InVecMaskNode> {
  9062. defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
  9063. InVecMaskNode, MaskNode, MaskNode, sched,
  9064. avx512vl_i32_info, v8i16x_info, v8i16x_info,
  9065. v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
  9066. MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
  9067. }
  9068. multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9069. SDPatternOperator MaskNode,
  9070. X86SchedWriteWidths sched, PatFrag StoreNode,
  9071. PatFrag MaskedStoreNode, SDNode InVecNode,
  9072. SDPatternOperator InVecMaskNode> {
  9073. defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
  9074. InVecMaskNode, MaskNode, MaskNode, sched,
  9075. avx512vl_i16_info, v16i8x_info, v16i8x_info,
  9076. v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
  9077. MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
  9078. }
  9079. defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
  9080. SchedWriteVecTruncate, truncstorevi8,
  9081. masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
  9082. defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
  9083. SchedWriteVecTruncate, truncstore_s_vi8,
  9084. masked_truncstore_s_vi8, X86vtruncs,
  9085. X86vmtruncs>;
  9086. defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
  9087. SchedWriteVecTruncate, truncstore_us_vi8,
  9088. masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
  9089. defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
  9090. SchedWriteVecTruncate, truncstorevi16,
  9091. masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
  9092. defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
  9093. SchedWriteVecTruncate, truncstore_s_vi16,
  9094. masked_truncstore_s_vi16, X86vtruncs,
  9095. X86vmtruncs>;
  9096. defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
  9097. select_truncus, SchedWriteVecTruncate,
  9098. truncstore_us_vi16, masked_truncstore_us_vi16,
  9099. X86vtruncus, X86vmtruncus>;
  9100. defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
  9101. SchedWriteVecTruncate, truncstorevi32,
  9102. masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
  9103. defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
  9104. SchedWriteVecTruncate, truncstore_s_vi32,
  9105. masked_truncstore_s_vi32, X86vtruncs,
  9106. X86vmtruncs>;
  9107. defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
  9108. select_truncus, SchedWriteVecTruncate,
  9109. truncstore_us_vi32, masked_truncstore_us_vi32,
  9110. X86vtruncus, X86vmtruncus>;
  9111. defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
  9112. SchedWriteVecTruncate, truncstorevi8,
  9113. masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
  9114. defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
  9115. SchedWriteVecTruncate, truncstore_s_vi8,
  9116. masked_truncstore_s_vi8, X86vtruncs,
  9117. X86vmtruncs>;
  9118. defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
  9119. select_truncus, SchedWriteVecTruncate,
  9120. truncstore_us_vi8, masked_truncstore_us_vi8,
  9121. X86vtruncus, X86vmtruncus>;
  9122. defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
  9123. SchedWriteVecTruncate, truncstorevi16,
  9124. masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
  9125. defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
  9126. SchedWriteVecTruncate, truncstore_s_vi16,
  9127. masked_truncstore_s_vi16, X86vtruncs,
  9128. X86vmtruncs>;
  9129. defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
  9130. select_truncus, SchedWriteVecTruncate,
  9131. truncstore_us_vi16, masked_truncstore_us_vi16,
  9132. X86vtruncus, X86vmtruncus>;
  9133. defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
  9134. SchedWriteVecTruncate, truncstorevi8,
  9135. masked_truncstorevi8, X86vtrunc,
  9136. X86vmtrunc>;
  9137. defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
  9138. SchedWriteVecTruncate, truncstore_s_vi8,
  9139. masked_truncstore_s_vi8, X86vtruncs,
  9140. X86vmtruncs>;
  9141. defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
  9142. select_truncus, SchedWriteVecTruncate,
  9143. truncstore_us_vi8, masked_truncstore_us_vi8,
  9144. X86vtruncus, X86vmtruncus>;
  9145. let Predicates = [HasAVX512, NoVLX] in {
  9146. def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
  9147. (v8i16 (EXTRACT_SUBREG
  9148. (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
  9149. VR256X:$src, sub_ymm)))), sub_xmm))>;
  9150. def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
  9151. (v4i32 (EXTRACT_SUBREG
  9152. (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
  9153. VR256X:$src, sub_ymm)))), sub_xmm))>;
  9154. }
  9155. let Predicates = [HasBWI, NoVLX] in {
  9156. def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
  9157. (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
  9158. VR256X:$src, sub_ymm))), sub_xmm))>;
  9159. }
  9160. // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
  9161. multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
  9162. X86VectorVTInfo DestInfo,
  9163. X86VectorVTInfo SrcInfo> {
  9164. def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
  9165. DestInfo.RC:$src0,
  9166. SrcInfo.KRCWM:$mask)),
  9167. (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
  9168. SrcInfo.KRCWM:$mask,
  9169. SrcInfo.RC:$src)>;
  9170. def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
  9171. DestInfo.ImmAllZerosV,
  9172. SrcInfo.KRCWM:$mask)),
  9173. (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
  9174. SrcInfo.RC:$src)>;
  9175. }
  9176. let Predicates = [HasVLX] in {
  9177. defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
  9178. defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
  9179. defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
  9180. }
  9181. let Predicates = [HasAVX512] in {
  9182. defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
  9183. defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
  9184. defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
  9185. defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
  9186. defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
  9187. defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
  9188. defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
  9189. defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
  9190. defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
  9191. }
  9192. multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
  9193. X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
  9194. X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
  9195. let ExeDomain = DestInfo.ExeDomain in {
  9196. defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
  9197. (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
  9198. (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
  9199. EVEX, Sched<[sched]>;
  9200. defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
  9201. (ins x86memop:$src), OpcodeStr ,"$src", "$src",
  9202. (DestInfo.VT (LdFrag addr:$src))>,
  9203. EVEX, Sched<[sched.Folded]>;
  9204. }
  9205. }
  9206. multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
  9207. SDNode OpNode, SDNode InVecNode, string ExtTy,
  9208. X86SchedWriteWidths sched,
  9209. PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
  9210. let Predicates = [HasVLX, HasBWI] in {
  9211. defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
  9212. v16i8x_info, i64mem, LdFrag, InVecNode>,
  9213. EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
  9214. defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
  9215. v16i8x_info, i128mem, LdFrag, OpNode>,
  9216. EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
  9217. }
  9218. let Predicates = [HasBWI] in {
  9219. defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
  9220. v32i8x_info, i256mem, LdFrag, OpNode>,
  9221. EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
  9222. }
  9223. }
  9224. multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
  9225. SDNode OpNode, SDNode InVecNode, string ExtTy,
  9226. X86SchedWriteWidths sched,
  9227. PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
  9228. let Predicates = [HasVLX, HasAVX512] in {
  9229. defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
  9230. v16i8x_info, i32mem, LdFrag, InVecNode>,
  9231. EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
  9232. defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
  9233. v16i8x_info, i64mem, LdFrag, InVecNode>,
  9234. EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
  9235. }
  9236. let Predicates = [HasAVX512] in {
  9237. defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
  9238. v16i8x_info, i128mem, LdFrag, OpNode>,
  9239. EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
  9240. }
  9241. }
  9242. multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
  9243. SDNode InVecNode, string ExtTy,
  9244. X86SchedWriteWidths sched,
  9245. PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
  9246. let Predicates = [HasVLX, HasAVX512] in {
  9247. defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
  9248. v16i8x_info, i16mem, LdFrag, InVecNode>,
  9249. EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
  9250. defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
  9251. v16i8x_info, i32mem, LdFrag, InVecNode>,
  9252. EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
  9253. }
  9254. let Predicates = [HasAVX512] in {
  9255. defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
  9256. v16i8x_info, i64mem, LdFrag, InVecNode>,
  9257. EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
  9258. }
  9259. }
  9260. multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
  9261. SDNode OpNode, SDNode InVecNode, string ExtTy,
  9262. X86SchedWriteWidths sched,
  9263. PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
  9264. let Predicates = [HasVLX, HasAVX512] in {
  9265. defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
  9266. v8i16x_info, i64mem, LdFrag, InVecNode>,
  9267. EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
  9268. defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
  9269. v8i16x_info, i128mem, LdFrag, OpNode>,
  9270. EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
  9271. }
  9272. let Predicates = [HasAVX512] in {
  9273. defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
  9274. v16i16x_info, i256mem, LdFrag, OpNode>,
  9275. EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
  9276. }
  9277. }
  9278. multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
  9279. SDNode OpNode, SDNode InVecNode, string ExtTy,
  9280. X86SchedWriteWidths sched,
  9281. PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
  9282. let Predicates = [HasVLX, HasAVX512] in {
  9283. defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
  9284. v8i16x_info, i32mem, LdFrag, InVecNode>,
  9285. EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
  9286. defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
  9287. v8i16x_info, i64mem, LdFrag, InVecNode>,
  9288. EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
  9289. }
  9290. let Predicates = [HasAVX512] in {
  9291. defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
  9292. v8i16x_info, i128mem, LdFrag, OpNode>,
  9293. EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
  9294. }
  9295. }
  9296. multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
  9297. SDNode OpNode, SDNode InVecNode, string ExtTy,
  9298. X86SchedWriteWidths sched,
  9299. PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
  9300. let Predicates = [HasVLX, HasAVX512] in {
  9301. defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
  9302. v4i32x_info, i64mem, LdFrag, InVecNode>,
  9303. EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
  9304. defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
  9305. v4i32x_info, i128mem, LdFrag, OpNode>,
  9306. EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
  9307. }
  9308. let Predicates = [HasAVX512] in {
  9309. defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
  9310. v8i32x_info, i256mem, LdFrag, OpNode>,
  9311. EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
  9312. }
  9313. }
  9314. defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
  9315. defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
  9316. defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>;
  9317. defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
  9318. defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
  9319. defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
  9320. defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
  9321. defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
  9322. defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>;
  9323. defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
  9324. defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
  9325. defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
  9326. // Patterns that we also need any extend versions of. aext_vector_inreg
  9327. // is currently legalized to zext_vector_inreg.
  9328. multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
  9329. // 256-bit patterns
  9330. let Predicates = [HasVLX, HasBWI] in {
  9331. def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
  9332. (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
  9333. }
  9334. let Predicates = [HasVLX] in {
  9335. def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
  9336. (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
  9337. def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
  9338. (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
  9339. }
  9340. // 512-bit patterns
  9341. let Predicates = [HasBWI] in {
  9342. def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
  9343. (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
  9344. }
  9345. let Predicates = [HasAVX512] in {
  9346. def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
  9347. (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
  9348. def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
  9349. (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
  9350. def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
  9351. (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
  9352. def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
  9353. (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
  9354. }
  9355. }
  9356. multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
  9357. SDNode InVecOp> :
  9358. AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
  9359. // 128-bit patterns
  9360. let Predicates = [HasVLX, HasBWI] in {
  9361. def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
  9362. (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
  9363. def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
  9364. (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
  9365. def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
  9366. (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
  9367. }
  9368. let Predicates = [HasVLX] in {
  9369. def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
  9370. (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
  9371. def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
  9372. (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
  9373. def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
  9374. (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
  9375. def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
  9376. (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
  9377. def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
  9378. (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
  9379. def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
  9380. (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
  9381. def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
  9382. (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
  9383. def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
  9384. (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
  9385. def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
  9386. (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
  9387. def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
  9388. (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
  9389. def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
  9390. (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
  9391. }
  9392. let Predicates = [HasVLX] in {
  9393. def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
  9394. (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
  9395. def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
  9396. (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
  9397. def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
  9398. (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
  9399. def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
  9400. (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
  9401. def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
  9402. (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
  9403. def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
  9404. (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
  9405. def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
  9406. (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
  9407. def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
  9408. (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
  9409. }
  9410. // 512-bit patterns
  9411. let Predicates = [HasAVX512] in {
  9412. def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
  9413. (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
  9414. def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
  9415. (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
  9416. def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
  9417. (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
  9418. }
  9419. }
  9420. defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
  9421. defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
  9422. // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
  9423. // ext+trunc aggressively making it impossible to legalize the DAG to this
  9424. // pattern directly.
  9425. let Predicates = [HasAVX512, NoBWI] in {
  9426. def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
  9427. (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
  9428. def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
  9429. (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
  9430. }
  9431. //===----------------------------------------------------------------------===//
  9432. // GATHER - SCATTER Operations
  9433. // FIXME: Improve scheduling of gather/scatter instructions.
  9434. multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  9435. X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
  9436. let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
  9437. ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
  9438. def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
  9439. (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
  9440. !strconcat(OpcodeStr#_.Suffix,
  9441. "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
  9442. []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
  9443. Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
  9444. }
  9445. multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
  9446. AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
  9447. defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
  9448. vy512xmem>, EVEX_V512, VEX_W;
  9449. defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
  9450. vz512mem>, EVEX_V512, VEX_W;
  9451. let Predicates = [HasVLX] in {
  9452. defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
  9453. vx256xmem>, EVEX_V256, VEX_W;
  9454. defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
  9455. vy256xmem>, EVEX_V256, VEX_W;
  9456. defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
  9457. vx128xmem>, EVEX_V128, VEX_W;
  9458. defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
  9459. vx128xmem>, EVEX_V128, VEX_W;
  9460. }
  9461. }
  9462. multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
  9463. AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
  9464. defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
  9465. EVEX_V512;
  9466. defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
  9467. EVEX_V512;
  9468. let Predicates = [HasVLX] in {
  9469. defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
  9470. vy256xmem>, EVEX_V256;
  9471. defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
  9472. vy128xmem>, EVEX_V256;
  9473. defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
  9474. vx128xmem>, EVEX_V128;
  9475. defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
  9476. vx64xmem, VK2WM>, EVEX_V128;
  9477. }
  9478. }
  9479. defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
  9480. avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
  9481. defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
  9482. avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
  9483. multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
  9484. X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
  9485. let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
  9486. hasSideEffects = 0 in
  9487. def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
  9488. (ins memop:$dst, MaskRC:$mask, _.RC:$src),
  9489. !strconcat(OpcodeStr#_.Suffix,
  9490. "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
  9491. []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
  9492. Sched<[WriteStore]>;
  9493. }
  9494. multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
  9495. AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
  9496. defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
  9497. vy512xmem>, EVEX_V512, VEX_W;
  9498. defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
  9499. vz512mem>, EVEX_V512, VEX_W;
  9500. let Predicates = [HasVLX] in {
  9501. defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
  9502. vx256xmem>, EVEX_V256, VEX_W;
  9503. defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
  9504. vy256xmem>, EVEX_V256, VEX_W;
  9505. defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
  9506. vx128xmem>, EVEX_V128, VEX_W;
  9507. defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
  9508. vx128xmem>, EVEX_V128, VEX_W;
  9509. }
  9510. }
  9511. multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
  9512. AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
  9513. defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
  9514. EVEX_V512;
  9515. defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
  9516. EVEX_V512;
  9517. let Predicates = [HasVLX] in {
  9518. defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
  9519. vy256xmem>, EVEX_V256;
  9520. defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
  9521. vy128xmem>, EVEX_V256;
  9522. defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
  9523. vx128xmem>, EVEX_V128;
  9524. defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
  9525. vx64xmem, VK2WM>, EVEX_V128;
  9526. }
  9527. }
  9528. defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
  9529. avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
  9530. defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
  9531. avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
  9532. // prefetch
  9533. multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
  9534. RegisterClass KRC, X86MemOperand memop> {
  9535. let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
  9536. def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
  9537. !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
  9538. EVEX, EVEX_K, Sched<[WriteLoad]>;
  9539. }
  9540. defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
  9541. VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
  9542. defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
  9543. VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
  9544. defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
  9545. VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
  9546. defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
  9547. VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
  9548. defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
  9549. VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
  9550. defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
  9551. VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
  9552. defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
  9553. VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
  9554. defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
  9555. VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
  9556. defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
  9557. VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
  9558. defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
  9559. VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
  9560. defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
  9561. VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
  9562. defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
  9563. VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
  9564. defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
  9565. VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
  9566. defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
  9567. VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
  9568. defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
  9569. VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
  9570. defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
  9571. VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
  9572. multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
  9573. def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
  9574. !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
  9575. [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
  9576. EVEX, Sched<[Sched]>;
  9577. }
  9578. multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
  9579. string OpcodeStr, Predicate prd> {
  9580. let Predicates = [prd] in
  9581. defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
  9582. let Predicates = [prd, HasVLX] in {
  9583. defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
  9584. defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
  9585. }
  9586. }
  9587. defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
  9588. defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
  9589. defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
  9590. defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
  9591. multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
  9592. def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
  9593. !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
  9594. [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
  9595. EVEX, Sched<[WriteMove]>;
  9596. }
  9597. // Use 512bit version to implement 128/256 bit in case NoVLX.
  9598. multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
  9599. X86VectorVTInfo _,
  9600. string Name> {
  9601. def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
  9602. (_.KVT (COPY_TO_REGCLASS
  9603. (!cast<Instruction>(Name#"Zrr")
  9604. (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
  9605. _.RC:$src, _.SubRegIdx)),
  9606. _.KRC))>;
  9607. }
  9608. multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
  9609. AVX512VLVectorVTInfo VTInfo, Predicate prd> {
  9610. let Predicates = [prd] in
  9611. defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
  9612. EVEX_V512;
  9613. let Predicates = [prd, HasVLX] in {
  9614. defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
  9615. EVEX_V256;
  9616. defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
  9617. EVEX_V128;
  9618. }
  9619. let Predicates = [prd, NoVLX] in {
  9620. defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
  9621. defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
  9622. }
  9623. }
  9624. defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
  9625. avx512vl_i8_info, HasBWI>;
  9626. defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
  9627. avx512vl_i16_info, HasBWI>, VEX_W;
  9628. defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
  9629. avx512vl_i32_info, HasDQI>;
  9630. defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
  9631. avx512vl_i64_info, HasDQI>, VEX_W;
  9632. // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
  9633. // is available, but BWI is not. We can't handle this in lowering because
  9634. // a target independent DAG combine likes to combine sext and trunc.
  9635. let Predicates = [HasDQI, NoBWI] in {
  9636. def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
  9637. (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
  9638. def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
  9639. (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
  9640. }
  9641. let Predicates = [HasDQI, NoBWI, HasVLX] in {
  9642. def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
  9643. (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
  9644. }
  9645. //===----------------------------------------------------------------------===//
  9646. // AVX-512 - COMPRESS and EXPAND
  9647. //
  9648. multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
  9649. string OpcodeStr, X86FoldableSchedWrite sched> {
  9650. defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
  9651. (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
  9652. (null_frag)>, AVX5128IBase,
  9653. Sched<[sched]>;
  9654. let mayStore = 1, hasSideEffects = 0 in
  9655. def mr : AVX5128I<opc, MRMDestMem, (outs),
  9656. (ins _.MemOp:$dst, _.RC:$src),
  9657. OpcodeStr # "\t{$src, $dst|$dst, $src}",
  9658. []>, EVEX_CD8<_.EltSize, CD8VT1>,
  9659. Sched<[sched.Folded]>;
  9660. def mrk : AVX5128I<opc, MRMDestMem, (outs),
  9661. (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
  9662. OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  9663. []>,
  9664. EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
  9665. Sched<[sched.Folded]>;
  9666. }
  9667. multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
  9668. def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
  9669. (!cast<Instruction>(Name#_.ZSuffix#mrk)
  9670. addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
  9671. def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
  9672. (!cast<Instruction>(Name#_.ZSuffix#rrk)
  9673. _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
  9674. def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
  9675. (!cast<Instruction>(Name#_.ZSuffix#rrkz)
  9676. _.KRCWM:$mask, _.RC:$src)>;
  9677. }
  9678. multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
  9679. X86FoldableSchedWrite sched,
  9680. AVX512VLVectorVTInfo VTInfo,
  9681. Predicate Pred = HasAVX512> {
  9682. let Predicates = [Pred] in
  9683. defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
  9684. compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
  9685. let Predicates = [Pred, HasVLX] in {
  9686. defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
  9687. compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
  9688. defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
  9689. compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
  9690. }
  9691. }
  9692. // FIXME: Is there a better scheduler class for VPCOMPRESS?
  9693. defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
  9694. avx512vl_i32_info>, EVEX, NotMemoryFoldable;
  9695. defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
  9696. avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
  9697. defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
  9698. avx512vl_f32_info>, EVEX, NotMemoryFoldable;
  9699. defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
  9700. avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
  9701. // expand
  9702. multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
  9703. string OpcodeStr, X86FoldableSchedWrite sched> {
  9704. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9705. (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
  9706. (null_frag)>, AVX5128IBase,
  9707. Sched<[sched]>;
  9708. defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9709. (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
  9710. (null_frag)>,
  9711. AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
  9712. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9713. }
  9714. multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
  9715. def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
  9716. (!cast<Instruction>(Name#_.ZSuffix#rmkz)
  9717. _.KRCWM:$mask, addr:$src)>;
  9718. def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
  9719. (!cast<Instruction>(Name#_.ZSuffix#rmkz)
  9720. _.KRCWM:$mask, addr:$src)>;
  9721. def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
  9722. (_.VT _.RC:$src0))),
  9723. (!cast<Instruction>(Name#_.ZSuffix#rmk)
  9724. _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
  9725. def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
  9726. (!cast<Instruction>(Name#_.ZSuffix#rrk)
  9727. _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
  9728. def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
  9729. (!cast<Instruction>(Name#_.ZSuffix#rrkz)
  9730. _.KRCWM:$mask, _.RC:$src)>;
  9731. }
  9732. multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
  9733. X86FoldableSchedWrite sched,
  9734. AVX512VLVectorVTInfo VTInfo,
  9735. Predicate Pred = HasAVX512> {
  9736. let Predicates = [Pred] in
  9737. defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
  9738. expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
  9739. let Predicates = [Pred, HasVLX] in {
  9740. defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
  9741. expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
  9742. defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
  9743. expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
  9744. }
  9745. }
  9746. // FIXME: Is there a better scheduler class for VPEXPAND?
  9747. defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
  9748. avx512vl_i32_info>, EVEX;
  9749. defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
  9750. avx512vl_i64_info>, EVEX, VEX_W;
  9751. defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
  9752. avx512vl_f32_info>, EVEX;
  9753. defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
  9754. avx512vl_f64_info>, EVEX, VEX_W;
  9755. //handle instruction reg_vec1 = op(reg_vec,imm)
  9756. // op(mem_vec,imm)
  9757. // op(broadcast(eltVt),imm)
  9758. //all instruction created with FROUND_CURRENT
  9759. multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
  9760. SDPatternOperator OpNode,
  9761. SDPatternOperator MaskOpNode,
  9762. X86FoldableSchedWrite sched,
  9763. X86VectorVTInfo _> {
  9764. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  9765. defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9766. (ins _.RC:$src1, i32u8imm:$src2),
  9767. OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
  9768. (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
  9769. (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
  9770. Sched<[sched]>;
  9771. defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9772. (ins _.MemOp:$src1, i32u8imm:$src2),
  9773. OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
  9774. (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
  9775. (i32 timm:$src2)),
  9776. (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
  9777. (i32 timm:$src2))>,
  9778. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9779. defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9780. (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
  9781. OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
  9782. "${src1}"#_.BroadcastStr#", $src2",
  9783. (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
  9784. (i32 timm:$src2)),
  9785. (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
  9786. (i32 timm:$src2))>, EVEX_B,
  9787. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9788. }
  9789. }
  9790. //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
  9791. multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
  9792. SDNode OpNode, X86FoldableSchedWrite sched,
  9793. X86VectorVTInfo _> {
  9794. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  9795. defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9796. (ins _.RC:$src1, i32u8imm:$src2),
  9797. OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
  9798. "$src1, {sae}, $src2",
  9799. (OpNode (_.VT _.RC:$src1),
  9800. (i32 timm:$src2))>,
  9801. EVEX_B, Sched<[sched]>;
  9802. }
  9803. multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
  9804. AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
  9805. SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
  9806. Predicate prd>{
  9807. let Predicates = [prd] in {
  9808. defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
  9809. sched.ZMM, _.info512>,
  9810. avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
  9811. sched.ZMM, _.info512>, EVEX_V512;
  9812. }
  9813. let Predicates = [prd, HasVLX] in {
  9814. defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
  9815. sched.XMM, _.info128>, EVEX_V128;
  9816. defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
  9817. sched.YMM, _.info256>, EVEX_V256;
  9818. }
  9819. }
  9820. //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
  9821. // op(reg_vec2,mem_vec,imm)
  9822. // op(reg_vec2,broadcast(eltVt),imm)
  9823. //all instruction created with FROUND_CURRENT
  9824. multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9825. X86FoldableSchedWrite sched, X86VectorVTInfo _>{
  9826. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  9827. defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9828. (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
  9829. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  9830. (OpNode (_.VT _.RC:$src1),
  9831. (_.VT _.RC:$src2),
  9832. (i32 timm:$src3))>,
  9833. Sched<[sched]>;
  9834. defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9835. (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
  9836. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  9837. (OpNode (_.VT _.RC:$src1),
  9838. (_.VT (bitconvert (_.LdFrag addr:$src2))),
  9839. (i32 timm:$src3))>,
  9840. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9841. defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9842. (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
  9843. OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
  9844. "$src1, ${src2}"#_.BroadcastStr#", $src3",
  9845. (OpNode (_.VT _.RC:$src1),
  9846. (_.VT (_.BroadcastLdFrag addr:$src2)),
  9847. (i32 timm:$src3))>, EVEX_B,
  9848. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9849. }
  9850. }
  9851. //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
  9852. // op(reg_vec2,mem_vec,imm)
  9853. multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9854. X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
  9855. X86VectorVTInfo SrcInfo>{
  9856. let ExeDomain = DestInfo.ExeDomain in {
  9857. defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
  9858. (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
  9859. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  9860. (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
  9861. (SrcInfo.VT SrcInfo.RC:$src2),
  9862. (i8 timm:$src3)))>,
  9863. Sched<[sched]>;
  9864. defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
  9865. (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
  9866. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  9867. (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
  9868. (SrcInfo.VT (bitconvert
  9869. (SrcInfo.LdFrag addr:$src2))),
  9870. (i8 timm:$src3)))>,
  9871. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9872. }
  9873. }
  9874. //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
  9875. // op(reg_vec2,mem_vec,imm)
  9876. // op(reg_vec2,broadcast(eltVt),imm)
  9877. multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9878. X86FoldableSchedWrite sched, X86VectorVTInfo _>:
  9879. avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
  9880. let ExeDomain = _.ExeDomain in
  9881. defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9882. (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
  9883. OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
  9884. "$src1, ${src2}"#_.BroadcastStr#", $src3",
  9885. (OpNode (_.VT _.RC:$src1),
  9886. (_.VT (_.BroadcastLdFrag addr:$src2)),
  9887. (i8 timm:$src3))>, EVEX_B,
  9888. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9889. }
  9890. //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
  9891. // op(reg_vec2,mem_scalar,imm)
  9892. multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9893. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  9894. let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
  9895. defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9896. (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
  9897. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  9898. (OpNode (_.VT _.RC:$src1),
  9899. (_.VT _.RC:$src2),
  9900. (i32 timm:$src3))>,
  9901. Sched<[sched]>;
  9902. defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  9903. (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
  9904. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  9905. (OpNode (_.VT _.RC:$src1),
  9906. (_.ScalarIntMemFrags addr:$src2),
  9907. (i32 timm:$src3))>,
  9908. Sched<[sched.Folded, sched.ReadAfterFold]>;
  9909. }
  9910. }
  9911. //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
  9912. multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
  9913. SDNode OpNode, X86FoldableSchedWrite sched,
  9914. X86VectorVTInfo _> {
  9915. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  9916. defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9917. (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
  9918. OpcodeStr, "$src3, {sae}, $src2, $src1",
  9919. "$src1, $src2, {sae}, $src3",
  9920. (OpNode (_.VT _.RC:$src1),
  9921. (_.VT _.RC:$src2),
  9922. (i32 timm:$src3))>,
  9923. EVEX_B, Sched<[sched]>;
  9924. }
  9925. //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
  9926. multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  9927. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  9928. let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
  9929. defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  9930. (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
  9931. OpcodeStr, "$src3, {sae}, $src2, $src1",
  9932. "$src1, $src2, {sae}, $src3",
  9933. (OpNode (_.VT _.RC:$src1),
  9934. (_.VT _.RC:$src2),
  9935. (i32 timm:$src3))>,
  9936. EVEX_B, Sched<[sched]>;
  9937. }
  9938. multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
  9939. AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
  9940. SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
  9941. let Predicates = [prd] in {
  9942. defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
  9943. avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
  9944. EVEX_V512;
  9945. }
  9946. let Predicates = [prd, HasVLX] in {
  9947. defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
  9948. EVEX_V128;
  9949. defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
  9950. EVEX_V256;
  9951. }
  9952. }
  9953. multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
  9954. X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
  9955. AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
  9956. let Predicates = [Pred] in {
  9957. defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
  9958. SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
  9959. }
  9960. let Predicates = [Pred, HasVLX] in {
  9961. defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
  9962. SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
  9963. defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
  9964. SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
  9965. }
  9966. }
  9967. multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
  9968. bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
  9969. Predicate Pred = HasAVX512> {
  9970. let Predicates = [Pred] in {
  9971. defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
  9972. EVEX_V512;
  9973. }
  9974. let Predicates = [Pred, HasVLX] in {
  9975. defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
  9976. EVEX_V128;
  9977. defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
  9978. EVEX_V256;
  9979. }
  9980. }
  9981. multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
  9982. X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
  9983. SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
  9984. let Predicates = [prd] in {
  9985. defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
  9986. avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
  9987. }
  9988. }
  9989. multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
  9990. bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
  9991. SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
  9992. X86SchedWriteWidths sched, Predicate prd>{
  9993. defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
  9994. opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
  9995. AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
  9996. defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
  9997. opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
  9998. AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
  9999. defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
  10000. opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
  10001. AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
  10002. }
  10003. defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
  10004. X86VReduce, X86VReduce, X86VReduceSAE,
  10005. SchedWriteFRnd, HasDQI>;
  10006. defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
  10007. X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
  10008. SchedWriteFRnd, HasAVX512>;
  10009. defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
  10010. X86VGetMant, X86VGetMant, X86VGetMantSAE,
  10011. SchedWriteFRnd, HasAVX512>;
  10012. defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
  10013. 0x50, X86VRange, X86VRangeSAE,
  10014. SchedWriteFAdd, HasDQI>,
  10015. AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
  10016. defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
  10017. 0x50, X86VRange, X86VRangeSAE,
  10018. SchedWriteFAdd, HasDQI>,
  10019. AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
  10020. defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
  10021. f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
  10022. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
  10023. defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
  10024. 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
  10025. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
  10026. defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
  10027. 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
  10028. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
  10029. defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
  10030. 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
  10031. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
  10032. defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
  10033. 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
  10034. AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
  10035. defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
  10036. 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
  10037. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
  10038. defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
  10039. 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
  10040. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
  10041. defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
  10042. 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
  10043. AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
  10044. multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
  10045. X86FoldableSchedWrite sched,
  10046. X86VectorVTInfo _,
  10047. X86VectorVTInfo CastInfo,
  10048. string EVEX2VEXOvrd> {
  10049. let ExeDomain = _.ExeDomain in {
  10050. defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  10051. (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
  10052. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  10053. (_.VT (bitconvert
  10054. (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
  10055. (i8 timm:$src3)))))>,
  10056. Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
  10057. defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10058. (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
  10059. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  10060. (_.VT
  10061. (bitconvert
  10062. (CastInfo.VT (X86Shuf128 _.RC:$src1,
  10063. (CastInfo.LdFrag addr:$src2),
  10064. (i8 timm:$src3)))))>,
  10065. Sched<[sched.Folded, sched.ReadAfterFold]>,
  10066. EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
  10067. defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10068. (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
  10069. OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
  10070. "$src1, ${src2}"#_.BroadcastStr#", $src3",
  10071. (_.VT
  10072. (bitconvert
  10073. (CastInfo.VT
  10074. (X86Shuf128 _.RC:$src1,
  10075. (_.BroadcastLdFrag addr:$src2),
  10076. (i8 timm:$src3)))))>, EVEX_B,
  10077. Sched<[sched.Folded, sched.ReadAfterFold]>;
  10078. }
  10079. }
  10080. multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
  10081. AVX512VLVectorVTInfo _,
  10082. AVX512VLVectorVTInfo CastInfo, bits<8> opc,
  10083. string EVEX2VEXOvrd>{
  10084. let Predicates = [HasAVX512] in
  10085. defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
  10086. _.info512, CastInfo.info512, "">, EVEX_V512;
  10087. let Predicates = [HasAVX512, HasVLX] in
  10088. defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
  10089. _.info256, CastInfo.info256,
  10090. EVEX2VEXOvrd>, EVEX_V256;
  10091. }
  10092. defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
  10093. avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
  10094. defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
  10095. avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
  10096. defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
  10097. avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
  10098. defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
  10099. avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
  10100. multiclass avx512_valign<bits<8> opc, string OpcodeStr,
  10101. X86FoldableSchedWrite sched, X86VectorVTInfo _>{
  10102. // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
  10103. // instantiation of this class.
  10104. let ExeDomain = _.ExeDomain in {
  10105. defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  10106. (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
  10107. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  10108. (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
  10109. Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
  10110. defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10111. (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
  10112. OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
  10113. (_.VT (X86VAlign _.RC:$src1,
  10114. (bitconvert (_.LdFrag addr:$src2)),
  10115. (i8 timm:$src3)))>,
  10116. Sched<[sched.Folded, sched.ReadAfterFold]>,
  10117. EVEX2VEXOverride<"VPALIGNRrmi">;
  10118. defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10119. (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
  10120. OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
  10121. "$src1, ${src2}"#_.BroadcastStr#", $src3",
  10122. (X86VAlign _.RC:$src1,
  10123. (_.VT (_.BroadcastLdFrag addr:$src2)),
  10124. (i8 timm:$src3))>, EVEX_B,
  10125. Sched<[sched.Folded, sched.ReadAfterFold]>;
  10126. }
  10127. }
  10128. multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
  10129. AVX512VLVectorVTInfo _> {
  10130. let Predicates = [HasAVX512] in {
  10131. defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
  10132. AVX512AIi8Base, EVEX_4V, EVEX_V512;
  10133. }
  10134. let Predicates = [HasAVX512, HasVLX] in {
  10135. defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
  10136. AVX512AIi8Base, EVEX_4V, EVEX_V128;
  10137. // We can't really override the 256-bit version so change it back to unset.
  10138. let EVEX2VEXOverride = ? in
  10139. defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
  10140. AVX512AIi8Base, EVEX_4V, EVEX_V256;
  10141. }
  10142. }
  10143. defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
  10144. avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  10145. defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
  10146. avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
  10147. VEX_W;
  10148. defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
  10149. SchedWriteShuffle, avx512vl_i8_info,
  10150. avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
  10151. // Fragments to help convert valignq into masked valignd. Or valignq/valignd
  10152. // into vpalignr.
  10153. def ValignqImm32XForm : SDNodeXForm<timm, [{
  10154. return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
  10155. }]>;
  10156. def ValignqImm8XForm : SDNodeXForm<timm, [{
  10157. return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
  10158. }]>;
  10159. def ValigndImm8XForm : SDNodeXForm<timm, [{
  10160. return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
  10161. }]>;
  10162. multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
  10163. X86VectorVTInfo From, X86VectorVTInfo To,
  10164. SDNodeXForm ImmXForm> {
  10165. def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
  10166. (bitconvert
  10167. (From.VT (OpNode From.RC:$src1, From.RC:$src2,
  10168. timm:$src3))),
  10169. To.RC:$src0)),
  10170. (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
  10171. To.RC:$src1, To.RC:$src2,
  10172. (ImmXForm timm:$src3))>;
  10173. def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
  10174. (bitconvert
  10175. (From.VT (OpNode From.RC:$src1, From.RC:$src2,
  10176. timm:$src3))),
  10177. To.ImmAllZerosV)),
  10178. (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
  10179. To.RC:$src1, To.RC:$src2,
  10180. (ImmXForm timm:$src3))>;
  10181. def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
  10182. (bitconvert
  10183. (From.VT (OpNode From.RC:$src1,
  10184. (From.LdFrag addr:$src2),
  10185. timm:$src3))),
  10186. To.RC:$src0)),
  10187. (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
  10188. To.RC:$src1, addr:$src2,
  10189. (ImmXForm timm:$src3))>;
  10190. def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
  10191. (bitconvert
  10192. (From.VT (OpNode From.RC:$src1,
  10193. (From.LdFrag addr:$src2),
  10194. timm:$src3))),
  10195. To.ImmAllZerosV)),
  10196. (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
  10197. To.RC:$src1, addr:$src2,
  10198. (ImmXForm timm:$src3))>;
  10199. }
  10200. multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
  10201. X86VectorVTInfo From,
  10202. X86VectorVTInfo To,
  10203. SDNodeXForm ImmXForm> :
  10204. avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
  10205. def : Pat<(From.VT (OpNode From.RC:$src1,
  10206. (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
  10207. timm:$src3)),
  10208. (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
  10209. (ImmXForm timm:$src3))>;
  10210. def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
  10211. (bitconvert
  10212. (From.VT (OpNode From.RC:$src1,
  10213. (bitconvert
  10214. (To.VT (To.BroadcastLdFrag addr:$src2))),
  10215. timm:$src3))),
  10216. To.RC:$src0)),
  10217. (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
  10218. To.RC:$src1, addr:$src2,
  10219. (ImmXForm timm:$src3))>;
  10220. def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
  10221. (bitconvert
  10222. (From.VT (OpNode From.RC:$src1,
  10223. (bitconvert
  10224. (To.VT (To.BroadcastLdFrag addr:$src2))),
  10225. timm:$src3))),
  10226. To.ImmAllZerosV)),
  10227. (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
  10228. To.RC:$src1, addr:$src2,
  10229. (ImmXForm timm:$src3))>;
  10230. }
  10231. let Predicates = [HasAVX512] in {
  10232. // For 512-bit we lower to the widest element type we can. So we only need
  10233. // to handle converting valignq to valignd.
  10234. defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
  10235. v16i32_info, ValignqImm32XForm>;
  10236. }
  10237. let Predicates = [HasVLX] in {
  10238. // For 128-bit we lower to the widest element type we can. So we only need
  10239. // to handle converting valignq to valignd.
  10240. defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
  10241. v4i32x_info, ValignqImm32XForm>;
  10242. // For 256-bit we lower to the widest element type we can. So we only need
  10243. // to handle converting valignq to valignd.
  10244. defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
  10245. v8i32x_info, ValignqImm32XForm>;
  10246. }
  10247. let Predicates = [HasVLX, HasBWI] in {
  10248. // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
  10249. defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
  10250. v16i8x_info, ValignqImm8XForm>;
  10251. defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
  10252. v16i8x_info, ValigndImm8XForm>;
  10253. }
  10254. defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
  10255. SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
  10256. EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
  10257. multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10258. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  10259. let ExeDomain = _.ExeDomain in {
  10260. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  10261. (ins _.RC:$src1), OpcodeStr,
  10262. "$src1", "$src1",
  10263. (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
  10264. Sched<[sched]>;
  10265. defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10266. (ins _.MemOp:$src1), OpcodeStr,
  10267. "$src1", "$src1",
  10268. (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
  10269. EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
  10270. Sched<[sched.Folded]>;
  10271. }
  10272. }
  10273. multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10274. X86FoldableSchedWrite sched, X86VectorVTInfo _> :
  10275. avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
  10276. defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10277. (ins _.ScalarMemOp:$src1), OpcodeStr,
  10278. "${src1}"#_.BroadcastStr,
  10279. "${src1}"#_.BroadcastStr,
  10280. (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
  10281. EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
  10282. Sched<[sched.Folded]>;
  10283. }
  10284. multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10285. X86SchedWriteWidths sched,
  10286. AVX512VLVectorVTInfo VTInfo, Predicate prd> {
  10287. let Predicates = [prd] in
  10288. defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
  10289. EVEX_V512;
  10290. let Predicates = [prd, HasVLX] in {
  10291. defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
  10292. EVEX_V256;
  10293. defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
  10294. EVEX_V128;
  10295. }
  10296. }
  10297. multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10298. X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
  10299. Predicate prd> {
  10300. let Predicates = [prd] in
  10301. defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
  10302. EVEX_V512;
  10303. let Predicates = [prd, HasVLX] in {
  10304. defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
  10305. EVEX_V256;
  10306. defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
  10307. EVEX_V128;
  10308. }
  10309. }
  10310. multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
  10311. SDNode OpNode, X86SchedWriteWidths sched,
  10312. Predicate prd> {
  10313. defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
  10314. avx512vl_i64_info, prd>, VEX_W;
  10315. defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
  10316. avx512vl_i32_info, prd>;
  10317. }
  10318. multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
  10319. SDNode OpNode, X86SchedWriteWidths sched,
  10320. Predicate prd> {
  10321. defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
  10322. avx512vl_i16_info, prd>, VEX_WIG;
  10323. defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
  10324. avx512vl_i8_info, prd>, VEX_WIG;
  10325. }
  10326. multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
  10327. bits<8> opc_d, bits<8> opc_q,
  10328. string OpcodeStr, SDNode OpNode,
  10329. X86SchedWriteWidths sched> {
  10330. defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
  10331. HasAVX512>,
  10332. avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
  10333. HasBWI>;
  10334. }
  10335. defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
  10336. SchedWriteVecALU>;
  10337. // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
  10338. let Predicates = [HasAVX512, NoVLX] in {
  10339. def : Pat<(v4i64 (abs VR256X:$src)),
  10340. (EXTRACT_SUBREG
  10341. (VPABSQZrr
  10342. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
  10343. sub_ymm)>;
  10344. def : Pat<(v2i64 (abs VR128X:$src)),
  10345. (EXTRACT_SUBREG
  10346. (VPABSQZrr
  10347. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
  10348. sub_xmm)>;
  10349. }
  10350. // Use 512bit version to implement 128/256 bit.
  10351. multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
  10352. AVX512VLVectorVTInfo _, Predicate prd> {
  10353. let Predicates = [prd, NoVLX] in {
  10354. def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
  10355. (EXTRACT_SUBREG
  10356. (!cast<Instruction>(InstrStr # "Zrr")
  10357. (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
  10358. _.info256.RC:$src1,
  10359. _.info256.SubRegIdx)),
  10360. _.info256.SubRegIdx)>;
  10361. def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
  10362. (EXTRACT_SUBREG
  10363. (!cast<Instruction>(InstrStr # "Zrr")
  10364. (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
  10365. _.info128.RC:$src1,
  10366. _.info128.SubRegIdx)),
  10367. _.info128.SubRegIdx)>;
  10368. }
  10369. }
  10370. defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
  10371. SchedWriteVecIMul, HasCDI>;
  10372. // FIXME: Is there a better scheduler class for VPCONFLICT?
  10373. defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
  10374. SchedWriteVecALU, HasCDI>;
  10375. // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
  10376. defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
  10377. defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
  10378. //===---------------------------------------------------------------------===//
  10379. // Counts number of ones - VPOPCNTD and VPOPCNTQ
  10380. //===---------------------------------------------------------------------===//
  10381. // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
  10382. defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
  10383. SchedWriteVecALU, HasVPOPCNTDQ>;
  10384. defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
  10385. defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
  10386. //===---------------------------------------------------------------------===//
  10387. // Replicate Single FP - MOVSHDUP and MOVSLDUP
  10388. //===---------------------------------------------------------------------===//
  10389. multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10390. X86SchedWriteWidths sched> {
  10391. defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
  10392. avx512vl_f32_info, HasAVX512>, XS;
  10393. }
  10394. defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
  10395. SchedWriteFShuffle>;
  10396. defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
  10397. SchedWriteFShuffle>;
  10398. //===----------------------------------------------------------------------===//
  10399. // AVX-512 - MOVDDUP
  10400. //===----------------------------------------------------------------------===//
  10401. multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
  10402. X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  10403. let ExeDomain = _.ExeDomain in {
  10404. defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
  10405. (ins _.RC:$src), OpcodeStr, "$src", "$src",
  10406. (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
  10407. Sched<[sched]>;
  10408. defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10409. (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
  10410. (_.VT (_.BroadcastLdFrag addr:$src))>,
  10411. EVEX, EVEX_CD8<_.EltSize, CD8VH>,
  10412. Sched<[sched.Folded]>;
  10413. }
  10414. }
  10415. multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
  10416. X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
  10417. defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
  10418. VTInfo.info512>, EVEX_V512;
  10419. let Predicates = [HasAVX512, HasVLX] in {
  10420. defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
  10421. VTInfo.info256>, EVEX_V256;
  10422. defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
  10423. VTInfo.info128>, EVEX_V128;
  10424. }
  10425. }
  10426. multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
  10427. X86SchedWriteWidths sched> {
  10428. defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
  10429. avx512vl_f64_info>, XD, VEX_W;
  10430. }
  10431. defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
  10432. let Predicates = [HasVLX] in {
  10433. def : Pat<(v2f64 (X86VBroadcast f64:$src)),
  10434. (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
  10435. def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
  10436. (v2f64 VR128X:$src0)),
  10437. (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
  10438. (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
  10439. def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
  10440. immAllZerosV),
  10441. (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
  10442. }
  10443. //===----------------------------------------------------------------------===//
  10444. // AVX-512 - Unpack Instructions
  10445. //===----------------------------------------------------------------------===//
  10446. let Uses = []<Register>, mayRaiseFPException = 0 in {
  10447. defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
  10448. SchedWriteFShuffleSizes, 0, 1>;
  10449. defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
  10450. SchedWriteFShuffleSizes>;
  10451. }
  10452. defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
  10453. SchedWriteShuffle, HasBWI>;
  10454. defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
  10455. SchedWriteShuffle, HasBWI>;
  10456. defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
  10457. SchedWriteShuffle, HasBWI>;
  10458. defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
  10459. SchedWriteShuffle, HasBWI>;
  10460. defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
  10461. SchedWriteShuffle, HasAVX512>;
  10462. defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
  10463. SchedWriteShuffle, HasAVX512>;
  10464. defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
  10465. SchedWriteShuffle, HasAVX512>;
  10466. defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
  10467. SchedWriteShuffle, HasAVX512>;
  10468. //===----------------------------------------------------------------------===//
  10469. // AVX-512 - Extract & Insert Integer Instructions
  10470. //===----------------------------------------------------------------------===//
  10471. multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10472. X86VectorVTInfo _> {
  10473. def mr : AVX512Ii8<opc, MRMDestMem, (outs),
  10474. (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
  10475. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  10476. [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
  10477. addr:$dst)]>,
  10478. EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
  10479. }
  10480. multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
  10481. let Predicates = [HasBWI] in {
  10482. def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
  10483. (ins _.RC:$src1, u8imm:$src2),
  10484. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  10485. [(set GR32orGR64:$dst,
  10486. (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
  10487. EVEX, TAPD, Sched<[WriteVecExtract]>;
  10488. defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
  10489. }
  10490. }
  10491. multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
  10492. let Predicates = [HasBWI] in {
  10493. def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
  10494. (ins _.RC:$src1, u8imm:$src2),
  10495. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  10496. [(set GR32orGR64:$dst,
  10497. (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
  10498. EVEX, PD, Sched<[WriteVecExtract]>;
  10499. let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
  10500. def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
  10501. (ins _.RC:$src1, u8imm:$src2),
  10502. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
  10503. EVEX, TAPD, FoldGenData<NAME#rr>,
  10504. Sched<[WriteVecExtract]>;
  10505. defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
  10506. }
  10507. }
  10508. multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
  10509. RegisterClass GRC> {
  10510. let Predicates = [HasDQI] in {
  10511. def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
  10512. (ins _.RC:$src1, u8imm:$src2),
  10513. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  10514. [(set GRC:$dst,
  10515. (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
  10516. EVEX, TAPD, Sched<[WriteVecExtract]>;
  10517. def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
  10518. (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
  10519. OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
  10520. [(store (extractelt (_.VT _.RC:$src1),
  10521. imm:$src2),addr:$dst)]>,
  10522. EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
  10523. Sched<[WriteVecExtractSt]>;
  10524. }
  10525. }
  10526. defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
  10527. defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
  10528. defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
  10529. defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
  10530. multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10531. X86VectorVTInfo _, PatFrag LdFrag,
  10532. SDPatternOperator immoperator> {
  10533. def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
  10534. (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
  10535. OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  10536. [(set _.RC:$dst,
  10537. (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
  10538. EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
  10539. }
  10540. multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10541. X86VectorVTInfo _, PatFrag LdFrag> {
  10542. let Predicates = [HasBWI] in {
  10543. def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
  10544. (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
  10545. OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  10546. [(set _.RC:$dst,
  10547. (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
  10548. Sched<[WriteVecInsert]>;
  10549. defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
  10550. }
  10551. }
  10552. multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
  10553. X86VectorVTInfo _, RegisterClass GRC> {
  10554. let Predicates = [HasDQI] in {
  10555. def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
  10556. (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
  10557. OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
  10558. [(set _.RC:$dst,
  10559. (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
  10560. EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
  10561. defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
  10562. _.ScalarLdFrag, imm>, TAPD;
  10563. }
  10564. }
  10565. defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
  10566. extloadi8>, TAPD, VEX_WIG;
  10567. defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
  10568. extloadi16>, PD, VEX_WIG;
  10569. defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
  10570. defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
  10571. let Predicates = [HasAVX512, NoBWI] in {
  10572. def : Pat<(X86pinsrb VR128:$src1,
  10573. (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
  10574. timm:$src3),
  10575. (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
  10576. timm:$src3)>;
  10577. }
  10578. let Predicates = [HasBWI] in {
  10579. def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
  10580. (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
  10581. GR8:$src2, sub_8bit), timm:$src3)>;
  10582. def : Pat<(X86pinsrb VR128:$src1,
  10583. (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
  10584. timm:$src3),
  10585. (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
  10586. timm:$src3)>;
  10587. }
  10588. // Always select FP16 instructions if available.
  10589. let Predicates = [HasBWI], AddedComplexity = -10 in {
  10590. def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
  10591. def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
  10592. def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
  10593. def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
  10594. }
  10595. //===----------------------------------------------------------------------===//
  10596. // VSHUFPS - VSHUFPD Operations
  10597. //===----------------------------------------------------------------------===//
  10598. multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
  10599. defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
  10600. SchedWriteFShuffle>,
  10601. EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
  10602. AVX512AIi8Base, EVEX_4V;
  10603. }
  10604. defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
  10605. defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
  10606. //===----------------------------------------------------------------------===//
  10607. // AVX-512 - Byte shift Left/Right
  10608. //===----------------------------------------------------------------------===//
  10609. multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
  10610. Format MRMm, string OpcodeStr,
  10611. X86FoldableSchedWrite sched, X86VectorVTInfo _>{
  10612. def ri : AVX512<opc, MRMr,
  10613. (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
  10614. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  10615. [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
  10616. Sched<[sched]>;
  10617. def mi : AVX512<opc, MRMm,
  10618. (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
  10619. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  10620. [(set _.RC:$dst,(_.VT (OpNode
  10621. (_.VT (bitconvert (_.LdFrag addr:$src1))),
  10622. (i8 timm:$src2))))]>,
  10623. Sched<[sched.Folded, sched.ReadAfterFold]>;
  10624. }
  10625. multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
  10626. Format MRMm, string OpcodeStr,
  10627. X86SchedWriteWidths sched, Predicate prd>{
  10628. let Predicates = [prd] in
  10629. defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
  10630. sched.ZMM, v64i8_info>, EVEX_V512;
  10631. let Predicates = [prd, HasVLX] in {
  10632. defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
  10633. sched.YMM, v32i8x_info>, EVEX_V256;
  10634. defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
  10635. sched.XMM, v16i8x_info>, EVEX_V128;
  10636. }
  10637. }
  10638. defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
  10639. SchedWriteShuffle, HasBWI>,
  10640. AVX512PDIi8Base, EVEX_4V, VEX_WIG;
  10641. defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
  10642. SchedWriteShuffle, HasBWI>,
  10643. AVX512PDIi8Base, EVEX_4V, VEX_WIG;
  10644. multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
  10645. string OpcodeStr, X86FoldableSchedWrite sched,
  10646. X86VectorVTInfo _dst, X86VectorVTInfo _src> {
  10647. let isCommutable = 1 in
  10648. def rr : AVX512BI<opc, MRMSrcReg,
  10649. (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
  10650. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  10651. [(set _dst.RC:$dst,(_dst.VT
  10652. (OpNode (_src.VT _src.RC:$src1),
  10653. (_src.VT _src.RC:$src2))))]>,
  10654. Sched<[sched]>;
  10655. def rm : AVX512BI<opc, MRMSrcMem,
  10656. (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
  10657. !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  10658. [(set _dst.RC:$dst,(_dst.VT
  10659. (OpNode (_src.VT _src.RC:$src1),
  10660. (_src.VT (bitconvert
  10661. (_src.LdFrag addr:$src2))))))]>,
  10662. Sched<[sched.Folded, sched.ReadAfterFold]>;
  10663. }
  10664. multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
  10665. string OpcodeStr, X86SchedWriteWidths sched,
  10666. Predicate prd> {
  10667. let Predicates = [prd] in
  10668. defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
  10669. v8i64_info, v64i8_info>, EVEX_V512;
  10670. let Predicates = [prd, HasVLX] in {
  10671. defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
  10672. v4i64x_info, v32i8x_info>, EVEX_V256;
  10673. defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
  10674. v2i64x_info, v16i8x_info>, EVEX_V128;
  10675. }
  10676. }
  10677. defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
  10678. SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
  10679. // Transforms to swizzle an immediate to enable better matching when
  10680. // memory operand isn't in the right place.
  10681. def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
  10682. // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
  10683. uint8_t Imm = N->getZExtValue();
  10684. // Swap bits 1/4 and 3/6.
  10685. uint8_t NewImm = Imm & 0xa5;
  10686. if (Imm & 0x02) NewImm |= 0x10;
  10687. if (Imm & 0x10) NewImm |= 0x02;
  10688. if (Imm & 0x08) NewImm |= 0x40;
  10689. if (Imm & 0x40) NewImm |= 0x08;
  10690. return getI8Imm(NewImm, SDLoc(N));
  10691. }]>;
  10692. def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
  10693. // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
  10694. uint8_t Imm = N->getZExtValue();
  10695. // Swap bits 2/4 and 3/5.
  10696. uint8_t NewImm = Imm & 0xc3;
  10697. if (Imm & 0x04) NewImm |= 0x10;
  10698. if (Imm & 0x10) NewImm |= 0x04;
  10699. if (Imm & 0x08) NewImm |= 0x20;
  10700. if (Imm & 0x20) NewImm |= 0x08;
  10701. return getI8Imm(NewImm, SDLoc(N));
  10702. }]>;
  10703. def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
  10704. // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
  10705. uint8_t Imm = N->getZExtValue();
  10706. // Swap bits 1/2 and 5/6.
  10707. uint8_t NewImm = Imm & 0x99;
  10708. if (Imm & 0x02) NewImm |= 0x04;
  10709. if (Imm & 0x04) NewImm |= 0x02;
  10710. if (Imm & 0x20) NewImm |= 0x40;
  10711. if (Imm & 0x40) NewImm |= 0x20;
  10712. return getI8Imm(NewImm, SDLoc(N));
  10713. }]>;
  10714. def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
  10715. // Convert a VPTERNLOG immediate by moving operand 1 to the end.
  10716. uint8_t Imm = N->getZExtValue();
  10717. // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
  10718. uint8_t NewImm = Imm & 0x81;
  10719. if (Imm & 0x02) NewImm |= 0x04;
  10720. if (Imm & 0x04) NewImm |= 0x10;
  10721. if (Imm & 0x08) NewImm |= 0x40;
  10722. if (Imm & 0x10) NewImm |= 0x02;
  10723. if (Imm & 0x20) NewImm |= 0x08;
  10724. if (Imm & 0x40) NewImm |= 0x20;
  10725. return getI8Imm(NewImm, SDLoc(N));
  10726. }]>;
  10727. def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
  10728. // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
  10729. uint8_t Imm = N->getZExtValue();
  10730. // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
  10731. uint8_t NewImm = Imm & 0x81;
  10732. if (Imm & 0x02) NewImm |= 0x10;
  10733. if (Imm & 0x04) NewImm |= 0x02;
  10734. if (Imm & 0x08) NewImm |= 0x20;
  10735. if (Imm & 0x10) NewImm |= 0x04;
  10736. if (Imm & 0x20) NewImm |= 0x40;
  10737. if (Imm & 0x40) NewImm |= 0x08;
  10738. return getI8Imm(NewImm, SDLoc(N));
  10739. }]>;
  10740. multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
  10741. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  10742. string Name>{
  10743. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
  10744. defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  10745. (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
  10746. OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
  10747. (OpNode (_.VT _.RC:$src1),
  10748. (_.VT _.RC:$src2),
  10749. (_.VT _.RC:$src3),
  10750. (i8 timm:$src4)), 1, 1>,
  10751. AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
  10752. defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10753. (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
  10754. OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
  10755. (OpNode (_.VT _.RC:$src1),
  10756. (_.VT _.RC:$src2),
  10757. (_.VT (bitconvert (_.LdFrag addr:$src3))),
  10758. (i8 timm:$src4)), 1, 0>,
  10759. AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  10760. Sched<[sched.Folded, sched.ReadAfterFold]>;
  10761. defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  10762. (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
  10763. OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
  10764. "$src2, ${src3}"#_.BroadcastStr#", $src4",
  10765. (OpNode (_.VT _.RC:$src1),
  10766. (_.VT _.RC:$src2),
  10767. (_.VT (_.BroadcastLdFrag addr:$src3)),
  10768. (i8 timm:$src4)), 1, 0>, EVEX_B,
  10769. AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
  10770. Sched<[sched.Folded, sched.ReadAfterFold]>;
  10771. }// Constraints = "$src1 = $dst"
  10772. // Additional patterns for matching passthru operand in other positions.
  10773. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10774. (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
  10775. _.RC:$src1)),
  10776. (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
  10777. _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
  10778. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10779. (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
  10780. _.RC:$src1)),
  10781. (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
  10782. _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
  10783. // Additional patterns for matching zero masking with loads in other
  10784. // positions.
  10785. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10786. (OpNode (bitconvert (_.LdFrag addr:$src3)),
  10787. _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
  10788. _.ImmAllZerosV)),
  10789. (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
  10790. _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
  10791. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10792. (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
  10793. _.RC:$src2, (i8 timm:$src4)),
  10794. _.ImmAllZerosV)),
  10795. (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
  10796. _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
  10797. // Additional patterns for matching masked loads with different
  10798. // operand orders.
  10799. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10800. (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
  10801. _.RC:$src2, (i8 timm:$src4)),
  10802. _.RC:$src1)),
  10803. (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
  10804. _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
  10805. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10806. (OpNode (bitconvert (_.LdFrag addr:$src3)),
  10807. _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
  10808. _.RC:$src1)),
  10809. (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
  10810. _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
  10811. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10812. (OpNode _.RC:$src2, _.RC:$src1,
  10813. (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
  10814. _.RC:$src1)),
  10815. (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
  10816. _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
  10817. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10818. (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
  10819. _.RC:$src1, (i8 timm:$src4)),
  10820. _.RC:$src1)),
  10821. (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
  10822. _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
  10823. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10824. (OpNode (bitconvert (_.LdFrag addr:$src3)),
  10825. _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
  10826. _.RC:$src1)),
  10827. (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
  10828. _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
  10829. // Additional patterns for matching zero masking with broadcasts in other
  10830. // positions.
  10831. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10832. (OpNode (_.BroadcastLdFrag addr:$src3),
  10833. _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
  10834. _.ImmAllZerosV)),
  10835. (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
  10836. _.KRCWM:$mask, _.RC:$src2, addr:$src3,
  10837. (VPTERNLOG321_imm8 timm:$src4))>;
  10838. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10839. (OpNode _.RC:$src1,
  10840. (_.BroadcastLdFrag addr:$src3),
  10841. _.RC:$src2, (i8 timm:$src4)),
  10842. _.ImmAllZerosV)),
  10843. (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
  10844. _.KRCWM:$mask, _.RC:$src2, addr:$src3,
  10845. (VPTERNLOG132_imm8 timm:$src4))>;
  10846. // Additional patterns for matching masked broadcasts with different
  10847. // operand orders.
  10848. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10849. (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
  10850. _.RC:$src2, (i8 timm:$src4)),
  10851. _.RC:$src1)),
  10852. (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
  10853. _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
  10854. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10855. (OpNode (_.BroadcastLdFrag addr:$src3),
  10856. _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
  10857. _.RC:$src1)),
  10858. (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
  10859. _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
  10860. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10861. (OpNode _.RC:$src2, _.RC:$src1,
  10862. (_.BroadcastLdFrag addr:$src3),
  10863. (i8 timm:$src4)), _.RC:$src1)),
  10864. (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
  10865. _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
  10866. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10867. (OpNode _.RC:$src2,
  10868. (_.BroadcastLdFrag addr:$src3),
  10869. _.RC:$src1, (i8 timm:$src4)),
  10870. _.RC:$src1)),
  10871. (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
  10872. _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
  10873. def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
  10874. (OpNode (_.BroadcastLdFrag addr:$src3),
  10875. _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
  10876. _.RC:$src1)),
  10877. (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
  10878. _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
  10879. }
  10880. multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
  10881. AVX512VLVectorVTInfo _> {
  10882. let Predicates = [HasAVX512] in
  10883. defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
  10884. _.info512, NAME>, EVEX_V512;
  10885. let Predicates = [HasAVX512, HasVLX] in {
  10886. defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
  10887. _.info128, NAME>, EVEX_V128;
  10888. defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
  10889. _.info256, NAME>, EVEX_V256;
  10890. }
  10891. }
  10892. defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
  10893. avx512vl_i32_info>;
  10894. defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
  10895. avx512vl_i64_info>, VEX_W;
  10896. // Patterns to implement vnot using vpternlog instead of creating all ones
  10897. // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
  10898. // so that the result is only dependent on src0. But we use the same source
  10899. // for all operands to prevent a false dependency.
  10900. // TODO: We should maybe have a more generalized algorithm for folding to
  10901. // vpternlog.
  10902. let Predicates = [HasAVX512] in {
  10903. def : Pat<(v64i8 (vnot VR512:$src)),
  10904. (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
  10905. def : Pat<(v32i16 (vnot VR512:$src)),
  10906. (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
  10907. def : Pat<(v16i32 (vnot VR512:$src)),
  10908. (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
  10909. def : Pat<(v8i64 (vnot VR512:$src)),
  10910. (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
  10911. }
  10912. let Predicates = [HasAVX512, NoVLX] in {
  10913. def : Pat<(v16i8 (vnot VR128X:$src)),
  10914. (EXTRACT_SUBREG
  10915. (VPTERNLOGQZrri
  10916. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10917. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10918. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10919. (i8 15)), sub_xmm)>;
  10920. def : Pat<(v8i16 (vnot VR128X:$src)),
  10921. (EXTRACT_SUBREG
  10922. (VPTERNLOGQZrri
  10923. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10924. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10925. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10926. (i8 15)), sub_xmm)>;
  10927. def : Pat<(v4i32 (vnot VR128X:$src)),
  10928. (EXTRACT_SUBREG
  10929. (VPTERNLOGQZrri
  10930. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10931. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10932. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10933. (i8 15)), sub_xmm)>;
  10934. def : Pat<(v2i64 (vnot VR128X:$src)),
  10935. (EXTRACT_SUBREG
  10936. (VPTERNLOGQZrri
  10937. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10938. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10939. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
  10940. (i8 15)), sub_xmm)>;
  10941. def : Pat<(v32i8 (vnot VR256X:$src)),
  10942. (EXTRACT_SUBREG
  10943. (VPTERNLOGQZrri
  10944. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10945. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10946. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10947. (i8 15)), sub_ymm)>;
  10948. def : Pat<(v16i16 (vnot VR256X:$src)),
  10949. (EXTRACT_SUBREG
  10950. (VPTERNLOGQZrri
  10951. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10952. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10953. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10954. (i8 15)), sub_ymm)>;
  10955. def : Pat<(v8i32 (vnot VR256X:$src)),
  10956. (EXTRACT_SUBREG
  10957. (VPTERNLOGQZrri
  10958. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10959. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10960. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10961. (i8 15)), sub_ymm)>;
  10962. def : Pat<(v4i64 (vnot VR256X:$src)),
  10963. (EXTRACT_SUBREG
  10964. (VPTERNLOGQZrri
  10965. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10966. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10967. (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
  10968. (i8 15)), sub_ymm)>;
  10969. }
  10970. let Predicates = [HasVLX] in {
  10971. def : Pat<(v16i8 (vnot VR128X:$src)),
  10972. (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
  10973. def : Pat<(v8i16 (vnot VR128X:$src)),
  10974. (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
  10975. def : Pat<(v4i32 (vnot VR128X:$src)),
  10976. (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
  10977. def : Pat<(v2i64 (vnot VR128X:$src)),
  10978. (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
  10979. def : Pat<(v32i8 (vnot VR256X:$src)),
  10980. (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
  10981. def : Pat<(v16i16 (vnot VR256X:$src)),
  10982. (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
  10983. def : Pat<(v8i32 (vnot VR256X:$src)),
  10984. (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
  10985. def : Pat<(v4i64 (vnot VR256X:$src)),
  10986. (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
  10987. }
  10988. //===----------------------------------------------------------------------===//
  10989. // AVX-512 - FixupImm
  10990. //===----------------------------------------------------------------------===//
  10991. multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
  10992. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  10993. X86VectorVTInfo TblVT>{
  10994. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
  10995. Uses = [MXCSR], mayRaiseFPException = 1 in {
  10996. defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  10997. (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
  10998. OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
  10999. (X86VFixupimm (_.VT _.RC:$src1),
  11000. (_.VT _.RC:$src2),
  11001. (TblVT.VT _.RC:$src3),
  11002. (i32 timm:$src4))>, Sched<[sched]>;
  11003. defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  11004. (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
  11005. OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
  11006. (X86VFixupimm (_.VT _.RC:$src1),
  11007. (_.VT _.RC:$src2),
  11008. (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
  11009. (i32 timm:$src4))>,
  11010. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11011. defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  11012. (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
  11013. OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
  11014. "$src2, ${src3}"#_.BroadcastStr#", $src4",
  11015. (X86VFixupimm (_.VT _.RC:$src1),
  11016. (_.VT _.RC:$src2),
  11017. (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
  11018. (i32 timm:$src4))>,
  11019. EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  11020. } // Constraints = "$src1 = $dst"
  11021. }
  11022. multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
  11023. X86FoldableSchedWrite sched,
  11024. X86VectorVTInfo _, X86VectorVTInfo TblVT>
  11025. : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
  11026. let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
  11027. defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  11028. (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
  11029. OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
  11030. "$src2, $src3, {sae}, $src4",
  11031. (X86VFixupimmSAE (_.VT _.RC:$src1),
  11032. (_.VT _.RC:$src2),
  11033. (TblVT.VT _.RC:$src3),
  11034. (i32 timm:$src4))>,
  11035. EVEX_B, Sched<[sched]>;
  11036. }
  11037. }
  11038. multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
  11039. X86FoldableSchedWrite sched, X86VectorVTInfo _,
  11040. X86VectorVTInfo _src3VT> {
  11041. let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
  11042. ExeDomain = _.ExeDomain in {
  11043. defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  11044. (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
  11045. OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
  11046. (X86VFixupimms (_.VT _.RC:$src1),
  11047. (_.VT _.RC:$src2),
  11048. (_src3VT.VT _src3VT.RC:$src3),
  11049. (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
  11050. let Uses = [MXCSR] in
  11051. defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
  11052. (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
  11053. OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
  11054. "$src2, $src3, {sae}, $src4",
  11055. (X86VFixupimmSAEs (_.VT _.RC:$src1),
  11056. (_.VT _.RC:$src2),
  11057. (_src3VT.VT _src3VT.RC:$src3),
  11058. (i32 timm:$src4))>,
  11059. EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
  11060. defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
  11061. (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
  11062. OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
  11063. (X86VFixupimms (_.VT _.RC:$src1),
  11064. (_.VT _.RC:$src2),
  11065. (_src3VT.VT (scalar_to_vector
  11066. (_src3VT.ScalarLdFrag addr:$src3))),
  11067. (i32 timm:$src4))>,
  11068. Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
  11069. }
  11070. }
  11071. multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
  11072. AVX512VLVectorVTInfo _Vec,
  11073. AVX512VLVectorVTInfo _Tbl> {
  11074. let Predicates = [HasAVX512] in
  11075. defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
  11076. _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
  11077. EVEX_4V, EVEX_V512;
  11078. let Predicates = [HasAVX512, HasVLX] in {
  11079. defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
  11080. _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
  11081. EVEX_4V, EVEX_V128;
  11082. defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
  11083. _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
  11084. EVEX_4V, EVEX_V256;
  11085. }
  11086. }
  11087. defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
  11088. SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
  11089. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
  11090. defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
  11091. SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
  11092. AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
  11093. defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
  11094. avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  11095. defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
  11096. avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
  11097. // Patterns used to select SSE scalar fp arithmetic instructions from
  11098. // either:
  11099. //
  11100. // (1) a scalar fp operation followed by a blend
  11101. //
  11102. // The effect is that the backend no longer emits unnecessary vector
  11103. // insert instructions immediately after SSE scalar fp instructions
  11104. // like addss or mulss.
  11105. //
  11106. // For example, given the following code:
  11107. // __m128 foo(__m128 A, __m128 B) {
  11108. // A[0] += B[0];
  11109. // return A;
  11110. // }
  11111. //
  11112. // Previously we generated:
  11113. // addss %xmm0, %xmm1
  11114. // movss %xmm1, %xmm0
  11115. //
  11116. // We now generate:
  11117. // addss %xmm1, %xmm0
  11118. //
  11119. // (2) a vector packed single/double fp operation followed by a vector insert
  11120. //
  11121. // The effect is that the backend converts the packed fp instruction
  11122. // followed by a vector insert into a single SSE scalar fp instruction.
  11123. //
  11124. // For example, given the following code:
  11125. // __m128 foo(__m128 A, __m128 B) {
  11126. // __m128 C = A + B;
  11127. // return (__m128) {c[0], a[1], a[2], a[3]};
  11128. // }
  11129. //
  11130. // Previously we generated:
  11131. // addps %xmm0, %xmm1
  11132. // movss %xmm1, %xmm0
  11133. //
  11134. // We now generate:
  11135. // addss %xmm1, %xmm0
  11136. // TODO: Some canonicalization in lowering would simplify the number of
  11137. // patterns we have to try to match.
  11138. multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
  11139. string OpcPrefix, SDNode MoveNode,
  11140. X86VectorVTInfo _, PatLeaf ZeroFP> {
  11141. let Predicates = [HasAVX512] in {
  11142. // extracted scalar math op with insert via movss
  11143. def : Pat<(MoveNode
  11144. (_.VT VR128X:$dst),
  11145. (_.VT (scalar_to_vector
  11146. (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
  11147. _.FRC:$src)))),
  11148. (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
  11149. (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
  11150. def : Pat<(MoveNode
  11151. (_.VT VR128X:$dst),
  11152. (_.VT (scalar_to_vector
  11153. (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
  11154. (_.ScalarLdFrag addr:$src))))),
  11155. (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
  11156. // extracted masked scalar math op with insert via movss
  11157. def : Pat<(MoveNode (_.VT VR128X:$src1),
  11158. (scalar_to_vector
  11159. (X86selects_mask VK1WM:$mask,
  11160. (MaskedOp (_.EltVT
  11161. (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  11162. _.FRC:$src2),
  11163. _.FRC:$src0))),
  11164. (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
  11165. (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
  11166. VK1WM:$mask, _.VT:$src1,
  11167. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
  11168. def : Pat<(MoveNode (_.VT VR128X:$src1),
  11169. (scalar_to_vector
  11170. (X86selects_mask VK1WM:$mask,
  11171. (MaskedOp (_.EltVT
  11172. (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  11173. (_.ScalarLdFrag addr:$src2)),
  11174. _.FRC:$src0))),
  11175. (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
  11176. (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
  11177. VK1WM:$mask, _.VT:$src1, addr:$src2)>;
  11178. // extracted masked scalar math op with insert via movss
  11179. def : Pat<(MoveNode (_.VT VR128X:$src1),
  11180. (scalar_to_vector
  11181. (X86selects_mask VK1WM:$mask,
  11182. (MaskedOp (_.EltVT
  11183. (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  11184. _.FRC:$src2), (_.EltVT ZeroFP)))),
  11185. (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
  11186. VK1WM:$mask, _.VT:$src1,
  11187. (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
  11188. def : Pat<(MoveNode (_.VT VR128X:$src1),
  11189. (scalar_to_vector
  11190. (X86selects_mask VK1WM:$mask,
  11191. (MaskedOp (_.EltVT
  11192. (extractelt (_.VT VR128X:$src1), (iPTR 0))),
  11193. (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
  11194. (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
  11195. }
  11196. }
  11197. defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
  11198. defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
  11199. defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
  11200. defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
  11201. defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
  11202. defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
  11203. defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
  11204. defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
  11205. defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
  11206. defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
  11207. defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
  11208. defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
  11209. multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
  11210. SDNode Move, X86VectorVTInfo _> {
  11211. let Predicates = [HasAVX512] in {
  11212. def : Pat<(_.VT (Move _.VT:$dst,
  11213. (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
  11214. (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
  11215. }
  11216. }
  11217. defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
  11218. defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
  11219. defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
  11220. //===----------------------------------------------------------------------===//
  11221. // AES instructions
  11222. //===----------------------------------------------------------------------===//
  11223. multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
  11224. let Predicates = [HasVLX, HasVAES] in {
  11225. defm Z128 : AESI_binop_rm_int<Op, OpStr,
  11226. !cast<Intrinsic>(IntPrefix),
  11227. loadv2i64, 0, VR128X, i128mem>,
  11228. EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
  11229. defm Z256 : AESI_binop_rm_int<Op, OpStr,
  11230. !cast<Intrinsic>(IntPrefix#"_256"),
  11231. loadv4i64, 0, VR256X, i256mem>,
  11232. EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
  11233. }
  11234. let Predicates = [HasAVX512, HasVAES] in
  11235. defm Z : AESI_binop_rm_int<Op, OpStr,
  11236. !cast<Intrinsic>(IntPrefix#"_512"),
  11237. loadv8i64, 0, VR512, i512mem>,
  11238. EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
  11239. }
  11240. defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
  11241. defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
  11242. defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
  11243. defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
  11244. //===----------------------------------------------------------------------===//
  11245. // PCLMUL instructions - Carry less multiplication
  11246. //===----------------------------------------------------------------------===//
  11247. let Predicates = [HasAVX512, HasVPCLMULQDQ] in
  11248. defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
  11249. EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
  11250. let Predicates = [HasVLX, HasVPCLMULQDQ] in {
  11251. defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
  11252. EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
  11253. defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
  11254. int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
  11255. EVEX_CD8<64, CD8VF>, VEX_WIG;
  11256. }
  11257. // Aliases
  11258. defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
  11259. defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
  11260. defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
  11261. //===----------------------------------------------------------------------===//
  11262. // VBMI2
  11263. //===----------------------------------------------------------------------===//
  11264. multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
  11265. X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
  11266. let Constraints = "$src1 = $dst",
  11267. ExeDomain = VTI.ExeDomain in {
  11268. defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
  11269. (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
  11270. "$src3, $src2", "$src2, $src3",
  11271. (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
  11272. T8PD, EVEX_4V, Sched<[sched]>;
  11273. defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
  11274. (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
  11275. "$src3, $src2", "$src2, $src3",
  11276. (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
  11277. (VTI.VT (VTI.LdFrag addr:$src3))))>,
  11278. T8PD, EVEX_4V,
  11279. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11280. }
  11281. }
  11282. multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
  11283. X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
  11284. : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
  11285. let Constraints = "$src1 = $dst",
  11286. ExeDomain = VTI.ExeDomain in
  11287. defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
  11288. (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
  11289. "${src3}"#VTI.BroadcastStr#", $src2",
  11290. "$src2, ${src3}"#VTI.BroadcastStr,
  11291. (OpNode VTI.RC:$src1, VTI.RC:$src2,
  11292. (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
  11293. T8PD, EVEX_4V, EVEX_B,
  11294. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11295. }
  11296. multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
  11297. X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
  11298. let Predicates = [HasVBMI2] in
  11299. defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
  11300. EVEX_V512;
  11301. let Predicates = [HasVBMI2, HasVLX] in {
  11302. defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
  11303. EVEX_V256;
  11304. defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
  11305. EVEX_V128;
  11306. }
  11307. }
  11308. multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
  11309. X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
  11310. let Predicates = [HasVBMI2] in
  11311. defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
  11312. EVEX_V512;
  11313. let Predicates = [HasVBMI2, HasVLX] in {
  11314. defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
  11315. EVEX_V256;
  11316. defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
  11317. EVEX_V128;
  11318. }
  11319. }
  11320. multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
  11321. SDNode OpNode, X86SchedWriteWidths sched> {
  11322. defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
  11323. avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
  11324. defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
  11325. avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
  11326. defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
  11327. avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
  11328. }
  11329. multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
  11330. SDNode OpNode, X86SchedWriteWidths sched> {
  11331. defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
  11332. avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
  11333. VEX_W, EVEX_CD8<16, CD8VF>;
  11334. defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
  11335. OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
  11336. defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
  11337. sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
  11338. }
  11339. // Concat & Shift
  11340. defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
  11341. defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
  11342. defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
  11343. defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
  11344. // Compress
  11345. defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
  11346. avx512vl_i8_info, HasVBMI2>, EVEX,
  11347. NotMemoryFoldable;
  11348. defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
  11349. avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
  11350. NotMemoryFoldable;
  11351. // Expand
  11352. defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
  11353. avx512vl_i8_info, HasVBMI2>, EVEX;
  11354. defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
  11355. avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
  11356. //===----------------------------------------------------------------------===//
  11357. // VNNI
  11358. //===----------------------------------------------------------------------===//
  11359. let Constraints = "$src1 = $dst" in
  11360. multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
  11361. X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
  11362. bit IsCommutable> {
  11363. let ExeDomain = VTI.ExeDomain in {
  11364. defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
  11365. (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
  11366. "$src3, $src2", "$src2, $src3",
  11367. (VTI.VT (OpNode VTI.RC:$src1,
  11368. VTI.RC:$src2, VTI.RC:$src3)),
  11369. IsCommutable, IsCommutable>,
  11370. EVEX_4V, T8PD, Sched<[sched]>;
  11371. defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
  11372. (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
  11373. "$src3, $src2", "$src2, $src3",
  11374. (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
  11375. (VTI.VT (VTI.LdFrag addr:$src3))))>,
  11376. EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
  11377. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11378. defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
  11379. (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
  11380. OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
  11381. "$src2, ${src3}"#VTI.BroadcastStr,
  11382. (OpNode VTI.RC:$src1, VTI.RC:$src2,
  11383. (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
  11384. EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
  11385. T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
  11386. }
  11387. }
  11388. multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
  11389. X86SchedWriteWidths sched, bit IsCommutable> {
  11390. let Predicates = [HasVNNI] in
  11391. defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
  11392. IsCommutable>, EVEX_V512;
  11393. let Predicates = [HasVNNI, HasVLX] in {
  11394. defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
  11395. IsCommutable>, EVEX_V256;
  11396. defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
  11397. IsCommutable>, EVEX_V128;
  11398. }
  11399. }
  11400. // FIXME: Is there a better scheduler class for VPDP?
  11401. defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
  11402. defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
  11403. defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
  11404. defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
  11405. // Patterns to match VPDPWSSD from existing instructions/intrinsics.
  11406. let Predicates = [HasVNNI] in {
  11407. def : Pat<(v16i32 (add VR512:$src1,
  11408. (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
  11409. (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
  11410. def : Pat<(v16i32 (add VR512:$src1,
  11411. (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
  11412. (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
  11413. }
  11414. let Predicates = [HasVNNI,HasVLX] in {
  11415. def : Pat<(v8i32 (add VR256X:$src1,
  11416. (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
  11417. (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
  11418. def : Pat<(v8i32 (add VR256X:$src1,
  11419. (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
  11420. (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
  11421. def : Pat<(v4i32 (add VR128X:$src1,
  11422. (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
  11423. (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
  11424. def : Pat<(v4i32 (add VR128X:$src1,
  11425. (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
  11426. (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
  11427. }
  11428. //===----------------------------------------------------------------------===//
  11429. // Bit Algorithms
  11430. //===----------------------------------------------------------------------===//
  11431. // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
  11432. defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
  11433. avx512vl_i8_info, HasBITALG>;
  11434. defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
  11435. avx512vl_i16_info, HasBITALG>, VEX_W;
  11436. defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
  11437. defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
  11438. def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
  11439. (X86Vpshufbitqmb node:$src1, node:$src2), [{
  11440. return N->hasOneUse();
  11441. }]>;
  11442. multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
  11443. defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
  11444. (ins VTI.RC:$src1, VTI.RC:$src2),
  11445. "vpshufbitqmb",
  11446. "$src2, $src1", "$src1, $src2",
  11447. (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
  11448. (VTI.VT VTI.RC:$src2)),
  11449. (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
  11450. (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
  11451. Sched<[sched]>;
  11452. defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
  11453. (ins VTI.RC:$src1, VTI.MemOp:$src2),
  11454. "vpshufbitqmb",
  11455. "$src2, $src1", "$src1, $src2",
  11456. (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
  11457. (VTI.VT (VTI.LdFrag addr:$src2))),
  11458. (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
  11459. (VTI.VT (VTI.LdFrag addr:$src2)))>,
  11460. EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
  11461. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11462. }
  11463. multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
  11464. let Predicates = [HasBITALG] in
  11465. defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
  11466. let Predicates = [HasBITALG, HasVLX] in {
  11467. defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
  11468. defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
  11469. }
  11470. }
  11471. // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
  11472. defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
  11473. //===----------------------------------------------------------------------===//
  11474. // GFNI
  11475. //===----------------------------------------------------------------------===//
  11476. multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
  11477. X86SchedWriteWidths sched> {
  11478. let Predicates = [HasGFNI, HasAVX512] in
  11479. defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
  11480. EVEX_V512;
  11481. let Predicates = [HasGFNI, HasVLX] in {
  11482. defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
  11483. EVEX_V256;
  11484. defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
  11485. EVEX_V128;
  11486. }
  11487. }
  11488. defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
  11489. SchedWriteVecALU>,
  11490. EVEX_CD8<8, CD8VF>, T8PD;
  11491. multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
  11492. X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
  11493. X86VectorVTInfo BcstVTI>
  11494. : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
  11495. let ExeDomain = VTI.ExeDomain in
  11496. defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
  11497. (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
  11498. OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
  11499. "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
  11500. (OpNode (VTI.VT VTI.RC:$src1),
  11501. (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
  11502. (i8 timm:$src3))>, EVEX_B,
  11503. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11504. }
  11505. multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
  11506. X86SchedWriteWidths sched> {
  11507. let Predicates = [HasGFNI, HasAVX512] in
  11508. defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
  11509. v64i8_info, v8i64_info>, EVEX_V512;
  11510. let Predicates = [HasGFNI, HasVLX] in {
  11511. defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
  11512. v32i8x_info, v4i64x_info>, EVEX_V256;
  11513. defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
  11514. v16i8x_info, v2i64x_info>, EVEX_V128;
  11515. }
  11516. }
  11517. defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
  11518. X86GF2P8affineinvqb, SchedWriteVecIMul>,
  11519. EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
  11520. defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
  11521. X86GF2P8affineqb, SchedWriteVecIMul>,
  11522. EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
  11523. //===----------------------------------------------------------------------===//
  11524. // AVX5124FMAPS
  11525. //===----------------------------------------------------------------------===//
  11526. let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
  11527. Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
  11528. defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
  11529. (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
  11530. "v4fmaddps", "$src3, $src2", "$src2, $src3",
  11531. []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
  11532. Sched<[SchedWriteFMA.ZMM.Folded]>;
  11533. defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
  11534. (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
  11535. "v4fnmaddps", "$src3, $src2", "$src2, $src3",
  11536. []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
  11537. Sched<[SchedWriteFMA.ZMM.Folded]>;
  11538. defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
  11539. (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
  11540. "v4fmaddss", "$src3, $src2", "$src2, $src3",
  11541. []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
  11542. Sched<[SchedWriteFMA.Scl.Folded]>;
  11543. defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
  11544. (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
  11545. "v4fnmaddss", "$src3, $src2", "$src2, $src3",
  11546. []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
  11547. Sched<[SchedWriteFMA.Scl.Folded]>;
  11548. }
  11549. //===----------------------------------------------------------------------===//
  11550. // AVX5124VNNIW
  11551. //===----------------------------------------------------------------------===//
  11552. let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
  11553. Constraints = "$src1 = $dst" in {
  11554. defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
  11555. (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
  11556. "vp4dpwssd", "$src3, $src2", "$src2, $src3",
  11557. []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
  11558. Sched<[SchedWriteFMA.ZMM.Folded]>;
  11559. defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
  11560. (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
  11561. "vp4dpwssds", "$src3, $src2", "$src2, $src3",
  11562. []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
  11563. Sched<[SchedWriteFMA.ZMM.Folded]>;
  11564. }
  11565. let hasSideEffects = 0 in {
  11566. let mayStore = 1, SchedRW = [WriteFStoreX] in
  11567. def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
  11568. let mayLoad = 1, SchedRW = [WriteFLoadX] in
  11569. def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
  11570. }
  11571. //===----------------------------------------------------------------------===//
  11572. // VP2INTERSECT
  11573. //===----------------------------------------------------------------------===//
  11574. multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
  11575. def rr : I<0x68, MRMSrcReg,
  11576. (outs _.KRPC:$dst),
  11577. (ins _.RC:$src1, _.RC:$src2),
  11578. !strconcat("vp2intersect", _.Suffix,
  11579. "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  11580. [(set _.KRPC:$dst, (X86vp2intersect
  11581. _.RC:$src1, (_.VT _.RC:$src2)))]>,
  11582. EVEX_4V, T8XD, Sched<[sched]>;
  11583. def rm : I<0x68, MRMSrcMem,
  11584. (outs _.KRPC:$dst),
  11585. (ins _.RC:$src1, _.MemOp:$src2),
  11586. !strconcat("vp2intersect", _.Suffix,
  11587. "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
  11588. [(set _.KRPC:$dst, (X86vp2intersect
  11589. _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
  11590. EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
  11591. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11592. def rmb : I<0x68, MRMSrcMem,
  11593. (outs _.KRPC:$dst),
  11594. (ins _.RC:$src1, _.ScalarMemOp:$src2),
  11595. !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
  11596. ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
  11597. [(set _.KRPC:$dst, (X86vp2intersect
  11598. _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
  11599. EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
  11600. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11601. }
  11602. multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
  11603. let Predicates = [HasAVX512, HasVP2INTERSECT] in
  11604. defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
  11605. let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
  11606. defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
  11607. defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
  11608. }
  11609. }
  11610. defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
  11611. defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
  11612. multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
  11613. X86SchedWriteWidths sched,
  11614. AVX512VLVectorVTInfo _SrcVTInfo,
  11615. AVX512VLVectorVTInfo _DstVTInfo,
  11616. SDNode OpNode, Predicate prd,
  11617. bit IsCommutable = 0> {
  11618. let Predicates = [prd] in
  11619. defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
  11620. _SrcVTInfo.info512, _DstVTInfo.info512,
  11621. _SrcVTInfo.info512, IsCommutable>,
  11622. EVEX_V512, EVEX_CD8<32, CD8VF>;
  11623. let Predicates = [HasVLX, prd] in {
  11624. defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
  11625. _SrcVTInfo.info256, _DstVTInfo.info256,
  11626. _SrcVTInfo.info256, IsCommutable>,
  11627. EVEX_V256, EVEX_CD8<32, CD8VF>;
  11628. defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
  11629. _SrcVTInfo.info128, _DstVTInfo.info128,
  11630. _SrcVTInfo.info128, IsCommutable>,
  11631. EVEX_V128, EVEX_CD8<32, CD8VF>;
  11632. }
  11633. }
  11634. let ExeDomain = SSEPackedSingle in
  11635. defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
  11636. SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
  11637. avx512vl_f32_info, avx512vl_bf16_info,
  11638. X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
  11639. // Truncate Float to BFloat16
  11640. multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
  11641. X86SchedWriteWidths sched> {
  11642. let ExeDomain = SSEPackedSingle in {
  11643. let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
  11644. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
  11645. X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
  11646. }
  11647. let Predicates = [HasBF16, HasVLX] in {
  11648. let Uses = []<Register>, mayRaiseFPException = 0 in {
  11649. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
  11650. null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
  11651. VK4WM>, EVEX_V128;
  11652. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
  11653. X86cvtneps2bf16, X86cvtneps2bf16,
  11654. sched.YMM, "{1to8}", "{y}">, EVEX_V256;
  11655. }
  11656. } // Predicates = [HasBF16, HasVLX]
  11657. } // ExeDomain = SSEPackedSingle
  11658. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  11659. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
  11660. VR128X:$src), 0>;
  11661. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  11662. (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
  11663. f128mem:$src), 0, "intel">;
  11664. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  11665. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
  11666. VR256X:$src), 0>;
  11667. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  11668. (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
  11669. f256mem:$src), 0, "intel">;
  11670. }
  11671. defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
  11672. SchedWriteCvtPD2PS>, T8XS,
  11673. EVEX_CD8<32, CD8VF>;
  11674. let Predicates = [HasBF16, HasVLX] in {
  11675. // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
  11676. // patterns have been disabled with null_frag.
  11677. def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
  11678. (VCVTNEPS2BF16Z128rr VR128X:$src)>;
  11679. def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
  11680. VK4WM:$mask),
  11681. (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
  11682. def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
  11683. VK4WM:$mask),
  11684. (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
  11685. def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
  11686. (VCVTNEPS2BF16Z128rm addr:$src)>;
  11687. def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
  11688. VK4WM:$mask),
  11689. (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  11690. def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
  11691. VK4WM:$mask),
  11692. (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
  11693. def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
  11694. (X86VBroadcastld32 addr:$src)))),
  11695. (VCVTNEPS2BF16Z128rmb addr:$src)>;
  11696. def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
  11697. (v8bf16 VR128X:$src0), VK4WM:$mask),
  11698. (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  11699. def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
  11700. v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
  11701. (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
  11702. def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
  11703. (VCVTNEPS2BF16Z128rr VR128X:$src)>;
  11704. def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
  11705. (VCVTNEPS2BF16Z128rm addr:$src)>;
  11706. def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
  11707. (VCVTNEPS2BF16Z256rr VR256X:$src)>;
  11708. def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
  11709. (VCVTNEPS2BF16Z256rm addr:$src)>;
  11710. }
  11711. let Constraints = "$src1 = $dst" in {
  11712. multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
  11713. X86FoldableSchedWrite sched,
  11714. X86VectorVTInfo _, X86VectorVTInfo src_v> {
  11715. defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  11716. (ins src_v.RC:$src2, src_v.RC:$src3),
  11717. OpcodeStr, "$src3, $src2", "$src2, $src3",
  11718. (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
  11719. EVEX_4V, Sched<[sched]>;
  11720. defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  11721. (ins src_v.RC:$src2, src_v.MemOp:$src3),
  11722. OpcodeStr, "$src3, $src2", "$src2, $src3",
  11723. (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
  11724. (src_v.LdFrag addr:$src3)))>, EVEX_4V,
  11725. Sched<[sched.Folded, sched.ReadAfterFold]>;
  11726. defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  11727. (ins src_v.RC:$src2, f32mem:$src3),
  11728. OpcodeStr,
  11729. !strconcat("${src3}", _.BroadcastStr,", $src2"),
  11730. !strconcat("$src2, ${src3}", _.BroadcastStr),
  11731. (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
  11732. (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
  11733. EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
  11734. }
  11735. } // Constraints = "$src1 = $dst"
  11736. multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
  11737. X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
  11738. AVX512VLVectorVTInfo src_v, Predicate prd> {
  11739. let Predicates = [prd] in {
  11740. defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
  11741. src_v.info512>, EVEX_V512;
  11742. }
  11743. let Predicates = [HasVLX, prd] in {
  11744. defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
  11745. src_v.info256>, EVEX_V256;
  11746. defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
  11747. src_v.info128>, EVEX_V128;
  11748. }
  11749. }
  11750. let ExeDomain = SSEPackedSingle in
  11751. defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
  11752. avx512vl_f32_info, avx512vl_bf16_info,
  11753. HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
  11754. //===----------------------------------------------------------------------===//
  11755. // AVX512FP16
  11756. //===----------------------------------------------------------------------===//
  11757. let Predicates = [HasFP16] in {
  11758. // Move word ( r/m16) to Packed word
  11759. def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
  11760. "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
  11761. def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
  11762. "vmovw\t{$src, $dst|$dst, $src}",
  11763. [(set VR128X:$dst,
  11764. (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
  11765. T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
  11766. def : Pat<(f16 (bitconvert GR16:$src)),
  11767. (f16 (COPY_TO_REGCLASS
  11768. (VMOVW2SHrr
  11769. (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
  11770. FR16X))>;
  11771. def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
  11772. (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
  11773. def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
  11774. (VMOVW2SHrr GR32:$src)>;
  11775. // FIXME: We should really find a way to improve these patterns.
  11776. def : Pat<(v8i32 (X86vzmovl
  11777. (insert_subvector undef,
  11778. (v4i32 (scalar_to_vector
  11779. (and GR32:$src, 0xffff))),
  11780. (iPTR 0)))),
  11781. (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
  11782. def : Pat<(v16i32 (X86vzmovl
  11783. (insert_subvector undef,
  11784. (v4i32 (scalar_to_vector
  11785. (and GR32:$src, 0xffff))),
  11786. (iPTR 0)))),
  11787. (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
  11788. def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
  11789. (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
  11790. // AVX 128-bit movw instruction write zeros in the high 128-bit part.
  11791. def : Pat<(v8i16 (X86vzload16 addr:$src)),
  11792. (VMOVWrm addr:$src)>;
  11793. def : Pat<(v16i16 (X86vzload16 addr:$src)),
  11794. (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
  11795. // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
  11796. def : Pat<(v32i16 (X86vzload16 addr:$src)),
  11797. (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
  11798. def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
  11799. (VMOVWrm addr:$src)>;
  11800. def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
  11801. (VMOVWrm addr:$src)>;
  11802. def : Pat<(v8i32 (X86vzmovl
  11803. (insert_subvector undef,
  11804. (v4i32 (scalar_to_vector
  11805. (i32 (zextloadi16 addr:$src)))),
  11806. (iPTR 0)))),
  11807. (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
  11808. def : Pat<(v16i32 (X86vzmovl
  11809. (insert_subvector undef,
  11810. (v4i32 (scalar_to_vector
  11811. (i32 (zextloadi16 addr:$src)))),
  11812. (iPTR 0)))),
  11813. (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
  11814. // Move word from xmm register to r/m16
  11815. def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
  11816. "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
  11817. def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
  11818. (ins i16mem:$dst, VR128X:$src),
  11819. "vmovw\t{$src, $dst|$dst, $src}",
  11820. [(store (i16 (extractelt (v8i16 VR128X:$src),
  11821. (iPTR 0))), addr:$dst)]>,
  11822. T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
  11823. def : Pat<(i16 (bitconvert FR16X:$src)),
  11824. (i16 (EXTRACT_SUBREG
  11825. (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
  11826. sub_16bit))>;
  11827. def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
  11828. (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
  11829. // Allow "vmovw" to use GR64
  11830. let hasSideEffects = 0 in {
  11831. def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
  11832. "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
  11833. def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
  11834. "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
  11835. }
  11836. }
  11837. // Convert 16-bit float to i16/u16
  11838. multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  11839. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  11840. AVX512VLVectorVTInfo _Dst,
  11841. AVX512VLVectorVTInfo _Src,
  11842. X86SchedWriteWidths sched> {
  11843. let Predicates = [HasFP16] in {
  11844. defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
  11845. OpNode, MaskOpNode, sched.ZMM>,
  11846. avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
  11847. OpNodeRnd, sched.ZMM>, EVEX_V512;
  11848. }
  11849. let Predicates = [HasFP16, HasVLX] in {
  11850. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
  11851. OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
  11852. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
  11853. OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
  11854. }
  11855. }
  11856. // Convert 16-bit float to i16/u16 truncate
  11857. multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  11858. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  11859. AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
  11860. X86SchedWriteWidths sched> {
  11861. let Predicates = [HasFP16] in {
  11862. defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
  11863. OpNode, MaskOpNode, sched.ZMM>,
  11864. avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
  11865. OpNodeRnd, sched.ZMM>, EVEX_V512;
  11866. }
  11867. let Predicates = [HasFP16, HasVLX] in {
  11868. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
  11869. OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
  11870. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
  11871. OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
  11872. }
  11873. }
  11874. defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
  11875. X86cvtp2UIntRnd, avx512vl_i16_info,
  11876. avx512vl_f16_info, SchedWriteCvtPD2DQ>,
  11877. T_MAP5PS, EVEX_CD8<16, CD8VF>;
  11878. defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
  11879. X86VUintToFpRnd, avx512vl_f16_info,
  11880. avx512vl_i16_info, SchedWriteCvtPD2DQ>,
  11881. T_MAP5XD, EVEX_CD8<16, CD8VF>;
  11882. defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
  11883. X86cvttp2si, X86cvttp2siSAE,
  11884. avx512vl_i16_info, avx512vl_f16_info,
  11885. SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
  11886. defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
  11887. X86cvttp2ui, X86cvttp2uiSAE,
  11888. avx512vl_i16_info, avx512vl_f16_info,
  11889. SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
  11890. defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
  11891. X86cvtp2IntRnd, avx512vl_i16_info,
  11892. avx512vl_f16_info, SchedWriteCvtPD2DQ>,
  11893. T_MAP5PD, EVEX_CD8<16, CD8VF>;
  11894. defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
  11895. X86VSintToFpRnd, avx512vl_f16_info,
  11896. avx512vl_i16_info, SchedWriteCvtPD2DQ>,
  11897. T_MAP5XS, EVEX_CD8<16, CD8VF>;
  11898. // Convert Half to Signed/Unsigned Doubleword
  11899. multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  11900. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  11901. X86SchedWriteWidths sched> {
  11902. let Predicates = [HasFP16] in {
  11903. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
  11904. MaskOpNode, sched.ZMM>,
  11905. avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
  11906. OpNodeRnd, sched.ZMM>, EVEX_V512;
  11907. }
  11908. let Predicates = [HasFP16, HasVLX] in {
  11909. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
  11910. MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
  11911. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
  11912. MaskOpNode, sched.YMM>, EVEX_V256;
  11913. }
  11914. }
  11915. // Convert Half to Signed/Unsigned Doubleword with truncation
  11916. multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  11917. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  11918. X86SchedWriteWidths sched> {
  11919. let Predicates = [HasFP16] in {
  11920. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
  11921. MaskOpNode, sched.ZMM>,
  11922. avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
  11923. OpNodeRnd, sched.ZMM>, EVEX_V512;
  11924. }
  11925. let Predicates = [HasFP16, HasVLX] in {
  11926. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
  11927. MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
  11928. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
  11929. MaskOpNode, sched.YMM>, EVEX_V256;
  11930. }
  11931. }
  11932. defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
  11933. X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
  11934. EVEX_CD8<16, CD8VH>;
  11935. defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
  11936. X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
  11937. EVEX_CD8<16, CD8VH>;
  11938. defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
  11939. X86cvttp2si, X86cvttp2siSAE,
  11940. SchedWriteCvtPS2DQ>, T_MAP5XS,
  11941. EVEX_CD8<16, CD8VH>;
  11942. defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
  11943. X86cvttp2ui, X86cvttp2uiSAE,
  11944. SchedWriteCvtPS2DQ>, T_MAP5PS,
  11945. EVEX_CD8<16, CD8VH>;
  11946. // Convert Half to Signed/Unsigned Quardword
  11947. multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  11948. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  11949. X86SchedWriteWidths sched> {
  11950. let Predicates = [HasFP16] in {
  11951. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
  11952. MaskOpNode, sched.ZMM>,
  11953. avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
  11954. OpNodeRnd, sched.ZMM>, EVEX_V512;
  11955. }
  11956. let Predicates = [HasFP16, HasVLX] in {
  11957. // Explicitly specified broadcast string, since we take only 2 elements
  11958. // from v8f16x_info source
  11959. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
  11960. MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
  11961. EVEX_V128;
  11962. // Explicitly specified broadcast string, since we take only 4 elements
  11963. // from v8f16x_info source
  11964. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
  11965. MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
  11966. EVEX_V256;
  11967. }
  11968. }
  11969. // Convert Half to Signed/Unsigned Quardword with truncation
  11970. multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  11971. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  11972. X86SchedWriteWidths sched> {
  11973. let Predicates = [HasFP16] in {
  11974. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
  11975. MaskOpNode, sched.ZMM>,
  11976. avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
  11977. OpNodeRnd, sched.ZMM>, EVEX_V512;
  11978. }
  11979. let Predicates = [HasFP16, HasVLX] in {
  11980. // Explicitly specified broadcast string, since we take only 2 elements
  11981. // from v8f16x_info source
  11982. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
  11983. MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
  11984. // Explicitly specified broadcast string, since we take only 4 elements
  11985. // from v8f16x_info source
  11986. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
  11987. MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
  11988. }
  11989. }
  11990. defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
  11991. X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
  11992. EVEX_CD8<16, CD8VQ>;
  11993. defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
  11994. X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
  11995. EVEX_CD8<16, CD8VQ>;
  11996. defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
  11997. X86cvttp2si, X86cvttp2siSAE,
  11998. SchedWriteCvtPS2DQ>, T_MAP5PD,
  11999. EVEX_CD8<16, CD8VQ>;
  12000. defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
  12001. X86cvttp2ui, X86cvttp2uiSAE,
  12002. SchedWriteCvtPS2DQ>, T_MAP5PD,
  12003. EVEX_CD8<16, CD8VQ>;
  12004. // Convert Signed/Unsigned Quardword to Half
  12005. multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
  12006. SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
  12007. X86SchedWriteWidths sched> {
  12008. // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
  12009. // 512 memory forms of these instructions in Asm Parcer. They have the same
  12010. // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
  12011. // due to the same reason.
  12012. let Predicates = [HasFP16] in {
  12013. defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
  12014. MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
  12015. avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
  12016. OpNodeRnd, sched.ZMM>, EVEX_V512;
  12017. }
  12018. let Predicates = [HasFP16, HasVLX] in {
  12019. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
  12020. null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
  12021. i128mem, VK2WM>,
  12022. EVEX_V128, NotEVEX2VEXConvertible;
  12023. defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
  12024. null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
  12025. i256mem, VK4WM>,
  12026. EVEX_V256, NotEVEX2VEXConvertible;
  12027. }
  12028. def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
  12029. (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
  12030. VR128X:$src), 0, "att">;
  12031. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
  12032. (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
  12033. VK2WM:$mask, VR128X:$src), 0, "att">;
  12034. def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
  12035. (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
  12036. VK2WM:$mask, VR128X:$src), 0, "att">;
  12037. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
  12038. (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
  12039. i64mem:$src), 0, "att">;
  12040. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
  12041. "$dst {${mask}}, ${src}{1to2}}",
  12042. (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
  12043. VK2WM:$mask, i64mem:$src), 0, "att">;
  12044. def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
  12045. "$dst {${mask}} {z}, ${src}{1to2}}",
  12046. (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
  12047. VK2WM:$mask, i64mem:$src), 0, "att">;
  12048. def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
  12049. (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
  12050. VR256X:$src), 0, "att">;
  12051. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
  12052. "$dst {${mask}}, $src}",
  12053. (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
  12054. VK4WM:$mask, VR256X:$src), 0, "att">;
  12055. def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
  12056. "$dst {${mask}} {z}, $src}",
  12057. (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
  12058. VK4WM:$mask, VR256X:$src), 0, "att">;
  12059. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
  12060. (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
  12061. i64mem:$src), 0, "att">;
  12062. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
  12063. "$dst {${mask}}, ${src}{1to4}}",
  12064. (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
  12065. VK4WM:$mask, i64mem:$src), 0, "att">;
  12066. def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
  12067. "$dst {${mask}} {z}, ${src}{1to4}}",
  12068. (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
  12069. VK4WM:$mask, i64mem:$src), 0, "att">;
  12070. def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
  12071. (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
  12072. VR512:$src), 0, "att">;
  12073. def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
  12074. "$dst {${mask}}, $src}",
  12075. (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
  12076. VK8WM:$mask, VR512:$src), 0, "att">;
  12077. def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
  12078. "$dst {${mask}} {z}, $src}",
  12079. (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
  12080. VK8WM:$mask, VR512:$src), 0, "att">;
  12081. def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
  12082. (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
  12083. i64mem:$src), 0, "att">;
  12084. def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
  12085. "$dst {${mask}}, ${src}{1to8}}",
  12086. (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
  12087. VK8WM:$mask, i64mem:$src), 0, "att">;
  12088. def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
  12089. "$dst {${mask}} {z}, ${src}{1to8}}",
  12090. (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
  12091. VK8WM:$mask, i64mem:$src), 0, "att">;
  12092. }
  12093. defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
  12094. X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
  12095. EVEX_CD8<64, CD8VF>;
  12096. defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
  12097. X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
  12098. EVEX_CD8<64, CD8VF>;
  12099. // Convert half to signed/unsigned int 32/64
  12100. defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
  12101. X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
  12102. T_MAP5XS, EVEX_CD8<16, CD8VT1>;
  12103. defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
  12104. X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
  12105. T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
  12106. defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
  12107. X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
  12108. T_MAP5XS, EVEX_CD8<16, CD8VT1>;
  12109. defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
  12110. X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
  12111. T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
  12112. defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
  12113. any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
  12114. "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
  12115. defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
  12116. any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
  12117. "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
  12118. defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
  12119. any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
  12120. "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
  12121. defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
  12122. any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
  12123. "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
  12124. let Predicates = [HasFP16] in {
  12125. defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
  12126. v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
  12127. T_MAP5XS, EVEX_CD8<32, CD8VT1>;
  12128. defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
  12129. v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
  12130. T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
  12131. defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
  12132. v8f16x_info, i32mem, loadi32,
  12133. "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
  12134. defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
  12135. v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
  12136. T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
  12137. def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
  12138. (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
  12139. def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
  12140. (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
  12141. def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
  12142. (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
  12143. def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
  12144. (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
  12145. def : Pat<(f16 (any_sint_to_fp GR32:$src)),
  12146. (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
  12147. def : Pat<(f16 (any_sint_to_fp GR64:$src)),
  12148. (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
  12149. def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
  12150. (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
  12151. def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
  12152. (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
  12153. def : Pat<(f16 (any_uint_to_fp GR32:$src)),
  12154. (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
  12155. def : Pat<(f16 (any_uint_to_fp GR64:$src)),
  12156. (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
  12157. // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
  12158. // which produce unnecessary vmovsh instructions
  12159. def : Pat<(v8f16 (X86Movsh
  12160. (v8f16 VR128X:$dst),
  12161. (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
  12162. (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
  12163. def : Pat<(v8f16 (X86Movsh
  12164. (v8f16 VR128X:$dst),
  12165. (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
  12166. (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
  12167. def : Pat<(v8f16 (X86Movsh
  12168. (v8f16 VR128X:$dst),
  12169. (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
  12170. (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
  12171. def : Pat<(v8f16 (X86Movsh
  12172. (v8f16 VR128X:$dst),
  12173. (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
  12174. (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
  12175. def : Pat<(v8f16 (X86Movsh
  12176. (v8f16 VR128X:$dst),
  12177. (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
  12178. (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
  12179. def : Pat<(v8f16 (X86Movsh
  12180. (v8f16 VR128X:$dst),
  12181. (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
  12182. (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
  12183. def : Pat<(v8f16 (X86Movsh
  12184. (v8f16 VR128X:$dst),
  12185. (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
  12186. (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
  12187. def : Pat<(v8f16 (X86Movsh
  12188. (v8f16 VR128X:$dst),
  12189. (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
  12190. (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
  12191. } // Predicates = [HasFP16]
  12192. let Predicates = [HasFP16, HasVLX] in {
  12193. // Special patterns to allow use of X86VMSintToFP for masking. Instruction
  12194. // patterns have been disabled with null_frag.
  12195. def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
  12196. (VCVTQQ2PHZ256rr VR256X:$src)>;
  12197. def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
  12198. VK4WM:$mask),
  12199. (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
  12200. def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
  12201. VK4WM:$mask),
  12202. (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
  12203. def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
  12204. (VCVTQQ2PHZ256rm addr:$src)>;
  12205. def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
  12206. VK4WM:$mask),
  12207. (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  12208. def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
  12209. VK4WM:$mask),
  12210. (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
  12211. def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
  12212. (VCVTQQ2PHZ256rmb addr:$src)>;
  12213. def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
  12214. (v8f16 VR128X:$src0), VK4WM:$mask),
  12215. (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  12216. def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
  12217. v8f16x_info.ImmAllZerosV, VK4WM:$mask),
  12218. (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
  12219. def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
  12220. (VCVTQQ2PHZ128rr VR128X:$src)>;
  12221. def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
  12222. VK2WM:$mask),
  12223. (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  12224. def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
  12225. VK2WM:$mask),
  12226. (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
  12227. def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
  12228. (VCVTQQ2PHZ128rm addr:$src)>;
  12229. def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
  12230. VK2WM:$mask),
  12231. (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  12232. def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
  12233. VK2WM:$mask),
  12234. (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
  12235. def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
  12236. (VCVTQQ2PHZ128rmb addr:$src)>;
  12237. def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
  12238. (v8f16 VR128X:$src0), VK2WM:$mask),
  12239. (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  12240. def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
  12241. v8f16x_info.ImmAllZerosV, VK2WM:$mask),
  12242. (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
  12243. // Special patterns to allow use of X86VMUintToFP for masking. Instruction
  12244. // patterns have been disabled with null_frag.
  12245. def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
  12246. (VCVTUQQ2PHZ256rr VR256X:$src)>;
  12247. def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
  12248. VK4WM:$mask),
  12249. (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
  12250. def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
  12251. VK4WM:$mask),
  12252. (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
  12253. def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
  12254. (VCVTUQQ2PHZ256rm addr:$src)>;
  12255. def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
  12256. VK4WM:$mask),
  12257. (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  12258. def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
  12259. VK4WM:$mask),
  12260. (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
  12261. def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
  12262. (VCVTUQQ2PHZ256rmb addr:$src)>;
  12263. def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
  12264. (v8f16 VR128X:$src0), VK4WM:$mask),
  12265. (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
  12266. def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
  12267. v8f16x_info.ImmAllZerosV, VK4WM:$mask),
  12268. (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
  12269. def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
  12270. (VCVTUQQ2PHZ128rr VR128X:$src)>;
  12271. def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
  12272. VK2WM:$mask),
  12273. (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
  12274. def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
  12275. VK2WM:$mask),
  12276. (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
  12277. def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
  12278. (VCVTUQQ2PHZ128rm addr:$src)>;
  12279. def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
  12280. VK2WM:$mask),
  12281. (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  12282. def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
  12283. VK2WM:$mask),
  12284. (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
  12285. def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
  12286. (VCVTUQQ2PHZ128rmb addr:$src)>;
  12287. def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
  12288. (v8f16 VR128X:$src0), VK2WM:$mask),
  12289. (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
  12290. def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
  12291. v8f16x_info.ImmAllZerosV, VK2WM:$mask),
  12292. (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
  12293. }
  12294. let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
  12295. multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
  12296. defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  12297. (ins _.RC:$src2, _.RC:$src3),
  12298. OpcodeStr, "$src3, $src2", "$src2, $src3",
  12299. (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
  12300. defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  12301. (ins _.RC:$src2, _.MemOp:$src3),
  12302. OpcodeStr, "$src3, $src2", "$src2, $src3",
  12303. (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
  12304. defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
  12305. (ins _.RC:$src2, _.ScalarMemOp:$src3),
  12306. OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
  12307. (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
  12308. }
  12309. } // Constraints = "@earlyclobber $dst, $src1 = $dst"
  12310. multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
  12311. X86VectorVTInfo _> {
  12312. let Constraints = "@earlyclobber $dst, $src1 = $dst" in
  12313. defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
  12314. (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
  12315. OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
  12316. (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
  12317. EVEX_4V, EVEX_B, EVEX_RC;
  12318. }
  12319. multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
  12320. let Predicates = [HasFP16] in {
  12321. defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
  12322. avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
  12323. EVEX_V512, Sched<[WriteFMAZ]>;
  12324. }
  12325. let Predicates = [HasVLX, HasFP16] in {
  12326. defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
  12327. defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
  12328. }
  12329. }
  12330. multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
  12331. SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
  12332. let Predicates = [HasFP16] in {
  12333. defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
  12334. WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
  12335. avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
  12336. "", "@earlyclobber $dst">, EVEX_V512;
  12337. }
  12338. let Predicates = [HasVLX, HasFP16] in {
  12339. defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
  12340. WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
  12341. defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
  12342. WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
  12343. }
  12344. }
  12345. let Uses = [MXCSR] in {
  12346. defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
  12347. T_MAP6XS, EVEX_CD8<32, CD8VF>;
  12348. defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
  12349. T_MAP6XD, EVEX_CD8<32, CD8VF>;
  12350. defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
  12351. x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
  12352. defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
  12353. x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
  12354. }
  12355. multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
  12356. bit IsCommutable> {
  12357. let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
  12358. defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
  12359. (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
  12360. "$src3, $src2", "$src2, $src3",
  12361. (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
  12362. Sched<[WriteFMAX]>;
  12363. defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
  12364. (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
  12365. "$src3, $src2", "$src2, $src3",
  12366. (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
  12367. Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
  12368. defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
  12369. (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
  12370. "$rc, $src3, $src2", "$src2, $src3, $rc",
  12371. (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
  12372. EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
  12373. }
  12374. }
  12375. multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
  12376. SDNode OpNodeRnd, bit IsCommutable> {
  12377. let Predicates = [HasFP16] in {
  12378. defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
  12379. (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
  12380. "$src2, $src1", "$src1, $src2",
  12381. (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
  12382. IsCommutable, IsCommutable, IsCommutable,
  12383. X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
  12384. defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
  12385. (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
  12386. "$src2, $src1", "$src1, $src2",
  12387. (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
  12388. 0, 0, 0, X86selects, "@earlyclobber $dst">,
  12389. Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
  12390. defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
  12391. (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
  12392. "$rc, $src2, $src1", "$src1, $src2, $rc",
  12393. (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
  12394. 0, 0, 0, X86selects, "@earlyclobber $dst">,
  12395. EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
  12396. }
  12397. }
  12398. let Uses = [MXCSR] in {
  12399. defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
  12400. T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
  12401. defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
  12402. T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
  12403. defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
  12404. T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
  12405. defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
  12406. T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
  12407. }